[med-svn] [netepi-analysis] 11/13: New upstream version 0.9.0
Andreas Tille
tille at debian.org
Tue Dec 26 13:49:29 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository netepi-analysis.
commit 78eebf719cc2e0ba6674e95c5aec408fcc6aade0
Author: Andreas Tille <tille at debian.org>
Date: Tue Dec 26 14:43:34 2017 +0100
New upstream version 0.9.0
---
CHANGES | 113 +
LICENSE | 546 ++
MANIFEST.in | 27 +
PKG-INFO | 10 +
README | 439 ++
SOOMv0/Analysis/PopRate.py | 727 +++
SOOMv0/Analysis/__init__.py | 16 +
SOOMv0/Analysis/twobytwo.py | 1336 +++++
SOOMv0/BaseDataset.py | 323 ++
SOOMv0/CachingLoader.py | 246 +
SOOMv0/ChunkingLoader.py | 148 +
SOOMv0/ColTypes/Discrete.py | 332 ++
SOOMv0/ColTypes/Identity.py | 22 +
SOOMv0/ColTypes/RowOrdinal.py | 108 +
SOOMv0/ColTypes/Scalar.py | 99 +
SOOMv0/ColTypes/SearchableText.py | 221 +
SOOMv0/ColTypes/__init__.py | 17 +
SOOMv0/ColTypes/base.py | 569 +++
SOOMv0/CrossTab.py | 323 ++
SOOMv0/Cstats.pyx | 123 +
SOOMv0/DataSourceColumn.py | 128 +
SOOMv0/DataTypes.py | 317 ++
SOOMv0/Dataset.py | 441 ++
SOOMv0/DatasetColumn.py | 55 +
SOOMv0/DatasetSummary.py | 419 ++
SOOMv0/Datasets.py | 137 +
SOOMv0/Describe.py | 64 +
SOOMv0/Filter.py | 486 ++
SOOMv0/Makefile | 25 +
SOOMv0/Plot/README | 56 +
SOOMv0/Plot/__init__.py | 17 +
SOOMv0/Plot/_output.py | 257 +
SOOMv0/Plot/panelfn.py | 119 +
SOOMv0/Plot/plotmethods.py | 337 ++
SOOMv0/Plot/raxis.py | 415 ++
SOOMv0/Plot/rconv.py | 126 +
SOOMv0/Plot/rplot.py | 663 +++
SOOMv0/PlotRegistry.py | 70 +
SOOMv0/PrintDataset.py | 88 +
SOOMv0/Search.py | 236 +
SOOMv0/Soom.py | 195 +
SOOMv0/SourceDataTypes.py | 163 +
SOOMv0/Sources/All.py | 20 +
SOOMv0/Sources/CSV.py | 79 +
SOOMv0/Sources/Columns.py | 48 +
SOOMv0/Sources/DB.py | 81 +
SOOMv0/Sources/__init__.py | 16 +
SOOMv0/Sources/common.py | 206 +
SOOMv0/Stats.py | 1595 ++++++
SOOMv0/SummaryCond.py | 554 ++
SOOMv0/SummaryProp.py | 131 +
SOOMv0/SummaryStats.py | 501 ++
SOOMv0/Timers.py | 59 +
SOOMv0/TransformFN.py | 64 +
SOOMv0/Utils.py | 318 ++
SOOMv0/__init__.py | 57 +
SOOMv0/common.py | 58 +
SOOMv0/interactive_hook.py | 41 +
SOOMv0/soomparse.g | 390 ++
SOOMv0/soomparse.py | 953 ++++
SOOMv0/xvfb_spawn.py | 201 +
SOOMv0/yappsrt.py | 304 ++
TODO | 288 ++
debian/README.Debian | 7 -
debian/changelog | 5 -
debian/compat | 1 -
debian/control | 78 -
debian/copyright | 548 --
debian/docs | 2 -
debian/netepi-analysis.examples | 1 -
debian/netepi-analysis.install | 1 -
debian/rules | 23 -
debian/source/format | 1 -
debian/watch | 8 -
demo/SOOM_demo_data_load.py | 90 +
demo/api_demo.py | 930 ++++
demo/demo.txt | 8474 +++++++++++++++++++++++++++++++
demo/loaders/__init__.py | 16 +
demo/loaders/epitools.py | 283 ++
demo/loaders/make_icd9cm_fmt.py | 114 +
demo/loaders/nhds.py | 699 +++
demo/loaders/nhds_population.py | 201 +
demo/loaders/nhmrc.py | 137 +
demo/loaders/rtfparse.py | 112 +
demo/loaders/syndeath.py | 390 ++
demo/loaders/syndeath_expand.py | 94 +
demo/loaders/urlfetch.py | 76 +
demo/loaders/whopop.py | 194 +
demo/loaders/whotext.py | 96 +
demo/plot_demo.py | 368 ++
demo/rawdata/README_NHMRC.txt | 2 +
demo/rawdata/README_WHO.txt | 18 +
demo/rawdata/aus01stdpop.csv | 19 +
demo/rawdata/aus01stdpop_mf.csv | 37 +
demo/rawdata/nhmrc_grantdata.csv.gz | Bin 0 -> 770619 bytes
demo/rawdata/synthetic_deaths_comp.gz | Bin 0 -> 111642 bytes
demo/rawdata/synthetic_pops.csv.gz | Bin 0 -> 48970 bytes
demo/rawdata/who2000.csv | 38 +
demo/rawdata/who2001.csv | 38 +
demo/rawdata/who2002.csv | 38 +
demo/rawdata/whoreps.csv.gz | Bin 0 -> 274026 bytes
demo/testrunner.py | 144 +
docs/README.searchabletext | 230 +
liccheck.py | 79 +
sandbox/PopRate.py | 110 +
sandbox/cacheloadmeta.py | 51 +
sandbox/ci_bars.py | 446 ++
sandbox/dobson.lst | 134 +
sandbox/dobson.sas | 161 +
sandbox/martinstats.py | 857 ++++
sandbox/poprate.py | 203 +
sandbox/reformat_icd9.py | 54 +
sandbox/sander.py | 286 ++
sandbox/soomload-report.py | 238 +
sandbox/source_bench.py | 32 +
sandbox/summ_measure.py | 81 +
sandbox/twobytwo.output | 2371 +++++++++
sandbox/wordidx.py | 338 ++
setup.py | 42 +
simpleinst/.cvsignore | 2 +
simpleinst/__init__.py | 66 +
simpleinst/config_register.py | 185 +
simpleinst/defaults.py | 25 +
simpleinst/filter.py | 33 +
simpleinst/glob.py | 49 +
simpleinst/install_files.py | 190 +
simpleinst/platform.py | 65 +
simpleinst/pyinstaller.py | 30 +
simpleinst/usergroup.py | 47 +
simpleinst/utils.py | 112 +
soomext/bad.py | 7 +
soomext/blob.c | 409 ++
soomext/blobstore.c | 253 +
soomext/blobstore.h | 44 +
soomext/doc/Makefile | 52 +
soomext/doc/blobstore.tex | 178 +
soomext/doc/copyright.tex | 17 +
soomext/doc/installation.tex | 58 +
soomext/doc/soom.pdf | Bin 0 -> 50274 bytes
soomext/doc/soom.tex | 72 +
soomext/doc/soomarray.tex | 163 +
soomext/doc/soomfunc.tex | 170 +
soomext/doc/storage.dia | Bin 0 -> 1474 bytes
soomext/doc/storage.tex | 244 +
soomext/matest.py | 71 +
soomext/mmaparray.c | 277 +
soomext/setup.py | 43 +
soomext/soomarray.py | 542 ++
soomext/soomfunc.c | 1517 ++++++
soomext/storage.c | 623 +++
soomext/storage.h | 69 +
soomext/test/Makefile | 12 +
soomext/test/all.py | 39 +
soomext/test/soomarraytest.py | 228 +
soomext/test/soomfunctest.py | 313 ++
test.py | 64 +
tests/SAS/Stats_py_test.sas | 436 ++
tests/SAS/indirect_std_check.sas | 335 ++
tests/SAS/summ_higher_order.sas | 321 ++
tests/__init__.py | 16 +
tests/column.py | 335 ++
tests/csv_source.py | 61 +
tests/data/csv_data | 4 +
tests/data/smr_results_37_37_CL0.90.csv | 35 +
tests/data/smr_results_37_37_CL0.99.csv | 35 +
tests/data/smr_results_37_95_CL0.90.csv | 35 +
tests/data/smr_results_37_95_CL0.99.csv | 35 +
tests/data/smr_results_95_37_CL0.90.csv | 35 +
tests/data/smr_results_95_37_CL0.99.csv | 35 +
tests/data/smr_results_95_95_CL0.90.csv | 35 +
tests/data/smr_results_95_95_CL0.99.csv | 35 +
tests/db_source.py | 74 +
tests/filters.py | 649 +++
tests/indirect_std_SAS.py | 176 +
tests/poprate.py | 298 ++
tests/source_datatypes.py | 179 +
tests/stats.py | 2894 +++++++++++
tests/summ.py | 423 ++
tests/summ_higher_order.py | 340 ++
web/install.py | 80 +
web/libsoomexplorer/__init__.py | 16 +
web/libsoomexplorer/colvals.py | 190 +
web/libsoomexplorer/common.py | 64 +
web/libsoomexplorer/condcol.py | 222 +
web/libsoomexplorer/dsparams.py | 177 +
web/libsoomexplorer/fields.py | 735 +++
web/libsoomexplorer/filter.py | 662 +++
web/libsoomexplorer/filterparse.py | 635 +++
web/libsoomexplorer/filterstore.py | 184 +
web/libsoomexplorer/output/__init__.py | 16 +
web/libsoomexplorer/output/base.py | 147 +
web/libsoomexplorer/output/plot.py | 59 +
web/libsoomexplorer/output/table.py | 382 ++
web/libsoomexplorer/output/twobytwo.py | 105 +
web/libsoomexplorer/parameters.py | 83 +
web/libsoomexplorer/paramstore.py | 179 +
web/libsoomexplorer/plotform.py | 606 +++
web/libsoomexplorer/plottypes.py | 456 ++
web/libsoomexplorer/twobytwoparams.py | 213 +
web/libsoomexplorer/undo.py | 105 +
web/libsoomexplorer/workspace.py | 124 +
web/libsoomexplorer/yappsrt.py | 328 ++
web/nea-standalone.py | 90 +
web/nea.py | 583 +++
web/pages/colvalselect.html | 113 +
web/pages/condcolparams.html | 134 +
web/pages/explore.html | 82 +
web/pages/fields.html | 445 ++
web/pages/filter.html | 320 ++
web/pages/macros.html | 123 +
web/pages/newanalysis.html | 42 +
web/pages/output_crosstab.html | 105 +
web/pages/output_dsrows.html | 82 +
web/pages/output_image.html | 25 +
web/pages/output_table.html | 42 +
web/pages/output_twobytwo.html | 138 +
web/pages/params.html | 57 +
web/pages/paramsave.html | 43 +
web/pages/result.html | 56 +
web/pages/start.html | 70 +
web/pages/twobytwoparams.html | 137 +
web/static/Netepi_logo_m.png | Bin 0 -> 1541 bytes
web/static/Netepi_logo_s.png | Bin 0 -> 447 bytes
web/static/add.xcf | Bin 0 -> 2642 bytes
web/static/button-add.png | Bin 0 -> 1010 bytes
web/static/button-del.png | Bin 0 -> 892 bytes
web/static/button-down.png | Bin 0 -> 246 bytes
web/static/button-l.png | Bin 0 -> 955 bytes
web/static/button-r.png | Bin 0 -> 957 bytes
web/static/button-up.png | Bin 0 -> 245 bytes
web/static/button.xcf | Bin 0 -> 3350 bytes
web/static/close.png | Bin 0 -> 314 bytes
web/static/close.xcf | Bin 0 -> 1228 bytes
web/static/copyright.html | 567 +++
web/static/favicon.ico | Bin 0 -> 318 bytes
web/static/help.html | 418 ++
web/static/help.png | Bin 0 -> 2851 bytes
web/static/netepi-2x2.svg | 112 +
web/static/style.css | 610 +++
web/static/target.png | Bin 0 -> 822 bytes
web/static/target.xcf | Bin 0 -> 2211 bytes
241 files changed, 58096 insertions(+), 675 deletions(-)
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..0f3c510
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,113 @@
+All material associated with "NetEpi Analysis" is Copyright (C) 2004, 2005
+Health Administration Corporation (New South Wales Department of Health).
+
+NetEpi Analysis is licensed under the terms of the Health
+Administration Corporation Open Source License Version 1.2 (HACOS License
+V1.2), the full text of which can be found in the LICENSE file provided
+with NetEpi Analysis.
+
+Current for NetEpi-Analysis-0-1-20050223-01
+
+Since NetEpi-Analysis-0-1-20050216-01:
+
+- Copyright updated to include 2005.
+
+- Note about Numeric build problems added to README
+
+- added new column datatype "recodedate", which uses the new RecodeArray
+ storage method (see below), but also has date formatting support. NHDS
+ demo "randomdate" column changed to "recodedate" datatype.
+
+- disabled subset() method, which returned a realised subset of a
+ dataset, as it had not been maintained for some time. Future work will
+ add per-user workspaces, and at that time a realise() or deepcopy()
+ method will be added.
+
+- made FilteredDataset instances more dataset-like so they would be
+ printed correctly. Also fixed several bugs in their implementation.
+
+- demo data had nhds proceedure_all column as a string type, rather than
+ a tuple type (diagnosis_all was fixed in the previous snapshot).
+
+- soomarray changes:
+
+ - many more soomarray unit tests.
+
+ - ArrayString would cast values to str, which was hiding errors
+ elsewhere (for example, the incorrect datatype on diagnosis_all and
+ proceedure_all). Changed to only accept str or None (which becomes
+ a null string).
+
+ - arrays would raise exceptions when garbage collected under obscure
+ circumstances - trap and ignore.
+
+ - Also fixed typos in previous slice changes.
+
+ - ArrayTime never worked - fixed implementation problems.
+
+- fixed masking bug introduced in previous snapshot.
+
+- several plot demos that raised exceptions have been disabled.
+
+Since NetEpi-Analysis-0-1-20050211-01:
+
+- Filter and DatasetSummary require that we fetch column data based
+ on a list of record ids (scatter-gather) - the Numeric "take()"
+ operation. Previously, the code to do this was duplicated in both
+ modules. It has now been pushed down into soomarray classes (where
+ appropriate), with the column datatype deciding which scheme should
+ be used. Soomarray types are now free to implement more efficient
+ scatter-gather methods.
+
+- soomarray types were still using the __getslice__ special method, even
+ though this was deprecated in Python 2.0. They have now been updated
+ to support slice objects passed to __getitem__.
+
+- a new column datatype has been added - "recode". Columns of this
+ type are mapped to a numeric code, and the result stored in a
+ Numeric.array(). Elements are allowed to be any hashable python type
+ (and no type checking is performed). This datatype is most useful
+ for low-cardinality discrete columns containing string or DateTime
+ data. The demo diagnosis[1-7] and proceedure[1-4] columns have been
+ changed to the 'recode' datatype.
+
+- soomext tests are now run by the main test driver.
+
+- the ChunkingLoader has been changed to yield column values via a
+ generator, rather than assembling a list and returning that and the
+ DatasetColumn.store_column() method has been updated to allow it to
+ work with iterables. Depending on the back-end data storage scheme,
+ this allows us to write the column data incrementally, which can result
+ in a significant reduction in process size.
+
+- bugfix: summary "_type_" column was 'int' when it should have been 'str'.
+
+Since NetEpi-Analysis-0-1-20041221-02:
+
+- dataset importing no longer attempts to keep all column data
+ loaded. Column data is unloaded after masking and saving, and will be
+ demand-loaded if need be.
+
+- fine-grained Timer code has been removed (performance impact was too
+ great) - column loading is now about 3x faster. Replaced with simpler
+ reporting of per column masking, indexing and saving timers.
+
+- extracting icd9cm map from RTF files consumes about 150MB in building
+ the parse tree. Due to memory fragmentation, this memory is largely
+ lost. The icd9cm rtf parsing process has been split into a separate
+ process to allow the memory to be reclaimed and not impact the loading
+ of the demo data.
+
+- icd9cm rtf parser reimplemented as a generator (via re.finditer),
+ rather than using re.split. The result is slightly slower, but the code
+ is easier to read and maintain and could potentially use less memory.
+
+- bugfix: FilteredDataset view was not recognising MmapArray as a
+ Numeric-like array type and was using slower list comprehension
+ filtering, rather than a Numeric.take.
+
+- bugfix: demo data had nhds diagnosis_all column as a string type,
+ rather than a tuple type.
+
+- New stats functions (including some Pyrex implementation) and associated
+ tests (!!NOTE!! not surfaced as stats methods yet)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d4c2d20
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,546 @@
+COPYRIGHT AND LICENSING ARRANGEMENTS
+
+All material is copyright 2004, 2005 Health Administration Corporation
+(New South Wales Department of Health).
+
+NetEpi Analysis is licensed under the terms of the Health
+Administration Corporation Open Source Licence V1.2 (HACOS License V1.2),
+the complete text of which appears below.
+
+HEALTH ADMINISTRATION CORPORATION OPEN SOURCE LICENSE VERSION 1.2
+
+1. DEFINITIONS.
+
+ "Commercial Use" shall mean distribution or otherwise making the
+ Covered Software available to a third party.
+
+ "Contributor" shall mean each entity that creates or contributes to
+ the creation of Modifications.
+
+ "Contributor Version" shall mean in case of any Contributor the
+ combination of the Original Software, prior Modifications used by a
+ Contributor, and the Modifications made by that particular Contributor
+ and in case of Health Administration Corporation in addition the
+ Original Software in any form, including the form as Executable.
+
+ "Covered Software" shall mean the Original Software or Modifications
+ or the combination of the Original Software and Modifications, in
+ each case including portions thereof.
+
+ "Electronic Distribution Mechanism" shall mean a mechanism generally
+ accepted in the software development community for the electronic
+ transfer of data.
+
+ "Executable" shall mean Covered Software in any form other than
+ Source Code.
+
+ "Initial Developer" shall mean the individual or entity identified as
+ the Initial Developer in the Source Code notice required by Exhibit A.
+
+ "Health Administration Corporation" shall mean the Health
+ Administration Corporation as established by the Health Administration
+ Act 1982, as amended, of the State of New South Wales, Australia. The
+ Health Administration Corporation has its offices at 73 Miller Street,
+ North Sydney, New South Wales 2059, Australia.
+
+ "Larger Work" shall mean a work, which combines Covered Software or
+ portions thereof with code not governed by the terms of this License.
+
+ "License" shall mean this document.
+
+ "Licensable" shall mean having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed herein.
+
+ "Modifications" shall mean any addition to or deletion from the
+ substance or structure of either the Original Software or any previous
+ Modifications. When Covered Software is released as a series of files,
+ a Modification is:
+
+ a) Any addition to or deletion from the contents of a file
+ containing Original Software or previous Modifications.
+
+ b) Any new file that contains any part of the Original Software or
+ previous Modifications.
+
+ "Original Software" shall mean the Source Code of computer software
+ code which is described in the Source Code notice required by Exhibit
+ A as Original Software, and which, at the time of its release under
+ this License is not already Covered Software governed by this License.
+
+ "Patent Claims" shall mean any patent claim(s), now owned or hereafter
+ acquired, including without limitation, method, process, and apparatus
+ claims, in any patent Licensable by grantor.
+
+ "Source Code" shall mean the preferred form of the Covered Software
+ for making modifications to it, including all modules it contains,
+ plus any associated interface definition files, scripts used to
+ control compilation and installation of an Executable, or source
+ code differential comparisons against either the Original Software or
+ another well known, available Covered Software of the Contributor's
+ choice. The Source Code can be in a compressed or archival form,
+ provided the appropriate decompression or de-archiving software is
+ widely available for no charge.
+
+ "You" (or "Your") shall mean an individual or a legal entity exercising
+ rights under, and complying with all of the terms of, this License or
+ a future version of this License issued under Section 6.1. For legal
+ entities, "You" includes an entity which controls, is controlled
+ by, or is under common control with You. For the purposes of this
+ definition, "control" means (a) the power, direct or indirect,
+ to cause the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty per cent
+ (50%) of the outstanding shares or beneficial ownership of such entity.
+
+2. SOURCE CODE LICENSE.
+
+2.1 Health Administration Corporation Grant.
+
+Subject to the terms of this License, Health Administration Corporation
+hereby grants You a world-wide, royalty-free, non-exclusive license,
+subject to third party intellectual property claims:
+
+a) under copyrights Licensable by Health Administration Corporation
+ to use, reproduce, modify, display, perform, sublicense and
+ distribute the Original Software (or portions thereof) with or without
+ Modifications, and/or as part of a Larger Work;
+
+b) and under Patents Claims infringed by the making, using or selling
+ of Original Software, to make, have made, use, practice, sell, and
+ offer for sale, and/or otherwise dispose of the Original Software
+ (or portions thereof).
+
+c) The licenses granted in this Section 2.1(a) and (b) are effective
+ on the date Health Administration Corporation first distributes
+ Original Software under the terms of this License.
+
+d) Notwithstanding Section 2.1(b) above, no patent license is granted:
+ 1) for code that You delete from the Original Software; 2) separate
+ from the Original Software; or 3) for infringements caused by: i)
+ the modification of the Original Software or ii) the combination of
+ the Original Software with other software or devices.
+
+2.2 Contributor Grant.
+
+Subject to the terms of this License and subject to third party
+intellectual property claims, each Contributor hereby grants You a
+world-wide, royalty-free, non-exclusive license:
+
+a) under copyrights Licensable by Contributor, to use, reproduce,
+ modify, display, perform, sublicense and distribute the Modifications
+ created by such Contributor (or portions thereof) either on an
+ unmodified basis, with other Modifications, as Covered Software and/or
+ as part of a Larger Work; and
+
+b) under Patent Claims necessarily infringed by the making, using,
+ or selling of Modifications made by that Contributor either alone
+ and/or in combination with its Contributor Version (or portions of
+ such combination), to make, use, sell, offer for sale, have made,
+ and/or otherwise dispose of: 1) Modifications made by that Contributor
+ (or portions thereof); and 2) the combination of Modifications made
+ by that Contributor with its Contributor Version (or portions of
+ such combination).
+
+c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective
+ on the date Contributor first makes Commercial Use of the Covered
+ Software.
+
+d) Notwithstanding Section 2.2(b) above, no patent license is granted:
+ 1) for any code that Contributor has deleted from the Contributor
+ Version; 2) separate from the Contributor Version; 3) for infringements
+ caused by: i) third party modifications of Contributor Version or ii)
+ the combination of Modifications made by that Contributor with other
+ software (except as part of the Contributor Version) or other devices;
+ or 4) under Patent Claims infringed by Covered Software in the absence
+ of Modifications made by that Contributor.
+
+3. DISTRIBUTION OBLIGATIONS.
+
+3.1 Application of License.
+
+The Modifications which You create or to which You contribute are governed
+by the terms of this License, including without limitation Section
+2.2. The Source Code version of Covered Software may be distributed
+only under the terms of this License or a future version of this License
+released under Section 6.1, and You must include a copy of this License
+with every copy of the Source Code You distribute. You may not offer or
+impose any terms on any Source Code version that alters or restricts the
+applicable version of this License or the recipients' rights hereunder.
+
+3.2 Availability of Source Code.
+
+Any Modification which You create or to which You contribute must be made
+available in Source Code form under the terms of this License either on
+the same media as an Executable version or via an accepted Electronic
+Distribution Mechanism to anyone to whom you made an Executable version
+available; and if made available via Electronic Distribution Mechanism,
+must remain available for at least twelve (12) months after the date it
+initially became available, or at least six (6) months after a subsequent
+version of that particular Modification has been made available to
+such recipients. You are responsible for ensuring that the Source Code
+version remains available even if the Electronic Distribution Mechanism
+is maintained by a third party.
+
+3.3 Description of Modifications.
+
+You must cause all Covered Software to which You contribute to contain
+a file documenting the changes You made to create that Covered Software
+and the date of any change. You must include a prominent statement that
+the Modification is derived, directly or indirectly, from Original
+Software provided by Health Administration Corporation and including
+the name of Health Administration Corporation in (a) the Source Code,
+and (b) in any notice in an Executable version or related documentation
+in which You describe the origin or ownership of the Covered Software.
+
+3.4 Intellectual Property Matters
+
+a) Third Party Claims.
+
+ If Contributor has knowledge that a license under a third party's
+ intellectual property rights is required to exercise the rights
+ granted by such Contributor under Sections 2.1 or 2.2, Contributor
+ must include a text file with the Source Code distribution titled
+ "LEGAL'' which describes the claim and the party making the claim
+ in sufficient detail that a recipient will know whom to contact. If
+ Contributor obtains such knowledge after the Modification is made
+ available as described in Section 3.2, Contributor shall promptly
+ modify the LEGAL file in all copies Contributor makes available
+ thereafter and shall take other steps (such as notifying appropriate
+ mailing lists or newsgroups) reasonably calculated to inform those
+ who received the Covered Software that new knowledge has been obtained.
+
+b) Contributor APIs.
+
+ If Contributor's Modifications include an application programming
+ interface (API) and Contributor has knowledge of patent licenses
+ which are reasonably necessary to implement that API, Contributor
+ must also include this information in the LEGAL file.
+
+c) Representations.
+
+ Contributor represents that, except as disclosed pursuant to Section
+ 3.4(a) above, Contributor believes that Contributor's Modifications are
+ Contributor's original creation(s) and/or Contributor has sufficient
+ rights to grant the rights conveyed by this License.
+
+3.5 Required Notices.
+
+You must duplicate the notice in Exhibit A in each file of the Source
+Code. If it is not possible to put such notice in a particular Source
+Code file due to its structure, then You must include such notice in a
+location (such as a relevant directory) where a user would be likely to
+look for such a notice. If You created one or more Modification(s) You
+may add your name as a Contributor to the notice described in Exhibit
+A. You must also duplicate this License in any documentation for the
+Source Code where You describe recipients' rights or ownership rights
+relating to Covered Software. You may choose to offer, and to charge a
+fee for, warranty, support, indemnity or liability obligations to one or
+more recipients of Covered Software. However, You may do so only on Your
+own behalf, and not on behalf of Health Administration Corporation or any
+Contributor. You must make it absolutely clear that any such warranty,
+support, indemnity or liability obligation is offered by You alone,
+and You hereby agree to indemnify Health Administration Corporation and
+every Contributor for any liability incurred by Health Administration
+Corporation or such Contributor as a result of warranty, support,
+indemnity or liability terms You offer.
+
+3.6 Distribution of Executable Versions.
+
+You may distribute Covered Software in Executable form only if the
+requirements of Sections 3.1-3.5 have been met for that Covered Software,
+and if You include a notice stating that the Source Code version of the
+Covered Software is available under the terms of this License, including
+a description of how and where You have fulfilled the obligations of
+Section 3.2. The notice must be conspicuously included in any notice in
+an Executable version, related documentation or collateral in which You
+describe recipients' rights relating to the Covered Software. You may
+distribute the Executable version of Covered Software or ownership rights
+under a license of Your choice, which may contain terms different from
+this License, provided that You are in compliance with the terms of this
+License and that the license for the Executable version does not attempt
+to limit or alter the recipient's rights in the Source Code version from
+the rights set forth in this License. If You distribute the Executable
+version under a different license You must make it absolutely clear
+that any terms which differ from this License are offered by You alone,
+not by Health Administration Corporation or any Contributor. You hereby
+agree to indemnify Health Administration Corporation and every Contributor
+for any liability incurred by Health Administration Corporation or such
+Contributor as a result of any such terms You offer.
+
+3.7 Larger Works.
+
+You may create a Larger Work by combining Covered Software with other
+software not governed by the terms of this License and distribute the
+Larger Work as a single product. In such a case, You must make sure the
+requirements of this License are fulfilled for the Covered Software.
+
+4. INABILITY TO COMPLY DUE TO STATUTE OR REGULATION.
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with the
+terms of this License to the maximum extent possible; and (b) describe the
+limitations and the code they affect. Such description must be included
+in the LEGAL file described in Section 3.4 and must be included with all
+distributions of the Source Code. Except to the extent prohibited by
+statute or regulation, such description must be sufficiently detailed
+for a recipient of ordinary skill to be able to understand it.
+
+5. APPLICATION OF THIS LICENSE.
+
+This License applies to code to which Health Administration Corporation
+has attached the notice in Exhibit A and to related Covered Software.
+
+6. VERSIONS OF THE LICENSE.
+
+6.1 New Versions.
+
+Health Administration Corporation may publish revised and/or new
+versions of the License from time to time. Each version will be given
+a distinguishing version number.
+
+6.2 Effect of New Versions.
+
+Once Covered Software has been published under a particular version
+of the License, You may always continue to use it under the terms of
+that version. You may also choose to use such Covered Software under
+the terms of any subsequent version of the License published by Health
+Administration Corporation. No one other than Health Administration
+Corporation has the right to modify the terms applicable to Covered
+Software created under this License.
+
+7. DISCLAIMER OF WARRANTY.
+
+COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS'' BASIS,
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF
+DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS
+WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU
+(NOT HEALTH ADMINISTRATION CORPORATION, ITS LICENSORS OR AFFILIATES OR
+ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR
+OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
+OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER
+EXCEPT UNDER THIS DISCLAIMER.
+
+8. TERMINATION.
+
+8.1 This License and the rights granted hereunder will terminate
+automatically if You fail to comply with terms herein and fail to
+cure such breach within 30 days of becoming aware of the breach. All
+sublicenses to the Covered Software which are properly granted shall
+survive any termination of this License. Provisions which, by their
+nature, must remain in effect beyond the termination of this License
+shall survive.
+
+8.2 If You initiate litigation by asserting a patent infringement claim
+(excluding declatory judgment actions) against Health Administration
+Corporation or a Contributor (Health Administration Corporation
+or Contributor against whom You file such action is referred to as
+"Participant") alleging that:
+
+a) such Participant's Contributor Version directly or indirectly
+ infringes any patent, then any and all rights granted by such
+ Participant to You under Sections 2.1 and/or 2.2 of this License
+ shall, upon 60 days notice from Participant terminate prospectively,
+ unless if within 60 days after receipt of notice You either: (i)
+ agree in writing to pay Participant a mutually agreeable reasonable
+ royalty for Your past and future use of Modifications made by such
+ Participant, or (ii) withdraw Your litigation claim with respect to
+ the Contributor Version against such Participant. If within 60 days
+ of notice, a reasonable royalty and payment arrangement are not
+ mutually agreed upon in writing by the parties or the litigation
+ claim is not withdrawn, the rights granted by Participant to
+ You under Sections 2.1 and/or 2.2 automatically terminate at the
+ expiration of the 60 day notice period specified above.
+
+b) any software, hardware, or device, other than such Participant's
+ Contributor Version, directly or indirectly infringes any patent,
+ then any rights granted to You by such Participant under Sections
+ 2.1(b) and 2.2(b) are revoked effective as of the date You first
+ made, used, sold, distributed, or had made, Modifications made by
+ that Participant.
+
+8.3 If You assert a patent infringement claim against Participant
+alleging that such Participant's Contributor Version directly or
+indirectly infringes any patent where such claim is resolved (such as by
+license or settlement) prior to the initiation of patent infringement
+litigation, then the reasonable value of the licenses granted by such
+Participant under Sections 2.1 or 2.2 shall be taken into account in
+determining the amount or value of any payment or license.
+
+8.4 In the event of termination under Sections 8.1 or 8.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or any distributor hereunder prior to
+termination shall survive termination.
+
+9. LIMITATION OF LIABILITY.
+
+9.1 UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+(INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, HEALTH
+ADMINISTRATION CORPORATION, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR
+OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE
+TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS
+OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND
+ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE
+BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
+LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
+RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION
+OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, BUT MAY ALLOW
+LIABILITY TO BE LIMITED; IN SUCH CASES, A PARTY'S, ITS EMPLOYEES',
+LICENSORS' OR AFFILIATES' LIABILITY SHALL BE LIMITED TO AUD$100. NOTHING
+CONTAINED IN THIS LICENSE SHALL PREJUDICE THE STATUTORY RIGHTS OF ANY
+PARTY DEALING AS A CONSUMER.
+
+9.2 Notwithstanding any other clause in the licence, and to the extent
+permitted by law:
+
+(a) Health Administration Corporation ("the Corporation") excludes all
+ conditions and warranties which would otherwise be implied into
+ a supply of goods or services arising out of or in relation to
+ the granting of this licence by the Corporation or any associated
+ acquisition of software to which this licence relates;
+
+(b) Where a condition or warranty is implied into such a supply and
+ that condition or warranty cannot be excluded by law that warranty
+ or condition is implied into that supply and the liability of the
+ Health Administration Corporation for a breach of that condition or
+ warranty is limited to the fullest extent permitted by law and, in
+ respect of conditions and warranties implied by the Trade Practices
+ Act (Commonwealth of Australia) 1974, is limited, to the extent
+ permitted by law, to one or more of the following at the election
+ of the Corporation:
+
+ (A) In the case of goods: (i) the replacement of the goods or the
+ supply of equivalent goods; (ii) the repair of the goods; (iii)
+ the payment of the cost of replacing the goods or of acquiring
+ equivalent goods; (iv) the payment of the cost of having the
+ goods repaired; and
+
+ (B) in the case of services: (i) the supplying of the services again;
+ or (ii) the payment of the cost of having the services supplied
+ again.
+
+10. MISCELLANEOUS.
+
+This License represents the complete agreement concerning subject matter
+hereof. All rights in the Covered Software not expressly granted under
+this License are reserved. Nothing in this License shall grant You any
+rights to use any of the trademarks of Health Administration Corporation
+or any of its Affiliates, even if any of such trademarks are included
+in any part of Covered Software and/or documentation to it.
+
+This License is governed by the laws of the State of New South Wales,
+Australia excluding its conflict-of-law provisions. All disputes or
+litigation arising from or relating to this Agreement shall be subject
+to the jurisdiction of the Supreme Court of New South Wales. If any part
+of this Agreement is found void and unenforceable, it will not affect
+the validity of the balance of the Agreement, which shall remain valid
+and enforceable according to its terms.
+
+11. RESPONSIBILITY FOR CLAIMS.
+
+As between Health Administration Corporation and the Contributors,
+each party is responsible for claims and damages arising, directly or
+indirectly, out of its utilisation of rights under this License and You
+agree to work with Health Administration Corporation and Contributors
+to distribute such responsibility on an equitable basis. Nothing herein
+is intended or shall be deemed to constitute any admission of liability.
+
+EXHIBIT A
+
+The contents of this file are subject to the HACOS License Version 1.2
+(the "License"); you may not use this file except in compliance with
+the License.
+
+Software distributed under the License is distributed on an "AS IS"
+basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific language governing rights and limitations under
+the License.
+
+The Original Software is "NetEpi Analysis". The Initial Developer
+of the Original Software is the Health Administration Corporation,
+incorporated in the State of New South Wales, Australia.
+
+Copyright (C) 2004, 2005 Health Administration Corporation.
+All Rights Reserved.
+
+Contributors:
+
+ Tomas Aragon <aragon at berkeley.edu> (or.midp code)
+
+
+APPENDIX 1. DIFFERENCES BETWEEN THE HACOS LICENSE VERSION 1.2, THE
+MOZILLA PUBLIC LICENSE VERSION 1.1 AND THE NOKIA OPEN SOURCE LICENSE
+(NOKOS LICENSE) VERSION 1.0A
+
+The HACOS License Version 1.2 was derived from the Mozilla Public
+License Version 1.1 using some of the changes to the Mozilla Public
+License embodied in the Nokia Open Source License (NOKOS License)
+Version 1.0a. The differences between the HACOS License Version 1.2
+(this document), the Mozilla Public License and the NOKOS License are
+as follows:
+
+i. The title of the license was changed to "Health Administration
+ Corporation Open Source License Version 1.2".
+
+ii. Globally, all references to "Netscape Communications Corporation",
+ "Mozilla", "Nokia" and "Nokia Corporation" were changed to "Health
+ Administration Corporation".
+
+iii. Globally, the words "means", "Covered Code" and "Covered Software"
+ as used in the Mozilla Public License were changed to "shall means",
+ "Covered Code" and "Covered Software" respectively, as used in
+ the NOKOS License.
+
+iv. In Section 1 (Definitions), a definition of "Health Administration
+ Corporation" was added.
+
+v. In Section 2, the term "intellectual property rights" used in the
+ Mozilla Public License was replaced by the term "copyrights"
+ as used in the NOKOS License.
+
+vi. In Section 2.2 (Contributor Grant), the words "Subject to the
+ terms of this License" which appear in the NOKOS License were
+ added to the Mozilla Public License.
+
+vii. The sentence "However, You may include an additional document
+ offering the additional rights described in Section 3.5." which
+ appears in the Mozilla Public License was omitted.
+
+viii. Section 6.3 (Derivative Works) of the Mozilla Public License,
+ which permits modifications to the Mozilla Public License,
+ was omitted.
+
+ix. The original Section 9 (Limitation of Liability) was renumbered
+ as Section 9.1, a maximum liability of AUD$100 was specified
+ for those jurisdictions which do not allow complete exclusion of
+ liability but which do allow limitation of liability. The sentence
+ "NOTHING CONTAINED IN THE LICENSE SHALL PREJUDICE THE STATUTORY
+ RIGHTS OF ANY PARTY DEALING AS A CONSUMER.", which appears in the
+ NOKOS License but not in the Mozilla Public License, was added.
+
+x. Section 9.2 was added in order to further limit liability to the
+ maximum extent permitted by the Commonwealth of Australia Trade
+ Practices Act 1974.
+
+xi. Section 10 of the Mozilla Public License, which provides additional
+ conditions for United States Government End Users, was omitted.
+
+xii. The governing law and jurisdiction for the settlement of disputes
+ in Section 11 of the Mozilla Public License and Section 10 of the
+ NOKOS License was changed to the laws of the State of New South
+ Wales and the Supreme Court of New South Wales respectively. The
+ exclusion of the application of the United Nations Convention on
+ Contracts for the International Sale of Goods which appears in
+ the Mozilla Public License was omitted.
+
+xiii. Section 13 (Multiple-Licensed Code) of the Mozilla Public License
+ was omitted.
+
+xiv. The provisions for alternative licensing arrangement for contributed
+ code which appear in Exhibit A of the Mozilla Public License
+ were omitted.
+
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..ffbb32d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,27 @@
+exclude SOOM_objects
+include liccheck.py
+include LICENSE
+include MANIFEST.in
+include test.py
+include TODO
+include CHANGES
+include CONTRIBUTERS
+global-include .cvsignore
+recursive-include SOOMv0 *.py *.pyx *.g Makefile README
+recursive-include soomext *.py *.c *.h *.tex *.dia soom.pdf Makefile
+prune soomext/build
+recursive-include tests *.py *.sas
+recursive-include web *.py *.html *.png *.ico *.xcf *.svg *.css
+recursive-include simpleinst *
+prune simpleinst/build
+recursive-include demo *.py Makefile
+include demo/demo.txt
+include demo/rawdata/*
+include demo/scratch
+prune demo/regression
+recursive-include docs *
+recursive-include sandbox *
+include tests/data/*
+exclude web/dynamic/soom*
+prune web/static/.xvpics
+global-exclude *.pyc *.swp *.bak *.orig .\#* .xvpics/*
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..7fb87d4
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: NetEpi-Analysis
+Version: 0-9-0
+Summary: Network-enabled tools for epidemiology and public health practice
+Home-page: http://netepi.info/
+Author: NSW Department of Health
+Author-email: Tim CHURCHES <TCHUR at doh.health.nsw.gov.au>
+License: Health Administration Corporation Open Source License Version 1.2
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/README b/README
new file mode 100644
index 0000000..3a89d6b
--- /dev/null
+++ b/README
@@ -0,0 +1,439 @@
+NetEpi Analysis V0.1 README
+===========================
+
+LICENSE
+=======
+
+All material associated with "NetEpi Analysis" is Copyright (C) 2004, 2005
+Health Administration Corporation (New South Wales Department of Health).
+
+NetEpi Analysis is licensed under the terms of the Health
+Administration Corporation Open Source License Version 1.2 (HACOS License
+V1.2), the full text of which can be found in the LICENSE file provided
+with NetEpi Analysis.
+
+Status of this release
+======================
+
+Version 0.1 should be considered a "technology preview" or "pre-alpha"
+release, intended to demonstrate the overall direction of the project.
+It should not be used ina production setting. Many aspects of the API and
+Web interface are likely to chnage in future releases.
+
+Bug reports, feature requests and general discussion
+====================================================
+
+Please report all bugs, problems, feature requests and ideas to the
+NetEpi-discuss mailing list. You need to subscribe to this list in
+order to post messages to it - see the list management Web page at:
+
+ http://lists.sourceforge.net/mailman/listinfo/netepi-discuss
+
+SOOM
+====
+
+NetEpi Analysis uses several (somewhat) unusual techniques to ensure
+reasonable performance when dealing with moderately-sized datasets
+(up to about 10 million records), despite being programmed in a highly
+dynamic, late-binding, object-oriented programming language (Python). In
+particular, all datasets are stored in vertically partitioned form
+- that is, column-wise, not row-wise, and dataset summarisation is
+achieved using set-theoretic operations on ordinal mappings - that is,
+the ordinal row position in each column is used as an implicit row ID
+and set intersections (and other set operations) are performed on these
+row IDs. This approach, referred to as "SOOM" for short (Set Operations
+on Ordinal Mappings), differs from more commonly used bit-mapped indexes
+in that value indexes are stored as vectors of sorted integer indexes -
+an approach which sacrifices some performance on very large datasets,
+but which retains storage and processing efficiency even for columns with
+very high cardinality, without having to use complex bitmap compression
+schemes. High cardinality data are routinely encountered in health
+and epidemiological datasets. A peer-reviewed paper describing these
+techniques appeared in 2003 - please see: Churches T. Exploratory data
+analysis using set operations and ordinal mapping. Comput Methods Programs
+Biomed 2003; 71(1):11-23. Preprint copies of the paper in PDF format are
+available from the author on request (email: tchur at doh.health.nsw.gov.au)
+if your institution or organisation does not have access to this journal.
+
+Prerequisites
+=============
+
+NetEpi Analysis only runs under POSIX-compatible operating systems:
+Linux, Unix or Mac OS X at present. Later versions may run under
+Microsoft Windows. However, the demonstration Web interface can be
+accessed from any type of computer using a relatively recent Web browser.
+
+
+For the Analysis core, you'll need to have the following installed. Unless
+otherwise stated, these can either be packages provided by your operating
+system vendor, or source downloaded from from the project web page.
+
+ * Python 2.3.2 or later
+
+ NOTE: Python 2.4.0 is not suitable as it has a defect in its regular
+ expression library (bug #1088891), although this only effects the
+ web interface.
+
+ NOTE that the python 2.3.5 shipped with Mac OS X does not supply
+ the bsddb module, which is required for NetEpi Analysis. The
+ prebuilt binary Python packages at the following site do provide bsddb
+ (and a number of other advantages):
+
+ http://pythonmac.org/packages/
+
+ * Numeric Python
+
+ Version 24.2 is recommended (23.8 and 23.1 are known to work also,
+ but 23.5 and 23.6 had build problems under RedHat linux)
+
+ NOTE - this is **NOT** numarray or numpy, which are available from
+ the same web site - conversion to use numpy will be undertaken for
+ a future version of NetEpi Analysis.
+
+ http://sourceforge.net/projects/numpy
+
+ If building Numeric from source, you will require development versions
+ of a number of scientific packages. If using Fedora Core Linux,
+ installing all development and scientific packages is recommended.
+
+ * mx.DateTime
+
+ http://www.egenix.com/files/python/mxDateTime.html
+
+ * Oracle Berkeley DB (formerly Sleepycat DB)
+
+ http://www.oracle.com/database/berkeley-db/index.html
+
+ * the "SOOM extensions", which are included in the "soomext"
+ directory of the NetEpi Analysis distribution. To install,
+ change into the "soomext" directory, and as root, run:
+
+ python setup.py install
+
+*** Note that order matters: the soomext blobstore relies on the internal
+ structure of Numeric and if they get out of sync core dumps will
+ result.
+
+ * Psyco (optional - speeds up some operations on 32-bit X86 systems only)
+
+ http://psyco.sourceforge.net/
+
+ * R (Debian: r-base) v2.0.0 or later (v2.4.0 or later recommended)
+
+ http://www.r-project.org/
+
+ NOTE: the RPM packages on the r-project.org site do not contain the
+ R shared library, which is needed for building RPy (see below) -
+ unless this is rectified, you will need to build R from source.
+
+ NOTE: when building R from source, configure should be given the
+ "--enable-R-shlib" option to indicate that you want the R shared
+ libraries to be built (necssary for building RPy).
+
+ NOTE: OS X does not supply g77 (a fortran compiler), which is
+ necessary for building R from source. There are instructions on the
+ R web site for building from scratch under OS X, but the pre-built
+ binaries should suffice.
+
+ * RPy v0.4 or later (v0.4.6 preferred)
+
+ http://rpy.sourceforge.net/
+
+ NOTE: OS X uses an unusual shared library scheme - at this time, rpy
+ does not invoke the linker correctly. If the setup.py script
+ fails, copy the last link line and replace "-L<r_path> -lR" with
+ "-framework R".
+
+ NOTE: R 2.3(?) and later on Unix-like platforms introduce a stack
+ checking mechanism that triggers prematurely when R is embedded
+ in Python. A patch has been committed to rpy to disable the stack
+ checking, although the fixed version has not been released. Either
+ use R 2.2, or use the SVN version of rpy until a version later than
+ 0.4.6 is released [May 2007].
+
+ * Xvfb - part of X11, Xvfb provides a virtual frame buffer device,
+ to allow R to generate raster graphics on a headless server etc.
+
+ * the RSvgDevice library (optional - allows the R plots to produce SVG
+ files as output).
+
+ http://cran.r-project.org/
+
+ * yapps2 (only need if the filter grammar in soomparse.g is changed):
+
+ http://theory.stanford.edu/~amitp/Yapps/
+
+For the web interface to NetEpi Analysis, in addition to the above,
+you will also need:
+
+ * the Albatross web application framework from Object Craft Pty Ltd
+
+ http://www.object-craft.com.au/projects/albatross/
+
+
+Installation
+============
+
+After installing all of the prerequisites, install the SOOMv0 library
+by running, as root "python setup.py install" in the base directory of
+the installation.
+
+Unit tests
+==========
+
+Unit tests for most aspects of the SOOM data summarisation engine which
+underlies NetEpi Analysis are provided, although test coverage is not
+complete in this version.
+
+The tests, which may take up to 10 minutes to complete, can be run by
+issuing the following command:
+
+ python test.py
+
+Note that the "soomext" extensions have their own test suite. This can
+be run by issuing the following command:
+
+ python soomext/test/all.py
+
+
+Installing the Web interface
+============================
+
+The web NetEpi Analysis interface can either be run as a simple
+stand-alone python web server, or it can be run as a CGI or FastCGI
+script under a web server (for example Apache). The stand-alone server is
+quick and easy to deploy, but only services one request at a time. CGI
+deployment will handle any number of requests concurrently, but suffers
+due to the cost of starting a new NEA instance for each request. FastCGI
+is considerably harder to initially deploy, but NEA service processes
+are started on demand and continue to service web requests, amortising
+the startup cost.
+
+Stand-alone Web interface
+-------------------------
+
+To start the stand-alone server, run
+
+ python web/nea-standalone.py
+
+The script accepts the following options:
+
+ -pPORT, --port=PORT listen on PORT (default: 8080)
+ -SSOOMPATH, --soompath=SOOMPATH
+ SOOM search path
+ -NAPPNAME, --appname=APPNAME
+ application name (effects paths)
+ -TAPPTITLE, --apptitle=APPTITLE
+ web application title
+ --datadir=DATA_DIR A writable directory NOT published by the web server
+ (contains private data)
+ --dynamicdir=DYNAMIC_TARGET
+ A writable directory published by the web server,
+ contains files generated by the application
+ --staticdir=STATIC_TARGET
+ A UNwritable directory published by the web server,
+ contains static content used by the application (CSS,
+ images)
+
+Typically only the --soompath option and --port options will be used. The
+other options might be used if the script is installed, rather than
+being run from the source directory, although this is not a recommended
+configuration at this time.
+
+
+CGI or FastCGI installation
+---------------------------
+
+The script "web/install.py" is responsible for installing the web
+components of NEA. The install scripts know default paths and web
+user names for:
+
+ RedHat Linux
+ Debian Linux
+ Apple OS X
+
+Additional platforms can be added by editing simpleinst/platform.py
+
+ Configurable parameters include:
+
+ appname
+ Application name - effects install paths, cookies,
+ etc. Unfortunately, some application resources have to be
+ hard-wired due to limitations in the Albatross templating
+ system (which will be fixed, eventually), so this option
+ is not recommended at this time.
+
+ apptitle
+ User visible application name
+
+ cgi_dir
+ Application scripts and data will be placed into a
+ subdirectory "appname" off this directory.
+
+ html_dir
+ Fixed application content (images, help text, style sheets)
+ will be installed in an "appname" subdirectory of this
+ directory.
+
+ session_secret
+ This string is mixed with any session data that makes a
+ round trip via the user's browser to prevent unauthorised
+ modifications being made. The string must be kept secret,
+ and should not be shared with other applications.
+
+ soompath
+ A colon separated list of paths to directories containing
+ SOOM datasets.
+
+ web_user
+ User name to install files as - this should match the user
+ id your web server runs CGI scripts as.
+
+Click on the HELP link in the banner of the Web interface for a brief
+introduction to its capabilities. You can also access this introduction
+separately in the file web/static/help.html
+
+Loading test data
+=================
+
+The demo/SOOM_demo_data_load.py script automatically downloads a number
+of freely available datasets from a US CDC National Center for Health
+Statistics FTP server and loads them as NetEpi Analysis datasets. The
+datasets are:
+
+ nhds US CDC National Hospital Discharge Surveys 1996-2002
+
+Please be sure to observe the data use restrictions which the National
+Center for Health Statitics attaches to these files. See the various
+README files at:
+
+ ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/NHDS/
+
+For example, the data use restrictions for the 2002 data file are
+as follows:
+
+--- begin excerpt from US CDC NCHS data use restrictions ---
+!WARNING -- DATA USE RESTRICTIONS!
+READ CAREFULLY BEFORE USING
+
+The Public Health Service Act (Section 308(d)) provides that the data
+collected by the National Center for Health Statistics (NCHS), Centers
+for Disease Control and Prevention (CDC), may be used only for the
+purpose of health statistical reporting and analysis. Any effort to
+determine the identity of any reported case is prohibited by this law.
+
+NCHS does all it can to assure that the identity of data subjects
+cannot be disclosed. All direct identifiers, as well as any
+characteristics that might lead to identification, are omitted from the
+dataset. Any intentional identification or disclosure of a person or
+establishment violates the assurances of confidentiality given to the
+providers of the information. Therefore, users will:
+
+1. Use the data in this dataset for statistical reporting and analysis
+ only.
+2. Make no use of the identity of any person or establishment discovered
+ inadvertently and advise the Director, NCHS, of any such discovery.
+3. Not link this dataset with individually identifiable data from other
+ NCHS or non-NCHS datasets.
+
+BY USING THESE DATA, YOU SIGNIFY YOUR AGREEMENT TO COMPLY WITH
+THE ABOVE-STATED STATUTORILY-BASED AGREEMENTS."
+---end excerpt from US CDC NCHS data use restrictions ---
+
+In addition, copies of the following data are included under copyright
+fair dealing provisions:
+
+ who_indicators WHO national health indicators 2000-2002
+ worldpop WHO World Standard Population Proportions
+
+Instructions for obtaining the latest version of the WHO indicators
+can be found in the README file in the demo subdirectory.
+
+To run the loader:
+
+ python demo/SOOM_demo_data_load.py
+
+Running the script with no arguments will load the entire datasets,
+which can take around two hours (the nhds dataset contains slightly more
+than two million rows). For testing purposes, however, the --rowlimit
+option can be used to stop loading after the specified number of rows,
+for example --rowlimit=10000 will stop loading after the first ten
+thousand rows of each dataset.
+
+The --datasets option allows you to load only the NHDS data
+(--datasets=nhds), or the WHO data (--datasets=whopop) - the default is
+"all".
+
+To see all the options, run:
+
+ python demo/SOOM_demo_data_load.py --help
+
+Note that the script automatically downloads the NHDS data files from the
+CDC Web site via FTP. If you do not have an Internet connection then it will
+fail. However, you can download the files manually and place them in the
+demo/rawdata directory. The URLs for the required files can be gleaned from
+the source code for the programmes in the demo/loaders directory. The files
+occupy about 10 MB in compressed state.
+
+The script also downloads two RTF (Rich Text Format) files containing the US
+ICD-9-CM codes and labels, in order to construct an output translation for the
+diagnosis and procedure codes in the NHDS datasets. This is intended as a
+demonstration only, and you should be aware that te code labels may not be
+complete or appropriate for the version of the US ICD-9-CM codes used when the
+NHDS data was collected - this may be corrected in future versions.
+
+Demo Applications
+=================
+
+Two simple applications are provided to demonstrate some of the
+capabilities of NetEpi Analysis. These applications operate on the NHDS
+demonstration data, which should be loaded prior to their use (see the
+previous section).
+
+API Demo
+--------
+
+The script demo/api_demo.py is intended to demonstrate the Python
+programming interface to NetEpi Analysis. The NetEpi Analysis API can
+either be used interactively from a Python interpreter, or used from
+Python programmes.
+
+To run the demo:
+
+ python demo/api_demo.py
+
+The demo accepts several command line options:
+
+ --soompath=SOOMPATH SOOM dataset path
+ --writepath=WRITEPATH
+ Dataset write path (for temporary objects, defaults to
+ SOOMPATH)
+ --nopause Don't pause for user after each step
+
+Plot Demo
+---------
+
+The script demo/plot_demo.py demonstrates a range of graphical plots of
+the NHDS demonstration dataset. This demonstration requires an X11 session.
+
+To run the demo:
+
+ python demo/plot_demo.py
+
+The demo will display some explanatory text and diagnostics in the
+terminal window, then open a new window and display a plot. The user
+can then step to the next or previous test by using their cursor keys,
+or quit by pressing 'q', in the terminal window. The user can also skip
+directly to a test by entering its number and pressing the Enter key.
+
+Note that some of the plot demonstrations fail - we know this, and it
+serves to remind us that certain features which used to work need to be
+attended to. Both the api_demo.py and lot_demo.py programmes serve as
+convenient, informal regression tests.
+
+Web Interface
+-------------
+
+See the help.html file in the web/static subdirectory of the NetEpi Analysis
+distribution for a brief introduction to the Web interface.
+
diff --git a/SOOMv0/Analysis/PopRate.py b/SOOMv0/Analysis/PopRate.py
new file mode 100644
index 0000000..13173d8
--- /dev/null
+++ b/SOOMv0/Analysis/PopRate.py
@@ -0,0 +1,727 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+# ToDo: add confidence interval methods for recurring events (eg hospital
+# admission) as suggested in:
+# http://www.doh.wa.gov/Data/Guidelines/ConfIntguide.htm
+
+# Standard Python libraries
+import math
+import time
+import sys
+
+# http://sourceforge.net/projects/numpy
+import Numeric, MA
+sumaxis = MA.add.reduce
+
+# SOOM
+from SOOMv0.common import *
+from SOOMv0.CrossTab import CrossTab, shape_union, dims
+from SOOMv0.Soom import soom
+
+__all__ = (
+ 'calc_directly_std_rates',
+ 'calc_stratified_rates',
+ 'calc_indirectly_std_ratios',
+)
+
+class Vars:
+ "Debugging aid"
+ def __init__(self, vars):
+ self.__dict__.update(vars)
+
+def just_freq_tables(crosstab):
+ for table in crosstab.tables():
+ name = table.name
+ if name == '_freq_':
+ yield table, name, '', ''
+ elif name.startswith('freq_wgtd_by'):
+ yield (table, name, name[len('freq'):],
+ crosstab[name].label[len('Frequency'):])
+
+
+def get_alpha(conflev):
+ if conflev is None:
+ return None
+ if 0.0 < conflev < 1.0:
+ return 1.0 - conflev
+ raise Error("conflev must be greater than 0 and less than 1")
+
+
+def calc_directly_std_rates(summset, popset, stdpopset=None,
+ conflev=0.95, basepop = 100000,
+ timeinterval='years',
+ ci_method='dobson', popset_popcol='_freq_',
+ stdpopset_popcol='_stdpop_',
+ axis = 0,
+ debug=False):
+
+ """
+ Calculate Directly Standardised Population Rates
+
+ summset is a summary dataset of counts of events for the
+ population-of-interest being compared to the standard
+ population.
+ popset is the stratified population counts for the
+ population-of-interest
+ stdpopset is the stratified population counts for the standard
+ population
+ """
+ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION
+
+ alpha = get_alpha(conflev)
+
+ if ci_method not in ('dobson','ff'):
+ raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) methods '
+ 'for confidence intervals currently implemented')
+ if not popset.has_column(popset_popcol):
+ raise Error('Denominator population dataset %r does not have a '
+ '%r column' % (popset.label or popset.name, popset_popcol))
+ if stdpopset is not None and not stdpopset.has_column(stdpopset_popcol):
+ raise Error('Standard population dataset %r does not have a '
+ '%r column' % (stdpopset.label or stdpopset.name, stdpopset_popcol))
+
+ st = time.time()
+ r_mode = get_default_mode()
+ try:
+ set_default_mode(BASIC_CONVERSION)
+
+ # We turn the summset into an Ncondcols-dimensional matrix
+ summtab = CrossTab.from_summset(summset)
+
+ if stdpopset is not None:
+ # Then attempt to do the same to the stdpop data, summing any
+ # axes not required and replicate any missing until we have an
+ # array the same shape as the summtab array.
+ stdtab = CrossTab.from_summset(stdpopset, shaped_like=summtab)
+ stdtab.collapse_axes_not_in(summtab)
+ stdtab.replicate_axes(summtab)
+ stdpop = stdtab[stdpopset_popcol].data.astype(Numeric.Float64)
+
+ # The population dataset must have at least as many dimensions as
+ # summary dataset. Any additional axes are eliminated by summing.
+ # any missing axes are created by replication.
+ poptab = CrossTab.from_summset(popset, shaped_like=summtab)
+ poptab.collapse_axes_not_in(summtab)
+ poptab.replicate_axes(summtab)
+ popfreq = poptab[popset_popcol].data.astype(Numeric.Float64)
+
+ # Manufacture a CrossTab for the result, with one less axis (the first)
+ result = summtab.empty_copy()
+ del result.axes[axis]
+
+ if stdpopset is not None:
+ sum_stdpop = sumaxis(stdpop)
+ stdwgts = stdpop / sum_stdpop
+ stdpop_sq = stdpop**2
+ sum_stdpop_sq = sum_stdpop**2
+ ffwi = stdwgts / popfreq
+ ffwm = MA.maximum(MA.ravel(ffwi))
+
+ basepop = float(basepop)
+
+ for table, name, n_add, l_add in just_freq_tables(summtab):
+
+ # avoid integer overflows...
+ summfreq = table.data.astype(Numeric.Float64)
+ strata_rate = summfreq / popfreq
+
+ result.add_table('summfreq'+n_add,
+ data=sumaxis(summfreq, axis),
+ label='Total events'+l_add)
+ result.add_table('popfreq'+n_add,
+ data=sumaxis(popfreq, axis),
+ label='Total person-'+timeinterval+' at risk'+l_add)
+
+ if stdpopset is not None:
+ std_strata_summfreq = summfreq * Numeric.where(MA.getmask(stdwgts),0.,1.)
+ wgtrate = strata_rate * stdwgts
+ result.add_table('std_strata_summfreq'+n_add,
+ data=sumaxis(std_strata_summfreq, axis),
+ label="Total events in standard strata"+l_add)
+
+ # Crude rate
+ cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis) * basepop
+ result.add_table('cr'+n_add, data=cr,
+ label='Crude Rate per '+'%d' % basepop +' person-'+timeinterval+l_add)
+
+ if alpha is not None:
+ # CIs for crude rate
+ count = sumaxis(summfreq, axis)
+ count_shape = count.shape
+ count_flat = MA.ravel(count)
+ totpop = sumaxis(popfreq, axis)
+ assert totpop.shape == count.shape
+ totpop_flat = MA.ravel(totpop)
+
+ cr_ll = Numeric.empty(len(count_flat), typecode=Numeric.Float64)
+ cr_ul = Numeric.empty(len(count_flat), typecode=Numeric.Float64)
+ cr_ll_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8)
+ cr_ul_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8)
+
+ for i, v in enumerate(count_flat):
+ try:
+ if v == 0:
+ cr_ll[i] = 0.0
+ else:
+ cr_ll[i] = ((r.qchisq(alpha/2., df=2.0*v)/2.0) / totpop_flat[i]) * basepop
+ cr_ul[i] = ((r.qchisq(1. - alpha/2., df=2.0*(v + 1))/2.0) / totpop_flat[i]) * basepop
+ except:
+ cr_ll[i] = 0.0
+ cr_ul[i] = 0.0
+ cr_ll_mask[i] = 1
+ cr_ul_mask[i] = 1
+
+ cr_ll = MA.array(cr_ll, mask=cr_ll_mask, typecode=MA.Float64)
+ cr_ul = MA.array(cr_ul, mask=cr_ul_mask, typecode=MA.Float64)
+ cr_ll.shape = count_shape
+ cr_ul.shape = count_shape
+
+ cr_base = 'Crude rate %d%%' % (100.0*conflev)
+ result.add_table('cr_ll'+n_add, data=cr_ll,
+ label=cr_base+' lower confidence limit '+l_add)
+ result.add_table('cr_ul'+n_add, data=cr_ul,
+ label=cr_base+' upper confidence limit '+l_add)
+
+ if stdpopset is not None:
+
+ # Directly Standardised Rate
+ dsr = sumaxis(wgtrate, axis)
+ result.add_table('dsr'+n_add, data=dsr*basepop,
+ label='Directly Standardised Rate per '+'%d' % basepop +' person-'+timeinterval+l_add)
+
+ # Confidence Intervals
+ if alpha is None or name != '_freq_':
+ # Can only calculate confidence intervals on freq cols
+ continue
+
+ if ci_method == 'dobson':
+ # Dobson et al method
+ # see: Dobson A, Kuulasmaa K, Eberle E, Schere J. Confidence intervals for weighted sums
+ # of Poisson parameters, Statistics in Medicine, Vol. 10, 1991, pp. 457-62.
+ # se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2)
+ se_wgtrate = summfreq*((stdwgts/(popfreq))**2)
+ stderr = stdpop_sq * strata_rate * (1.0 - strata_rate)
+ se_rate = sumaxis(se_wgtrate, axis)
+ sumsei = sumaxis(stderr, axis)
+ total_freq = sumaxis(std_strata_summfreq, axis)
+ # get shape of total_freq
+ total_freq_shape = total_freq.shape
+
+ total_freq_flat = MA.ravel(total_freq)
+
+ # flat arrays to hold results and associated masks
+ l_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64)
+ u_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64)
+ l_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8)
+ u_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8)
+
+ conflev_l = (1 - conflev) / 2.0
+ conflev_u = (1 + conflev) / 2.0
+
+ for i, v in enumerate(total_freq_flat):
+ try:
+ if v == 0.:
+ u_lam[i] = -math.log(1 - conflev)
+ l_lam[i] = 0.0
+ else:
+ l_lam[i] = r.qgamma(conflev_l, v, scale = 1.)
+ u_lam[i] = r.qgamma(conflev_u, v + 1., scale = 1.)
+ except:
+ l_lam[i] = 0.0
+ u_lam[i] = 0.0
+ l_lam_mask[i] = 1
+ u_lam_mask[i] = 1
+
+ l_lam = MA.array(l_lam, mask=l_lam_mask, typecode=MA.Float64)
+ u_lam = MA.array(u_lam, mask=u_lam_mask, typecode=MA.Float64)
+ l_lam.shape = total_freq_shape
+ u_lam.shape = total_freq_shape
+ dsr_ll = dsr + (((se_rate/total_freq)**0.5)*(l_lam - total_freq))
+ dsr_ul = dsr + (((se_rate/total_freq)**0.5)*(u_lam - total_freq))
+
+ elif ci_method == 'ff':
+ # Fay and Feuer method
+ # see: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates:
+ # a method based on the gamma distribution. Statistics in Medicine 1997 Apr 15;16(7):791-801.
+
+ ffvari = summfreq * ffwi**2.0
+ ffvar = sumaxis(ffvari,axis)
+
+ dsr_flat = Numeric.ravel(MA.filled(dsr,0))
+ dsr_shape = dsr.shape
+
+ ffvar_flat = Numeric.ravel(MA.filled(ffvar,0))
+
+ # flat arrays to hold results and associated masks
+ dsr_ll = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64)
+ dsr_ul = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64)
+ dsr_ll_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8)
+ dsr_ul_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8)
+
+ for i, y in enumerate(dsr_flat):
+ try:
+ dsr_ll[i] = (ffvar_flat[i] / (2.0*y)) * r.qchisq(alpha/2., df= (2.0*(y**2.)/ffvar_flat[i]))
+ dsr_ul[i] = ((ffvar_flat[i] + (ffwm**2.0))/ (2.0*(y + ffwm))) * r.qchisq(1. - alpha/2., df = ((2.0*((y + ffwm)**2.0))/(ffvar_flat[i] + ffwm**2.0)))
+ except:
+ dsr_ll[i] = 0.0
+ dsr_ul[i] = 0.0
+ dsr_ll_mask[i] = 1
+ dsr_ul_mask[i] = 1
+ dsr_ll = MA.array(dsr_ll,mask=dsr_ll_mask,typecode=MA.Float64)
+ dsr_ul = MA.array(dsr_ul,mask=dsr_ul_mask,typecode=MA.Float64)
+ dsr_ll.shape = dsr_shape
+ dsr_ul.shape = dsr_shape
+
+ result.add_table('dsr_ll'+n_add, data=dsr_ll*basepop,
+ label='DSR '+ '%d' % (100.0*conflev)+'% lower confidence limit'+l_add)
+ result.add_table('dsr_ul'+n_add, data=dsr_ul*basepop,
+ label='DSR '+ '%d' % (100.0*conflev)+'% upper confidence limit'+l_add)
+
+ finally:
+ set_default_mode(r_mode)
+ soom.info('calc_directly_std_rates took %.03f' % (time.time() - st))
+ if stdpopset is not None:
+ name = 'dir_std_rates_' + summset.name
+ label = 'Directly Standardised Rates for '+(summset.label or summset.name)
+ else:
+ name = 'crude_rates_' + summset.name
+ label = 'Crude Rates for '+(summset.label or summset.name)
+ if conflev:
+ label += ' (%g%% conf. limits)' % (conflev * 100)
+ if debug:
+ global vars
+ vars = Vars(locals())
+ return result.to_summset(name, label=label)
+
+
+def calc_stratified_rates(summset, popset,
+ conflev=0.95, basepop = 100000,
+ timeinterval='years',
+ ci_method='dobson', popset_popcol='_freq_',
+ debug=False):
+
+ """
+ Calculate stratified population rates
+
+ summset is a straified summary dataset of counts of events for
+ the population-of-interest
+ popset is the stratified population counts for the
+ population-of-interest
+ """
+ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION
+
+ alpha = get_alpha(conflev)
+
+ if ci_method not in ('dobson','ff'):
+ raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) '
+ 'methods for confidence intervals currently '
+ 'implemented')
+ if not popset.has_column(popset_popcol):
+ raise Error('Denominator population dataset %r does not have a '
+ '%r column' % (popset.label or popset.name, popset_popcol))
+
+ st = time.time()
+ r_mode = get_default_mode()
+ try:
+ set_default_mode(BASIC_CONVERSION)
+
+ # We turn the summset into an Ncondcols-dimensional matrix
+ summtab = CrossTab.from_summset(summset)
+
+ # The population dataset must have at least as many dimensions as
+ # summary dataset. Any additional axes are eliminated by summing.
+ # any missing axes are created by replication.
+ poptab = CrossTab.from_summset(popset, shaped_like=summtab)
+ poptab.collapse_axes_not_in(summtab)
+ poptab.replicate_axes(summtab)
+ popfreq = poptab[popset_popcol].data.astype(Numeric.Float64)
+
+ # Manufacture a CrossTab for the result
+ result = summtab.empty_copy()
+
+ basepop = float(basepop)
+
+ for table, name, n_add, l_add in just_freq_tables(summtab):
+ # avoid integer overflows...
+ summfreq = table.data.astype(Numeric.Float64)
+
+ strata_rate = summfreq / popfreq
+
+ result.add_table('summfreq'+n_add,
+ data=summfreq,
+ label='Events'+l_add)
+ result.add_table('popfreq'+n_add,
+ data=popfreq,
+ label='Person-'+timeinterval+' at risk'+l_add)
+ result.add_table('sr'+n_add,
+ data=strata_rate * basepop,
+ label='Strata-specific Rate per '+'%d' % basepop +' person-'+timeinterval+l_add)
+
+ if alpha is not None:
+ # CIs for stratified rates
+ summfreq_shape = summfreq.shape
+ summfreq_flat = MA.ravel(summfreq)
+ assert popfreq.shape == summfreq.shape
+ popfreq_flat = MA.ravel(popfreq)
+
+ sr_ll = Numeric.empty(len(summfreq_flat), typecode=Numeric.Float64)
+ sr_ul = Numeric.empty(len(summfreq_flat), typecode=Numeric.Float64)
+ sr_ll_mask = Numeric.zeros(len(summfreq_flat), typecode=Numeric.Int8)
+ sr_ul_mask = Numeric.zeros(len(summfreq_flat), typecode=Numeric.Int8)
+
+ for i, v in enumerate(summfreq_flat):
+ try:
+ if v == 0:
+ sr_ll[i] = 0.0
+ else:
+ sr_ll[i] = ((r.qchisq(alpha/2., df=2.0*v)/2.0) / popfreq_flat[i]) * basepop
+ sr_ul[i] = ((r.qchisq(1. - alpha/2., df=2.0*(v + 1))/2.0) / popfreq_flat[i]) * basepop
+ except:
+ sr_ll[i] = 0.0
+ sr_ul[i] = 0.0
+ sr_ll_mask[i] = 1
+ sr_ul_mask[i] = 1
+
+ sr_ll = MA.array(sr_ll, mask=sr_ll_mask, typecode=MA.Float64)
+ sr_ul = MA.array(sr_ul, mask=sr_ul_mask, typecode=MA.Float64)
+ sr_ll.shape = summfreq_shape
+ sr_ul.shape = summfreq_shape
+
+ sr_base = 'Stratified rate %s%%' % (100.0*conflev)
+ result.add_table('sr_ll'+n_add, data=sr_ll,
+ label=sr_base+' lower confidence limit '+l_add)
+ result.add_table('sr_ul'+n_add, data=sr_ul,
+ label=sr_base+' upper confidence limit '+l_add)
+
+ finally:
+ set_default_mode(r_mode)
+ soom.info('calc_stratified_rates took %.03f' % (time.time() - st))
+ name = 'stratified_rates_' + summset.name
+ label = 'Stratified Rates for '+(summset.label or summset.name)
+ if conflev:
+ label += ' (%g%% conf. limits)' % (conflev * 100)
+ if debug:
+ global vars
+ vars = Vars(locals())
+ return result.to_summset(name, label=label)
+
+
+def calc_indirectly_std_ratios(summset, popset, stdsummset, stdpopset,
+ conflev=0.95, baseratio = 100, timeinterval='years',
+ popset_popcol='_freq_', stdpopset_popcol='_stdpop_', ci_method='daly', debug=False):
+
+ """
+ Calculate Indirectly Standardised Population Event Ratios
+
+ - summset is a summary dataset of counts of events for the
+ population-of-interest being compared to the standard population.
+ - popset is the stratified population counts for the
+ population-of-interest
+ - stdsummset is a summary dataset of counts of events for the
+ standard population
+ - stdpopset is the stratified population counts for the standard
+ population
+ """
+ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION
+
+ alpha = get_alpha(conflev)
+
+ if ci_method != 'daly':
+ raise Error("Only Daly method for confidence intervals "
+ "currently implemented")
+ if not popset.has_column(popset_popcol):
+ raise Error('Denominator population dataset %r does not have a '
+ '%r column' % (popset.label or popset.name, popset_popcol))
+ if not stdpopset.has_column(stdpopset_popcol):
+ raise Error('Standard population dataset %r does not have a '
+ '%r column' % (stdpopset.label or stdpopset.name, stdpopset_popcol))
+
+ st = time.time()
+ r_mode = get_default_mode()
+ try:
+ set_default_mode(BASIC_CONVERSION)
+
+ shape = shape_union(stdsummset, summset)
+
+ summtab = CrossTab.from_summset(summset, shaped_like=shape)
+
+ stdsummtab = CrossTab.from_summset(stdsummset, shaped_like=shape)
+
+ stdpoptab = CrossTab.from_summset(stdpopset, shaped_like=shape)
+ stdpoptab.collapse_axes_not_in(stdsummtab)
+
+ stdsummtab.replicate_axes(shape)
+ stdpoptab.replicate_axes(shape)
+
+ poptab = CrossTab.from_summset(popset, shaped_like=shape)
+ poptab.collapse_axes_not_in(shape)
+ if poptab.get_shape() != stdsummtab.get_shape():
+ raise Error('Observed population does not have all the required columns')
+ popfreq = poptab[popset_popcol].data.astype(MA.Float64)
+
+ result = stdsummtab.empty_copy()
+ result.add_table('popfreq', data=popfreq,
+ label='Total person-'+timeinterval+' at risk')
+
+ expected_cols = []
+ for table, name, n_add, l_add in just_freq_tables(stdsummtab):
+ stdsummfreq = stdsummtab[name].data.astype(MA.Float64)
+ stdpopfreq = stdpoptab[stdpopset_popcol].data.astype(MA.Float64)
+ std_strata_rates = stdsummfreq / stdpopfreq
+ strata_expected_freq = std_strata_rates * popfreq
+# print stdsummfreq[0,0,0], stdpopfreq[0,0,0], popfreq[0,0,0]
+ result.add_table('expected'+n_add, data=strata_expected_freq,
+ label='Expected events'+l_add)
+ expected_cols.append('expected'+n_add)
+
+ result.collapse_axes_not_in(summtab)
+
+ axis = 0
+ baseratio = float(baseratio)
+
+ for table, name, n_add, l_add in just_freq_tables(summtab):
+ observed = table.data.astype(Numeric.Float64)
+ result.add_table('observed'+n_add,
+ data=observed,
+ label='Observed events'+l_add)
+
+ expected = result['expected'+n_add].data
+
+ isr = observed / expected
+ result.add_table('isr'+n_add, data=isr*baseratio,
+ label='Indirectly Standardised Event Ratio')
+
+ # Confidence Intervals
+ if alpha is None or name != '_freq_':
+ # Can only calculate confidence intervals on freq cols
+ continue
+
+ conflev_l = (1 - conflev) / 2.0
+ conflev_u = (1 + conflev) / 2.0
+
+ # get shape of observed
+ observed_shape = observed.shape
+ # flattened version
+ observed_flat = MA.ravel(observed)
+
+
+ # sanity check on shapes - should be the same!
+ assert expected.shape == observed.shape
+
+ # flattened version of expecetd
+ expected_flat = MA.ravel(expected)
+
+ # lists to hold results
+ isr_ll = Numeric.empty(len(observed_flat), typecode=Numeric.Float64)
+ isr_ul = Numeric.empty(len(observed_flat), typecode=Numeric.Float64)
+ isr_ll_mask = Numeric.zeros(len(observed_flat), typecode=Numeric.Int8)
+ isr_ul_mask = Numeric.zeros(len(observed_flat), typecode=Numeric.Int8)
+
+ obs_mask = MA.getmaskarray(observed_flat)
+ exp_mask = MA.getmaskarray(expected_flat)
+
+ for i, v in enumerate(observed_flat):
+ if obs_mask[i] or exp_mask[i]:
+ isr_ll[i] = 0.0
+ isr_ul[i] = 0.0
+ isr_ll_mask[i] = 1
+ isr_ul_mask[i] = 1
+ else:
+ if v == 0.:
+ obs_ll = 0.0
+ obs_ul = -math.log(1 - conflev)
+ else:
+ obs_ll = r.qgamma(conflev_l, v, scale = 1.)
+ obs_ul = r.qgamma(conflev_u, v + 1., scale = 1.)
+ isr_ll[i] = obs_ll / expected_flat[i]
+ isr_ul[i] = obs_ul / expected_flat[i]
+
+ isr_ll = MA.array(isr_ll, typecode=MA.Float64, mask=isr_ll_mask)
+ isr_ul = MA.array(isr_ul, typecode=MA.Float64, mask=isr_ul_mask)
+ isr_ll.shape = observed_shape
+ isr_ul.shape = observed_shape
+
+ isr_base = 'ISR %d%%' % (100.0*conflev)
+ result.add_table('isr_ll'+n_add, data=isr_ll*baseratio,
+ label=isr_base+' lower confidence limit'+l_add)
+ result.add_table('isr_ul'+n_add, data=isr_ul*baseratio,
+ label=isr_base+' upper confidence limit'+l_add)
+ finally:
+ set_default_mode(r_mode)
+ soom.info('calc_indirectly_std_ratios took %.03f' % (time.time() - st))
+ name = 'indir_std_ratios_' + summset.name
+ label = 'Indirectly Standardised Ratios for '+(summset.label or summset.name)
+ if conflev:
+ label += ' (%g%% conf. limits)' % (conflev * 100)
+
+ if debug:
+ global vars
+ vars = Vars(locals())
+ return result.to_summset(name, label=label)
+
+if __name__ == '__main__':
+ from SOOMv0 import datasets, soom, SummaryStats, plot
+
+ soom.messages = False
+ path = '../SOOM_objects'
+
+ ds = datasets.dsload('syndeath', path=path)
+ pop = datasets.dsload('synpop', path=path)
+ stdpop_mf = datasets.dsload("aus01stdpop_mf", path=path)
+ stdpop = datasets.dsload("aus01stdpop", path=path)
+
+ if 1:
+ print "Directly Age-Standardised Rates by Sex for 1997"
+ print
+ s = ds.summ('agegrp', 'sex', SummaryStats.freq(), filterexpr='year=1997', zeros=1)
+ p = calc_directly_std_rates(s, pop, stdpopset=stdpop, popset_popcol='pop',
+ stdpopset_popcol='pop')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Crude Rates by Sex for 1997"
+ print
+ s = ds.summ('agegrp', 'sex', SummaryStats.freq(), filterexpr='year=1997', zeros=1)
+ p = calc_directly_std_rates(s, pop, popset_popcol='pop')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Age-specific Rates by Sex for 1997"
+ print
+ s = ds.summ('agegrp', 'sex', SummaryStats.freq(), filterexpr='year=1997', zeros=1)
+ p = calc_stratified_rates(s, pop, popset_popcol='pop')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Region-specific Rates by Sex for 1997"
+ print
+ s = ds.summ('region', 'sex', SummaryStats.freq(), filterexpr='year=1997', zeros=1)
+ p = calc_stratified_rates(s, pop, popset_popcol='pop')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Year by Sex"
+ print
+ s = ds.summ('agegrp', 'sex', 'year', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99)
+ print p
+ print
+ plot.lineplot(p, 'year', 'sex', measure='dsr', xlabelrotate=45)
+ raw_input('See graph - hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Year by Sex"
+ print
+ s = ds.summ('agegrp', 'year', 'sex', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99)
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Sex by Year (Dobson CI method)"
+ print
+ s = ds.summ('agegrp', 'sex', 'year', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,ci_method='dobson')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Sex by Year (Fay-Feuer CI method)"
+ print
+ s = ds.summ('agegrp', 'sex', 'year', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,ci_method='ff')
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age-Standardised Rates by Sex by Year"
+ print
+ s = ds.summ('agegrp', 'sex', 'year', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99)
+ print p
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Sex by Region"
+ print
+ s = ds.summ('agegrp', 'sex', 'region', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99)
+ print p
+ print
+ plot.dotchart(p, 'region', 'sex', measure='dsr',horizontal=True)
+ raw_input('See graph - hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age/sex-Standardised Rates by Year by Region by Sex"
+ print "First 15 lines only:"
+ print
+ s = ds.summ('agegrp', 'sex', 'year', 'region', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop_mf, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,ci_method='dobson')
+ print p[:15]
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age-Standardised Rates"
+ print
+ s = ds.summ('agegrp', 'year', SummaryStats.freq(), zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99)
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age-Standardised Rates by Year by Cause-of-death"
+ print
+ s = ds.summ('agegrp', 'year', 'causeofdeath', SummaryStats.freq(),filterexpr='causeofdeath in (1,2,3,4)', zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,debug=True)
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Directly Age-Standardised Rates by Cause-of-death by Year"
+ print
+ s = ds.summ('agegrp', 'causeofdeath', 'year', SummaryStats.freq(),filterexpr='causeofdeath in (1,2,3,4)', zeros=1)
+ p=calc_directly_std_rates(s, pop, stdpopset=stdpop, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,debug=True)
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
+ if 1:
+ print "Indirectly Age-Standardised Mortality Ratios by Region"
+ print
+ s = ds.summ('agegrp', 'sex', SummaryStats.freq(), zeros=1)
+ t = ds.summ('sex', 'region', SummaryStats.freq(), zeros=1)
+ stdpop = pop.summ('agegrp','sex',SummaryStats.asum('pop'))
+ #pop2 = pop.summ('agegrp','sex','region',SummaryStats.asum('pop'))
+ #p=calc_indirectly_std_ratios(t, pop2, s, stdpop, popset_popcol='sum_of_pop', stdpopset_popcol='sum_of_pop',conflev=0.99,debug=True)
+ p=calc_indirectly_std_ratios(t, pop, s, pop, popset_popcol='pop', stdpopset_popcol='pop',conflev=0.99,debug=True)
+ print p
+ print
+ raw_input('Hit <ENTER> to continue')
+
diff --git a/SOOMv0/Analysis/__init__.py b/SOOMv0/Analysis/__init__.py
new file mode 100644
index 0000000..afa6c4f
--- /dev/null
+++ b/SOOMv0/Analysis/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+from SOOMv0.Analysis.PopRate import *
+from SOOMv0.Analysis.twobytwo import *
diff --git a/SOOMv0/Analysis/twobytwo.py b/SOOMv0/Analysis/twobytwo.py
new file mode 100644
index 0000000..0c66fb8
--- /dev/null
+++ b/SOOMv0/Analysis/twobytwo.py
@@ -0,0 +1,1336 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: twobytwo.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Analysis/twobytwo.py,v $
+#
+# twobytwo.py - calculates epidemiological measures of association for
+# two-by-two tables. Largely, but not entirely, based on a translation of
+# JavaScript code from the OpenEpi project (see http://www.openepi.com) - such
+# translation and re-use is permited by the one source license under which
+# OpenEpi is made available.
+
+from SOOMv0 import Stats
+from rpy import *
+import math
+import Numeric
+use_cstats = True
+try:
+ import Cstats
+except ImportError:
+ use_cstats = False
+
+__all__ = (
+ 'twobytwotable',
+)
+
+class NotAvailable(Exception): pass
+
+MathError = (RException, ZeroDivisionError, NotAvailable)
+
+class _ReportBase:
+ float_fmt = '%.5f'
+ rule = None
+
+ def __init__(self, label):
+ self.label = label
+ self.contents = []
+
+ def fmt(self, fmt, *args):
+ strargs = []
+ for arg in args:
+ if isinstance(arg, float):
+ strargs.append(self.float_fmt % arg)
+ elif arg is None:
+ strargs.append('undefined')
+ else:
+ strargs.append(str(arg))
+ self.contents.append(fmt % tuple(strargs))
+
+ def _lines(self):
+ return [str(node) for node in self.contents]
+
+ def __str__(self):
+ return '\n'.join(self._lines())
+
+class _ReportSubSection(_ReportBase):
+ role = 'subsection'
+
+ def _lines(self):
+ lines = _ReportBase._lines(self)
+ lines.insert(0, self.label+':')
+ lines.append('-----------------------')
+ return lines
+
+class _ReportSection(_ReportBase):
+ role = 'section'
+ rule = '='
+
+ def new_subsection(self, label):
+ subsection = _ReportSubSection(label)
+ self.contents.append(subsection)
+ return subsection
+
+ def _lines(self):
+ lines = _ReportBase._lines(self)
+ lines[0:0] = ['', self.label, '=' * len(self.label)]
+ return lines
+
+
+# Wrapper function around R's fisher.exact() function to retry with various
+# options if it fails due to the values being too large.
+def fisher_exact_test(rmatrix, conf_level=0.95, conf_int=True,
+ alternative='two.sided'):
+ kwargs = dict(conf_level=conf_level, conf_int=conf_int,
+ alternative=alternative)
+ while True:
+ try:
+ return r.fisher_test(rmatrix, **kwargs)
+ except RException, e:
+ if 'workspace' in kwargs:
+ del kwargs['workspace']
+ kwargs['hybrid'] = True
+ elif 'hybrid' in kwargs:
+ # run out of options
+ return None
+ elif 'Out of workspace' in str(e) or 'FEXACT error 40' in str(e):
+ # increase network algorithm workspace
+ kwargs['workspace'] = 2000000
+ else:
+ kwargs['hybrid'] = True
+
+
+# create a function in the R environment to calculate mid-p exact OR and
+# confidence limits. This code is adapted from the EpiTools package, used with
+# permission from Tomas Aragon
+# See http://www.medepi.net/epitools/
+r("""midpcl <- function(x, conf.level=0.95, interval=c(0, 1000)) {
+ mm<-x
+ mue <- function(mm, or) {
+ fisher.test(mm, or=or, alternative="less")$p -
+ fisher.test(x=x, or=or, alternative="greater")$p }
+ midp <- function(mm, or=1) {
+ lteqtoa1 <- fisher.test(mm, or=or, alternative="less")$p.val
+ gteqtoa1 <- fisher.test(mm, or=or, alternative="greater")$p.val
+ 0.5 * (lteqtoa1 - gteqtoa1 + 1.0) }
+ alpha <- 1.0 - conf.level
+ EST <- uniroot(function(or) {
+ mue(mm, or)
+ }, interval = interval)$root
+ LCL <- uniroot(function(or) {
+ 1.0 - midp(mm, or) - alpha/2.0
+ }, interval = interval)$root
+ UCL <- 1.0/uniroot(function(or) {
+ midp(mm, or = 1.0/or) - alpha/2.0
+ }, interval = interval)$root
+ midporcl<-c(EST, LCL, UCL)
+ return(midporcl) }
+ """)
+
+class TwoByTwoStratum(object):
+
+ def __init__(self, e1d1, e0d1, e1d0, e0d0,
+ label=None,
+ conflev=0.95, add_half_if_zeros=False, khan=False):
+ self.label = label
+ self.conflev = conflev
+ self.a = e1d1 # exposed and diseased
+ self.b = e0d1 # unexposed and diseased
+ self.c = e1d0 # exposed and undiseased
+ self.d = e0d0 # unexposed and undiseased
+ if self.a < 0 or self.b < 0 or self.c < 0 or self.d < 0:
+ raise ValueError('a, b, c and d must all be greater than or equal to zero')
+ self.khan = khan
+ self.added_half = False
+ self.has_cell_zeros = False
+ if self.a == 0 or self.b == 0 or self.c == 0 or self.d == 0:
+ if add_half_if_zeros:
+ self.a += 0.5
+ self.b += 0.5
+ self.c += 0.5
+ self.d += 0.5
+ self.added_half = True
+ self.has_cell_zeros = False
+ else:
+ self.has_cell_zeros = True
+
+ self.r1 = self.a + self.b # total diseased
+ self.r2 = self.c + self.d # total undiseased
+ self.c1 = self.a + self.c # total exposed
+ self.c2 = self.b + self.d # total unexposed
+ self.t = self.a + self.b + self.c + self.d # grand total
+
+ if self.c1 == 0 or self.c2 == 0 or self.r1 == 0 or self.r2 == 0:
+ self.has_marginal_zeros = True
+ else:
+ self.has_marginal_zeros = False
+
+ # Set flag for expected counts less than 5
+ if self.t == 0:
+ self.anyExpLT5 = True
+ else:
+ if (float(self.r1*self.c1)/float(self.t) < 5
+ or float(self.r1*self.c2)/float(self.t) < 5
+ or float(self.r2*self.c1)/float(self.t) < 5
+ or float(self.r2*self.c2)/float(self.t) < 5):
+ self.anyExpLT5 = True
+ else:
+ self.anyExpLT5 = False
+
+ # chi square calculations
+ if not self.has_marginal_zeros:
+ self.cs = float(self.t * ((self.a*self.d) - (self.b*self.c))**2) / float(self.c1*self.c2*self.r1*self.r2)
+ self.csc = (self.t * (abs((self.a*self.d) - (self.b*self.c)) - (float(self.t)/2.0))**2) \
+ / float(self.c1*self.c2*self.r1*self.r2)
+ self.mhcs = (float((self.t - 1)*((self.a*self.d) - (self.b*self.c))**2)) / float(self.c1*self.c2*self.r1*self.r2)
+ self.pcs = 1.0 - r.pchisq(self.cs, 1)
+ self.pcsc = 1.0 - r.pchisq(self.csc, 1)
+ self.pmhcs = 1.0 - r.pchisq(self.mhcs, 1)
+ else:
+ self.cs = None
+ self.csc = None
+ self.mhcs = None
+ self.pcs = None
+ self.pcsc = None
+ self.pmhcs = None
+
+ # critical value
+ self.z = -Stats.probit((1.0 - self.conflev)/2)
+
+ # risk/prevalence
+ self.risk_exposed, self.risk_exposed_lower, self.risk_exposed_upper = self._modwald(self.a, self.c1, self.z)
+ self.risk_unexposed, self.risk_unexposed_lower, self.risk_unexposed_upper = self._modwald(self.b, self.c2, self.z)
+ self.risk_overall, self.risk_overall_lower, self.risk_overall_upper = self._modwald(self.r1, self.t, self.z)
+
+ # risk/prevalence ratio
+ if self.c1 == 0 or self.c2 == 0 or self.b == 0:
+ self.rr = None
+ self.rr_lower = None
+ self.rr_upper = None
+ else:
+ self.rr = (float(self.a)/float(self.c1))/(float(self.b)/float(self.c2))
+ # confidence limits for risk/prevalence ratio - Taylor series
+ if self.a == 0:
+ self.rr_lower = None
+ self.rr_upper = None
+ else:
+ self.rr_lower = self.rr * math.exp(-self.z * (((1.0 - float(self.a)/float(self.c1))/float(self.a)) + \
+ ((1.0 - float(self.b)/float(self.c2)) / float(self.b)))**0.5)
+ self.rr_upper = self.rr * math.exp( self.z * (((1.0 - float(self.a)/float(self.c1))/float(self.a)) + \
+ ((1.0 - float(self.b)/float(self.c2)) / float(self.b)))**0.5)
+
+ # risk/prevalence difference
+ if self.c1 == 0 or self.c2 == 0:
+ self.rd = None
+ self.rd_lower = None
+ self.rd_upper = None
+ else:
+ self.rd = (float(self.a)/float(self.c1)) - (float(self.b)/float(self.c2) )
+ # confidence limits for the risk/prevalence difference - Taylor
+ # series
+ rd_bound = ((float(self.a)*float(self.c)/(self.c1**3)) + (float(self.b)*float(self.d)/(self.c2**3)))**0.5
+ self.rd_lower = self.rd - (self.z * rd_bound)
+ self.rd_upper = self.rd + (self.z * rd_bound)
+
+ # aetiological fraction in the population based on the risk/prevalance
+ # ratio
+ if self.t == 0 or self.c2 == 0 or self.r1 == 0:
+ self.aefp = None
+ self.aefp_upper = None
+ self.aefp_lower = None
+ else:
+ self.aefp = (float(self.r1)/float(self.t) - float(self.b)/float(self.c2)) / (float(self.r1) / float(self.t))
+ if self.aefp < -1.0:
+ self.aefp = -1.0
+ elif self.aefp > 1.0:
+ self.aefp = 1.0
+ else:
+ pass
+ # confidence limits for the aetiological fraction in the
+ # population, based on the risk/prevalance ratio - Kahn/Sempos
+ # method
+ aefp_bound_num = self.b * self.t * (self.a * self.d * (self.t - self.b) + (self.c * self.b**2))
+ aefp_bound_den = ((self.r1**3) * (self.c2**3))
+ aefp_bound = self.z * ((float(aefp_bound_num) / float(aefp_bound_den))**0.5)
+ self.aefp_lower = self.aefp - aefp_bound
+ if self.aefp_lower < -1.0:
+ self.aefp_lower = -1.0
+ self.aefp_upper = self.aefp + aefp_bound
+ if self.aefp_upper > 1.0:
+ self.aefp_upper = 1.0
+
+ # aetiological fraction in the exposed based on the risk/prevalance
+ # ratio
+ if self.rr in (0.0, None):
+ self.aefe = None
+ self.aefe_lower = None
+ self.aefe_upper = None
+ else:
+ self.aefe = float(self.rr - 1.0) / float(self.rr)
+ if self.aefe < -1.0:
+ self.aefe = -1.0
+ elif self.aefe > 1.0:
+ self.aefe = 1.0
+ else:
+ pass
+ # confidence limits for the aetiological fraction in the exposed,
+ # based on the risk/prevalance ratio
+ if self.rr_lower ==0 or self.rr_lower == None:
+ self.aefe_lower = None
+ else:
+ self.aefe_lower = float(self.rr_lower - 1.0) / self.rr_lower
+ if self.aefe_lower < -1.0:
+ self.aefp_lower = -1.0
+ if self.rr_upper ==0 or self.rr_upper == None:
+ self.aefe_upper = None
+ else:
+ self.aefe_upper = float(self.rr_upper - 1.0) / self.rr_upper
+ if self.aefe_upper > 1.0:
+ self.aefe_upper = 1.0
+
+ # prevented fraction in the population based on the risk/prevalance
+ # ratio
+ if self.risk_unexposed in (0, None) or self.risk_overall == None:
+ self.pfp = None
+ self.pfp_lower = None
+ self.pfp_upper = None
+ else:
+ self.pfp = float(self.risk_unexposed - self.risk_overall) / float(self.risk_unexposed)
+ # confidence limits for the prevented fraction in the population,
+ # based on the risk/prevalance ratio
+ if 1.0 - self.aefp_upper == 0.0:
+ self.pfp_lower = None
+ else:
+ self.pfp_lower = (1.0 - (1.0 / (1.0 - self.aefp_upper)))
+ if 1.0 - self.aefp_lower == 0.0:
+ self.pfp_upper = None
+ else:
+ self.pfp_upper = (1.0 - (1.0 / (1.0 - self.aefp_lower)))
+
+ # prevented fraction in the exposed based on the risk/prevalance ratio
+ if self.risk_unexposed in (0, None) or self.risk_exposed == None:
+ self.pfe = None
+ self.pfe_lower = None
+ self.pfe_upper = None
+ else:
+ self.pfe = float(self.risk_unexposed - self.risk_exposed) / float(self.risk_unexposed)
+ # confidence limits for the prevented fraction in the exposed,
+ # based on the risk/prevalance ratio
+ if self.rr_upper == None or self.rr_lower == None:
+ self.pfe_lower = None
+ self.pfe_upper = None
+ else:
+ self.pfe_lower = 1.0 - self.rr_upper
+ self.pfe_upper = 1.0 - self.rr_lower
+
+ # odds ratios
+ if self.b == 0 or self.c == 0:
+ self.oddsratio = None
+ self.or_lower = None
+ self.or_upper = None
+ else:
+ self.oddsratio = float(self.a*self.d) / float(self.b*self.c)
+ # confidence limits for odds ratio - Taylor series
+ if self.a == 0 or self.b == 0 or self.c == 0 or self.d == 0:
+ self.or_lower = None
+ self.or_upper = None
+ else:
+ self.or_lower = self.oddsratio * math.exp(-self.z * (1.0/float(self.a) + 1.0/float(self.b) + 1.0/float(self.c) + \
+ 1.0/float(self.d))**0.5)
+ self.or_upper = self.oddsratio * math.exp( self.z * (1.0/float(self.a) + 1.0/float(self.b) + 1.0/float(self.c) + \
+ 1.0/float(self.d))**0.5)
+
+ # aetiological fraction in the population based on the odds ratio
+ # if self.c == 0 or self.r2 == 0 or self.oddsratio == None:
+ self.aefpor = None
+ self.aefpor_lower = None
+ self.aefpor_upper = None
+ if self.oddsratio is not None:
+ try:
+ self.aefpor = ((float(self.c)/float(self.r2))*(self.oddsratio - 1.0)) \
+ / ((float(self.c)/float(self.r2))*(self.oddsratio - 1.0) + 1.0)
+ if self.aefpor < -1.0:
+ self.aefpor = -1.0
+ elif self.aefpor > 1.0:
+ self.aefpor = 1.0
+ else:
+ pass
+ # confidence limits for the aetiological fraction in the
+ # population, based on the odds ratio
+ if self.b != 0 and self.d != 0 and self.r1 != 0:
+ aefpor_bound = self.z*(((float(self.b*self.r2)/float(self.d*self.r1))**2)*((float(self.a)/float(self.b*self.r1)) \
+ + (float(self.c)/float(self.d*self.r2))))**0.5
+ self.aefpor_lower = self.aefpor - aefpor_bound
+ if self.aefpor_lower < -1.0:
+ self.aefpor_lower = -1.0
+ self.aefpor_upper = self.aefpor + aefpor_bound
+ if self.aefpor_upper > 1.0:
+ self.aefpor_upper = 1.0
+ except MathError:
+ pass
+
+ # aetiological fraction in the exposed based on the odds ratio
+ if self.oddsratio == 0.0 or self.oddsratio == None:
+ self.aefeor = None
+ self.aefeor_lower = None
+ self.aefeor_upper = None
+ else:
+ self.aefeor = (self.oddsratio - 1.0) / float(self.oddsratio)
+ if self.aefeor < -1.0:
+ self.aefeor = -1.0
+ elif self.aefeor > 1.0:
+ self.aefeor = 1.0
+ else:
+ pass
+ # confidence limits for the aetiological fraction in the exposed,
+ # based on the odds ratio
+ if self.or_lower == 0 or self.or_lower == None:
+ self.aefeor_lower = None
+ else:
+ self.aefeor_lower = float(self.or_lower - 1.0) / self.or_lower
+ if self.aefeor_lower < -1.0:
+ self.aefeor_lower = -1.0
+ if self.or_upper == 0 or self.or_upper == None:
+ self.aefeor_upper = None
+ else:
+ self.aefeor_upper = float(self.or_upper - 1.0) / self.or_upper
+ if self.aefeor_upper > 1.0:
+ self.aefeor_upper = 1.0
+
+ # prevented fraction in the population based on the odds ratio
+ self.pfpor = None
+ self.pfpor_lower = None
+ self.pfpor_upper = None
+ if self.r2 != 0 and self.oddsratio is not None:
+ try:
+ self.pfpor = (float(self.c)/float(self.r2))*(1.0 - self.oddsratio)
+ # confidence limits for the prevented fraction in the population,
+ # based on the odds ratio
+ if self.aefpor_upper == None or (1.0 - self.aefpor_upper == 0.0):
+ self.pfpor_lower = None
+ else:
+ self.pfpor_lower = (1.0 - (1.0 / float(1.0 - self.aefpor_upper)))
+ if self.aefpor_lower == None or (1.0 - self.aefpor_lower == 0.0):
+ self.pfpor_upper = None
+ else:
+ self.pfpor_upper = (1.0 - (1.0 / float(1.0 - self.aefpor_lower)))
+ except MathError:
+ pass
+
+ # prevented fraction in the exposed based on the odds ratio
+ if self.oddsratio == None:
+ self.pfeor = None
+ self.pfeor_lower = None
+ self.pfeor_upper = None
+ else:
+ self.pfeor = 1.0 - self.oddsratio
+ # confidence limits for the prevented fraction in the exposed,
+ # based on the odds ratio
+ if self.or_lower == None or self.or_upper == None:
+ self.pfeor_lower = None
+ self.pfeor_upper = None
+ else:
+ self.pfeor_lower = 1.0 - self.or_upper
+ self.pfeor_upper = 1.0 - self.or_lower
+
+
+ # Don't perform exact tests if numbers too large.
+ if max(self.a, self.b, self.c, self.d) < 1000 or \
+ (max(self.a, self.b, self.c, self.d) < 10000 and min(self.a, self.b, self.c, self.d) < 50):
+ # Fisher's exact test and conditional MLE odds ratio via fisher.test()
+ # in R
+ if self.added_half:
+ a = int(self.a - 0.5)
+ b = int(self.b - 0.5)
+ c = int(self.c - 0.5)
+ d = int(self.d - 0.5)
+ else:
+ a = int(self.a)
+ b = int(self.b)
+ c = int(self.c)
+ d = int(self.d)
+
+ tab = with_mode(NO_CONVERSION, r.matrix)([a, b, c, d], nr=2)
+
+ ft = fisher_exact_test(tab, conf_level=self.conflev, conf_int=True, alternative='two.sided')
+ if ft is not None:
+ self.exact_p_twosided_asextreme = ft['p.value']
+ self.cmle_or = ft['estimate']['odds ratio']
+ self.cmle_or_lower = ft['conf.int'][0]
+ self.cmle_or_upper = ft['conf.int'][1]
+ else:
+ self.exact_p_twosided_asextreme = None
+ self.cmle_or = None
+ self.cmle_or_lower = None
+ self.cmle_or_upper = None
+
+ p_less = p_greater = None
+ ft = fisher_exact_test(tab, conf_level=self.conflev, conf_int=False, alternative='less')
+ if ft is not None:
+ p_less = ft['p.value']
+ else:
+ p_less = None
+ ft = fisher_exact_test(tab, conf_level=self.conflev, conf_int=False, alternative='greater')
+ if ft is not None:
+ p_greater = ft['p.value']
+ else:
+ p_greater = None
+
+ if self.cmle_or <= 1.0:
+ self.exact_p_onesided = p_less
+ else:
+ self.exact_p_onesided = p_greater
+
+ if self.exact_p_onesided is not None:
+ self.exact_p_twosided_twiceonesided = 2.0 * self.exact_p_onesided
+ else:
+ self.exact_p_twosided_twiceonesided = None
+
+ if p_less is not None and p_greater is not None:
+ pval1 = (0.5 * (p_less - (1.0 - p_greater))) + (1.0 - p_greater)
+ self.mid_p_onesided = min(pval1, 1.0 - pval1)
+ self.mid_p_twosided = 2.0 * self.mid_p_onesided
+ del pval1
+ else:
+ self.mid_p_onesided = None
+ self.mid_p_twosided = None
+ del ft
+
+ # calculate mid-p confidence limits and median-unbiased estimate of
+ # OR. code adapted from EpiTools package, used with permission
+ # from Tomas Aragon
+ try:
+ self.midp_or, self.midp_or_lower, self.midp_or_upper = r.midpcl(tab, conf_level=self.conflev)
+ except MathError:
+ self.midp_or, self.midp_or_lower, self.midp_or_upper = None, None, None
+ else:
+ self.exact_p_twosided_twiceonesided = None
+ self.exact_p_onesided = None
+ self.exact_p_twosided_asextreme = None
+ self.cmle_or = None
+ self.cmle_or_lower = None
+ self.cmle_or_upper = None
+ self.mid_p_onesided = None
+ self.mid_p_twosided = None
+ self.midp_or, self.midp_or_lower, self.midp_or_upper = None, None, None
+
+ if khan:
+ # Fisher's exact using method of Khan, HA. A Visual Basic Software
+ # for computing Fisher's exact Probability. Journal of Statistical
+ # Software 2003; 8(21) (available at http://www.jstatsoft.org)
+ # First find the minimum value in the table
+ if self.added_half:
+ a = self.a - 0.5
+ b = self.b - 0.5
+ c = self.c - 0.5
+ d = self.d - 0.5
+ else:
+ a = self.a
+ b = self.b
+ c = self.c
+ d = self.d
+ x1 = int(a)
+ x2 = int(b)
+ x3 = int(c)
+ x4 = int(d)
+ t1 = int(x1 + x2)
+ t2 = int(x3 + x4)
+ t3 = int(x1 + x3)
+ t4 = int(x2 + x4)
+ x = int(x1 + x2 + x3 + x4)
+ minval = min(x1, x2, x3, x4)
+ if use_cstats: # try:
+ self.khan_p1 = Cstats.exactcalc(minval, t1, t2, t3, t4, x, x1, x2, x3, x4, a, b, c, d)
+ else: # except:
+ mini = minval
+ total_p = 0.0
+ while minval >= 0.0:
+ subtotal = (self._khancalc(t1) + self._khancalc(t2) + self._khancalc(t3) + self._khancalc(t4)) \
+ - (self._khancalc(x) + self._khancalc(x1) + self._khancalc(x2) + self._khancalc(x3) + self._khancalc(x4))
+ delta = math.exp(subtotal)
+ if abs(delta) < 0.000001:
+ break
+ else:
+ total_p += delta
+ # print minval, subtotal, math.exp(subtotal), total_p
+ if mini == a:
+ x1 = x1 - 1
+ x2 = t1 - x1
+ x3 = t3 - x1
+ x4 = t2 - x3
+ if mini == b:
+ x2 = x2 - 1
+ x1 = t1 - x2
+ x4 = t4 - x2
+ x3 = t2 - x4
+ if mini == c:
+ x3 = x3 - 1
+ x1 = t3 - x3
+ x4 = t2 - x3
+ x2 = t4 - x4
+ if mini == d:
+ x4 = x4 - 1
+ x2 = t4 - x4
+ x3 = t2 - x4
+ x1 = t3 - x3
+ minval -= 1
+ self.khan_p1 = total_p
+ self.khan_p = self.khan_p1 * 2.0
+ if self.khan_p > 1.0:
+ self.khan_p = 0.9999999999999999999999999999999999999
+
+ def _modwald(self, num, den, z):
+ if den > 0:
+ pointestimate = float(num) / float(den)
+ vpp = (num + (z**2)/2.0) / (den + z**2)
+ bound = z*((vpp*(1.0 - vpp) / (den + z**2))**0.5)
+ lower = vpp - bound
+ if lower < 0.0:
+ lower = 0.0
+ upper = vpp + bound
+ if upper > 1.0:
+ upper = 1.0
+ return pointestimate, lower, upper
+ else:
+ return None, None, None
+
+ def _khancalc(self, value):
+ value = int(value)
+ logvalue = 0.0
+ if value == 0.0:
+ pass
+ else:
+ for i in range(value):
+ logvalue += math.log(i+1)
+ return logvalue
+
+ def informative(self):
+ return (self.a * self.d != 0) or (self.b * self.c != 0)
+
+ def report_counts(self, subsec, crude=False):
+ if self.added_half:
+ subsec.fmt('Note: 0.5 added to each table cell due to one of more zero counts')
+ subsec.fmt('a (exposed, disease): %s', self.a)
+ subsec.fmt('b (unexposed, disease): %s', self.b)
+ subsec.fmt('c (exposed, no disease): %s', self.c)
+ subsec.fmt('d (unexposed, no disease): %s', self.d)
+
+ def report_measures_of_association(self, subsec, crude=False):
+ subsec.fmt('Chi sq: %s, p=%s', self.cs, self.pcs)
+ subsec.fmt('Yates-corrected Chi sq: %s, p=%s', self.csc, self.pcsc)
+ subsec.fmt('M-H Chi sq: %s, p=%s', self.mhcs, self.pmhcs)
+ if self.khan:
+ subsec.fmt('Fisher\'s exact test (Khan method): one-sided p=%s, two-sided p=%s', self.khan_p1, self.khan_p)
+ subsec.fmt('Fisher\'s exact test: one-sided p=%s, two-sided (twice one-sided): p=%s, two-sided (as extreme): p=%s', self.exact_p_onesided, self.exact_p_twosided_twiceonesided, self.exact_p_twosided_asextreme)
+ subsec.fmt('mid-p: one-sided p=%s, two-sided p=%s', self.mid_p_onesided, self.mid_p_twosided)
+
+ def report_risk_based(self, subsec, crude=False):
+ subsec.fmt('Risk in exposed: %s (%s, %s)', self.risk_exposed, self.risk_exposed_lower, self.risk_exposed_upper)
+ subsec.fmt('Risk in unexposed: %s (%s, %s)', self.risk_unexposed, self.risk_unexposed_lower, self.risk_unexposed_upper)
+ subsec.fmt('Risk in overall population: %s (%s, %s)', self.risk_overall, self.risk_overall_lower, self.risk_overall_upper)
+ subsec.fmt('Risk ratio: %s (%s, %s)', self.rr, self.rr_lower, self.rr_upper)
+ subsec.fmt('Risk difference: %s (%s, %s)', self.rd, self.rd_lower, self.rd_upper)
+ if crude:
+ subsec.fmt('Aetiological fraction in the population: %s (%s, %s)', self.aefp, self.aefp_lower, self.aefp_upper)
+ subsec.fmt('Aetiological fraction in the exposed: %s (%s, %s)', self.aefe, self.aefe_lower, self.aefe_upper)
+ subsec.fmt('Prevented fraction in the population: %s (%s, %s)', self.pfp, self.pfp_lower, self.pfp_upper)
+ subsec.fmt('Prevented fraction in the exposed: %s (%s, %s)', self.pfe, self.pfe_lower, self.pfe_upper)
+
+ def report_odds_based(self, subsec, crude=False):
+ subsec.fmt('Sample odds ratio: %s (%s, %s)', self.oddsratio, self.or_lower, self.or_upper)
+ subsec.fmt('CMLE odds ratio: %s (%s, %s)', self.cmle_or, self.cmle_or_lower, self.cmle_or_upper)
+ subsec.fmt('mid-p CMLE odds ratio: %s (%s, %s)', self.midp_or, self.midp_or_lower, self.midp_or_upper)
+ if crude:
+ subsec.fmt('Aetiological fraction in the population: %s (%s, %s)', self.aefpor, self.aefpor_lower, self.aefpor_upper)
+ subsec.fmt('Aetiological fraction in the exposed: %s (%s, %s)', self.aefeor, self.aefeor_lower, self.aefeor_upper)
+ subsec.fmt('Prevented fraction in the population: %s (%s, %s)', self.pfpor, self.pfpor_lower, self.pfpor_upper)
+ subsec.fmt('Prevented fraction in the exposed :%s (%s, %s)', self.pfeor, self.pfeor_lower, self.pfeor_upper)
+
+ def report(self, section, report, crude=False):
+ subsec = report.new_subsection(self.label)
+ getattr(self, 'report_' + section)(subsec, crude)
+
+
+class twobytwotable(object):
+
+ def __init__(self, conflev=0.95):
+ self.conflev = conflev
+ self.z = -Stats.probit((1.0 - conflev)/2)
+ self.strata = [] # vector of strata
+ self.unstratified = None
+
+ def add_stratum(self, e1d1, e0d1, e1d0, e0d0, label=None):
+ if label is None:
+ label = 'Stratum %d' % (len(self.strata) + 1)
+ self.strata.append(TwoByTwoStratum(e1d1, e0d1, e1d0, e0d0,
+ label=label,
+ conflev=self.conflev))
+
+ def calc(self):
+ # initialise variables
+
+ # for crude table
+ crude_e1d1, crude_e0d1, crude_e1d0, crude_e0d0 = 0, 0, 0, 0
+
+ # for Mantel-Haenszel summary chi square
+ sumMhChiSqNum = 0.0
+ sumMhChiSqDen = 0.0
+
+ self.expLT5 = []
+
+ # for adjusted risk ratio
+ RRarr = []
+ wRRarr = []
+ sumRRDirectwlnRR = 0.0
+ sumwRR = 0.0
+
+ # for Mantel-Haenszel adjusted risk ratio
+ sumMhRRnum = 0.0
+ sumMhRRden = 0.0
+ sumRgbRRnum = 0.0
+ sumRgbRRSE = 0.0
+
+ # for risk difference
+ RDarr = []
+ wRDarr = []
+ sumwtimesRD = 0.0
+ sumwRD = 0.0
+
+ # for directly adjusted OR
+ ORarr = []
+ wORarr = []
+ sumwtimesOR = 0.0
+ sumwOR = 0.0
+
+ # for Mantel-Haenszel adjusted OR
+ sumMhORnum = 0.0
+ sumMhORden = 0.0
+
+ # for Robins, Greenland and Breslow
+ sumRgbPR = 0.0
+ sumRgbPSplusQR = 0.0
+ sumRgbQS = 0.0
+ sumRgbSumR = 0.0
+ sumRgbSumS = 0.0
+
+ # Initialise flags which indicate whether various statistics can be
+ # calculated
+ MhChiSq_flag = True
+ RRDirect_flag = True
+ MhRR_flag = True
+ RD_flag = True
+ ORda_flag = True
+ MhOR_flag = True
+ RgbOR_flag = True
+ exactOR_flag = True
+
+ # accumulate various quantities across strata
+ for stratum in self.strata:
+
+ # for crude strata
+ crude_e1d1 += stratum.a
+ crude_e0d1 += stratum.b
+ crude_e1d0 += stratum.c
+ crude_e0d0 += stratum.d
+
+ # aliases for compatibility with published formulae
+ m1 = stratum.a + stratum.b
+ m0 = stratum.c + stratum.d
+ n1 = stratum.a + stratum.c
+ n0 = stratum.b + stratum.d
+ t = stratum.a + stratum.b + stratum.c + stratum.d
+
+ self.expLT5.append(stratum.anyExpLT5)
+
+ # for Mantel-Haenszel uncorrected Chi square acrss strata
+ try:
+ sumMhChiSqNum += float((stratum.a * stratum.d) - (stratum.b * stratum.c)) / float(t)
+ sumMhChiSqDen += float(n0 * n1 * m0 * m1) / float((t - 1) * t**2)
+ except MathError:
+ MhChSq_flag = False
+
+ # for directly adjusted risk ratio
+ try:
+ RR = (float(stratum.a)/float(n1)) / (float(stratum.b)/float(n0))
+ RRarr.append(RR)
+ w = 1.0 / ((float(stratum.c) / float(stratum.a * n1)) + (float(stratum.d) / (stratum.b * n0)))
+ wRRarr.append(w)
+ sumRRDirectwlnRR += w * math.log(RR)
+ sumwRR += w
+ except MathError:
+ RRDirect_flag = False
+
+ # for Mantel-Haenszel adjusted risk ratio
+ try:
+ sumMhRRnum += float(stratum.a * n0) / float(t)
+ sumMhRRden += float(stratum.b * n1) / float(t)
+ sumRgbRRnum += float((m1*n1*n0) - (stratum.a*stratum.b*t)) / float(t**2)
+ sumRgbRRSE = 0.0
+ except MathError:
+ MhRR_flag = False
+
+ # for risk difference
+ try:
+ RD = (float(stratum.a)/float(n1)) - (float(stratum.b)/float(n0))
+ w = 1.0 / ((float(stratum.a*stratum.c)/float(n1**3)) + (float(stratum.b*stratum.d) / float(n0**3)))
+ RDarr.append(RD)
+ wRDarr.append(w)
+ sumwtimesRD += w * RD
+ sumwRD += w
+ except MathError:
+ RD_flag = False
+
+ # for directly adjusted OR
+ try:
+ OR = float(stratum.a * stratum.d) / float(stratum.b * stratum.c)
+ w = 1.0 / (1.0/float(stratum.a) + 1.0/float(stratum.b) + 1.0/float(stratum.c) + 1.0/float(stratum.d))
+ ORarr.append(OR)
+ wORarr.append(w)
+ sumwtimesOR += w * math.log(OR)
+ sumwOR += w
+ except MathError:
+ ORda_flag = False
+
+ # for Mantel-Haenszel adjusted OR
+ try:
+ sumMhORnum += float(stratum.a) * float(stratum.d) / float(t)
+ sumMhORden += float(stratum.b) * float(stratum.c) / float(t)
+ except MathError:
+ MhOR_flag = False
+
+ # for Robins, Greenland and Breslow
+ try:
+ P = float(stratum.a + stratum.d) / float(t)
+ Q = float(stratum.b + stratum.c) / float(t)
+ R = float(stratum.a) * float(stratum.d) / float(t)
+ S = float(stratum.b) * float(stratum.c) / float(t)
+ sumRgbPR += P*R
+ sumRgbPSplusQR += (P*S) + (Q*R)
+ sumRgbQS += Q*S
+ sumRgbSumR += R
+ sumRgbSumS += S
+ except MathError:
+ RgbOR_flag = False
+
+ # create a "crude" table if more than one strata
+ if len(self.strata) > 1:
+ self.unstratified = TwoByTwoStratum(crude_e1d1, crude_e0d1, crude_e1d0, crude_e0d0, conflev=self.conflev, label='Unstratified (crude)')
+
+ # now calculate summary and adjusted values if more than one strata
+ if len(self.strata) > 1:
+
+ # uncorrected M-H summary
+ if MhChiSq_flag and abs(sumMhChiSqDen) > 0.0 :
+ self.MhUncorrSummaryChiSq = float(sumMhChiSqNum**2) / sumMhChiSqDen
+ self.pMhUncorrSummaryChiSq = 1.0 - r.pchisq(self.MhUncorrSummaryChiSq, 1)
+ else:
+ self.MhUncorrSummaryChiSq = None
+ self.pMhUncorrSummaryChiSq = None
+
+ # directly adjusted RR and RD
+ if RRDirect_flag:
+ self.adjRRdirect = math.exp(float(sumRRDirectwlnRR) / float(sumwRR))
+ self.lowerRRdirect = self.adjRRdirect * math.exp(-(self.z / sumwRR**0.5))
+ self.upperRRdirect = self.adjRRdirect * math.exp((self.z / sumwRR**0.5))
+ else:
+ self.adjRRdirect = None
+ self.lowerRRdirect = None
+ self.upperRRdirect = None
+
+ if RD_flag:
+ self.adjRDdirect = float(sumwtimesRD) / float(sumwRD)
+ self.lowerRDdirect = self.adjRDdirect - (self.z / sumwRD**0.5)
+ self.upperRDdirect = self.adjRDdirect + (self.z / sumwRD**0.5)
+ else:
+ self.adjRDdirect = None
+ self.lowerRDdirect = None
+ self.upperRDdirect = None
+
+ # Mantel-Haenszel adjusted RR
+ if MhRR_flag:
+ try:
+ self.adjRRmh = sumMhRRnum / float(sumMhRRden)
+ except MathError:
+ self.adjRRmh = None
+
+ try:
+ adjRRmhSE = (float(sumRgbRRnum) / float(sumMhRRnum * sumMhRRden))**0.5
+ self.lowerRRmh = self.adjRRmh * math.exp(-self.z * adjRRmhSE)
+ self.upperRRmh = self.adjRRmh * math.exp( self.z * adjRRmhSE)
+ except MathError:
+ self.adjRRmh = None
+ self.lowerRRmh = None
+ self.upperRRmh = None
+ else:
+ self.adjRRmh = None
+ self.lowerRRmh = None
+ self.upperRRmh = None
+
+ # Breslow-Day chi square test for homogeneity of RR across strata
+ try:
+ if self.adjRRdirect is None:
+ raise NotAvailable
+ self.BDchisqRR = 0.0
+ for i in range(len(self.strata)):
+ self.BDchisqRR += float((math.log(RRarr[i]) - math.log(self.adjRRdirect))**2) / (1.0 / wRRarr[i])
+ self.pBDchisqRR = 1.0 - r.pchisq(self.BDchisqRR, len(RRarr)-1)
+ except MathError:
+ self.BDchisqRR = None
+ self.pBDchisqRR = None
+
+ # Breslow-Day chi square test for homogeneity of RD across strata
+ try:
+ if self.adjRDdirect is None:
+ raise NotAvailable
+ self.BDchisqRD = 0.0
+ for i in range(len(self.strata)):
+ self.BDchisqRD += ((RDarr[i] - self.adjRDdirect)**2) / (1.0 / wRDarr[i])
+ self.pBDchisqRD = 1.0 - r.pchisq(self.BDchisqRD, len(RDarr)-1)
+ except MathError:
+ self.BDchisqRD = None
+ self.pBDchisqRD = None
+
+ # Mantel-Haenszel adjusted odds ratios
+ self.adjORmh = None
+ self.lowerORmh = None
+ self.upperORmh = None
+ if MhOR_flag and RgbOR_flag:
+ try:
+ self.adjORmh = float(sumMhORnum) / float(sumMhORden)
+ ORmhSE = ((float(sumRgbPR) / (2.0 * sumRgbSumR**2)) \
+ + (float(sumRgbPSplusQR) / (2.0 * sumRgbSumR * sumRgbSumS )) \
+ + (float(sumRgbQS) / (2.0 * sumRgbSumS**2)))**0.5
+ self.lowerORmh = self.adjORmh * math.exp(-self.z * ORmhSE)
+ self.upperORmh = self.adjORmh * math.exp( self.z * ORmhSE)
+ except MathError:
+ pass
+
+ # make an array for passing to mantelhaen.test() in R
+ mh_array = Numeric.array([0]*4*len(self.strata), typecode=Numeric.Int)
+ mh_array.shape = (2, 2, len(self.strata))
+ stratum_number = -1
+ zero_stratum_flag = False
+ for stratum in self.strata:
+ stratum_number += 1
+ mh_array[0, 0, stratum_number] = stratum.a
+ mh_array[0, 1, stratum_number] = stratum.b
+ mh_array[1, 0, stratum_number] = stratum.c
+ mh_array[1, 1, stratum_number] = stratum.d
+ if stratum.a + stratum.b + stratum.c + stratum.d < 1:
+ zero_stratum_flag = True
+
+ self.MH_chisq_contcorr_statistic = None
+ self.MH_chisq_contcorr_pvalue = None
+ self.MH_commonOR = None
+ self.MH_commonOR_ll = None
+ self.MH_commonOR_ul = None
+ self.MH_chisq_nocontcorr_statistic = None
+ self.MH_chisq_nocontcorr_pvalue = None
+ self.MH_exact_pvalue_twosided_asextreme = None
+ self.MH_exact_pvalue_twosided_twiceonesided = None
+ self.MH_exact_pvalue_onesided = None
+ self.MH_mid_p_onesided = None
+ self.MH_mid_p_twosided = None
+ self.MH_exact_commonOR = None
+ self.MH_exact_commonOR_ll = None
+ self.MH_exact_commonOR_ul = None
+
+ if exactOR_flag and zero_stratum_flag is False and \
+ (max(crude_e1d1, crude_e0d1, crude_e1d0, crude_e0d0) < 10000 or \
+ (max(crude_e1d1, crude_e0d1, crude_e1d0, crude_e0d0) < 20000 and \
+ min(crude_e1d1, crude_e0d1, crude_e1d0, crude_e0d0) < 50)):
+ try:
+ r_mh = r.mantelhaen_test(mh_array, correct=True, conf_level=self.conflev)
+ self.MH_chisq_contcorr_statistic = r_mh['statistic']['Mantel-Haenszel X-squared']
+ self.MH_chisq_contcorr_pvalue = r_mh['p.value']
+ self.MH_commonOR = r_mh['estimate']['common odds ratio']
+ self.MH_commonOR_ll = r_mh['conf.int'][0]
+ self.MH_commonOR_ul = r_mh['conf.int'][1]
+ r_mh = r.mantelhaen_test(mh_array, correct=False, conf_level=self.conflev)
+ self.MH_chisq_nocontcorr_statistic = r_mh['statistic']['Mantel-Haenszel X-squared']
+ self.MH_chisq_nocontcorr_pvalue = r_mh['p.value']
+
+ r_mh = r.mantelhaen_test(mh_array, exact=True, alternative='two.sided', conf_level=self.conflev)
+ self.MH_exact_pvalue_twosided_asextreme = r_mh['p.value']
+ self.MH_exact_commonOR = r_mh['estimate']['common odds ratio']
+ self.MH_exact_commonOR_ll = r_mh['conf.int'][0]
+ self.MH_exact_commonOR_ul = r_mh['conf.int'][1]
+
+ r_mh = r.mantelhaen_test(mh_array, exact=True, alternative='less', conf_level=self.conflev)
+ p_less = r_mh['p.value']
+ r_mh = r.mantelhaen_test(mh_array, exact=True, alternative='greater', conf_level=self.conflev)
+ p_greater = r_mh['p.value']
+
+ if self.MH_exact_commonOR <= 1.0:
+ self.MH_exact_pvalue_onesided = p_less
+ else:
+ self.MH_exact_pvalue_onesided = p_greater
+
+ self.MH_exact_pvalue_twosided_twiceonesided = 2.0 * self.MH_exact_pvalue_onesided
+
+ pval1 = (0.5 * (p_less - (1.0 - p_greater))) + (1.0 - p_greater)
+ self.MH_mid_p_onesided = min(pval1, 1.0 - pval1)
+ self.MH_mid_p_twosided = 2.0 * self.MH_mid_p_onesided
+ except MathError:
+ pass
+
+ try:
+ # Woolf chi square test for homogeneity of OR across
+ # strata
+ r.library('vcd')
+ r("woolf.test<-woolf_test")
+ Wor = r.woolf_test(mh_array)
+ self.WchisqOR = Wor['statistic']['X-squared']
+ self.dfWchisqOR = Wor['parameter']['df']
+ self.pWchisqOR = Wor['p.value']
+ except MathError:
+ self.WchisqOR = None
+ self.pWchisqOR = None
+ self.dfWchisqOR = None
+
+ # directly adjusted odds ratio
+ if ORda_flag:
+ self.ORda = math.exp(float(sumwtimesOR) / float(sumwOR))
+ # print "self.ORda:", self.ORda
+ self.lowerORda = self.ORda * math.exp(-self.z / sumwOR**0.5)
+ self.upperORda = self.ORda * math.exp( self.z / sumwOR**0.5)
+ else:
+ self.ORda = None
+ self.lowerORda = None
+ self.upperORda = None
+
+ try:
+ # Breslow-Day chi square test for homogeneity of OR across
+ # strata
+ if self.ORda is None:
+ raise NotAvailable
+ self.BDchisqOR = 0.0
+ for i in range(len(ORarr)):
+ # print ORarr[i], self.ORda, wORarr[i]
+ self.BDchisqOR += ((math.log(ORarr[i]) - math.log(self.ORda))**2) / (1.0 / wORarr[i])
+ # print r.pchisq(self.BDchisqOR, len(ORarr)-1)
+ self.pBDchisqOR = 1.0 - r.pchisq(self.BDchisqOR, len(ORarr)-1)
+ except MathError:
+ self.BDchisqOR = None
+ self.pBDchisqOR = None
+
+ def _repr_adjusted(self, res_section, section):
+ assert len(self.strata) > 1
+ if section == 'counts':
+ return
+ subsec = res_section.new_subsection('Adjusted')
+ if section == 'measures_of_association':
+ if True in self.expLT5:
+ subsec.fmt('Warning: expected values in some strata are < 5: use of exact statistics recommended.')
+ subsec.fmt('Mantel-Haenszel chi square with continuity correction: %s (p=%s)', self.MH_chisq_contcorr_statistic, self.MH_chisq_contcorr_pvalue)
+ # subsec.fmt('Mantel-Haenszel chi square without continuity correction: %s (p=%s)', self.MhUncorrSummaryChiSq, self.pMhUncorrSummaryChiSq)
+ subsec.fmt('Mantel-Haenszel chi square without continuity correction: %s (p=%s)', self.MH_chisq_nocontcorr_statistic, self.MH_chisq_nocontcorr_pvalue)
+ subsec.fmt('Fisher exact test: one-sided: p=%s, two-sided (twice one-sided): p=%s, two-sided (as extreme): p=%s', self.MH_exact_pvalue_onesided, self.MH_exact_pvalue_twosided_twiceonesided, self.MH_exact_pvalue_twosided_asextreme, )
+ subsec.fmt('Mid-p exact test: one-sided: p=%s, two-sided: p=%s', self.MH_mid_p_onesided, self.MH_mid_p_twosided)
+ elif section == 'risk_based':
+ subsec.fmt('Directly adjusted risk ratio: %s (%s, %s)', self.adjRRdirect, self.lowerRRdirect, self.upperRRdirect)
+ subsec.fmt('Mantel-Haenszel adjusted risk ratio: %s (%s, %s)', self.adjRRmh, self.lowerRRmh, self.upperRRmh)
+ subsec.fmt('Breslow-Day chi square test for homogeneity of RR across strata: %s (p=%s)', self.BDchisqRR, self.pBDchisqRR)
+ subsec.fmt('Directly adjusted risk difference: %s (%s, %s)', self.adjRDdirect, self.lowerRDdirect, self.upperRDdirect)
+ subsec.fmt('Breslow-Day chi square test for homogeneity of RD across strata: %s (p=%s)', self.BDchisqRD, self.pBDchisqRD)
+ elif section == 'odds_based':
+ subsec.fmt('Directly adjusted common odds ratio: %s (%s, %s)', self.ORda, self.lowerORda, self.upperORda)
+ # subsec.fmt('Mantel-Haenszel common odds ratio: %s (%s, %s)', self.adjORmh, self.lowerORmh, self.upperORmh)
+ subsec.fmt('Mantel-Haenszel common odds ratio: %s (%s, %s)', self.MH_commonOR, self.MH_commonOR_ll, self.MH_commonOR_ul)
+ subsec.fmt('CMLE common odds ratio: %s (%s, %s)', self.MH_exact_commonOR, self.MH_exact_commonOR_ll, self.MH_exact_commonOR_ul)
+ subsec.fmt('Breslow-Day chi square test for homogeneity of OR across strata: %s (p=%s)', self.BDchisqOR, self.pBDchisqOR)
+ subsec.fmt('Woolf chi square test for homogeneity of OR across strata: %s, df=%s (p=%s)', self.WchisqOR, self.dfWchisqOR, self.pWchisqOR)
+
+ _sections = [
+ ('counts', 'Tabulated values'),
+ ('measures_of_association', 'Measures of association'),
+ ('risk_based', 'Risk-based measures'),
+ ('odds_based', 'Odds-based measures'),
+ ]
+ sections = [s[0] for s in _sections]
+ section_labels = dict(_sections)
+
+ def report(self, sections=None):
+ if sections is None:
+ sections = self.sections
+ r.sink("/dev/null")
+ try:
+ self.calc()
+ finally:
+ r.sink()
+ for section in sections:
+ label = self.section_labels[section]
+ if section in ('risk_based', 'odds_based'):
+ label += ' (%g%% conf. limits)' % (self.conflev * 100)
+ res_section = _ReportSection(label)
+ for stratum in self.strata:
+ stratum.report(section, res_section)
+ if len(self.strata) > 1:
+ self.unstratified.report(section, res_section, crude=True)
+ self._repr_adjusted(res_section, section=section)
+ yield res_section
+
+ def __str__(self):
+ lines = []
+ for res_section in self.report():
+ lines.append(str(res_section))
+ return '\n'.join(lines)
+
+
+if __name__ == '__main__':
+ import time
+
+ starttime = time.time()
+ x = twobytwotable()
+ # OpenEpi example data
+ x.add_stratum(66,36,28,32)
+ x.add_stratum(139,93,61,54)
+ print "OpenEpi example values"
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # example from Armitage and Berry
+ x.add_stratum(1,21,4,16)
+ print "Armitage and Berry example"
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # StatExact homogeneity of OR example 14.4.1 Alcohol and Oesophageal cancer
+ x.add_stratum(1,0,9,106)
+ x.add_stratum(4,5,26,164)
+ x.add_stratum(25,21,29,138)
+ x.add_stratum(42,34,27,139)
+ x.add_stratum(19,36,18,88)
+ x.add_stratum(5,8,0,31)
+ print
+ print "=============================================================="
+ print "StatExact example 14.4.1 Alcohol and Oesophageal cancer values"
+ print "Breslow-Day homogeneity of OR chi-sq should be 9.323,p=0.0968"
+ print "CMLE common OR should be 5.251 with exact CI of (3.572, 7.757)"
+ print "and mid-p exact CI of (3.630, 7.629)"
+ print "Mantel-Haenszel common OR should be 5.158 with RGB CO of (3.562, 7.468)"
+ print "All p-values should be < 0.0000"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # StatExact example
+ x.add_stratum(1,0,9,106)
+ # x.add_stratum(4,5,26,164)
+ print
+ print "StatExact example values"
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+
+ starttime = time.time()
+ x = twobytwotable()
+ # StatExact extremely ill-conditioned data example
+ x.add_stratum(4,0,16,7)
+ x.add_stratum(4,0,13,7)
+ x.add_stratum(2,0,13,8)
+ x.add_stratum(1,0,17,8)
+ x.add_stratum(1,0,17,8)
+ x.add_stratum(1,0,29,10)
+ x.add_stratum(2,0,29,10)
+ x.add_stratum(1,0,30,10)
+ x.add_stratum(1,0,30,10)
+ x.add_stratum(1,0,33,13)
+ print
+ print "=============================================================="
+ print "StatExact example 14.5.3 Extremely imbalanced minority hiring"
+ print "CMLE common OR should be +Inf with exact CI of (1.819, +Inf)"
+ print "and mid-p exact CI of (3.069, +Inf)"
+ print "Mantel-Haenszel common OR cannot be estimated"
+ print "One-sided exact p-value for common OR=1.0 should be 0.0022"
+ print "Two-sided exact p-value for common OR=1.0 should be 0.0043 or 0.0044"
+ print "MH two-sided p-value for common OR=1.0 should be 0.0063"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Other extremely ill-conditioned data example
+ x.add_stratum(0,4,16,7)
+ x.add_stratum(0,4,13,7)
+ x.add_stratum(0,2,13,8)
+ x.add_stratum(0,1,17,8)
+ x.add_stratum(0,1,17,8)
+ x.add_stratum(0,1,29,10)
+ x.add_stratum(0,2,29,10)
+ x.add_stratum(0,1,30,10)
+ x.add_stratum(0,1,30,10)
+ x.add_stratum(0,1,33,13)
+ print
+ print "=============================================================="
+ print "Another extremely ill-conditioned data example - all zeros in cell A"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Other extremely ill-conditioned data example
+ x.add_stratum(16,4,0,7)
+ x.add_stratum(13,4,0,7)
+ x.add_stratum(13,2,0,8)
+ x.add_stratum(17,1,0,8)
+ x.add_stratum(17,1,0,8)
+ x.add_stratum(29,1,0,10)
+ x.add_stratum(29,2,0,10)
+ x.add_stratum(30,1,0,10)
+ x.add_stratum(30,1,0,10)
+ x.add_stratum(33,1,0,13)
+ print
+ print "=============================================================="
+ print "Another extremely ill-conditioned data example - all zeros in cell C"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Other extremely ill-conditioned data example
+ x.add_stratum(16,4,7,0)
+ x.add_stratum(13,4,7,0)
+ x.add_stratum(13,2,8,0)
+ x.add_stratum(17,1,8,0)
+ x.add_stratum(17,1,8,0)
+ x.add_stratum(29,1,10,0)
+ x.add_stratum(29,2,10,0)
+ x.add_stratum(30,1,10,0)
+ x.add_stratum(30,1,10,0)
+ x.add_stratum(33,1,13,0)
+ print
+ print "=============================================================="
+ print "Another extremely ill-conditioned data example - all zeros in cell D"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Other extremely ill-conditioned data example
+ x.add_stratum(16,4,7,7)
+ x.add_stratum(0,0,0,0)
+ x.add_stratum(13,2,8,8)
+ print
+ print "=============================================================="
+ print "Another extremely ill-conditioned data example - zeros in all cells in one stratum"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Other extremely ill-conditioned data example
+ x.add_stratum(0,0,0,0)
+ x.add_stratum(0,0,0,0)
+ print
+ print "=============================================================="
+ print "Another extremely ill-conditioned data example - zeros in all cells in all strata"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Large single stratum
+ x.add_stratum(950,999,234,789)
+ print
+ print "=============================================================="
+ print "Large single stratum"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Large single stratum with one small cell
+ x.add_stratum(950,999,23,789)
+ print
+ print "=============================================================="
+ print "Large single stratum with one small cell"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Very large single stratum with a small cell
+ x.add_stratum(9504,8997,43,7892)
+ print
+ print "=============================================================="
+ print "Very large single stratum with a small cell"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Very large single stratum
+ x.add_stratum(9504,8997,8943,7892)
+ print
+ print "=============================================================="
+ print "Very large single stratum"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Two very large strata with small cells
+ x.add_stratum(9504,8997,43,7892)
+ x.add_stratum(9763,8345,27,8765)
+ print
+ print "=============================================================="
+ print "Two very large single strata with small cells"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
+
+ starttime = time.time()
+ x = twobytwotable()
+ # Two very large strata
+ x.add_stratum(9504,8997,8943,7892)
+ x.add_stratum(9763,8345,7827,8765)
+ print
+ print "=============================================================="
+ print "Two very large single strata"
+ print "=============================================================="
+ print x
+ elapsed = time.time() - starttime
+ print '%.3f seconds' % elapsed
diff --git a/SOOMv0/BaseDataset.py b/SOOMv0/BaseDataset.py
new file mode 100644
index 0000000..58a2b19
--- /dev/null
+++ b/SOOMv0/BaseDataset.py
@@ -0,0 +1,323 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: BaseDataset.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/BaseDataset.py,v $
+
+from mx import DateTime
+
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.DatasetColumn import is_dataset_col, get_dataset_col, RowOrdinalColumn
+from SOOMv0.Describe import Describe
+from SOOMv0.PrintDataset import DSFormatter
+
+class BaseDataset(object):
+ """
+ Base class for data set definition
+
+ Keyword arguments include:
+ name, label, desc, path, backed, rowsas, printcols, nonprintcols,
+ weightcol, generations, date_created, date_updated.
+
+ Attributes:
+ name data set name
+ label Longer descriptive label for this dataset
+ desc Full description of this dataset
+ path Path to saved datasets
+ backed if true, dataset backed by disk files,
+ otherwise kept in memory.
+ summary dataset is summarised
+ rowsas return rows as a dict or list or tuple?
+ 'dict' is used by default.
+ printcols column names to return in each row when
+ printing
+ nonprintcols column names not to be returned in each
+ row when printing
+ weightcol default weighting column
+ generations The number of past dataset generations to keep.
+ generation Update generation count
+ date_created When the dataset was first created (mx.DateTime)
+ date_updated When the dataset was last updated
+ length Number of records in the dataset (use len(ds) instead)
+
+ """
+ filter_label = None
+
+ def __init__(self, name, label=None, desc=None,
+ weightcol=None,
+ summary=False,
+ printcols=None, nonprintcols=None,
+ date_created=None, date_updated=None):
+ soom.check_name_ok(name, 'Dataset')
+ self.name = name
+ self.label = label
+ self.desc = desc
+ self.backed = False
+ self.summary = summary
+ self.weightcol = weightcol
+ self.printcols = printcols
+ self.nonprintcols = nonprintcols
+ if date_created is None:
+ date_created = DateTime.now()
+ self.date_created = date_created
+ if date_updated is None:
+ date_updated = DateTime.now()
+ self.date_updated = date_updated
+ self.clear()
+
+ attrs = (
+ 'name', 'label', 'desc', 'weightcol', 'printcols', 'nonprintcols',
+ 'date_created', 'date_updated',
+ )
+ def get_metadata(self):
+ m = {}
+ for attr in self.attrs:
+ m[attr] = getattr(self, attr)
+ return m
+
+ def is_summarised(self):
+ return bool(getattr(self, 'summary', False))
+
+ def clear(self):
+ """
+ Clear columns without erasing metadata - also called to
+ initialise new dataset objects.
+ """
+ self.soom_version = version
+ self.soom_version_info = version_info
+ self._column_ord = []
+ self._column_dict = {}
+ self.length = 0
+ # add the row_ordinal column as the very first column
+ self.addcolumn(RowOrdinalColumn(self))
+
+ def rename_dataset(self, newname):
+ """
+ Rename the dataset
+ """
+ soom.check_name_ok(name, 'Dataset')
+ self.name = newname
+
+ def delete_dataset(self):
+ pass
+
+ def rename_column(self, oldname, newname):
+ try:
+ col = self._column_dict.pop(oldname)
+ except KeyError:
+ raise Error('Unknown column %r' % (oldname,))
+ col.rename_column(newname)
+ self._column_dict[col.name] = col
+
+ def delete_column(self, name):
+ """
+ Remove a column from a Dataset
+ """
+ try:
+ col = self._column_dict.pop(name)
+ except KeyError:
+ raise Error('Unknown column %r' % (name,))
+ self._column_ord.remove(col)
+ col.delete_column()
+
+ def addcolumn(self, name, **kwargs):
+ if is_dataset_col(name):
+ col = name
+ else:
+ col = get_dataset_col(self, name, **kwargs)
+ try:
+ self.delete_column(col.name)
+ except Error:
+ pass
+ self._column_ord.append(col)
+ self._column_dict[col.name] = col
+ return col
+
+ def addcolumnfromseq(self, name, data, mask=None, **kwargs):
+ """
+ Creates a new column from a supplied vector (sequence)
+ or iterable.
+ """
+ # now create the DatasetColumn instance to hold the actual data
+ # create the instance for the new column
+ col = get_dataset_col(self, name, **kwargs)
+ try:
+ col_len = len(data)
+ except TypeError:
+ # because data vector may be an iterable, rather than a list
+ pass
+ else:
+ if self.length == 0:
+ self.length = col_len
+ else:
+ if self.length != col_len:
+ raise Error('The length (%d) of the data vector '
+ 'supplied for column %r does not equal '
+ 'the length of other columns in the '
+ 'dataset (%d).' %
+ (col_len, name, self.length))
+ col.store_column(data, mask)
+ return self.addcolumn(col)
+
+
+ def get_column(self, name):
+ try:
+ return self._column_dict[name]
+ except KeyError:
+ raise ColumnNotFound('Unknown column %r' % (name,))
+
+ def has_column(self, name):
+ return name in self._column_dict
+
+ def get_columns(self, names = None):
+ if names is not None:
+ return [self.get_column(n) for n in names]
+ else:
+ return list(self._column_ord)
+
+ def get_column_names(self):
+ return [col.name for col in self._column_ord]
+
+ def get_print_columns(self):
+ cols = self.get_columns(self.printcols)
+ if self.nonprintcols:
+ cols = [col for col in cols if col.name not in self.nonprintcols]
+ return cols
+
+ def get_print_column_names(self):
+ return [col.name for col in self.get_print_columns()]
+
+ def __len__(self):
+ return self.length
+
+
+ def __getitem__(self, index_or_slice):
+ if isinstance(index_or_slice, basestring):
+ try:
+ return self._column_dict[index_or_slice]
+ except KeyError:
+ raise KeyError(index_or_slice)
+ if isinstance(index_or_slice, slice):
+ from SOOMv0.Filter import sliced_ds
+ return sliced_ds(self, index_or_slice)
+ return dict([(col.name, col.do_outtrans(col[index_or_slice]))
+ for col in self.get_print_columns()])
+
+ def describe(self, detail=ALL_DETAIL, date_fmt=None):
+ """
+ Return a description of the dataset as a Description object.
+
+ "detail" controls what is included, and should take one of
+ the following values:
+
+ NO_DETAIL dataset label, date and active filter
+ SOME_DETAIL metadata for naive users
+ ALL_DETAIL metadata for expert users/dataset admins
+ """
+
+ if date_fmt is None:
+ date_fmt = '%Y-%m-%d %H:%M:%S'
+ d = Describe(detail, 'name', 'prov', 'ds', 'cols')
+ if detail < SOME_DETAIL:
+ d.add('name', NO_DETAIL, 'Dataset', self.label or self.name)
+ else:
+ d.add('name', NO_DETAIL, 'Name', self.name)
+ d.add('name', NO_DETAIL, 'Label', self.label)
+ d.add('ds', SOME_DETAIL, 'Description', self.desc)
+ d.add('ds', SOME_DETAIL, 'Record Count', len(self))
+ if self.weightcol:
+ col = self.get_column(self.weightcol)
+ desc = '%s (%s)' % (col.name, col.label)
+ d.add('ds', SOME_DETAIL, 'Default weighting column', desc)
+ if self.date_updated is not None:
+ d.add('ds', SUB_DETAIL, 'Updated',
+ self.date_updated.strftime(date_fmt))
+ if self.date_created is not None:
+ d.add('ds', SOME_DETAIL, 'Created',
+ self.date_created.strftime(date_fmt))
+ return d
+
+ def short_description(self):
+ return str(self.describe(NO_DETAIL))
+
+ def describe_cols(self, sortby='label'):
+ """
+ Collect short-form column metadata as a list of lists.
+ """
+ colslabel = None
+ colsmeta = None
+ cols = [(getattr(col, sortby), col) for col in self.get_columns()]
+ cols.sort()
+ for sortbyval, col in cols:
+ meta = col.describe(NO_DETAIL).describe_tuples()
+ if colslabel is None:
+ colslabel = [m[0] for m in meta]
+ colsmeta = [[] for m in meta]
+ for colmeta, (label, value) in zip(colsmeta, meta):
+ colmeta.append(str(value))
+ return colslabel, colsmeta
+
+ def describe_with_cols(self):
+ lines = ['%s: %s' % kv for kv in self.describe().describe_tuples()]
+ lines.append("Containing the following columns:")
+ colslabel, colsmeta = self.describe_cols(sortby = 'name')
+ # Shove the column label at the top of the metadata columns
+ for collabel, colmeta in zip(colslabel, colsmeta):
+ colmeta.insert(0, collabel)
+ # Determine maximum column width for each column
+ colwidths = [max([len(v) for v in colmeta]) for colmeta in colsmeta]
+ # Rotate data from list per column to tuple per row
+ metabyrows = zip(*colsmeta)
+ # Insert a ruler line
+ metabyrows.insert(1, ['-' * w for w in colwidths])
+ # Now format up the rows
+ for rowvals in metabyrows:
+ line = ' '.join([val.ljust(width)
+ for width, val in zip(colwidths, rowvals)])
+ lines.append(line)
+ return '\n'.join(lines)
+
+ def _display_hook(self):
+ if self.is_summarised():
+ print self
+ else:
+ print self.describe_with_cols()
+
+ def print_cols(self, *cols):
+ if not cols:
+ cols = None
+ return '\n'.join(DSFormatter(self, cols))
+
+ def __str__(self):
+ """Prints a DataSet instance contents"""
+ return self.print_cols()
+
+ def show(self, *args):
+ for line in DSFormatter(self, args):
+ print line
+
+ def summ(self, *args, **kwargs):
+ from SOOMv0.DatasetSummary import summ
+ return summ(self, *args, **kwargs)
+
+ def filter(self, expr=None, **kwargs):
+ from SOOMv0.Filter import filter_dataset
+ return filter_dataset(self, expr=expr, **kwargs)
+
+ def crosstab(self, shaped_like=None):
+ from SOOMv0.CrossTab import CrossTab
+ if not self.summary:
+ raise Error('dataset must contain summary data')
+ return CrossTab.from_summset(self, shaped_like)
diff --git a/SOOMv0/CachingLoader.py b/SOOMv0/CachingLoader.py
new file mode 100644
index 0000000..be3e81e
--- /dev/null
+++ b/SOOMv0/CachingLoader.py
@@ -0,0 +1,246 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+# $Id: CachingLoader.py 2901 2007-11-20 04:52:21Z andrewm $
+# $HeadURL$
+
+# Standard Library
+import os
+import gzip
+import errno
+import array
+import cPickle
+from time import time
+
+# Application modules
+from common import *
+from SOOMv0.Utils import quiet_unlink
+from SOOMv0.Soom import soom
+from ChunkingLoader import ChunkingLoader
+
+class _CLState:
+ def __init__(self, il):
+ self.earliest = il.earliest
+ self.latest = il.latest
+ self.key_to_recno = il.key_to_recno
+ self.colnames = [col.name for col, data in il.columns]
+ self.numchunks = il.numchunks
+ self.load_count = il.load_count
+ assert len(il.recno_chunk) == len(il.recno_chunk_offs)
+ self.rowcount = len(il.recno_chunk)
+
+ def apply(self, il):
+ il.earliest = self.earliest
+ il.latest = self.latest
+ il.key_to_recno = self.key_to_recno
+ il.numchunks = self.numchunks
+ il.load_count = self.load_count + 1
+ il.rowcount = self.rowcount
+
+class CachingLoader(ChunkingLoader):
+ """
+ The CachingLoader is an elaboration on the ChunkingLoader. It allows
+ previously loaded data to be retained in the "chunks", with subsequent
+ source data overloading the chunk data (based on the key_column). This
+ means that the source only needs to supply rows that have been updated
+ since the last load, rather than the full dataset. For slow sources
+ (such as relational databases), this can be a big win.
+
+ It works by retaining the column "chunks", with an index that maps from
+ the datasource primary key to a "record number" (key_to_recno). Two arrays
+ then map this to a chunk number (recno_chunk) and an offset within the
+ chunk (recno_chunk_offs).
+ """
+ FN_MD = 'metadata.pkl.gz'
+ FN_CM = 'recno_chunk.array'
+ FN_CO = 'recno_chunk_offs.array'
+ CHECKPOINT_INTERVAL = 600
+
+ def __init__(self, columns, basepath, key_column, update_time_column=None):
+ ChunkingLoader.__init__(self, columns, basepath)
+ self.key_column = key_column
+ self.update_time_column = update_time_column
+ self._init()
+ self.load()
+ if not self.data_okay:
+ self.clear()
+
+ def _metadata_filename(self, name):
+ return os.path.join(self.basepath, name)
+
+ def _init(self):
+ ChunkingLoader._init(self)
+ self.key_to_recno = {}
+ self.recno_chunk = array.array('L')
+ self.recno_chunk_offs = array.array('L')
+ self.earliest = None
+ self.latest = None
+ self.data_okay = False
+ self.load_count = 1
+ self.last_save = None
+
+ def clear(self):
+ self._init()
+ quiet_unlink(self._metadata_filename(self.FN_MD))
+ quiet_unlink(self._metadata_filename(self.FN_CM))
+ quiet_unlink(self._metadata_filename(self.FN_CO))
+ ChunkingLoader.clear(self)
+
+ def _load_array(self, fn, n):
+ fn = self._metadata_filename(fn)
+ try:
+ f = open(fn, 'rb')
+ except IOError, (eno, estr):
+ if eno == errno.ENOENT:
+ return
+ raise
+ data = array.array('L')
+ try:
+ data.fromfile(f, n)
+ finally:
+ f.close()
+ return data
+
+ def _load_meta(self):
+ fn = self._metadata_filename(self.FN_MD)
+ try:
+ f = gzip.open(fn, 'rb')
+ except IOError, (eno, estr):
+ if eno == errno.ENOENT:
+ return None
+ raise
+ try:
+ return cPickle.load(f)
+ finally:
+ f.close()
+
+ def load(self):
+ t0 = time()
+ state = self._load_meta()
+ if not state:
+ return
+ this_cols = set([col.name for col, data in self.columns])
+ load_cols = set(state.colnames)
+ if this_cols == load_cols:
+ # Check that all existing chunks are available
+ for colname in state.colnames:
+ for chunknum in range(state.numchunks):
+ fn = self._chunk_filename(colname, chunknum)
+ if not os.access(fn, os.R_OK):
+ return
+ t1 = time()
+ recno_chunk = self._load_array(self.FN_CM, state.rowcount)
+ recno_chunk_offs = self._load_array(self.FN_CO, state.rowcount)
+ now = time()
+ soom.info('Caching load, generation %s, %s keys, took %.1fs '
+ '(%.1fs metadata, %.1fs index)' %
+ (state.load_count, len(state.key_to_recno),
+ now - t0, t1 - t0, now - t1))
+ self.data_okay = True
+ state.apply(self)
+ self.recno_chunk = recno_chunk
+ self.recno_chunk_offs = recno_chunk_offs
+
+ def _save_array(self, fn, data):
+ tmpfn = os.path.join(self.basepath, '.%s.tmp' % fn)
+ f = open(tmpfn, 'wb')
+ try:
+ data.tofile(f)
+ f.close()
+ os.rename(tmpfn, self._metadata_filename(fn))
+ except:
+ f.close()
+ os.unlink(tmpfn)
+
+ def _save_meta(self):
+ fn = self._metadata_filename(self.FN_MD)
+ tmpfn = os.path.join(self.basepath, '.loader_state.tmp')
+ f = gzip.open(tmpfn, 'wb')
+ try:
+ cPickle.dump(_CLState(self), f, -1)
+ f.close()
+ os.rename(tmpfn, fn)
+ except:
+ f.close()
+ os.unlink(tmpfn)
+
+ def save(self):
+ t0 = time()
+ self._save_array(self.FN_CM, self.recno_chunk)
+ self._save_array(self.FN_CO, self.recno_chunk_offs)
+ t1 = time()
+ self._save_meta()
+ t2 = time()
+ el = t2 - t0
+ soom.info('Save caching loader state took %.1fs (%.1fs array, %.1fs meta)' % (el, t1-t0, t2-t1))
+ return el
+
+ def flush(self):
+ flush_el = ChunkingLoader.flush(self)
+ now = time()
+ if self.last_save + self.CHECKPOINT_INTERVAL < now:
+ # Checkpoint
+ flush_el += self.save()
+ self.last_save = time()
+ return flush_el
+
+ def loadrows(self, sourcename, source, chunkrows=0, rowlimit=0):
+ self.last_save = time()
+ def _mapsource(source):
+ for row in source:
+ key = row.get(self.key_column)
+ if self.update_time_column is not None:
+ update_time = row.get(self.update_time_column)
+ if self.earliest is None or update_time < self.earliest:
+ self.earliest = update_time
+ if self.latest is None or update_time > self.latest:
+ self.latest = update_time
+ index = self.key_to_recno.get(key)
+ if index is None:
+ self.key_to_recno[key] = len(self.recno_chunk)
+ self.recno_chunk.append(self.numchunks)
+ self.recno_chunk_offs.append(self.chunklen)
+ else:
+ self.recno_chunk[index] = self.numchunks
+ self.recno_chunk_offs[index] = self.chunklen
+ yield row
+ return ChunkingLoader.loadrows(self, sourcename, _mapsource(source),
+ chunkrows, rowlimit)
+
+ def load_completed(self):
+ self.save()
+ ChunkingLoader.load_completed(self)
+ soom.info('Caching load yields %s rows (%s new) from %s generations' %
+ (len(self.key_to_recno), self.rownum, self.load_count))
+ return len(self.key_to_recno)
+
+ def unchunk_columns(self):
+ t0 = time()
+ soom.mem_report()
+ chunk_offs = zip(self.recno_chunk, self.recno_chunk_offs)
+ chunk_offs.sort()
+ soom.info('Caching unchunk sort took %.1fs' % (time() - t0))
+ soom.mem_report()
+ def _col_generator(colname):
+ last_chunknum = -1
+ data = None
+ for chunknum, offset in chunk_offs:
+ if chunknum != last_chunknum:
+ data = self.get_chunk(colname, chunknum)
+ last_chunknum = chunknum
+ yield data[offset]
+
+ for col, data in self.columns:
+ yield col, _col_generator(col.name)
diff --git a/SOOMv0/ChunkingLoader.py b/SOOMv0/ChunkingLoader.py
new file mode 100644
index 0000000..d861d39
--- /dev/null
+++ b/SOOMv0/ChunkingLoader.py
@@ -0,0 +1,148 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: ChunkingLoader.py 2901 2007-11-20 04:52:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ChunkingLoader.py,v $
+
+import os
+import zlib
+import cPickle
+from time import time
+from SOOMv0.Soom import soom
+
+class ChunkingLoader:
+ """
+ "Rotate" row-wise data to column-wise.
+
+ Do rotation on disk if number of rows is excessive
+ """
+ compress_chunk = True
+ chunk_ext = 'SOOMchunk'
+
+ def __init__(self, columns, basepath):
+ self.basepath = basepath
+ self.columns = []
+ self._init()
+ for col in columns:
+ if col.name != 'row_ordinal': # XXX
+ self.columns.append((col, []))
+
+ def _chunk_filename(self, colname, chunknum):
+ return os.path.join(self.basepath,
+ '%s.%s.%s' % (colname, chunknum, self.chunk_ext))
+
+ def _init(self):
+ self.numchunks = 0
+ self.rownum = 0
+ self.chunklen = 0
+ self.flushtime = 0.0
+ self.sourcetime = 0.0
+ self.nsources = 0
+
+ def clear(self):
+ for fn in os.listdir(self.basepath):
+ if fn.endswith(self.chunk_ext):
+ os.unlink(os.path.join(self.basepath, fn))
+
+ def flush(self):
+ t0 = time()
+ for col, data in self.columns:
+ fn = self._chunk_filename(col.name, self.numchunks)
+ f = open(fn, 'wb')
+ try:
+ if self.compress_chunk:
+ f.write(zlib.compress(cPickle.dumps(data, -1)))
+ else:
+ cPickle.dump(data, f, -1)
+ finally:
+ f.close()
+ del data[:]
+ self.chunklen = 0
+ self.numchunks += 1
+ soom.mem_report()
+ flush_el = time() - t0
+ self.flushtime += flush_el
+ return flush_el
+
+ def get_chunk(self, colname, chunknum):
+ filename = self._chunk_filename(colname, chunknum)
+ f = open(filename, 'rb')
+ try:
+ if self.compress_chunk:
+ return cPickle.loads(zlib.decompress(f.read()))
+ else:
+ return cPickle.load(f)
+ finally:
+ f.close()
+
+ def loadrows(self, sourcename, source, chunkrows=0, rowlimit=0):
+ source_rownum = 0
+ t0 = t1 = time()
+ initial_flushtime = self.flushtime
+ if not rowlimit:
+ rowlimit = -1
+ self.nsources += 1
+ for row in source:
+ source_rownum += 1
+ for col, data in self.columns:
+ data.append(row.get(col.name, None))
+ self.rownum += 1
+ self.chunklen += 1
+ if source_rownum == rowlimit:
+ break
+ if chunkrows and self.chunklen >= chunkrows:
+ el = self.flush()
+ t1 += el # Credit flushtime
+ if source_rownum and source_rownum % 1000 == 0:
+ t2 = time()
+ el = t2 - t1
+ t1 = t2
+ soom.info('%s rows from source %r (%.1f rows/s)' %
+ (source_rownum, sourcename, 1000 / el))
+ if self.chunklen:
+ self.flush()
+ flushtime = self.flushtime - initial_flushtime
+ sourcetime = time() - t0 - flushtime
+ self.sourcetime += sourcetime
+ rps = 0.0
+ if sourcetime:
+ rps = source_rownum / sourcetime
+ soom.info('%s rows from source %r in %.1fs '
+ '(%.1fs source, %.1fs chunking, %.1f rows/s)' %
+ (source_rownum, sourcename,
+ flushtime + sourcetime, sourcetime, flushtime, rps))
+ return source_rownum
+
+ def load_completed(self):
+ rps = 0.0
+ if self.sourcetime:
+ rps = self.rownum / self.sourcetime
+ soom.info('%s rows from %s source(s) in %.1fs '
+ '(%.1fs source, %.1fs chunking, %.1f rows/s)' %
+ (self.rownum, self.nsources,
+ self.flushtime + self.sourcetime,
+ self.sourcetime, self.flushtime,
+ rps))
+ return self.rownum
+
+ def unchunk_columns(self):
+ def _col_generator(self, col):
+ for chunknum in xrange(self.numchunks):
+ data = self.get_chunk(col.name, chunknum)
+ os.remove(self._chunk_filename(col.name, chunknum))
+ for v in data:
+ yield v
+
+ for col, data in self.columns:
+ yield col, _col_generator(self, col)
diff --git a/SOOMv0/ColTypes/Discrete.py b/SOOMv0/ColTypes/Discrete.py
new file mode 100644
index 0000000..2214c69
--- /dev/null
+++ b/SOOMv0/ColTypes/Discrete.py
@@ -0,0 +1,332 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Discrete.py 3693 2009-02-10 05:36:00Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/Discrete.py,v $
+
+import time
+import sets
+import operator
+import re
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+import Numeric, MA
+import soomfunc
+from soomarray import ArrayDict
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.ColTypes.base import DatasetColumnBase
+
+class _DiscreteDatasetColumn(DatasetColumnBase):
+ loadables = ['data', 'inverted']
+
+ def __init__(self, parent_dataset, name,
+ all_value=None, all_label=None,
+ **kwargs):
+ DatasetColumnBase.__init__(self, parent_dataset, name, **kwargs)
+ self._inverted = {}
+ if all_label is None:
+ all_label = '<All>'
+ self.all_label = all_label
+ # the datatype of the value used to represent "all" must be
+ # consistent with the datatype of the column...
+ if all_value is None:
+ self.all_value = self.datatype.default_all_value
+ else:
+ try:
+ self.all_value = self.datatype.as_pytype(all_value)
+ except (ValueError, TypeError):
+ raise Error('The all_value given, %r, for column %s does not match datatype %s' % (all_value, self.name, self.datatype.name))
+
+ def do_outtrans(self, v):
+ try:
+ if not isinstance(v, MA.MaskedScalar) and v == self.all_value:
+ return self.all_label
+ except:
+ # mx.DateTime can raise mx.DateTime.Error here, sigh.
+ pass
+ return DatasetColumnBase.do_outtrans(self, v)
+
+ def is_discrete(self):
+ return True
+
+ def cardinality(self):
+ """Method to report the cardinality of a categorical column"""
+ return len(self.inverted)
+
+ def load_inverted(self):
+ if self.parent_dataset.backed:
+ if self._inverted is None:
+ starttime = time.time()
+ filename = self.object_path('inverted', 'SOOMblobstore')
+ self._inverted = ArrayDict(filename, 'r')
+ elapsed = time.time() - starttime
+ soom.info('load of %r index took %.3f seconds.' %\
+ (self.name, elapsed))
+ else:
+ # we need to build the inverted index!
+ self._build_inverted()
+
+ def unload_inverted(self):
+ self._inverted = None
+
+ def get_inverted(self):
+ if self._inverted is None:
+ self.load_inverted()
+ return self._inverted
+ inverted = property(get_inverted)
+
+ def _build_inverted(self):
+ """
+ Build an inverted index
+
+ NOTE - This is now only used where there is no on-disk
+ inverted index, but the column is discrete. For persistent
+ discrete columns, the inverted index is built as the data
+ is filtered, and the inverted index is saved along with
+ the data.
+ """
+ starttime = time.time() # keep track of time
+ inverted_dict = {}
+ # Use fast NumPy methods if the column type is numeric
+ if self.is_numerictype():
+ # first get all the unique values
+ uniquevalues = soomfunc.unique(Numeric.sort(self._data.compressed()))
+ ordinals = Numeric.array(range(len(self._data)),
+ typecode=Numeric.Int)
+ for value in uniquevalues:
+ inverted = Numeric.compress(Numeric.where(Numeric.equal(self._data,value),1,0),ordinals)
+ inverted_dict[value] = inverted
+ else:
+ # loop over each element
+ for rownum, value in enumerate(self._data):
+ if type(value) is tuple:
+ for v in value:
+ row_nums = inverted_dict.setdefault(v, [])
+ row_nums.append(rownum)
+ else:
+ row_nums = inverted_dict.setdefault(value, [])
+ row_nums.append(rownum)
+ for value, row_nums in inverted_dict.iteritems():
+ row_array = Numeric.array(row_nums, typecode=Numeric.Int)
+ if self.datatype.name == 'tuple':
+ row_array = soomfunc.unique(Numeric.sort(row_array))
+ inverted_dict[value] = row_array
+ self._inverted = inverted_dict
+ soom.info('Building inverted index for column %s in dataset %s took %.3f seconds' % (self.name, self.parent_dataset.name, time.time() - starttime))
+
+ def _store_inverted(self, inverted=None):
+ """
+ Stores the passed inverted index as a memory-mapped dict
+ of NumPy ID vectors
+ """
+ indexfilename = None
+ inverted_blob = {}
+ if self.parent_dataset.backed:
+ indexfilename = self.object_path('inverted', 'SOOMblobstore',
+ mkdirs=True)
+ inverted_blob = ArrayDict(indexfilename, 'w+')
+ # now write out the Numpy array for each value in the column to a file
+ for value, rownums in inverted.iteritems():
+ # TO DO: need to determine the smallest Numpy integer type required
+ # to hold all the row ids
+ row_array = Numeric.array(rownums, Numeric.Int)
+ if self.datatype.name == 'tuple':
+ row_array = soomfunc.unique(Numeric.sort(row_array))
+ inverted_blob[value] = row_array
+ if self.heterosourcecols is not None:
+ # we need to assemble an output translation dict
+ self.outtrans = {}
+ for keytuple in inverted.keys():
+ for cname in self.parent_dataset.get_columns():
+ pcol = getattr(self.parent_dataset,cname)
+ if pcol.columnid == keytuple[0]:
+ clabel = pcol.label
+ if callable(pcol.outtrans):
+ cdesc = pcol.outtrans(keytuple[1])
+ else:
+ cdesc = pcol.outtrans[keytuple[1]]
+ newdesc = clabel + ":" + cdesc
+ getattr(self.parent_dataset,colname).outtrans[keytuple] = newdesc
+ del inverted # Not needed anymore
+ if indexfilename:
+ inverted_blob = None # Closes and flushes to disk
+ self._inverted = inverted_blob
+
+ def _inverted_gen(self, src):
+ inverted = {}
+ for rownum, value in enumerate(src):
+ if type(value) is tuple:
+ for v in value:
+ if v is not None or not self.ignorenone:
+ row_nums = inverted.setdefault(v, [])
+ row_nums.append(rownum)
+ else:
+ try:
+ row_nums = inverted.setdefault(value, [])
+ except TypeError, e:
+ raise Error('column %r: Bad value: %r %s: %s' %
+ (self.name, value, type(value), e))
+ row_nums.append(rownum)
+ yield value
+ self._store_inverted(inverted)
+
+ def get_store_chain(self, data, mask=None):
+ src = DatasetColumnBase.get_store_chain(self, data, mask)
+ src = self._inverted_gen(src)
+ return src
+
+ def describe(self, detail=ALL_DETAIL):
+ d = DatasetColumnBase.describe(self, detail)
+ if detail >= SOME_DETAIL: # Don't load .inverted otherwise
+ d.add('data', SOME_DETAIL, 'Cardinality', self.cardinality())
+ d.add('data', SOME_DETAIL, 'Label for <All>', self.all_label)
+ d.add('data', SOME_DETAIL, 'Value for <All>', str(self.all_value))
+ return d
+
+ def _op_general(self, fn, discardnull=True):
+ # handle the general case for an operator combining vectors of results
+ # for each operator.
+ possible_keys = set(self.inverted.keys())
+ if discardnull:
+ possible_keys.discard(None)
+ rows = [self.inverted[v] for v in possible_keys if fn(v)]
+ if len(rows) == 1:
+ vectors = rows[0]
+ elif len(rows) > 1:
+ vectors = soomfunc.union(*rows)
+ else:
+ vectors = []
+ return vectors
+
+ def op_between(self, value):
+ try:
+ start, end = value
+ except (ValueError, TypeError):
+ raise ExpressionError('between(start, end)')
+ possible_keys = set(self.inverted.keys())
+ rows = [self.inverted[v]
+ for v in possible_keys
+ if start <= v < end]
+ if len(rows) == 0:
+ vectors = []
+ elif len(rows) == 1:
+ vectors = rows[0]
+ else:
+ vectors = soomfunc.union(*rows)
+ return vectors
+
+ def op_equal(self, value):
+ # special case for operator equal as we can just do a direct lookup of
+ # the inverted index.
+ return self.inverted.get(value, [])
+
+ def op_less_than(self, value):
+ return self._op_general(lambda v: v < value)
+
+ def op_less_equal(self, value):
+ return self._op_general(lambda v: v <= value)
+
+ def op_greater_than(self, value):
+ return self._op_general(lambda v: v > value)
+
+ def op_greater_equal(self, value):
+ return self._op_general(lambda v: v >= value)
+
+ def op_not_equal(self, value):
+ return self._op_general(lambda v: v != value, discardnull=False)
+
+ def op_equal_col(self, value):
+ return self._op_general(self.prefix_match(operator.eq, value))
+
+ def op_not_equal_col(self, value):
+ return self._op_general(self.prefix_match(operator.ne, value))
+
+ def op_less_than_col(self, value):
+ return self._op_general(self.prefix_match(operator.lt, value))
+
+ def op_less_equal_col(self, value):
+ return self._op_general(self.prefix_match(operator.le, value))
+
+ def op_greater_than_col(self, value):
+ return self._op_general(self.prefix_match(operator.gt, value))
+
+ def op_greater_equal_col(self, value):
+ return self._op_general(self.prefix_match(operator.ge, value))
+
+ def value_set(self, value):
+ if type(value) not in (list, tuple):
+ raise ExpressionError('"in" operator must be followed by a list')
+ return set(value)
+
+ def op_in(self, value):
+ value = self.value_set(value)
+ return self._op_general(lambda v: v in value, discardnull=False)
+
+ def op_not_in(self, value):
+ value = self.value_set(value)
+ return self._op_general(lambda v: v not in value, discardnull=False)
+
+ def op_in_col(self, value):
+ return self._op_general(self.prefix_match_in(value))
+
+ def op_not_in_col(self, value):
+ match_fn = self.prefix_match_in(value)
+ return self._op_general(lambda v: not match_fn(v))
+
+ def op_regexp(self, value):
+ return self._op_general(self.regexp_match(value))
+
+ def op_not_regexp(self, value):
+ match_fn = self.regexp_match(value)
+ return self._op_general(lambda v: not match_fn(v))
+
+ def regexp_match(self, value):
+ pat = re.compile(value, re.IGNORECASE)
+ def _op_regexp(v):
+ return pat.search(self.do_format(self.do_outtrans(v)))
+ return _op_regexp
+
+ def prefix_match(self, match_fn, value):
+ value = self.do_format(value)
+ value_len = len(value)
+ def _op_prefix(v):
+ return match_fn(self.do_format(self.do_outtrans(v))[:value_len], value)
+ return _op_prefix
+
+ def prefix_match_in(self, value):
+ if type(value) not in (list, tuple):
+ raise ExpressionError('"in" operator must be followed by a list')
+ values = [self.do_format(v) for v in value]
+ def _op_prefix(v):
+ v = self.do_format(self.do_outtrans(v))
+ for value in values:
+ if v.startswith(value):
+ return True
+ return False
+ return _op_prefix
+
+
+class CategoricalDatasetColumn(_DiscreteDatasetColumn):
+ coltype = 'categorical'
+
+
+class OrdinalDatasetColumn(_DiscreteDatasetColumn):
+ coltype = 'ordinal'
+
+ def is_ordered(self):
+ return True
diff --git a/SOOMv0/ColTypes/Identity.py b/SOOMv0/ColTypes/Identity.py
new file mode 100644
index 0000000..5b86a62
--- /dev/null
+++ b/SOOMv0/ColTypes/Identity.py
@@ -0,0 +1,22 @@
+
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Identity.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/Identity.py,v $
+
+from SOOMv0.ColTypes.base import DatasetColumnBase
+
+class IdentityDatasetColumn(DatasetColumnBase):
+ coltype = 'identity'
diff --git a/SOOMv0/ColTypes/RowOrdinal.py b/SOOMv0/ColTypes/RowOrdinal.py
new file mode 100644
index 0000000..f3aa846
--- /dev/null
+++ b/SOOMv0/ColTypes/RowOrdinal.py
@@ -0,0 +1,108 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: RowOrdinal.py 3690 2009-02-09 05:58:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/RowOrdinal.py,v $
+
+import itertools
+
+import Numeric
+
+from SOOMv0.ColTypes.base import SimpleDatasetColumnBase
+
+class _RowOrdinalColumn(SimpleDatasetColumnBase):
+ coltype = 'ordinal'
+
+ def __init__(self, parent_dataset):
+ SimpleDatasetColumnBase.__init__(self, parent_dataset, 'row_ordinal',
+ label='Ordinal', datatype='int')
+
+ def is_numerictype(self):
+ return True
+
+ def is_ordered(self):
+ return True
+
+ def op_less_than(self, value):
+ return Numeric.arrayrange(value)
+
+ def op_less_equal(self, value):
+ return Numeric.arrayrange(value+1)
+
+ def op_greater_than(self, value):
+ return Numeric.arrayrange(value+1, len(self))
+
+ def op_greater_equal(self, value):
+ return Numeric.arrayrange(value, len(self))
+
+ def op_equal(self, value):
+ return [value]
+
+ def op_between(self, value):
+ return Numeric.arrayrange(*value)
+
+ def op_in(self, value):
+ return value
+
+
+class RowOrdinalColumn(_RowOrdinalColumn):
+ """
+ The row_ordinal column contains the record number of the row by
+ convention. For continuous datasets (an example of non-continuous
+ datasets are the results of a filter), the data is synthersized
+ on demand.
+ """
+ def __len__(self):
+ return len(self.parent_dataset)
+
+ def get_data(self):
+ return self
+ data = property(get_data)
+
+ def __iter__(self):
+ return iter(xrange(len(self.parent_dataset)))
+
+ def __getitem__(self, i):
+ if type(i) is slice:
+ start, stop, stride = i.indices(len(self.parent_dataset))
+ if stride > 1:
+ return Numeric.arrayrange(start, stop, stride)
+ else:
+ return Numeric.arrayrange(start, stop)
+ else:
+ if i < len(self.parent_dataset):
+ return i
+ raise IndexError
+
+ def take(self, rows):
+ return rows
+
+class FilteredRowOrdinalColumn(_RowOrdinalColumn):
+ coltype = 'ordinal'
+
+ def __init__(self, parent_dataset, record_ids):
+ _RowOrdinalColumn.__init__(self, parent_dataset)
+ self.data = record_ids
+
+ def __len__(self):
+ return len(self.data)
+
+ def __iter__(self):
+ return iter(self.data)
+
+ def __getitem__(self, i):
+ return self.data[i]
+
+ def take(self, rows):
+ return Numeric.take(self.data, rows)
diff --git a/SOOMv0/ColTypes/Scalar.py b/SOOMv0/ColTypes/Scalar.py
new file mode 100644
index 0000000..1bcb847
--- /dev/null
+++ b/SOOMv0/ColTypes/Scalar.py
@@ -0,0 +1,99 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Scalar.py 3691 2009-02-09 07:18:13Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/Scalar.py,v $
+
+import Numeric, MA
+import soomfunc
+from SOOMv0.ColTypes.base import DatasetColumnBase
+
+class _ScalarDatasetColumn(DatasetColumnBase):
+
+ def is_scalar(self):
+ return True
+
+ def _op_general(self, op, value):
+ # handle the general case for an operator combining vectors of results
+ # for each operator.
+ #
+ # NB: use of filled() in Numeric ops is a dangerous hack and may give
+ # wrong answers if columns contains values <= 0
+ if type(self.data) is MA.MaskedArray:
+ numeric_fn = getattr(MA, op)
+ resmap = numeric_fn(self.data, value).filled()
+ else:
+ numeric_fn = getattr(Numeric, op)
+ resmap = numeric_fn(self.data, value)
+ return Numeric.nonzero(resmap)
+
+ def op_less_than(self, value):
+ return self._op_general('less', value)
+
+ def op_less_equal(self, value):
+ return self._op_general('less_equal', value)
+
+ def op_greater_than(self, value):
+ return self._op_general('greater', value)
+
+ def op_greater_equal(self, value):
+ return self._op_general('greater_equal', value)
+
+ def op_not_equal(self, value):
+ if type(self.data) is MA.MaskedArray:
+ if value is None:
+ resmap = Numeric.not_equal(self.data.mask(), 1)
+ else:
+ resmap = MA.not_equal(self.data, value).filled()
+ resmap = Numeric.logical_or(resmap, self.data.mask())
+ else:
+ if value is None:
+ return Numeric.arange(len(self.data))
+ resmap = Numeric.not_equal(self.data, value)
+ return Numeric.nonzero(resmap)
+
+ def op_equal(self, value):
+ if value is None:
+ if type(self.data) is MA.MaskedArray:
+ return Numeric.nonzero(self.data.mask())
+ return []
+ return self._op_general('equal', value)
+
+ def op_between(self, value):
+ try:
+ start, end = value
+ except (ValueError, TypeError):
+ raise ExpressionError('between(start, end)')
+ if type(self.data) is MA.MaskedArray:
+ resmap_ge = greater_equal(self.data, start).filled()
+ resmap_lt = less(self.data, end).filled()
+ else:
+ resmap_ge = Numeric.greater_equal(self.data, start)
+ resmap_lt = Numeric.less(self.data, end)
+ vectors = soomfunc.intersect(Numeric.nonzero(resmap_ge),
+ NUmeric.nonzero(resmap_lt))
+ return vectors
+
+
+class ScalarDatasetColumn(_ScalarDatasetColumn):
+ coltype = 'scalar'
+
+
+class WeightingDatasetColumn(_ScalarDatasetColumn):
+ coltype = 'weighting'
+
+ def is_weighting(self):
+ return True
+
+
diff --git a/SOOMv0/ColTypes/SearchableText.py b/SOOMv0/ColTypes/SearchableText.py
new file mode 100644
index 0000000..713442f
--- /dev/null
+++ b/SOOMv0/ColTypes/SearchableText.py
@@ -0,0 +1,221 @@
+# vim: set sw=4 et ai:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: SearchableText.py 3690 2009-02-09 05:58:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/SearchableText.py,v $
+
+# Standard Libraries
+import os
+import re
+import struct
+import array
+from time import time
+from bsddb import db
+
+# Application modules
+from soomfunc import strip_word
+
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.ColTypes.base import DatasetColumnBase
+
+
+ITEMFMT = 'L'
+
+class WordInfo(object):
+ __slots__ = ('overflow_blocks', 'occurrences')
+
+ def __init__(self):
+ self.overflow_blocks = None
+ self.occurrences = array.array(ITEMFMT)
+
+
+class SearchableTextDatasetColumn(DatasetColumnBase):
+
+ coltype = 'searchabletext'
+ loadables = DatasetColumnBase.loadables + ('wordidx', 'wordidx_overflow')
+ OVERFLOW_BLOCKSIZE = 4096
+ OVERFLOW_ITEMSIZE = array.array(ITEMFMT).itemsize
+ OVERFLOW_MAGIC = 0x567eadbe
+ OVERFLOW_HDR = '>LLL'
+
+ def is_searchabletext(self):
+ return True
+
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+
+ def __init__(self, *args, **kw):
+ """
+ Initialise the cache
+ """
+ DatasetColumnBase.__init__(self, *args, **kw)
+ self._wordidx = None
+ self._wordidx_overflow = None
+ self._overflow_blocksize = None
+ self._overflow_blockitems = None
+
+ def _wordidx_open(self, mode='r'):
+ fn = self.object_path('wordidx', 'db', mkdirs=(mode == 'c'))
+ worddb = db.DB()
+ if mode == 'c':
+ worddb.set_cachesize(0, 8<<20)
+ worddb.open(fn, db.DB_HASH, db.DB_CREATE, 0666)
+ else:
+ worddb.open(fn, db.DB_HASH, db.DB_RDONLY)
+ return worddb
+
+ def _overflow_open(self, mode='r'):
+ fn = self.object_path('wordidx_overflow', 'data', mkdirs=(mode == 'c'))
+ if mode == 'c':
+ return open(fn, 'wb')
+ else:
+ return open(fn, 'rb')
+
+ def _overflow_hdr(self, f, count):
+ hdr = struct.pack(self.OVERFLOW_HDR, self.OVERFLOW_MAGIC,
+ self.OVERFLOW_BLOCKSIZE, count)
+ hdr += '\0' * (self.OVERFLOW_BLOCKSIZE - len(hdr))
+ f.seek(0, 0)
+ f.write(hdr)
+
+ def _wordidx_gen(self, src):
+ # Keep a record of the location of each occurrence of a word
+ self.unload_wordidx()
+ self.unload_wordidx_overflow()
+ t0 = time()
+ worddb = self._wordidx_open('c')
+ overflow = self._overflow_open('c')
+ self._overflow_hdr(overflow, 0)
+ overflow_block = 1
+ block_items = self.OVERFLOW_BLOCKSIZE / self.OVERFLOW_ITEMSIZE
+ wrecs = {}
+ for rownum, value in enumerate(src):
+ if value:
+ for wordnum, match in enumerate(self.WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ try:
+ wrec = wrecs[word]
+ except KeyError:
+ wrec = wrecs[word] = WordInfo()
+ occurrences = wrec.occurrences
+ occurrences.append(rownum)
+ occurrences.append(wordnum)
+ if len(occurrences) == block_items:
+ if wrec.overflow_blocks is None:
+ wrec.overflow_blocks = array.array(ITEMFMT)
+ occurrences.tofile(overflow)
+ wrec.overflow_blocks.append(overflow_block)
+ overflow_block += 1
+ del occurrences[:]
+ yield value
+ t1 = time()
+ for word, wrec in wrecs.iteritems():
+ data = array.array(ITEMFMT)
+ if wrec.overflow_blocks is not None:
+ data.append(len(wrec.overflow_blocks))
+ data.extend(wrec.overflow_blocks)
+ else:
+ data.append(0)
+ data.extend(wrec.occurrences)
+ worddb[word] = data.tostring()
+ worddb.close()
+ self._overflow_hdr(overflow, overflow_block)
+ overflow.close()
+ t2 = time()
+ overflow_size = overflow_block * self.OVERFLOW_BLOCKSIZE
+ soom.info('word index for %r took %.1fs (%.1fs+%.1fs), %d words, '
+ '%d overflow blocks (%.1fMB)' %
+ (self.name, t2-t0, t1-t0, t2-t1, len(wrecs),
+ overflow_block, overflow_size / 1024.0 / 1024.0))
+
+ def get_store_chain(self, data, mask=None):
+ src = iter(data)
+ if mask is not None:
+ src = self._mask_gen(src, mask)
+ if self.missingvalues:
+ src = self._missing_gen(src, self.missingvalues)
+ src = self._storedata_gen(src)
+ src = self._wordidx_gen(src)
+ return src
+
+# def _store_wordidx(self):
+# """
+# Make sure the accumulated word information is flushed
+# """
+# if not self.parent_dataset.backed:
+# raise Error('Searchable text requires a "backed" dataset')
+# self.flush()
+
+ def get_wordidx(self):
+ if self._wordidx is None:
+ self._wordidx = self._wordidx_open()
+ return self._wordidx
+ wordidx = property(get_wordidx)
+
+ def get_wordidx_overflow(self):
+ if self._wordidx_overflow is None:
+ self._wordidx_overflow = self._overflow_open()
+ hdr_size = struct.calcsize(self.OVERFLOW_HDR)
+ hdr = self._wordidx_overflow.read(hdr_size)
+ try:
+ magic, blocksize, blockcount =\
+ struct.unpack(self.OVERFLOW_HDR, hdr)
+ except struct.error, e:
+ raise DatasetError('%s: header: %s' % (fn, e))
+ if magic != self.OVERFLOW_MAGIC:
+ raise DatasetError('%s: incorrect magic' % (fn))
+ self._wordidx_overflow.seek(0, 2)
+ filesize = self._wordidx_overflow.tell()
+ if filesize != blockcount * blocksize:
+ raise DatasetError('%s: incorrect file size (expect %s, got %s)'
+ % (fn, blockcount * blocksize, filesize))
+ self._overflow_blocksize = blocksize
+ self._overflow_blockitems = blocksize / self.OVERFLOW_ITEMSIZE
+ return self._wordidx_overflow
+ wordidx_overflow = property(get_wordidx_overflow)
+
+ def unload_wordidx(self):
+ if self._wordidx is not None:
+ self._wordidx.close()
+ self._wordidx = None
+
+ def unload_wordidx_overflow(self):
+ if self._wordidx_overflow is not None:
+ self._wordidx_overflow.close()
+ self._wordidx_overflow = None
+ self._overflow_blocksize = None
+ self._overflow_blockitems = None
+
+ def op_contains(self, sexpr):
+ return sexpr(self)[0]
+
+ def word_occurrences(self, word):
+ # first look for the word itself in the index
+ try:
+ buf = self.wordidx[strip_word(word)]
+ except KeyError:
+ return None
+ info = array.array(ITEMFMT)
+ info.fromstring(buf)
+ overflow_block_count = info[0]
+ overflow_blocks = info[1:overflow_block_count+1]
+ residual_occurrences = info[overflow_block_count+1:]
+ occurrences = array.array(ITEMFMT)
+ for block in overflow_blocks:
+ self.wordidx_overflow.seek(block * self._overflow_blocksize, 0)
+ occurrences.fromfile(self.wordidx_overflow,
+ self._overflow_blockitems)
+ occurrences.extend(residual_occurrences)
+ return occurrences
diff --git a/SOOMv0/ColTypes/__init__.py b/SOOMv0/ColTypes/__init__.py
new file mode 100644
index 0000000..1493625
--- /dev/null
+++ b/SOOMv0/ColTypes/__init__.py
@@ -0,0 +1,17 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/__init__.py,v $
+
diff --git a/SOOMv0/ColTypes/base.py b/SOOMv0/ColTypes/base.py
new file mode 100644
index 0000000..e884796
--- /dev/null
+++ b/SOOMv0/ColTypes/base.py
@@ -0,0 +1,569 @@
+
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: base.py 3690 2009-02-09 05:58:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/ColTypes/base.py,v $
+
+import sys
+import os
+import time
+import itertools
+
+import MA, Numeric
+
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.Describe import Describe
+from SOOMv0.DataTypes import get_datatype_by_name
+
+"""
+Base classes for dataset columns
+
+Arguments are:
+ desc longer description of the column
+ label label for the column when printing output
+ coltype
+ datatype
+ all_label label for summaries of this column
+ all_value value used to represent summaries of this
+ column
+ maxoutlen maximum length of column value strings for
+ printing - not currently used
+ use_outtrans whether to use the out translation
+ outtrans output translation of coded values for
+ presentation (either a dict or a callable).
+ ignorenone flag for multivalues (tuples) to ignore the None value
+ calculatedby If not None then this is a calculated column
+ calculatedargs Sequence or dictionary of arguments to be
+ passed to the calculatedby function
+ missingvalues a dictionary of values which represent
+ missing data
+ multisourcecols contains a homogenous sequence of other
+ categorical or ordinal columns (of same type
+ of data) if a multicolumn - multicolumns
+ are multi-valued attributes (tricky!)
+ heterosourcecols contains a heterogenous sequence of other
+ categorical or ordinal columns
+
+Attributes are:
+ data data values in row order - referred to as
+ the data fork
+ mask mask of missing (0) and non-missing (1)
+ values for the data fork
+ inverted mapped to lists of record IDs - referred
+ to as the inverted fork
+
+Column types are:
+ categorical discrete values, inverted index provided
+ ordinal discrete *ordered* values, inverted index provided
+ scalar continuous values, no inverted index
+ identity discrete values, no inverted index (also called
+ "noncategorical")
+ weighting continuous weighting to be applied to other columns
+ searchabletext text strings which may be broken into words and indexed
+ for text searches
+
+"""
+
+class AbstractDatasetColumnBase(object):
+ __slots__ = ()
+
+class SimpleDatasetColumnBase(AbstractDatasetColumnBase):
+ coltype = None
+
+ def __init__(self, parent_dataset, name, label=None, desc=None,
+ datatype=None, format_str=None):
+ soom.check_name_ok(name, 'DatasetColumn')
+ self.parent_dataset = parent_dataset
+ self.name = name
+ self.label = label or name
+ self.desc = desc
+ self.datatype = get_datatype_by_name(datatype)
+ self.format_str = format_str
+ if self.format_str is None:
+ self.format_str = self.datatype.default_format_str
+
+ def get_metadata(self):
+ """
+ Returns the column "metadata" - basically the column
+ attributes, minus data or indicies. The result is a
+ dictionary suitable for use as kwargs in creating a new
+ shallow copy of the column.
+ """
+ m = dict(vars(self))
+ del m['parent_dataset']
+ m['coltype'] = self.coltype # Class attribute
+ m['datatype'] = self.datatype.name
+ return m
+
+ def rename_column(self, newname):
+ soom.check_name_ok(newname, 'DatasetColumn')
+ self.name = newname
+
+ def delete_column(self):
+ pass
+
+ def is_discrete(self):
+ """
+ Coltype: Is the column indexable?
+ """
+ return False
+
+ def is_scalar(self):
+ """
+ Coltype: Does the column contain continuous values (typically floats)
+ """
+ return False
+
+ def is_ordered(self):
+ """
+ Coltype: Is there a natural ordering for the column?
+ """
+ return False
+
+ def is_weighting(self):
+ """
+ Coltype: Can the column be used for weighting frequency counts?
+ """
+ return False
+
+ def is_searchabletext(self):
+ """
+ Coltype: Does the column support free text searches?
+ """
+ return False
+
+ def is_multivalue(self):
+ """
+ Datatype: Is the column made up of multiple values per row?
+ """
+ return self.datatype.is_multivalue
+
+ def is_datetimetype(self):
+ """
+ Datatype: Is the column data dates or times?
+
+ Date and time columns are handled differently when being
+ plotted, and require alternate import schemes in web
+ interfaces.
+ """
+ return self.datatype.is_datetime
+
+ def is_numerictype(self):
+ """
+ Datatype: Is the column data numeric?
+
+ Typically this is right justified on output, whereas other
+ data types are left justified
+ """
+ return self.datatype.is_numeric
+
+ def will_outtrans(self):
+ """
+ Is output translation enabled?
+ """
+ return False
+
+ def do_outtrans(self, v):
+ """
+ Translate the given value (if needed)
+ """
+ return v
+
+ def do_format(self, v):
+ """
+ Convert the value into a string, applying column-specific
+ (or datatype specific) formatting conventions.
+ """
+ if self.format_str is None:
+ self.format_str = self.datatype.default_format_str
+ return self.datatype.do_format(v, self.format_str)
+
+
+ def load(self, *args):
+ """
+ Load column data
+ """
+ pass
+
+ def unload(self, *args):
+ """
+ Unload column data
+ """
+ pass
+
+ def describe(self, detail=ALL_DETAIL):
+ """
+ Return a "Describe" object, which contains a string
+ description of the object metadata.
+ """
+ d = Describe(detail, 'col', 'data', 'out', 'misc')
+ d.add('col', NO_DETAIL, 'Name', self.name)
+ d.add('col', NO_DETAIL, 'Label', self.label)
+ d.add('col', SOME_DETAIL, 'Description', self.desc)
+ d.add('col', NO_DETAIL, 'Column Type', self.coltype)
+ d.add('col', NO_DETAIL, 'Data Type', self.datatype.name)
+ d.add('out', SOME_DETAIL, 'Format String', self.format_str)
+ return d
+
+ def filter_op(self, op, value):
+ """
+ Execute comparisons from expression parser for DatasetFilter.
+
+ Each returns a list of rows indexes that match the condition.
+ """
+
+ try:
+ opmeth = getattr(self, op)
+ except AttributeError:
+ op_name = op.replace('_', ' ')
+ if op_name.startswith('op '):
+ op_name = op_name[3:]
+ if op_name.endswith(' col'):
+ op_name = op_name[:-3] + 'starting'
+ raise ExpressionError('%r operator not supported on %s column %r' %
+ (op_name, self.coltype, self.name))
+ else:
+ return opmeth(value)
+
+
+class DatasetColumnBase(SimpleDatasetColumnBase):
+ # The primary difference between this class and its parent is that this
+ # class has additional loadable components (data, index, etc), as well as
+ # supporting data loading and output translation.
+ coltype = '[unknown]'
+ loadables = ('data',)
+
+ def __init__(self, parent_dataset, name, label=None, desc=None,
+ datatype=None, format_str=None,
+ calculatedby=None, calculatedargs=(),
+ ignorenone=True,
+ missingvalues=None,
+ use_outtrans=True, outtrans=None,
+ multisourcecols=None, heterosourcecols=None,
+ trace_load=False):
+ SimpleDatasetColumnBase.__init__(self, parent_dataset,
+ name, label, desc, datatype,
+ format_str)
+ self.calculatedby = calculatedby
+ self.calculatedargs = calculatedargs
+ self.ignorenone = ignorenone
+ self.missingvalues = missingvalues
+ self.outtrans = outtrans
+ self.use_outtrans = use_outtrans
+ if self.outtrans is None:
+ self.use_outtrans = False
+ self.outtrans = {}
+ self.multisourcecols = multisourcecols
+ self.heterosourcecols = heterosourcecols
+ self.trace_load = trace_load
+ self._data = []
+
+ def get_metadata(self):
+ m = SimpleDatasetColumnBase.get_metadata(self)
+ for attr in self.loadables:
+ del m['_' + attr]
+ return m
+
+ def __getstate__(self):
+ """
+ Returns a copy of the DatasetColumn's state but with the
+ loadable elements "unloaded".
+ """
+ if self.parent_dataset.backed:
+ # copy the dict since we will be changing it
+ odict = self.__dict__.copy()
+ for attr in self.loadables:
+ odict['_' + attr] = None
+ return odict
+ else:
+ return self.__dict__
+
+ def __setstate__(self, odict):
+ self.__dict__.update(odict)
+
+ def rename_column(self, newname):
+ raise NotImplementedError # XXX future work
+ SimpleDatasetColumnBase.rename_column(self, newname)
+
+ def delete_column(self):
+ # XXX this is wrong in the context of versioned datasets
+ soom.warning('deleting column %r (hope there are no concurrent users!' %
+ self.name)
+ SimpleDatasetColumnBase.delete_column(self)
+
+ def do_outtrans(self, v):
+ if self.use_outtrans and self.outtrans:
+ if callable(self.outtrans):
+ if isinstance(v, tuple):
+ return tuple([self.outtrans(vv) for vv in v])
+ else:
+ return self.outtrans(v)
+ else:
+ if isinstance(v, tuple):
+ return tuple([self.outtrans.get(vv, vv) for vv in v])
+ else:
+ return self.outtrans.get(v, v)
+ else:
+ return v
+
+ def will_outtrans(self):
+ """
+ Is an outtrans active?
+ """
+ if not self.use_outtrans or not self.outtrans:
+ return False
+ return True
+
+ def get_inverted(self):
+ # This is not strictly necessary, but helps to give a more useful
+ # diagnostic to the end user.
+ raise Error('%r is a %s, not a discrete column (no index)' %
+ (self.name, self.coltype))
+ inverted = property(get_inverted)
+
+ def _loadables(self, op, want):
+ if self.parent_dataset.backed:
+ if not want:
+ want = self.loadables
+ for loadable in want:
+ try:
+ meth = getattr(self, '%s_%s' % (op, loadable))
+ except AttributeError:
+ pass
+ else:
+ meth()
+
+ def load(self, *want):
+ self._loadables('load', want)
+
+ def unload(self, *want):
+ self._loadables('unload', want)
+
+ def load_data(self):
+ if self.parent_dataset.backed:
+ if self._data is None:
+ starttime = time.time()
+ self._data = self.datatype.load_data(self.object_path('data'))
+ elapsed = time.time() - starttime
+ soom.info('load of %r data vector took %.3f seconds.' %\
+ (self.name, elapsed))
+
+ def unload_data(self):
+ self._data = None
+
+ def get_data(self):
+ if self._data is None:
+ self.load_data()
+ return self._data
+ data = property(get_data)
+
+ def __getitem__(self, index):
+ return self.data[index]
+
+ def take(self, rows):
+ return self.datatype.take(self.data, rows)
+
+ def __len__(self):
+ return len(self.data)
+
+ def describe(self, detail=ALL_DETAIL):
+ d = SimpleDatasetColumnBase.describe(self, detail)
+ if detail >= SOME_DETAIL: # otherwise would load .data
+ d.add('data', SOME_DETAIL, 'Data Vector Length', len(self.data))
+ d.add('data', SOME_DETAIL, 'Values calculated by', self.calculatedby)
+ d.add('data', SOME_DETAIL, 'Calculated-by arguments', self.calculatedby)
+ d.add('data', SOME_DETAIL, 'Missing Values', self.missingvalues)
+ if self.is_multivalue():
+ d.add('data', SOME_DETAIL, 'Ignore None values', self.ignorenone)
+
+ d.add('data', SOME_DETAIL, 'Multi-source cols', self.multisourcecols)
+ d.add('data', SOME_DETAIL, 'Hetero-source cols', self.heterosourcecols)
+ d.add('out', SOME_DETAIL, 'Use Output Translation', yesno(self.use_outtrans))
+ d.add('out', SOME_DETAIL, 'Output Translation', self.outtrans)
+ return d
+
+ def __str__(self):
+ return self.parent_dataset.print_cols(self.name)
+
+ def object_path(self, objtype, filetype=None, mkdirs=False):
+ object_name = os.path.join(self.name, objtype)
+ if filetype:
+ object_name = '%s.%s' % (object_name, filetype)
+ return self.parent_dataset.object_path(object_name,
+ gen=True, mkdirs=mkdirs)
+
+ def _display_hook(self):
+ print str(self.describe())
+
+ # Generator functions for processing newly loaded columns
+ def _mask_gen(self, src, mask):
+ for value, mask in itertools.izip(src, mask):
+ if mask:
+ value = None
+ yield value
+
+ def _calcby_gen(self, src, calcfn, calcargs):
+ if type(calcargs) is dict:
+ for value in src:
+ if value is None:
+ value = calcfn(**calcargs)
+ yield value
+ else:
+ for value in src:
+ if value is None:
+ value = calcfn(*calcargs)
+ yield value
+
+ def _missing_gen(self, src, missingvalues):
+ for value in src:
+ if value in missingvalues:
+ value = None
+ yield value
+
+ def _multisrc_gen(self, multisourcecols):
+ srcs = []
+ for colname in multisourcecols:
+ srcs.append(self.parent_dataset[colname].data)
+ return itertools.izip(*srcs)
+
+ def _tuple_gen(self, src, ignorenone):
+ for value in src:
+ if value is None:
+ value = ()
+ elif ignorenone:
+ value = tuple([v for v in value if v is not None])
+ yield value
+
+ def _tuplemissing_gen(self, src, missingvalues):
+ for value in src:
+ yield tuple([v for v in value if v not in missingvalues])
+
+ def _storedata_gen(self, src):
+ # If persisent, set data file name
+ datafilename = None
+ if self.parent_dataset.backed:
+ datafilename = self.object_path('data', mkdirs=True)
+ ds_len = len(self.parent_dataset)
+ store_data = self.datatype.get_array(datafilename, ds_len)
+ maskval = self.datatype.masked_value
+ store_mask = self.datatype.get_mask(ds_len)
+ for rownum, value in enumerate(src):
+ if value is None:
+ store_value = maskval
+ if store_mask is not None:
+ store_mask[rownum] = 1
+ else:
+ store_value = value
+ try:
+ store_data[rownum] = store_value
+ except TypeError:
+ raise Error('bad data type, column %r at index %d, '
+ 'value %r, should be datatype %r' %\
+ (self.name, rownum, value,
+ self.datatype.name))
+ yield value
+ # If have a mask, but no values masked, use Numeric rather than MA
+ if store_mask is not None and not Numeric.sometrue(store_mask):
+ store_mask = None
+ self._data = self.datatype.store_data(store_data, store_mask,
+ datafilename)
+ # Give the datatype a chance to set the format string if none
+ # specified
+ if self.format_str is self.datatype.default_format_str:
+ format_str = self.datatype.derive_format_str(store_data)
+ if format_str:
+ self.format_str = format_str
+
+ def get_store_chain(self, data, mask=None):
+ """
+ Return a chain of generators for processing the src data,
+ which can either be a list or an iterable.
+ """
+ multisourcecols = self.multisourcecols or self.heterosourcecols
+ if multisourcecols:
+ src = self._multisrc_gen(multisourcecols)
+ else:
+ src = iter(data)
+ if MA.isMaskedArray(data) and mask is None:
+ mask = data.mask()
+ if mask is not None:
+ src = self._mask_gen(src, mask)
+ if self.is_multivalue():
+ src = self._tuple_gen(src, self.ignorenone)
+ if self.missingvalues:
+ src = self._tuplemissing_gen(src, self.missingvalues)
+ else:
+ if self.calculatedby:
+ src = self._calcby_gen(src, self.calculatedby,
+ self.calculatedargs)
+ if self.missingvalues:
+ src = self._missing_gen(src, self.missingvalues)
+ src = self._storedata_gen(src)
+ return src
+
+ def store_column(self, data, mask=None):
+ st = time.time()
+ # The chain of generators returned by get_store_chain does the actual
+ # processing.
+ if not getattr(self, 'trace_load', False):
+ for value in self.get_store_chain(data, mask):
+ pass
+ else:
+ tracer = store_trace()
+ for value in self.get_store_chain(data, mask):
+ tracer.flush()
+ tracer.done()
+ soom.info('Stored data for column %s in dataset %s (%.3fs)' %
+ (self.name, self.parent_dataset.name, time.time() - st))
+ if self.multisourcecols:
+ # We unload source cols to contain mapped memory use on 32 bit plats
+ for colname in self.multisourcecols:
+ self.parent_dataset[colname].unload()
+ soom.mem_report()
+
+
+class store_trace:
+ """
+ A debug aide for the store generator chain that enables tracing
+ and reports (any) generator return values.
+ """
+ def __init__(self):
+ sys.settrace(self.trace)
+ self.frames = []
+
+ def trace(self, frame, event, arg):
+ def line_trace(frame, event, arg):
+ if event == 'return':
+ self.frames.append((frame, arg))
+ return line_trace
+ if frame.f_code.co_flags & 0x20: # Generator
+ return line_trace
+
+ def flush(self):
+ res = []
+ for frame, arg in self.frames:
+ res.append(frame.f_code.co_name)
+ res.append(repr(arg))
+ print '->'.join(res)
+ self.frames = []
+
+ def done(self):
+ sys.settrace(None)
+ self.frames = []
+
+
+def is_dataset_col(obj):
+ return isinstance(obj, AbstractDatasetColumnBase)
+
diff --git a/SOOMv0/CrossTab.py b/SOOMv0/CrossTab.py
new file mode 100644
index 0000000..776e0ed
--- /dev/null
+++ b/SOOMv0/CrossTab.py
@@ -0,0 +1,323 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+import time
+import sets
+import itertools
+import Numeric, MA
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.BaseDataset import BaseDataset
+
+def fully_masked(shape, typecode='i'):
+ return MA.array(Numeric.empty(shape, typecode=typecode),
+ mask=Numeric.ones(shape, typecode='b', savespace=1))
+
+def sum_axis(data, axis):
+ # We cast to float to avoid integer overflow
+ data = data.astype(Numeric.Float64)
+ return MA.add.reduce(data, axis)
+
+def replicate_axis(data, axis, size):
+ """
+ Returns a new array with an additional axis of the specified size.
+
+ The new axis is formed by replicating along the other axes.
+ """
+ assert axis > 0
+ repeat_map = size * Numeric.ones(data.shape[axis-1])
+ shape = list(data.shape)
+ data = MA.repeat(data, repeat_map, axis=axis-1)
+ shape.insert(axis, size)
+ data.shape = tuple(shape)
+ return data
+
+class CrossTabAxis:
+ def __init__(self, name, label, values=None, col=None):
+ self.name = name
+ self.label = label
+ if values is not None:
+ self.values = values
+ self.col = col
+
+ def from_col(cls, col, values=None):
+ self = cls(col.name, col.label, col=col)
+ if values is not None:
+# this_values = sets.Set(col.inverted.keys())
+# wanted_values = sets.Set(values)
+# if not wanted_values.issubset(this_values):
+# missing_vals = [repr(v) for v in (wanted_values - this_values)]
+# raise Error('column %r values mismatch - %r' %
+# (self.name, ', '.join(missing_vals)))
+ self.values = values
+ else:
+ self.values = col.inverted.keys()
+ self.values.sort()
+ self.indices = fully_masked(len(col))
+ if 0:
+ # AM - the .inverted attribute of filtered datasets is returning
+ # invalid row indices, and the summ code relies on these indices,
+ # so the following code cannot be used until they are fixed.
+ self.axis_map = {}
+ for i, v in enumerate(self.values):
+ self.axis_map[v] = i
+ vec = col.inverted.get(v)
+ if vec is not None:
+ for idx in vec:
+ try:
+ self.indices[idx] = i
+ except IndexError:
+ print col.name, idx
+ raise
+ MA.put(self.indices, vec, i)
+ else:
+ axis_map = dict([(v, i) for i, v in enumerate(self.values)])
+ for i, v in enumerate(col.data):
+ try:
+ self.indices[i] = axis_map[v]
+ except KeyError:
+ pass
+ self.axis_map = axis_map
+ return self
+ from_col = classmethod(from_col)
+
+ def copy(self):
+ return self.__class__(self.name, self.label, self.values, self.col)
+
+ def __len__(self):
+ return len(self.values)
+
+ def __iter__(self):
+ return iter(self.values)
+
+ def __repr__(self):
+ lines = []
+ lines.append('Axis name: %r, label: %r' % (self.name, self.label))
+ if hasattr(self, 'axis_map'):
+ for value in self.values:
+ lines.append(' %5r: %5r: %s' % (self.axis_map[value], value,
+ self.col.do_outtrans(value)))
+ else:
+ lines.append(repr(self.values))
+ lines.append('')
+ return '\n'.join(lines)
+
+class CrossTabData:
+ def __init__(self, name, data=None, label=None):
+ self.name = name
+ self.data = data
+ self.label = label
+
+def xtab_axes(xtab_or_axes):
+ if isinstance(xtab_or_axes, CrossTab):
+ return xtab_or_axes.axes
+ elif hasattr(xtab_or_axes, 'get_columns'):
+ return [CrossTabAxis.from_col(col)
+ for col in xtab_or_axes.get_columns()
+ if not col.name.startswith('_') and col.is_discrete()]
+ else:
+ return xtab_or_axes
+
+def dims(xtab_or_axes):
+ return tuple([len(s) for s in xtab_axes(xtab_or_axes)])
+
+def shape_union(a, b):
+ a = xtab_axes(a)
+ b = xtab_axes(b)
+ b_remainder = list(b)
+ a_remainder = []
+ common = []
+ for a_axis in a:
+ for b_axis in b:
+ if a_axis.name == b_axis.name:
+ if a_axis.values != b_axis.values:
+ raise Error('%s column values not compatible: %r vs %r' %
+ (a_axis.name, a_axis.values, b_axis.values))
+ b_remainder.remove(b_axis)
+ common.append(a_axis.copy())
+ break
+ else:
+ a_remainder.append(a_axis.copy())
+ b_remainder = [b_axis.copy() for b_axis in b_remainder]
+ return a_remainder + common + b_remainder
+
+class CrossTab:
+ """
+ The CrossTab class provides an alternate N-dimensional array
+ representation of summary datasets.
+ """
+ def __init__(self, name=None):
+ self.name = name
+ self.table_dict = {}
+ self.table_order = []
+ self.axes = []
+
+ def from_summset(cls, ds, shaped_like=None):
+ self = cls(ds.name)
+ st = time.time()
+ cols = ds.get_columns()
+ if shaped_like is not None:
+ for axis in xtab_axes(shaped_like):
+ try:
+ col = ds[axis.name]
+ except KeyError:
+ pass
+ else:
+ self.axes.append(CrossTabAxis.from_col(col, axis.values))
+ cols.remove(col)
+ for col in cols:
+ if col.is_discrete() and not col.name.startswith('_'):
+ self.axes.append(CrossTabAxis.from_col(col))
+ if not self.axes:
+ raise Error('dataset %r must have at least one discrete column' %
+ (ds.name,))
+ indices = [axis.indices.filled() for axis in self.axes]
+ masks = [axis.indices.mask() for axis in self.axes]
+ map = MA.transpose(MA.array(indices, mask=masks))
+ shape = self.get_shape()
+ for col in ds.get_columns():
+ if col.is_scalar():
+ self.add_table(col.name,
+ data=self.from_vector(map, col.data, shape),
+ label=col.label)
+ elapsed = time.time() - st
+ soom.info('%r crosstab generation took %.3f, %.1f rows/s' %
+ (self.name, elapsed, len(map) / elapsed))
+ return self
+ from_summset = classmethod(from_summset)
+
+ def to_summset(self, name, **kwargs):
+ ds = BaseDataset(name, summary=True, **kwargs)
+ axes = self.axes[:]
+ axes.reverse()
+ colsdata = []
+ colsindex = []
+ colslen = 1
+ for axis in axes:
+ data = []
+ index = []
+ datalen = len(axis.values)
+ for i, value in enumerate(axis.values):
+ data.extend([value] * colslen)
+ index.extend([i] * colslen)
+ colsdata = [coldata * datalen for coldata in colsdata]
+ colsindex = [colindex * datalen for colindex in colsindex]
+ colsdata.append(data)
+ colsindex.append(index)
+ colslen *= datalen
+ colsdata.reverse()
+ colsindex.reverse()
+ map = zip(*colsindex)
+ for data, axis in zip(colsdata, self.axes):
+ col = axis.col
+ # This nonsense needs to be replaced with a common way of
+ # extracting column metadata.
+ ds.addcolumnfromseq(axis.name, data=data, label=axis.label,
+ coltype=col.coltype, datatype=col.datatype.name,
+ outtrans=col.outtrans,
+ use_outtrans=col.use_outtrans,
+ format_str=col.format_str,
+ all_value=col.all_value)
+ for table in self.tables():
+ ds.addcolumnfromseq(table.name,
+ data=self.to_vector(map, table.data),
+ label=table.label,
+ coltype='scalar', datatype='float')
+ return ds
+
+ def empty_copy(self):
+ """Returns an empty crosstab with the same shape"""
+ crosstab = self.__class__()
+ crosstab.axes = list(self.axes)
+ return crosstab
+
+ def get_shape(self):
+ return dims(self)
+
+ def from_vector(self, map, data, shape):
+ table = fully_masked(shape, typecode=data.typecode())
+ for idx, v in itertools.izip(map, data):
+ if not Numeric.sometrue(idx.mask()):
+ table[idx] = v
+ return table
+
+ def to_vector(self, map, table):
+ size = Numeric.multiply.reduce(self.get_shape())
+ if size != len(map):
+ raise AssertionError('size/shape %r/%d != map len %d' %
+ (self.get_shape(), size, len(map)))
+ v = fully_masked(size, typecode=table.typecode())
+ for i, idx in enumerate(map):
+ v[i] = table[idx]
+ return v
+
+ def collapse_axes_not_in(self, shaped_like):
+ """
+ This method summs frequency columns along any axes not
+ appearing in the target dataset.
+ """
+ foreign_axes = xtab_axes(shaped_like)
+ i = 0
+ while i < len(self.axes):
+ if (i < len(foreign_axes)
+ and self.axes[i].name == foreign_axes[i].name):
+ i += 1
+ else:
+ if len(self.axes) == 1:
+ raise Error('crosstab %r: cannot collapse last axis: %r' %
+ (self.name, self.axes[i].name))
+ del self.axes[i]
+ for table in self.tables():
+ table.data = sum_axis(table.data, i)
+
+ def replicate_axes(self, shaped_like):
+ for i, foreign_axis in enumerate(xtab_axes(shaped_like)):
+ if i == len(self.axes) or self.axes[i].name != foreign_axis.name:
+ self.axes.insert(i, foreign_axis.copy())
+ for table in self.tables():
+ table.data = replicate_axis(table.data,i,len(foreign_axis))
+ assert dims(self) == dims(shaped_like)
+
+ def __getitem__(self, key):
+ return self.table_dict[key]
+
+ def add_table(self, name, data, label=None):
+ shape = self.get_shape()
+ assert shape == data.shape, 'data shape %r != crosstab shape %r' %\
+ (data.shape, shape)
+ table = CrossTabData(name, data, label)
+ self.table_dict[name] = table
+ self.table_order.append(table)
+
+ def tables(self):
+ return self.table_order
+
+ def _display_hook(self):
+ res = []
+ res.append('Axes')
+ for i, axis in enumerate(self.axes):
+ res.append('%3d:%s' % (i, axis.label))
+ for table in self.tables():
+ res.append(table.label or 'None')
+ lines = str(table.data).split('\n')
+ for line in lines:
+ res.append(' ' + line)
+ print '\n'.join(res)
+
+if __name__ == '__main__':
+ from SOOMv0 import datasets
+
+ ds = datasets.dsload('nhds', path='/usr/src/oc/health/SOOM_objects_real')
+ s = ds.summ('agegrp', 'sex')
+ t = CrossTab(s)
diff --git a/SOOMv0/Cstats.pyx b/SOOMv0/Cstats.pyx
new file mode 100644
index 0000000..a763faa
--- /dev/null
+++ b/SOOMv0/Cstats.pyx
@@ -0,0 +1,123 @@
+# Except where otherwise stated in comments below (text contained in triple quotes),
+# the contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+import math
+
+cdef double _fx(x):
+ """
+ Support function for cdf_gauss() function which returns the probability that an observation
+ from the standard normal distribution is less than or equal to x (that is, x must be
+ a standard normal deviate).
+
+ The following function was adapted from Python code which appears in a paper by Dridi (2003).
+ The programme code is copyright 2003 Dridi and Regional Economics Applications Laboratory,
+ University of Illinois and is used here with the permission of its author.
+
+ This function uses Gauss-legendre quadrature to provide good accuracy in the tails of the
+ distribution, at the expense of speed - this function is slower than the cdf_norm_RA() function
+ in the accompanying Stats.py module which uses rational approximations, but is quite inaccurate.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of the Standard Normal
+ Cumulative Distribution and its Inverse, Regional Economics Applications Laboratory,
+ University of Illinois and Federal Reserve Bank of Chicago. Availsble at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ return 1.1283791670955125585606992899556644260883*(2.718281828459045090795598298428**(-x**2.0));
+
+cdef double _GLx(double a, double b):
+ """
+ Support function for cdf_gauss() function which returns the probability that an observation
+ from the standard normal distribution is less than or equal to x (that is, x must be
+ a standard normal deviate).
+
+ The following function was adapted from Python code which appears in a paper by Dridi (2003).
+ The programme code is copyright 2003 Dridi and Regional Economics Applications Laboratory,
+ University of Illinois and is used here with the permission of its author.
+
+ This function uses Gauss-legendre quadrature to provide good accuracy in the tails of the
+ distribution, at the expense of speed - this function is slower than the cdf_norm_RA() function
+ in the accompanying Stats.py module which uses rational approximations, but is quite inaccurate.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of the Standard Normal
+ Cumulative Distribution and its Inverse, Regional Economics Applications Laboratory,
+ University of Illinois and Federal Reserve Bank of Chicago. Availsble at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ cdef double y1, y2, y3, y4, y5, x1, x2, x3, x4, x5, w1, w2, w3, w4, w5, s, h
+ cdef int n, i
+
+ y1=0.0
+ y2=0.0
+ y3=0.0
+ y4=0.0
+ y5=0.0
+
+ x1=-(245.0 + 14.0 * (70.0**0.5))**0.5 / 21.0
+ x2=-(245.0 - 14.0 * (70.0**0.5))**0.5 / 21.0
+ x3=0.0
+ x4=-x2
+ x5=-x1
+
+ w1=(322.0 - 13.0 * (70.0**0.5)) / 900.0
+ w2=(322.0 + 13.0 * (70.0**0.5)) / 900.0
+ w3=128.0/225.0
+ w4=w2
+ w5=w1
+
+ # n=4800
+ n = 120
+ s=0.0
+ h=(b-a)/n
+
+ for i from 0 <= i < n:
+ y1=h*x1/2.0+(h+2.0*(a+i*h))/2.0
+ y2=h*x2/2.0+(h+2.0*(a+i*h))/2.0
+ y3=h*x3/2.0+(h+2.0*(a+i*h))/2.0
+ y4=h*x4/2.0+(h+2.0*(a+i*h))/2.0
+ y5=h*x5/2.0+(h+2.0*(a+i*h))/2.0
+ s=s+h*(w1*_fx(y1)+w2*_fx(y2)+w3*_fx(y3)+w4*_fx(y4)+w5*_fx(y5))/2.0;
+ return s;
+
+def cdf_gauss(double x):
+ """
+ Returns the probability that an observation from the standard normal distribution
+ is less than or equal to x (that is, x must be a standard normal deviate).
+
+ The following function was adapted from Python code which appears in a paper by Dridi (2003).
+ The programme code is copyright 2003 Dridi and Regional Economics Applications Laboratory,
+ University of Illinois and is used here with the permission of its author.
+
+ This function uses Gauss-legendre quadrature to provide good accuracy in the tails of the
+ distribution, at the expense of speed - this function is slower than the cdf_norm_RA() function
+ in the accompanying Stats.py module which uses rational approximations, but is quite inaccurate.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of the Standard Normal
+ Cumulative Distribution and its Inverse, Regional Economics Applications Laboratory,
+ University of Illinois and Federal Reserve Bank of Chicago. Availsble at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ if x >= 0.0:
+ return (1.0 + _GLx(0, x/(2.0**0.5))) / 2.0
+ else:
+ return (1.0 - _GLx(0, -x/(2.0**0.5))) / 2.0
diff --git a/SOOMv0/DataSourceColumn.py b/SOOMv0/DataSourceColumn.py
new file mode 100644
index 0000000..7af9be2
--- /dev/null
+++ b/SOOMv0/DataSourceColumn.py
@@ -0,0 +1,128 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Classes and functions for loading SOOM datasets from external
+sources. Currently data is loaded from one or more instances of
+the DataSource class - defined below Each DataSource object contains
+a number of DataSourceColumn instances
+"""
+
+# $Id: DataSourceColumn.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/DataSourceColumn.py,v $
+
+import os
+from SOOMv0.Soom import soom
+from SOOMv0 import SourceDataTypes
+
+__all__ = 'DataSourceColumn',
+
+class DataSourceColumn:
+ """
+ Column definition class for DataSource class
+
+ Attributes:
+
+ name Column name [I]
+ datatype FIXME XXX this seems to be unused XXX
+ coltype See Soom.py coltypes
+ desc longer description [I]
+ label column label [I][P]
+ blankval character(s) used to represent missing
+ values in source data
+ errorval character(s) used to represent error values
+ in source data
+
+ For column formatted ASCII files:
+ posbase column positions are zero-based or one-based
+ startpos starting column position for this column
+ of data
+ length length of this column
+
+ For CSV ascii files:
+ ordinalpos the ordinal position of the column (zero
+ or one based)
+
+ For DB queries:
+ dbname if database column name differs from "name"
+
+ Key: [I] inherited by DatasetColumn
+ [P] used for presentation of data
+ """
+
+ def __init__(self,
+ name,
+ label = None,
+# datatype = 'str',
+ coltype = 'categorical',
+ posbase = 0,
+ desc = None,
+ startpos = None,
+ length = None,
+ blankval = None,
+ errorval = None,
+ ordinalpos = None,
+ dbname = None,
+ format = None):
+ # make sure the new DataSourceColumn has a name
+ soom.check_name_ok(name, 'DataSourceColumn')
+ # DatasetColumn definitions
+ # validate some keyword arguments
+# Rules are more complicated than they used to be - fix this later
+# if coltype not in soom.coltypes:
+# raise ValueError, '%s is not a valid column type' % coltype
+
+ if posbase < 0 or posbase > 1:
+ raise ValueError, '%s - posbase must be 0 or 1' % posbase
+
+ self.name = name
+ self.coltype = coltype
+ self.posbase = posbase
+ self.desc = desc
+ self.label = label
+ self.startpos = startpos
+ self.length = length
+ self.blankval = blankval
+ self.errorval = errorval
+ self.ordinalpos = ordinalpos
+ self.dbname = dbname
+ self.format = format
+
+ def set_datatype(self, datatype):
+ self.datatype = datatype
+ if self.format and self.datatype in ('date', 'datetime', 'time'):
+ self.conversion = SourceDataTypes.get_format(self.datatype,
+ self.format)
+ else:
+ self.conversion = SourceDataTypes.get_conversion(self.datatype)
+
+ def __str__(self):
+ """
+ String method to print the definition of a DataSourceColumn
+ """
+ rep = []
+ rep.append("DataSourceColumn definition: %s" % self.name)
+ if self.label is not None:
+ rep.append(" Label: %s" % self.label)
+ if self.desc is not None:
+ rep.append(" Description: %s" + self.desc)
+ if self.startpos is not None:
+ rep.append(" Starting at column position: %s (%s-based)" % \
+ (self.startpos, self.posbase))
+ if self.length is not None:
+ rep.append(" Extending for: %s bytes" % self.length)
+ if self.ordinalpos is not None:
+ rep.append(" Ordinal position: %s" % self.ordinalpos)
+ rep.append(" Column Type: %s" % self.coltype)
+ return os.linesep.join(rep)
diff --git a/SOOMv0/DataTypes.py b/SOOMv0/DataTypes.py
new file mode 100644
index 0000000..7a75442
--- /dev/null
+++ b/SOOMv0/DataTypes.py
@@ -0,0 +1,317 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Column datatype specific functionality
+
+Object hierarchy is:
+
+ _BaseDataType
+ _NumericBaseDataType
+ _IntBaseDataType
+ IntDataType
+ LongDataType
+ FloatDataType
+ StrDataType
+ TupleDataType
+ RecodeDataType
+ _DateTimeBaseDataType
+ DateDataType
+ TimeDataType
+ DateTimeDataType
+
+ Classes implement the following attributes and methods:
+
+ pytype Python type, if applicable
+ default_all_value For discrete columns, default all_value
+ default_coltype If not specified by user, gives column
+ type: categorical, ordinal or scalar
+ masked_value Types that use numpy for storage use this
+ for rows with no value.
+ soomarray_type Types that use soomarray for storage use the
+ given soomarray class.
+ file_extension Extension used on persistent data files.
+
+ get_array(filename, size)
+ Return an array-like object of the appropriate type.
+ get_mask(size) Return a numpy mask, for types that use numpy
+ as_pytype(value) Cast value to an appropriate pytype or
+ raise ValueError
+ store_data(data, mask, filename)
+ If filename is not None, write data (and mask)
+ to permanent storage, return pointer to
+ persistent storage object
+ load_data(filename) return pointer to persistent storage object
+ take(data, want) extract the values associated with record ids in
+ "want" list.
+
+$Id: DataTypes.py 2860 2007-10-19 04:37:25Z andrewm $
+$Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/DataTypes.py,v $
+"""
+
+import sys
+import os
+import mx
+import math
+# implements memory-mapped Numpy arrays stored in BLOBs
+from soomarray import ArrayDict, ArrayFile, ArrayString, ArrayTuple,\
+ ArrayDateTime, ArrayDate, ArrayTime, get_recode_array
+import Numeric
+import MA
+from SOOMv0 import common
+
+class _BaseDataType:
+ pytype = None
+ default_all_value = '_all_'
+ default_coltype = 'categorical'
+ masked_value = None
+ soomarray_type = None
+ default_format_str = '%s'
+ is_numeric = False
+ is_datetime = False
+ is_multivalue = False
+
+ def _fnext(self, filename):
+ if filename:
+ return '%s.%s' % (filename, self.file_extension)
+
+ def get_array(self, filename, size):
+ if filename:
+ try:
+ os.unlink(self._fnext(filename))
+ except OSError:
+ pass
+ return self.soomarray_type(self._fnext(filename), 'w')
+ else:
+ return [None] * size
+
+ def get_mask(self, size):
+ return None # No mask needed for this type
+
+ def as_pytype(self, value):
+ if self.pytype is None:
+ return value
+ else:
+ return self.pytype(value)
+
+ def do_format(self, v, format_str):
+ if v is None:
+ return str(None)
+ else:
+ try:
+ return format_str % v
+ except TypeError:
+ return str(v)
+ except ValueError, e:
+ raise ValueError('%r %% %r: %s' % (format_str, v, e))
+
+ def derive_format_str(self, data):
+ return None
+
+ def store_data(self, data, mask, filename=None):
+ if filename:
+ return None # Flag load on demand
+ else:
+ return data
+
+ def load_data(self, filename):
+ return self.soomarray_type(self._fnext(filename), 'r')
+
+ def take(self, data, want):
+ if hasattr(data, 'take'):
+ return data.take(want)
+ else:
+ return [data[i] for i in want]
+
+ def __str__(self):
+ return self.name # Historical
+
+class _NumericBaseDataType(_BaseDataType):
+ file_extension = 'SOOMblobstore'
+ masked_value = 0
+ is_numeric = True
+
+ def get_array(self, filename, size):
+ return Numeric.zeros(size, typecode=self.numeric_type)
+
+ def get_mask(self, size):
+ return Numeric.zeros(size, typecode=MA.MaskType)
+
+ def store_data(self, data, mask, filename=None):
+ if mask is None:
+ data = Numeric.array(data, typecode=self.numeric_type)
+ else:
+ data = MA.array(data, typecode=self.numeric_type, mask=mask)
+ if filename:
+ try:
+ os.unlink(self._fnext(filename))
+ except OSError:
+ pass
+ data_blob = ArrayDict(self._fnext(filename), 'w+')
+ data_blob['data'] = data
+ del data_blob # this writes the data to disc -
+ # we really need a .sync() method...
+ return None # Flag for load on demand
+ else:
+ return data
+
+ def load_data(self, filename):
+ return ArrayDict(self._fnext(filename), 'r')['data']
+
+ def take(self, data, want):
+ if type(data) is MA.MaskedArray:
+ return MA.take(data, want)
+ elif type(data) is Numeric.ArrayType:
+ return Numeric.take(data, want)
+ else:
+ return _BaseDataType.take(self, data, want)
+
+class _IntBaseDataType(_NumericBaseDataType):
+ pytype = int
+ default_all_value = -sys.maxint
+ default_coltype = 'categorical'
+ numeric_type = Numeric.Int
+ default_format_str = '%d'
+
+ def as_pytype(self, value):
+ try:
+ return self.pytype(value)
+ except ValueError:
+ return self.pytype(round(float(value)))
+
+class IntDataType(_IntBaseDataType):
+ name = 'int'
+
+class LongDataType(_IntBaseDataType):
+ name = 'long'
+
+class FloatDataType(_NumericBaseDataType):
+ name = 'float'
+ pytype = float
+ default_all_value = float(-sys.maxint) # AM - ewww
+ default_coltype = 'scalar'
+ masked_value = 0.0
+ numeric_type = Numeric.Float
+ default_format_str = '%10.10g'
+
+ def derive_format_str(self, data):
+ if len(data) == 0:
+ return self.default_format_str
+ width = 10
+ max = MA.maximum(MA.absolute(data))
+ if max > 0:
+ decimals = width - math.log(max, 10) - 2
+ if decimals < 0:
+ decimals = 0
+ elif max >= 0.00001:
+ decimals = 8
+ else:
+ return '%10.10g'
+ return '%%%d.%df' % (width, decimals)
+
+class StrDataType(_BaseDataType):
+ name = 'str'
+ masked_value = ''
+ soomarray_type = ArrayString
+ file_extension = 'SOOMstringarray'
+
+class TupleDataType(_BaseDataType):
+ name = 'tuple'
+ pytype = None
+ masked_value = ()
+ soomarray_type = ArrayTuple
+ file_extension = 'SOOMtuplearray'
+ is_multivalue = True
+
+class RecodeDataType(_BaseDataType):
+ name = 'recode'
+ file_extension = 'SOOMrecodearray'
+
+ def get_array(self, filename, size):
+ return get_recode_array(size, self._fnext(filename), 'w')
+
+ def load_data(self, filename):
+ return get_recode_array(0, self._fnext(filename))
+
+class _DateTimeBaseDataType(_BaseDataType):
+ pytype = mx.DateTime.DateTimeType
+ default_all_value = mx.DateTime.DateTime(0,1,1,0,0,0.0)
+ default_coltype = 'ordinal'
+ is_datetime = True
+
+ def as_pytype(self, value):
+ if type(value) == self.pytype:
+ return value
+ raise ValueError('bad data type')
+
+ def do_format(self, v, format_str):
+ try:
+ return v.strftime(format_str)
+ except AttributeError:
+ return str(v)
+
+class DateDataType(_DateTimeBaseDataType):
+ name = 'date'
+ soomarray_type = ArrayDate
+ file_extension = 'SOOMdatearray'
+ default_format_str = '%Y-%m-%d'
+
+class TimeDataType(_DateTimeBaseDataType):
+ name = 'time'
+ soomarray_type = ArrayTime
+ file_extension = 'SOOMtimearray'
+ default_format_str = '%H:%M:%S'
+
+class DateTimeDataType(_DateTimeBaseDataType):
+ name = 'datetime'
+ soomarray_type = ArrayDateTime
+ file_extension = 'SOOMdatetimearray'
+ default_coltype = 'scalar'
+ default_format_str = '%Y-%m-%d %H:%M:%S'
+
+class RecodeDateDataType(DateDataType):
+ name = 'recodedate'
+ file_extension = 'SOOMrecodearray'
+
+ def get_array(self, filename, size):
+ return get_recode_array(size, self._fnext(filename), 'w')
+
+ def load_data(self, filename):
+ return get_recode_array(0, self._fnext(filename))
+
+class Datatypes(list):
+ def _display_hook(self):
+ for dt in self:
+ print '%-12r: pytype %r, file extension: %r' %\
+ (dt.name, dt.pytype, dt.file_extension)
+ print '%12s default coltype: %r' %\
+ ('', dt.default_coltype)
+
+datatypes = Datatypes((
+ IntDataType, LongDataType, FloatDataType,
+ StrDataType, TupleDataType, RecodeDataType,
+ DateDataType, TimeDataType, DateTimeDataType, RecodeDateDataType,
+))
+
+datatype_by_name = dict([(c.name, c) for c in datatypes])
+
+def get_datatype_by_name(datatype):
+ if type(datatype) is type:
+ datatype = datatype.__name__ # Historical bumph.
+ elif isinstance(datatype, _BaseDataType):
+ return datatype.__class__()
+ try:
+ return datatype_by_name[datatype.lower()]()
+ except KeyError:
+ raise common.Error('%r is not a valid column data type' % datatype)
diff --git a/SOOMv0/Dataset.py b/SOOMv0/Dataset.py
new file mode 100644
index 0000000..88eb305
--- /dev/null
+++ b/SOOMv0/Dataset.py
@@ -0,0 +1,441 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Classes to define DataSets and their columns. Each Dataset instance
+contains a number of DatasetColumn instances, which hold both
+metadata about themselves (no separate metadata class for columns
+any more) as well as their actual data and inverted indexes on it.
+"""
+# $Id: Dataset.py 2901 2007-11-20 04:52:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Dataset.py,v $
+
+import sys
+import os
+import cPickle
+import time
+import tempfile
+import fcntl
+import errno
+import shutil
+from mx import DateTime
+from MA import Numeric
+from SOOMv0.Soom import soom
+from SOOMv0 import Utils
+from SOOMv0.BaseDataset import BaseDataset
+from SOOMv0.DatasetSummary import Summarise
+from SOOMv0.DatasetColumn import get_dataset_col
+from SOOMv0.Filter import DatasetFilters, sliced_ds
+from soomarray import ArrayDict
+from SOOMv0.SummaryStats import stat_method_help
+from SOOMv0.common import *
+
+
+__all__ = 'Dataset',
+
+class Dataset(BaseDataset):
+ def __init__(self, name,
+ label=None, desc=None, path=None,
+ backed = False,
+ rowsas = 'dict',
+ generations = 24, **kwargs):
+ self.generation = 0
+ self.locked = False
+ self.generations = generations
+ self.path = path
+ BaseDataset.__init__(self, name, label, desc, **kwargs)
+ self.backed = backed
+ self.rowsas = rowsas
+ self.filters = DatasetFilters(self)
+ self.loader = None
+
+ def __getstate__(self):
+ odict = self.__dict__.copy() # copy the dict since we may be changing it
+ odict['locked'] = False
+ odict['filters'] = None
+ odict['loader'] = None
+ return odict
+
+ def __setstate__(self, dict):
+ self.__dict__.update(dict)
+ self.loader = None
+ self.filters = DatasetFilters(self)
+
+ def load_notify(self, path):
+ self.path = path
+ self.filters.load_metadata()
+
+ def object_path(self, object_name, mkdirs=False, gen=False, *names):
+ if gen:
+ path = os.path.join(self.path, self.name,
+ str(self.generation), object_name)
+ else:
+ path = os.path.join(self.path, self.name, object_name)
+ if mkdirs:
+ Utils.helpful_mkdir(os.path.dirname(path))
+ return path
+
+ def lock(self):
+ if not soom.writepath:
+ raise Error('soom.writepath not set, cannot lock dataset')
+ if not self.locked:
+ self.path = soom.writepath
+ Utils.helpful_mkdir(os.path.join(self.path, self.name))
+ lock_file_name = os.path.join(self.path, self.name, '.update_lck')
+ fd = os.open(lock_file_name, os.O_WRONLY|os.O_CREAT, 0666)
+ try:
+ fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError, (eno, estr):
+ os.close(fd)
+ if eno in (errno.EAGAIN, errno.EACCES, errno.EWOULDBLOCK):
+ raise Error('Dataset already locked')
+ raise
+ self.locked, self._lock_fd = True, fd
+
+ def unlock(self):
+ if self.locked:
+ os.close(self._lock_fd)
+ self.locked = False
+ del self._lock_fd
+
+ def assert_locked(self):
+ if self.backed and not self.locked:
+ raise Error('dataset must be locked for this operation')
+
+ def new_generation(self):
+ if self.backed:
+ self.assert_locked()
+ BaseDataset.clear(self)
+ self.date_created = DateTime.now()
+ self.generation += 1
+ if self.backed:
+ if self.generation - self.generations >= 0:
+ gendir = os.path.join(self.path, self.name,
+ str(self.generation - self.generations))
+ shutil.rmtree(gendir, ignore_errors=True)
+
+ def rename_dataset(self, newname):
+ self.assert_locked()
+ BaseDataset.rename_dataset(newname)
+ os.rename(os.path.join(self.path, self.name),
+ os.path.join(self.path, newname))
+ self.save()
+
+ def delete_dataset(self):
+ # XXX Future work
+ raise NotImplementedError
+
+ def save(self):
+ if self.locked and soom.writepath:
+ self.date_updated = DateTime.now()
+ self.path = soom.writepath
+ Utils.helpful_mkdir(os.path.join(self.path, self.name))
+ fd, filename = tempfile.mkstemp('', '.soom', self.path)
+ try:
+ # We could use dumps() and os.write, but cPickle.dumps()
+ # uses cStringIO, which seems like a waste.
+ f = os.fdopen(fd, 'w+b')
+ try:
+ cPickle.dump(self, f, -1)
+ finally:
+ f.close()
+ real_filename = self.object_path(soom.metadata_filename)
+ os.chmod(filename, 0444)
+ os.rename(filename, real_filename)
+ soom.info('Dataset %r saved to filename %s' %\
+ (self.name, real_filename))
+ finally:
+ try:
+ os.close(fd)
+ except OSError:
+ pass
+ try:
+ os.unlink(filename)
+ except OSError:
+ pass
+ else:
+ raise Error("No lock on dataset %r or no soom.writepath - not saved." % self.name)
+
+ def derivedcolumn(self, dername,
+ dercols=None, derargs=None, derfunc=None, **kwargs):
+ """
+ Method to create a new, derived (calculated) column, using
+ a supplied function and other columns as arguments.
+ """
+ st = time.time()
+ data = derfunc(*[self.get_column(name).data for name in dercols])
+ dt = time.time()
+ if isinstance(data, tuple) and len(data) == 2:
+ data, mask = data
+ else:
+ mask = None
+ col = self.addcolumnfromseq(dername, data, mask, **kwargs)
+ for name in dercols:
+ # Contain memory usage
+ self.get_column(name).unload()
+ et = time.time()
+ if (dt - st) < 0.001:
+ soom.info('Creating and storing derived column %s in dataset %s took %.3f' %\
+ (dername, self.name, et - st))
+ else:
+ soom.info('Creating derived column %s in dataset %s took %.3f, store took %.3f' %\
+ (dername, self.name, dt - st, et - dt))
+ soom.mem_report()
+ return col
+
+ def describe(self, detail=ALL_DETAIL, date_fmt=None):
+ d = BaseDataset.describe(self, detail, date_fmt=date_fmt)
+ d.add('ds', SOME_DETAIL, 'Disc backed', yesno(self.backed))
+ if self.backed and hasattr(self, 'path'):
+ d.add('ds', SOME_DETAIL, 'Path', self.path)
+ d.add('ds', SOME_DETAIL, 'Generation', self.generation)
+ d.add('ds', SOME_DETAIL, 'Generations retained', self.generations)
+ d.add('ds', SOME_DETAIL, 'Created by SOOM version', self.soom_version)
+ return d
+
+ def __getitem__(self, index):
+ if type(index) is int:
+ return dict([(col.name, col.do_outtrans(col[index]))
+ for col in self.get_print_columns()])
+ elif type(index) is slice:
+ return sliced_ds(self, index)
+ else:
+ try:
+ return self._column_dict[index]
+ except KeyError:
+ raise KeyError(index)
+
+ def unload(self):
+ """Unload data and inverted forks for all columns."""
+ for col in self.get_columns():
+ col.unload()
+
+ def filter(self, expr=None, **kwargs):
+ """Create a new (optionally named) dataset filter"""
+ return self.filters.filter(expr=expr, **kwargs)
+
+ def makefilter(self, name, expr, **kwargs):
+ """Legacy filter interface - use ds.filter(...) instead"""
+ return self.filters.filter(name=name, expr=expr, **kwargs)
+
+ def delfilter(self, filtername):
+ """
+ Method to remove a filter's metadata and record_ids from
+ a DataSet.
+ """
+ self.filters.delete(filtername)
+
+ def load_filter(self,filtername):
+ """
+ Function to load a filter vector for a data set.
+ """
+ try:
+ self.filters[filtername]
+ except KeyError:
+ raise Error('load_filter(): no filter %r' % filtername)
+
+ def initialise(self):
+ import warnings
+ warnings.warn('.initialise() method is deprecated, use .loaddata_initialise() instead', DeprecationWarning, stacklevel=2)
+ self.loaddata_initialise()
+
+ def loaddata_initialise(self, key_column=None, update_time_column=None):
+ """
+ Initialise a DataSet's loader in preparation for loading data into it.
+
+ Arguments (depends on loader being used)
+
+ key_column for incremental loading, the unique primary
+ key column. Source data is kept between loads,
+ with new data overlaying old based on key.
+
+ update_time_column if specified, the incremental loader will
+ record the range of dates covered by
+ the cumulative sources.
+
+ """
+ if not self.locked:
+ raise Error('dataset must be locked for this operation')
+ if self.length > 0:
+ raise Error('dataset must be empty for this operation')
+ if self.loader is not None:
+ raise Error('Another load is already in progress?')
+ if key_column:
+ from SOOMv0.CachingLoader import CachingLoader
+ dir = self.object_path('load_cache', mkdirs=True)
+ Utils.helpful_mkdir(dir)
+ self.loader = CachingLoader(self.get_columns(), dir,
+ key_column=key_column,
+ update_time_column=update_time_column)
+ else:
+ from SOOMv0.ChunkingLoader import ChunkingLoader
+ dir = self.object_path('chunks', mkdirs=True)
+ Utils.helpful_mkdir(dir)
+ self.loader = ChunkingLoader(self.get_columns(), dir)
+
+ def finalise(self):
+ import warnings
+ warnings.warn('.finalise() method is deprecated, use .loaddata_finalise() instead', DeprecationWarning, stacklevel=2)
+ self.loaddata_finalise()
+
+ def loaddata_finalise(self):
+ """
+ Finalise the loading of a DataSet - it re-processes all the chunks
+ on a column-by-column basis into their final form as Numpy arrays,
+ and must be called after the last .loaddata() call.
+ """
+ starttime = time.time() # a slowish operation so lets time it
+ self.length = self.loader.load_completed()
+ nproc = soom.nproc
+ if nproc < 2:
+ for col, data in self.loader.unchunk_columns():
+ try:
+ col.store_column(data)
+ except:
+ print >> sys.stderr, 'While processing column %r:' % col.name
+ raise
+ else:
+ running = 0
+ for col, data in self.loader.unchunk_columns():
+ if running == nproc:
+ pid, status = os.wait()
+ if not os.WIFEXITED(status) or os.WEXITSTATUS(status):
+ sys.exit(1)
+ running -= 1
+ pid = os.fork()
+ if not pid:
+ try:
+ try:
+ col.store_column(data)
+ os._exit(0)
+ except:
+ print >> sys.stderr, 'While processing column %r:' % col.name
+ raise
+ finally:
+ os._exit(1)
+ else:
+ running += 1
+ col.unload()
+ while 1:
+ try:
+ pid, status = os.wait()
+ except OSError, (eno, estr):
+ if eno == errno.ECHILD:
+ break
+ raise
+
+ self.loader = None
+ stoptime = time.time() # how long did that take?
+ elapsed = stoptime - starttime
+
+ def loaddata(self, datasource,
+ initialise=False, finalise=False,
+ rowlimit=None, chunkrows=0, close=True, **kwargs):
+ """
+ Load rows of data into a DataSet from a data source. The dataset
+ loader must be initialised prior to this, and must be finalised
+ after all data sources are loaded.
+
+ Optional arguments:
+
+ initialise initialise dataset prior to loading (call
+ the loaddata_initialise method).
+
+ close close dataset after loading (prior to
+ finalising)
+
+ finalise finalise dataset after to loading (call the
+ loaddata_finalise method).
+
+ chunkrows flush columns to disc every time this many
+ rows have been read.
+
+ rowlimit only load /rowlimit/ rows from this source
+
+ """
+ if initialise:
+ self.loaddata_initialise(**kwargs)
+ datasource.register_dataset_types(self.get_columns())
+ self.length = self.loader.loadrows(datasource.name, datasource,
+ chunkrows, rowlimit)
+ if close:
+ datasource.close()
+ if finalise:
+ self.loaddata_finalise()
+
+
+ # AM - Subsetting is currently not functional. Filtered Datasets should
+ # largely replace them. At some future point, the ability to deep copy
+ # datasets will be added (but first we need per-user workspaces).
+# def subset(self, subsetname, label=None, keepcols=None, **kwargs):
+# """
+# This method creates a complete physical subset of a
+# DataSet, both row-wise using a filter and column-wise
+# if required. It should really use a subclass of Dataset,
+# which incorporates extra metadata about where the Subset
+# was subsetted from. The principle is that every data object
+# (including filters etc) should know where it came from, so
+# it can update itself if its parent(s) have updated themselves
+# (as well as for data documentation/audit trail purposes).
+# """
+# filterset = self.filters.filter_dataset(kwargs)
+# newsubset = Dataset(subsetname, label=label, **kwargs)
+# newsubset.all_record_ids = Numeric.arrayrange(len(filterset),
+# typecode=Numeric.Int)
+# # This is bad - we should have a "make_column_from_column" method
+# # on the column class.
+# copy_attrs = ('label', 'all_value', 'all_label', 'datatype', 'coltype',
+# 'outtrans', 'use_outtrans', 'maxoutlen', 'missingvalues',
+# 'calculatedby' , 'calculatedargs')
+# for col in self.get_columns(keepcols):
+# try:
+# data = col.data.filled()
+# mask = col.data.mask()
+# except:
+# data = col.data
+# mask = None
+#
+# colargs = dict([(attr, getattr(col, attr)) for attr in copy_attrs])
+# newsubset.addcolumnfromseq(col.name, data, mask, **colargs)
+# return newsubset
+
+
+def load_filter(dataset,filtername):
+ """
+ Function to load a filter vector for a data set. Delegates to
+ load_filter method of the DataSet class
+ """
+ return dataset.load_filter(filtername)
+
+
+class SummarisedDataset(Dataset):
+ """
+ A Dataset with some additional info about how the summary
+ was generated.
+ """
+ def __init__(self, name, summ_label=None, filter_label=None, **kwargs):
+ Dataset.__init__(self, name, summary=True, **kwargs)
+ self.summ_label = summ_label
+ self.filter_label = filter_label
+ self.stat_methods = None
+
+ def get_method_statcolname(self, method):
+ return self.stat_methods.get_method_statcolname(method)
+
+ def describe(self, detail=ALL_DETAIL, date_fmt=None):
+ d = Dataset.describe(self, detail, date_fmt)
+ d.add('prov', SUB_DETAIL, 'Filter', self.filter_label)
+ d.add('prov', SOME_DETAIL, 'Summarised', self.summ_label)
+ return d
+
diff --git a/SOOMv0/DatasetColumn.py b/SOOMv0/DatasetColumn.py
new file mode 100644
index 0000000..71794b8
--- /dev/null
+++ b/SOOMv0/DatasetColumn.py
@@ -0,0 +1,55 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: DatasetColumn.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/DatasetColumn.py,v $
+
+from SOOMv0.common import *
+from SOOMv0.DataTypes import get_datatype_by_name
+
+from SOOMv0.ColTypes.base import is_dataset_col
+from SOOMv0.ColTypes.RowOrdinal import RowOrdinalColumn
+from SOOMv0.ColTypes.SearchableText import SearchableTextDatasetColumn
+from SOOMv0.ColTypes.Identity import IdentityDatasetColumn
+from SOOMv0.ColTypes.Scalar import ScalarDatasetColumn, WeightingDatasetColumn
+from SOOMv0.ColTypes.Discrete import CategoricalDatasetColumn, OrdinalDatasetColumn
+
+
+column_types = [
+ IdentityDatasetColumn, CategoricalDatasetColumn, OrdinalDatasetColumn,
+ ScalarDatasetColumn, WeightingDatasetColumn, SearchableTextDatasetColumn,
+]
+
+coltype_by_name = dict([(c.coltype, c) for c in column_types])
+coltype_by_name['noncategorical'] = coltype_by_name['identity']
+
+def get_coltype(coltype):
+ try:
+ return coltype_by_name[coltype]
+ except KeyError:
+ raise Error('%r is not a valid column type' % coltype)
+
+def get_dataset_col(*args, **kwargs):
+ """
+ Factory to produce appropriate dataset column instance, given coltype.
+ """
+ datatype = get_datatype_by_name(kwargs.get('datatype', 'str'))
+ coltype = kwargs.pop('coltype', None)
+ if coltype is None or coltype in ('date', 'time', 'datetime'):
+ coltype = datatype.default_coltype
+ coltype = get_coltype(coltype)
+ kwargs['datatype'] = datatype
+ return coltype(*args, **kwargs)
+
diff --git a/SOOMv0/DatasetSummary.py b/SOOMv0/DatasetSummary.py
new file mode 100644
index 0000000..4a677aa
--- /dev/null
+++ b/SOOMv0/DatasetSummary.py
@@ -0,0 +1,419 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: DatasetSummary.py 2859 2007-10-18 07:45:37Z andrewm $
+
+# Standard Library
+import time
+# 3rd Party
+import Numeric
+# Application
+from SOOMv0 import Utils
+from SOOMv0 import SummaryStats
+from SOOMv0 import SummaryCond
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+from SOOMv0.SummaryProp import calc_props
+import soomfunc
+
+class DatasetTakeCol:
+ """
+ Column object returned by DatasetTake
+ """
+ def __init__(self, col, cellrows):
+ if len(col.data) > 0 and len(cellrows) > 0:
+ self.data = col.take(cellrows)
+ else:
+ self.data = []
+ self.name = col.name
+ self.label = col.label
+
+
+class DatasetTake:
+ """
+ Caching lazy "take" of a dataset
+ """
+ def __init__(self, dataset, cellrows):
+ self.dataset = dataset
+ self.cellrows = cellrows
+ self.colvectors = {}
+
+ def __getitem__(self, colname):
+ try:
+ return self.colvectors[colname]
+ except KeyError:
+ col = self.dataset.get_column(colname)
+ coltake = DatasetTakeCol(col, self.cellrows)
+ self.colvectors[colname] = coltake
+ return coltake
+
+ def __len__(self):
+ return len(self.cellrows)
+
+
+class SummCondCol:
+ """
+ Summary helper class to manage the values assocated with a
+ discrete conditioning column.
+ """
+ def __init__(self, dataset, index, condcol):
+ self.name = condcol.colname
+ self.index = index
+ self.col = dataset.get_column(self.name)
+ self.use_outtrans = True
+ self.suppress_set = None
+ self.key_order = None
+ condcol.apply(self)
+ if self.suppress_set is None:
+ self.suppress_set = {}
+ for v in self.inverted.keys():
+ if v not in self.outtrans:
+ trans = self.col.do_outtrans(v)
+ if trans != v:
+ self.outtrans[v] = trans
+
+ def get_metadata(self):
+ meta = {}
+ for attr, srccol_value in self.col.get_metadata().items():
+ if attr in ('multisourcecols', 'heterosourcecols'):
+ continue
+ condcol_value = getattr(self, attr, None)
+ if attr == 'datatype':
+ if self.col.is_multivalue():
+ condcol_value = 'recode'
+ meta[attr] = condcol_value or srccol_value
+ return meta
+
+ def veckey_pairs(self, zeros):
+ """ Returns a list of (value, row ids, suppress, condcol) tuples """
+ return [(key, self.inverted[key], key in self.suppress_set, self)
+ for key in self.key_order
+ if zeros or len(self.inverted[key]) > 0]
+
+
+class SummCondCols(list):
+ """
+ A list representing the summary "conditioning columns"
+ """
+ def extract_args(self, dataset, args):
+ args_remain = []
+ cols_seen = {}
+ for arg in args:
+ if type(arg) in (unicode, str):
+ arg = SummaryCond.condcol(arg)
+ if isinstance(arg, SummaryCond.condcol):
+ if cols_seen.has_key(arg.colname):
+ raise Error('Column %r appears more than once' %
+ arg.colname)
+ cols_seen[arg.colname] = True
+ self.append(SummCondCol(dataset, len(self), arg))
+ elif isinstance(arg, SummaryCond.CondColArg):
+ raise Error("Use condcol('colname', %s(...)) instead!" %
+ arg.__class__.__name__)
+ else:
+ args_remain.append(arg)
+ return self, args_remain
+
+ def cols(self):
+ return [condcol.col for condcol in self]
+
+ def names(self):
+ return [condcol.name for condcol in self]
+
+ def veckey_pairs(self, zeros):
+ return [condcol.veckey_pairs(zeros) for condcol in self]
+
+ def __repr__(self):
+ return '%s(%s)' % (self.__class__.__name__,
+ ', '.join([c.name for c in self]))
+
+
+class TempSummaryColumn:
+ """
+ Temporary column object for results of summarisation
+ """
+ def __init__(self, metadata):
+ self.__dict__.update(metadata)
+ self.data = []
+
+ def fromargs(cls, name, label, datatype='float', coltype='scalar',**kwargs):
+ return cls(dict(name=name, label=label, datatype=datatype,
+ coltype=coltype, **kwargs))
+ fromargs = classmethod(fromargs)
+
+ def fromcondcol(cls, condcol):
+ return cls(condcol.get_metadata())
+ fromcondcol = classmethod(fromcondcol)
+
+ def filter_rows(self, vector):
+ self.data = [self.data[i] for i in vector]
+
+ def todict(self):
+ metadata = self.__dict__.copy()
+ if not metadata['label']:
+ metadata['label'] = metadata['name']
+ metadata['mask'] = [v is None for v in self.data]
+ return metadata
+
+
+class TempSummarySet(dict):
+ """
+ Temporary dataset for results of summarisation prior to the
+ result being turned into a real dataset object.
+ """
+ __slots__ = ('marginal_total_idx', 'marginal_total_rows',
+ 'suppressed_rows', 'colorder')
+ def __init__(self):
+ self.marginal_total_idx = {}
+ self.marginal_total_rows = []
+ self.suppressed_rows = []
+ self.colorder = []
+
+ def _addcol(self, col):
+ self[col.name] = col
+ self.colorder.append(col.name)
+ return col
+
+ def addcolumn(self, name, *args, **kwargs):
+ return self._addcol(TempSummaryColumn.fromargs(name, *args, **kwargs))
+
+ def addcolumnfromcondcol(self, condcol):
+ return self._addcol(TempSummaryColumn.fromcondcol(condcol))
+
+ def suppress_rows(self, suppress_marginal_totals=False):
+ """ Remove marginal totals and suppressed rows from vectors """
+ if (not (suppress_marginal_totals and self.marginal_total_rows)
+ and not self.suppressed_rows):
+ return
+ all_rows = Numeric.arrayrange(len(self.values()[0].data))
+ suppressed_rows = self.suppressed_rows
+ if suppress_marginal_totals:
+ suppressed_rows = soomfunc.union(suppressed_rows,
+ self.marginal_total_rows)
+ non_mt_rows = soomfunc.outersect(all_rows, suppressed_rows)
+ for col in self.values():
+ col.filter_rows(non_mt_rows)
+
+ def columntodataset(self, dataset):
+ for colname in self.colorder:
+ dataset.addcolumnfromseq(**self[colname].todict())
+
+
+class SummaryRow(object):
+ """
+ A temporary object yielded by the Summarise.yield_rows() method
+ """
+ __slots__ = (
+ 'colnames', 'colvalues', 'count', 'extract', 'level',
+ 'suppress', 'type_string',
+ )
+
+ def __str__(self):
+ colvals = ['%s=%s' % (c, v)
+ for c, v in zip(self.colnames,self.colvalues)]
+ return 'lvl %d %s (%d rows)' % (self.level, ', '.join(colvals),
+ self.count)
+
+
+class Summarise:
+ def __init__(self, dataset, *args, **kwargs):
+ self.dataset = dataset
+ # process keyword args
+ self.levels = kwargs.pop('levels', None)
+ self.allcalc = kwargs.pop('allcalc', False)
+ self.nomt = kwargs.pop('nomt', False) # Suppress marginal totals
+ self.proportions = kwargs.pop('proportions', False)
+ self.default_weightcol = kwargs.pop('weightcol', self.dataset.weightcol)
+ self.suppress_by_col = None
+ self.zeros = kwargs.pop('zeros', False)
+ suppress = kwargs.pop('suppress', None)
+ if suppress:
+ self.suppress_by_col = {}
+ for colname, col_values in suppress.items():
+ value_map = dict([(v, None) for v in col_values])
+ self.suppress_by_col[colname] = value_map
+ self.filtered_ds = self.dataset.filter(kwargs=kwargs)
+
+ # process positional args, separating conditioning columns from
+ # statistical methods.
+ self.stat_methods, args = SummaryStats.extract(args,
+ self.default_weightcol)
+ self.stat_methods.check_args(dataset)
+ self.condcols, args = SummCondCols().extract_args(self.filtered_ds,
+ args)
+ Utils.assert_args_exhausted(args)
+
+ if self.proportions:
+ self.allcalc = True
+ if self.allcalc:
+ self.levels = range(len(self.condcols)+1)
+ elif self.levels is None:
+ self.levels = [len(self.condcols)]
+
+ def yield_rows(self):
+ row = SummaryRow()
+ isect_time = 0.0
+ for item in Utils.combinations(*self.condcols.veckey_pairs(self.zeros)):
+ row.level = len(item)
+ row.suppress = False
+ if row.level in self.levels:
+ row.type_string = ['0'] * len(self.condcols)
+ colnames = []
+ colvalues = []
+ intersect_rows = []
+ for var_val, var_rows, suppress, condcol in item:
+ row.type_string[condcol.index] = '1'
+ intersect_rows.append(var_rows)
+ colnames.append(condcol.name)
+ colvalues.append(var_val)
+ if suppress:
+ row.suppress = True
+ isect_start = time.time()
+ if len(intersect_rows) == 0:
+ row.count = len(self.filtered_ds)
+ row.extract = self.filtered_ds
+ else:
+ if len(intersect_rows) == 1:
+ cellrows = intersect_rows[0]
+ else:
+ cellrows = soomfunc.intersect(*intersect_rows)
+ row.count = len(cellrows)
+ row.extract = DatasetTake(self.dataset, cellrows)
+ isect_time += time.time() - isect_start
+ row.colnames = tuple(colnames)
+ row.colvalues = tuple(colvalues)
+ yield row
+ soom.info('Summarise intersect() time: %.3f' % isect_time)
+
+ def as_dict(self):
+ start_time = time.time()
+
+ freqcol = '_freq_'
+ if self.proportions and self.default_weightcol:
+ # proportions code needs to know weighted frequency
+ wgtfreq_method = SummaryStats.freq()
+ self.stat_methods.append(wgtfreq_method)
+ freqcol = self.stat_methods.get_method_statcolname(wgtfreq_method)
+
+ summaryset = TempSummarySet()
+ summaryset.addcolumn('_freq_', 'Frequency', 'int', 'weighting')
+ summaryset.addcolumn('_level_', 'Level', 'int', 'scalar')
+ summaryset.addcolumn('_type_', 'Summary type', 'str', 'categorical')
+ summaryset.addcolumn('_condcols_', 'Conditioning Columns',
+ 'tuple', 'categorical')
+ for condcol in self.condcols:
+ summaryset.addcolumnfromcondcol(condcol)
+ _freq = summaryset['_freq_'].data
+ _level = summaryset['_level_'].data
+ _type = summaryset['_type_'].data
+ _condcols = summaryset['_condcols_'].data
+ self.stat_methods.add_statcols(self.dataset, summaryset)
+ row_ordinal = -1
+ for row in self.yield_rows():
+ row_ordinal += 1
+ _freq.append(row.count)
+ _level.append(row.level)
+ _type.append(''.join(row.type_string))
+ _condcols.append(row.colnames)
+ for colname, colvalue in zip(row.colnames, row.colvalues):
+ summaryset[colname].data.append(colvalue)
+ if row.suppress:
+ summaryset.suppressed_rows.append(row_ordinal)
+ if row.level != len(self.condcols):
+ mtvals = []
+ for condcol in self.condcols:
+ if condcol.name not in row.colnames:
+ colvalue = condcol.col.all_value
+ summaryset[condcol.name].data.append(colvalue)
+ mtvals.append(summaryset[condcol.name].data[-1])
+ summaryset.marginal_total_idx[tuple(mtvals)] = row_ordinal
+ summaryset.marginal_total_rows.append(row_ordinal)
+ self.stat_methods.calc(summaryset, row.extract)
+
+ if self.proportions:
+ allvals = [col.all_value for col in self.condcols.cols()]
+ calc_props(summaryset, self.condcols.names(), allvals, freqcol)
+ summaryset.suppress_rows(suppress_marginal_totals=self.nomt)
+ soom.info('Summarise as_dict() time: %.3f' % (time.time() - start_time))
+ return summaryset
+
+
+def summ(self, *args, **kwargs):
+ '''Summarise a Dataset
+
+ summ(conditioning_columns..., stat_methods..., options...)
+
+For example:
+
+ summary_set = dataset.summ('sex', 'agegrp',
+ mean('age'), median('age'),
+ allcalc = True)
+
+Options include:
+
+ name name of summary set
+ label summary set label
+ allcalc calculate all combinations
+ datasetpath for persistent summary sets,
+ the dataset path.
+ filtername apply the named filter
+ levels calculate combinations at the
+ specified levels, eg: 2 & 3 is '23'
+ permanent resulting summary dataset should
+ be written to disk.
+ proportions
+ zeros
+ suppress
+
+'''
+ from SOOMv0.Dataset import SummarisedDataset
+
+ starttime = time.time()
+ # Method argument parsing
+ label = kwargs.pop('label', None)
+# datasetpath = kwargs.pop('datasetpath', soom.default_object_path)
+ name = kwargs.pop('name', None)
+# permanent = kwargs.pop('permanent', False)
+
+ summarise = Summarise(self, *args, **kwargs)
+ summaryset = summarise.as_dict()
+
+ # print "summaryset:", # debug
+ # print summaryset # debug
+
+ soom.info('Summarise took %.3fs' % (time.time() - starttime))
+
+ if not name:
+ by = ['_by_%s' % condcol.name for condcol in summarise.condcols]
+ name = 'sumof_%s%s' % (self.name, ''.join(by))
+ if not label:
+ label = self.label
+
+ by = [' by %s' % condcol.col.label
+ for condcol in summarise.condcols]
+ summ_label = ''.join(by)
+
+ starttime = time.time()
+ sumset = SummarisedDataset(name, label=label,
+ summ_label=summ_label,
+ filter_label=summarise.filtered_ds.filter_label,
+# path=datasetpath, backed=permanent,
+ weightcol="_freq_",
+ date_created=summarise.filtered_ds.date_created,
+ date_updated=summarise.filtered_ds.date_updated)
+ summaryset.columntodataset(sumset)
+ sumset.stat_methods = summarise.stat_methods
+ sumset.nonprintcols = ('_level_', '_type_', '_condcols_')
+ soom.info('summary dict into dataset took %.3f' % (time.time() - starttime))
+ return sumset
diff --git a/SOOMv0/Datasets.py b/SOOMv0/Datasets.py
new file mode 100644
index 0000000..e0c07ad
--- /dev/null
+++ b/SOOMv0/Datasets.py
@@ -0,0 +1,137 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Datasets.py 3653 2008-12-24 03:06:36Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Datasets.py,v $
+
+import os
+import cPickle
+import errno
+from SOOMv0.Soom import soom
+from SOOMv0.Dataset import Dataset
+from SOOMv0.common import *
+
+class FileInfo:
+ def __init__(self, st):
+ self.dev = st.st_dev
+ self.ino = st.st_ino
+ self.mtime = st.st_mtime
+
+ def __eq__(self, other):
+ if other is None:
+ return False
+ return (self.dev == other.dev
+ and self.ino == other.ino
+ and self.mtime == other.mtime)
+
+
+class Datasets:
+ """
+ A cache for loaded datasets
+ """
+
+ def __init__(self):
+ self.datasets = {}
+
+ def _dsload(self, dsname, path):
+ metadata_file = os.path.join(dsname, soom.metadata_filename)
+ path = soom.object_path(metadata_file, path)
+ if not path:
+ raise DatasetNotFound('Unknown dataset %r' % dsname)
+ f = open(os.path.join(path, metadata_file), 'rb')
+ try:
+ c_ds, c_info = self.datasets.get(dsname.lower(), (None, None))
+ f_info = FileInfo(os.fstat(f.fileno()))
+ if f_info == c_info:
+ # Return cached ds if file has not changed
+ return c_ds
+ # Otherwise load the new ds
+ f_ds = cPickle.load(f)
+ finally:
+ f.close()
+ soom.info('Dataset %r loaded.' % dsname)
+ if (hasattr(f_ds, 'soom_version_info')
+ and f_ds.soom_version_info[:2] != version_info[:2]):
+ soom.warning('Dataset created by SOOM %s, this is SOOM %s' %\
+ (f_ds.soom_version, version))
+ f_ds.load_notify(path)
+ self.datasets[dsname.lower()] = f_ds, f_info
+ return f_ds
+
+ def dsload(self, dsname, path=None):
+ """
+ Function to load a stored data set definition (but not all
+ its data) from disc. The data is loaded column by column only
+ as required. The function returns a DataSet object instance.
+ """
+ ds = self._dsload(dsname, path)
+ # now load all the columns if soom.lazy_column_loading is turned off
+ if not soom.lazy_column_loading:
+ soom.info('Loading columns for dataset %r' % ds.name)
+ for col in ds.get_columns():
+ col.load('data')
+ col.load('inverted')
+ return ds
+
+ def dsunload(self, dsname):
+ """Unloads dataset (and purge from cache)"""
+ if isinstance(dsname, Dataset):
+ dsname = dsname.name
+ try:
+ ds, info = self.datasets.pop(dsname.lower())
+ except KeyError:
+ pass
+ else:
+ ds.unload()
+ soom.info('Dataset %r unloaded.' % dsname)
+
+ def makedataset(self, dsname, path=None, **kwargs):
+ """
+ Factory function to create a new DataSet instance (inheriting
+ metadata from any existing dataset with the same name). The
+ returned dataset is locked for update.
+ """
+ kwargs['backed'] = True
+ try:
+ ds = self._dsload(dsname, path)
+ except DatasetNotFound:
+ ds = Dataset(dsname, **kwargs)
+ ds.lock()
+ soom.info('Dataset %r created.' % dsname)
+ self.datasets[dsname.lower()] = ds, None
+ else:
+ ds.lock()
+ ds.new_generation()
+ return ds
+
+ def subset(self, ds, subsetname, label=None, **kwargs):
+ subset = ds.subset(subsetname, label=label, **kwargs)
+ self.datasets[subset.name.lower()] = subset, None
+ return subset
+
+ def __str__(self):
+ """Prints information about current loaded SOOM datasets/objects"""
+ # To-do: print information about unloaded SOOM datasets present in the
+ # default_object_path
+ rep = ['SOOM datasets currently loaded:']
+ if self.datasets:
+ for ds, info in self.datasets.values():
+ rep.append(' %s (%s)' % (ds.name, ds.label))
+ else:
+ rep.append(' None')
+ return '\n'.join(rep)
+
+ def _display_hook(self):
+ print str(self)
+
diff --git a/SOOMv0/Describe.py b/SOOMv0/Describe.py
new file mode 100644
index 0000000..dd1e7d6
--- /dev/null
+++ b/SOOMv0/Describe.py
@@ -0,0 +1,64 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Describe.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Describe.py,v $
+
+from SOOMv0.common import *
+
+class Describe:
+ """
+ Generate a structured string description of some object
+ """
+
+ def __init__(self, detail, *sections):
+ self.detail = detail
+ self.sections_order = sections
+ self.sections = {}
+ for name in sections:
+ self.new_section(name)
+
+ def new_section(self, name):
+ self.sections.setdefault(name, [])
+
+ def add(self, section, prio, label, text):
+ if self.detail >= prio and text is not None:
+ # Add some minimal pretty-printing of common types
+ if callable(text):
+ text = '%s.%s()' % (text.__module__, text.__name__)
+ elif hasattr(text, 'items'):
+ if text:
+ limit = 40
+ trans = text.items()
+ trans.sort()
+ trans = ['%s -> %s' % kv for kv in trans[:limit]]
+ if len(trans) < len(text):
+ trans.append('... and %d more values' %
+ (len(text) - len(trans)))
+ text = ', '.join(trans)
+ else:
+ text = '<empty map>'
+ self.sections[section].append((label, text))
+
+ def describe_tuples(self):
+ lines = []
+ for section_name in self.sections_order:
+ lines.extend(self.sections[section_name])
+ return lines
+
+ def describe_str(self):
+ return '\n'.join(['%s: %s' % kv for kv in self.describe_tuples()])
+
+ def __str__(self):
+ return self.describe_str()
diff --git a/SOOMv0/Filter.py b/SOOMv0/Filter.py
new file mode 100644
index 0000000..282e194
--- /dev/null
+++ b/SOOMv0/Filter.py
@@ -0,0 +1,486 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Filter.py 3701 2009-02-26 05:56:34Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Filter.py,v $
+
+import os
+import new
+import time
+import errno
+import cPickle
+
+import Numeric, RandomArray
+# implements memory-mapped Numpy arrays stored in BLOBs
+from soomarray import ArrayDict, MmapArray
+import soomfunc
+
+from SOOMv0 import soomparse, Utils
+from SOOMv0.Soom import soom
+from SOOMv0.common import *
+from SOOMv0.PrintDataset import DSFormatter
+from SOOMv0.BaseDataset import BaseDataset
+from SOOMv0.ColTypes.RowOrdinal import FilteredRowOrdinalColumn
+
+from SOOMv0.DatasetColumn import column_types
+
+class FilteredColumnMixin(object):
+ """
+ A column proxy that presents a filtered view of the target
+ column. The class is not a true DatasetColumn object in its own
+ right, rather it is intended to be a lightweight layer on top of
+ a column object (either a real column, or another filter proxy).
+ """
+ __slots__ = ('_src_col','_all_record_ids')
+
+ def __init__(self, src_col, record_ids):
+ self._src_col = src_col
+ self._all_record_ids = record_ids
+
+ def get_data(self):
+ return self._src_col.take(self._all_record_ids)
+ data = property(get_data)
+
+ def get_inverted(self):
+ inverted = {}
+ for value in self._src_col.inverted.keys():
+ inverted[value] = soomfunc.intersect(self._src_col.inverted[value],
+ self._all_record_ids)
+ return inverted
+ inverted = property(get_inverted)
+
+ def describe(self, detail=ALL_DETAIL):
+ return self._src_col.describe(detail)
+
+ def take(self, rows):
+ return self._src_col.take(Numeric.take(self._record_ids, rows))
+
+ def __len__(self):
+ return len(self._all_record_ids)
+
+ def __getitem__(self, i):
+ if type(i) is slice:
+ return self._src_col.take(self._all_record_ids[i])
+ else:
+ return self._src_col.data[self._all_record_ids[i]]
+
+# Magic Be Here - dynamically create versions of coltypes with a
+# FilteredColumnMixin layered on top. The FilteredColumnMixin proxies
+# access to .data and .inverted attributes so as to provide a filtered
+# view of them.
+namespace = globals()
+filter_coltypes_map = {}
+for coltype in column_types:
+ name = 'Filtered' + coltype.__name__
+ cls = new.classobj(name, (FilteredColumnMixin, coltype), {})
+ namespace[name] = filter_coltypes_map[coltype] = cls
+
+def get_filtered_col(col, record_ids):
+ cls = filter_coltypes_map[col.__class__]
+ inst = cls(col, record_ids)
+ inst.__dict__.update(col.__dict__)
+ return inst
+
+class FilteredDataset(BaseDataset):
+ def __init__(self, parent_dataset, record_ids, filter_label=None, **kwargs):
+ m = parent_dataset.get_metadata()
+ m.update(kwargs)
+ BaseDataset.__init__(self, **m)
+ if isinstance(parent_dataset, FilteredDataset):
+ record_ids = Numeric.take(parent_dataset.record_ids, record_ids)
+ filter_label = '%s, %s' % (filter_label,parent_dataset.filter_label)
+ parent_dataset = parent_dataset.parent_dataset
+ self.parent_dataset = parent_dataset
+ self.record_ids = record_ids
+ self.filter_label = filter_label
+ self.addcolumn(FilteredRowOrdinalColumn(self, record_ids))
+ for col in parent_dataset.get_columns():
+ if col.name == 'row_ordinal':
+ continue
+ self.addcolumn(get_filtered_col(col, self.record_ids))
+
+# def __getattr__(self, name):
+# if name[0] == '_':
+# raise AttributeError(name)
+# value = getattr(self.parent_dataset, name)
+# if hasattr(value, 'im_func'):
+# # Rebind bound methods
+# value = new.instancemethod(value.im_func, self, self.__class__)
+# return value
+#
+ def __len__(self):
+ return len(self.record_ids)
+
+
+ def describe(self, detail=ALL_DETAIL, date_fmt=None):
+ d = BaseDataset.describe(self, detail, date_fmt)
+ d.add('prov', SUB_DETAIL, 'Filter', self.filter_label)
+ return d
+
+
+def filtered_ds(parent_dataset, record_ids, name=None, **kwargs):
+ if name is None:
+ name = '%s_filtered' % parent_dataset.name
+ return FilteredDataset(parent_dataset, record_ids, name=name, **kwargs)
+
+
+def sampled_ds(parent_dataset, sample, name=None, filter_label=None, **kwargs):
+ parent_len = len(parent_dataset)
+ samp_len = int(parent_len * sample)
+ record_ids = Numeric.sort(RandomArray.randint(0, parent_len, samp_len))
+ if name is None:
+ name = 'samp%02d_%s' % (sample * 100, parent_dataset.name)
+ if filter_label is None:
+ filter_label = '%.3g%% sample' % (sample * 100)
+ return FilteredDataset(parent_dataset, record_ids, name=name,
+ filter_label=filter_label, **kwargs)
+
+
+def sliced_ds(parent_dataset, idx, name=None, filter_label=None, **kwargs):
+ assert isinstance(idx, slice)
+ indicies = idx.indices(len(parent_dataset))
+ record_ids = Numeric.arrayrange(*indicies)
+ if name is None:
+ name = 'slice_%d_%d_%d_%s' % (indicies + (parent_dataset.name,))
+ if filter_label is None:
+ filter_label = '[%d:%d:%d] slice' % indicies
+ return FilteredDataset(parent_dataset, record_ids, name=name,
+ filter_label=filter_label, **kwargs)
+
+
+def sorted_ds(parent_dataset, *args, **kwargs):
+ def _rowcmp(i, j):
+ for coldata, dir in cols:
+ if dir:
+ d = cmp(coldata[i], coldata[j])
+ else:
+ d = cmp(coldata[j], coldata[i])
+ if d:
+ return d
+ return 0
+ name = kwargs.pop('name', None)
+ filter_label = kwargs.pop('filter_label', None)
+ if len(args) == 1:
+ args = args[0].split(',')
+ cols = []
+ namefrag = ['sorted']
+ labelfrag = []
+ if not args:
+ return parent_dataset
+ t0 = time.time()
+ for arg in args:
+ words = arg.split()
+ nwords = len(words)
+ if nwords == 0 or nwords > 2:
+ raise ExpressionError('Unknown sort %r' % arg)
+ elif nwords == 2:
+ dir = words[1].lower()
+ if dir not in ('asc', 'desc'):
+ raise ExpressionError('Unknown sort direction %r' % arg)
+ else:
+ dir = 'asc'
+ colname = words[0]
+ namefrag.append('%s_%s' % (colname, dir))
+ labelfrag.append('%s %s' % (colname, dir))
+ coldata = parent_dataset.get_column(colname).data
+ cols.append((coldata, dir == 'asc'))
+ if name is None:
+ namefrag.append(parent_dataset.name)
+ name = '_'.join(namefrag)
+ if filter_label is None:
+ filter_label = '%s, ordered by %s' % (parent_dataset.label, ', '.join(labelfrag))
+ record_ids = range(len(parent_dataset))
+ t1 = time.time()
+ soom.info('Actualise data vectors took %.1fs' % (t1 - t0))
+ record_ids.sort(_rowcmp)
+ soom.info('Sort took %.1fs' % (time.time() - t1))
+ record_ids = Numeric.array(record_ids)
+ return FilteredDataset(parent_dataset, record_ids, name=name,
+ filter_label=filter_label, **kwargs)
+
+ # AM Feb-09 - also tried compiling comparison function on the fly - not
+ # much faster. Essentially:
+ # eval('lambda i, j: cmp(cols[0][i], cols[0][j]) # col 0, asc
+ # or cmp(cols[1][j], cols[1][i])') # col 1, desc
+
+
+class DatasetFilters:
+ '''
+ Dataset filter registry
+ '''
+ def __init__(self, dataset):
+ self.dataset = dataset
+ self.filters = {}
+
+ def load_metadata(self):
+ # Parent dataset has just been loaded, scan for applicable filters
+ paths = soom.find_metadata(os.path.join(self.dataset.name, 'filters'))
+ for path in paths:
+ filter = load_filter(self.dataset, path)
+ self.filters[filter.name] = filter
+
+ def _display_hook(self):
+ filters = self.filters.items()
+ filters.sort()
+ print 'Filters:'
+ if filters:
+ for name, filter in filters:
+ desc = filter.desc
+ if not desc:
+ desc = '(no description)'
+ print ' %-12s %s, %d elements\n%16s expr: %s' % \
+ (filter.name, desc, len(filter),
+ '', filter.expr)
+ else:
+ print ' (no filters defined)'
+
+ def delete(self, name):
+ filter = self.filters.pop(name, None)
+ if filter:
+ filter.delete()
+ soom.info('deleted filter %r' % name)
+
+ def add(self, name, filter):
+ assert name not in self.filters
+ self.filters[name] = filter
+
+ def __getitem__(self, name):
+ filter = self.filters[name]
+ filter.load()
+ return filter
+
+ def filter(self, **kwargs):
+ return filter_dataset(self.dataset, filters=self, **kwargs)
+
+
+def filter_dataset(dataset, expr=None, name=None, label=None,
+ kwargs=None, filters=None):
+ '''
+ Parse filter method arguments and return record_ids if
+ filter active:
+
+ - If "nofilter" - returns raw dataset
+ - If "filtername", use pre-defined named filter,
+ return dataset extract
+ - If "filterexpr", creates an anonymous filter,
+ return dataset extract
+ - Else return raw dataset
+ '''
+ if kwargs:
+ nofilter = kwargs.pop('nofilter', False)
+ if nofilter:
+ return dataset
+ name = kwargs.pop('filtername', None)
+ expr = kwargs.pop('filterexpr', None)
+ label = kwargs.pop('filterlabel', None)
+ if expr or name:
+ if expr:
+ if name and filters:
+ filters.delete(name)
+ filter = DatasetFilter(dataset, name,
+ expr, backed=bool(name),
+ label=label)
+ if name and filters:
+ filters.add(name, filter)
+ else:
+ try:
+ if filters:
+ filter = filters[name]
+ else:
+ raise KeyError
+ except KeyError:
+ raise Error('Unknown filter %s' % repr(name))
+ return filter.get_filtered_ds()
+ return dataset
+
+class DatasetFilter(object):
+ """
+ Base class for data set filters (resolved record IDs and definition)
+ """
+ def __init__(self, parent_dataset, name, expr,
+ desc=None, label=None, backed=True):
+ """
+ Method to define and evaluate a dataset filter i.e. a
+ where clause.
+
+ Arguments:
+ parent_dataset
+ name short name for filter
+ expr filter expression
+ desc longer description of the filter
+ label label for the filter when printing
+ output
+ """
+
+ self.parent_dataset = parent_dataset
+ self.name = name
+ self.desc = desc
+ self.label = label
+ self.record_ids = None # "list" of matching row/record ID's
+ self.generation = None
+ self.length = 0
+ self.expr = expr
+ self.path = None
+ self.backed = bool(backed and name)
+ self.path = None
+ self.filter()
+
+ def __getstate__(self):
+ """
+ Returns a copy of the DatasetFilter's state but with the
+ .record_ids attributes set to None, instead of BLOBstore
+ object instances, and parent_dataset set to None (so we don't
+ save that as well).
+ """
+ odict = self.__dict__.copy()
+ odict['filter_blob'] = None
+ odict['record_ids'] = None
+ odict['parent_dataset'] = None
+ return odict
+
+ def __setstate__(self, state):
+ self.__dict__.update(state)
+
+ def _fixpath(self):
+ self.path = os.path.join(soom.writepath,
+ self.parent_dataset.name,
+ 'filters',
+ self.name)
+ Utils.helpful_mkdir(self.path)
+
+ def save_metadata(self):
+ if self.backed and soom.writepath:
+ self._fixpath()
+ filename = os.path.join(self.path, soom.metadata_filename)
+ f = open(filename, 'wb')
+ try:
+ cPickle.dump(self, f, -1)
+ finally:
+ f.close()
+
+ def _blobstore_filename(self, mkdirs=False):
+ if self.backed and soom.writepath:
+ self._fixpath()
+ if self.path:
+ return os.path.join(self.path, 'record_ids.SOOMblobstore')
+
+ def filter(self):
+ starttime = time.time()
+ parser = soomparse.SoomFilterParse(self.parent_dataset, self.expr)
+ record_ids = parser.filter()
+ # Empty filter?
+ if record_ids is None or len(record_ids) == 0:
+ record_ids = []
+ self.record_ids = Numeric.array(record_ids, typecode=Numeric.Int)
+ del record_ids
+ self.generation = self.parent_dataset.generation
+ self.length = len(self.record_ids)
+
+ filename = self._blobstore_filename(mkdirs=True)
+ if self.backed and filename:
+ # initialise a BLOB dict to hold filter record ID vector
+ self.filter_blob = ArrayDict(filename, 'w+')
+ self.filter_blob['vector'] = self.record_ids
+ # this syncs the data to disc - EVIL - relying on cyclic GC to
+ # reap immediately.
+ del self.filter_blob
+ # re-instate the reference to the BLOBstore
+ self.filter_blob = ArrayDict(filename, 'r')
+ self.record_ids = self.filter_blob['vector']
+ else:
+ self.filter_blob = None
+ self.save_metadata()
+
+ soom.info('Assembling filter %s containing %d elements took %.3fs' %\
+ (self.name, len(self.record_ids), time.time() - starttime))
+
+ def get_filtered_ds(self):
+ return filtered_ds(self.parent_dataset, self.record_ids,
+ name=self.name,
+ filter_label=self.label or self.expr,
+ desc=self.desc)
+
+ def __len__(self):
+ if self.generation != self.parent_dataset.generation:
+ self.filter()
+ return self.length
+
+ def describe(self):
+ """Method to print metadata about a data set filter instance"""
+ m = ["","-"*20]
+ m.append("Metadata for dataset filter: %s" % self.name)
+ if self.label:
+ m.append("Label: %s" % self.label)
+ if self.desc:
+ m.append("Description: %s" % self.desc)
+ m.append("Parent dataset: %s" % self.parent_dataset.name)
+ m.append("Number of records returned by filter: %i" % self.length)
+ m.append("Definition: %s" % self.expr)
+ m.append("-"* 20)
+ m.append("")
+ return os.linesep.join(m)
+
+ def _display_hook(self):
+ """
+ Prints a DatasetFilter's metadata
+ """
+ print self.describe()
+
+ def load(self, verbose=1):
+ """
+ Function to load a filter vector for a data set, regenerating
+ it if the parent dataset has changed.
+
+ It checks to see if the filter has already been loaded,
+ and does nothing if it has.
+ """
+ if self.generation != self.parent_dataset.generation:
+ self.filter()
+ elif self.record_ids is None and self.backed:
+ filename = self._blobstore_filename()
+ starttime = time.time()
+ try:
+ self.filter_blob = ArrayDict(filename, 'r+')
+ except IOError, e:
+ raise IOError, "couldn't open filter \"%s\" blobstore: %s" %\
+ (self.name, e)
+ self.record_ids = self.filter_blob["vector"]
+ elapsed = time.time() - starttime
+ if verbose:
+ print "load_filter(): memory mapping of \"%s\" containing %d elements took %.3f seconds." % (self.name, len(self.record_ids), elapsed)
+
+ def unload(self):
+ self.filter_blob = None
+ self.record_ids = None
+
+ def delete(self):
+ self.unload()
+ if self.backed and self.path:
+ try:
+ os.unlink(os.path.join(self.path, soom.metadata_filename))
+ os.unlink(self._blobstore_filename())
+ os.rmdir(self.path)
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+
+def load_filter(dataset, path):
+ f = open(os.path.join(path, soom.metadata_filename), 'rb')
+ try:
+ filter = cPickle.load(f)
+ filter.path = path
+ filter.parent_dataset = dataset
+ finally:
+ f.close()
+ return filter
diff --git a/SOOMv0/Makefile b/SOOMv0/Makefile
new file mode 100644
index 0000000..b5d796f
--- /dev/null
+++ b/SOOMv0/Makefile
@@ -0,0 +1,25 @@
+
+PYTHON = python
+YAPPSDIR = ../yapps2
+
+.PHONY: soomparse
+.DELETE_ON_ERROR:
+
+soomparse: soomparse.py yappsrt.py
+
+# create the soomparse.py module to parse DatasetFilter expressions
+soomparse.py: soomparse.g $(YAPPSDIR)/yapps2.py $(YAPPSDIR)/yappsrt.py
+ $(PYTHON) $(YAPPSDIR)/yapps2.py soomparse.g
+
+# Merge yapps2 LICENSE into yappsrt.py source
+yappsrt.py: $(YAPPSDIR)/yappsrt.py $(YAPPSDIR)/LICENSE
+ (sed -e '/^$$/q' $(YAPPSDIR)/yappsrt.py && \
+ echo '# Copyright 2004 Amit J. Patel' && \
+ echo '# see: http://theory.standford.edu/~amitp/Yapps/' && \
+ echo '#' && \
+ sed -e 's/^/# /' $(YAPPSDIR)/LICENSE && \
+ sed -ne '/^$$/,$$p' $(YAPPSDIR)/yappsrt.py) > $@
+
+clean:
+ rm -f *.pyc soomparse.py
+
diff --git a/SOOMv0/Plot/README b/SOOMv0/Plot/README
new file mode 100644
index 0000000..801e31a
--- /dev/null
+++ b/SOOMv0/Plot/README
@@ -0,0 +1,56 @@
+A unified Python interface to R plots.
+
+The objects in the plotmethods module form the "user interface".
+Typically, a plot object is instantiated, filters, parameters and axes
+are added to it, and then the plot is rendered. This can either be done
+piecemeal, or by the __init__ method.
+
+An example of immediate plotting:
+
+ histogram(dataset, 'age', bins=5)
+
+By instantiating the plot object explicitly, users can interact with it:
+
+ >>> plot = histogram(dataset)
+ >>> plot.procargs(dataset, 'age', bins=8, sample=0.05)
+ >>> plot.get_title()
+ 'Sampled (5%) Distribution of Age (years)'
+ >>> plot.set_params(title='Distribution of Age')
+ >>> plot.plot(dataset)
+
+The plotmethods module uses a class "KWArgs" as a registry for plot
+method keyword arguments. This class assists in demultiplexing keyword
+arguments supplied by the user to the appropriate underlying methods.
+
+The plotmethods classes are build on the base classes in the rplot
+module. The class hierarchy looks like:
+
+ RPlotBase
+ RPlot
+ (piechart)
+ RLatticePlotBase
+ RLatticePlot
+ RLatticeMatrixPlot
+ (scattermatrix)
+ RLatticeBinPlot
+ (histogram)
+ (densityplot)
+ RLatticeBoxPlot
+ (boxplot)
+ (scatterplot)
+ RLatticeSummPlot
+ RLatticeCatPlot
+ (barchart)
+ (dotchart)
+ (lineplot)
+
+Axis object hierarchy:
+
+ RCondCol
+ RPercentAxisCol
+ RDensityAxisCol
+ RContinuousCondCol
+ RMeasureCondCol
+ RPropnMeasureCol
+ RDiscreteCondCol
+ RGroupByCol
diff --git a/SOOMv0/Plot/__init__.py b/SOOMv0/Plot/__init__.py
new file mode 100644
index 0000000..09f1bec
--- /dev/null
+++ b/SOOMv0/Plot/__init__.py
@@ -0,0 +1,17 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/__init__.py,v $
+
diff --git a/SOOMv0/Plot/_output.py b/SOOMv0/Plot/_output.py
new file mode 100644
index 0000000..0d6deb8
--- /dev/null
+++ b/SOOMv0/Plot/_output.py
@@ -0,0 +1,257 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: _output.py 2757 2007-07-26 07:07:10Z tchur $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/_output.py,v $
+
+import os
+from SOOMv0.common import *
+from SOOMv0 import soom
+import atexit
+from rpy import *
+
+# R creates litter in /tmp and rpy doesn't call their cleanup function because
+# it kills the interpreter (preventing random python object cleanup occuring).
+# So we attempt a minimal workaround here (will not remove directories with
+# contents, will not be called if interpreter dies):
+def rmdirignore(dir):
+ try:
+ os.rmdir(dir)
+ except OSError:
+ pass
+rtmp = r.tempdir()
+if rtmp.startswith('/tmp/Rtmp'): # Safety
+ atexit.register(rmdirignore, rtmp)
+
+def default_init(device, **kwargs):
+ """Make standard dev init functions look like r.trellis_device, etc"""
+ try:
+ init_fn = getattr(r, device)
+ except AttributeError:
+ raise Error('Unknown device')
+ return init_fn(**kwargs)
+
+class _OutputBase(object):
+ def __init__(self, **kwargs):
+ self.init_args = kwargs
+
+ def done(self):
+ while r.dev_list():
+ r.dev_off()
+
+ def new_page(self, init_fn = default_init):
+ r.plot_new()
+
+class _OutputNull(_OutputBase):
+ def done(self):
+ pass
+
+class _OutputScreenBase(_OutputBase):
+ def __init__(self, **kwargs):
+ super(_OutputScreenBase, self).__init__(**kwargs)
+ self.initialised = False
+ self.done()
+
+ def new_page(self, init_fn = default_init):
+ if not self.initialised:
+ init_fn(self.devname, **self.init_args)
+ self.initialised = True
+ super(_OutputScreenBase, self).new_page()
+
+ def done(self):
+ pass
+
+class _OutputFileBase(_OutputBase):
+ def new_page(self, init_fn = default_init):
+ self.done()
+ init_fn(self.devname, **self.init_args)
+ super(_OutputFileBase, self).new_page()
+
+class _OutputX11(_OutputScreenBase):
+ devname = 'X11'
+
+ def __init__(self, **kwargs):
+ kwargs.setdefault('width', 10)
+ kwargs.setdefault('height', 7.333)
+ super(_OutputX11, self).__init__(**kwargs)
+
+if 'Cairo' in r._packages(all=True):
+ have_Cairo = True
+else:
+ have_Cairo = False
+
+if have_Cairo:
+ # The optional Cairo package produces superior PNG and JPEG output, and does
+ # not require a connection to an X11 server, as the built-in PNG and JPEG
+ # renders do, so we use this if it is available. Cairo is available from:
+ #
+ # http://www.rforge.net/Cairo/index.html
+
+ class _OutputCairoBase(_OutputFileBase):
+ def __init__(self, **kwargs):
+ kwargs.setdefault('file', self.default_file)
+ kwargs.setdefault('height', 550)
+ kwargs.setdefault('width', 750)
+ super(_OutputCairoBase, self).__init__(**kwargs)
+
+ def new_page(self, init_fn_ignored = None):
+ self.done()
+ r.library('Cairo')
+ args = dict(self.init_args)
+ args['height'] = args.pop('height')
+ args['width'] = args.pop('width')
+ r.Cairo(type=self.devname, **args)
+ r.plot_new()
+
+ class _OutputPNG(_OutputCairoBase):
+ devname = 'png'
+ default_file = '/tmp/soom.png'
+
+ class _OutputJPEG(_OutputCairoBase):
+ devname = 'jpeg'
+ default_file = '/tmp/soom.jpeg'
+
+
+elif 'GDD' in r._packages(all=True):
+ # The optional GDD package produces superior PNG and JPEG output, and does
+ # not require a connection to an X11 server, as the built-in PNG and JPEG
+ # renders do, so we use this if it is available. GDD is available from:
+ #
+ # http://www.rosuda.org/R/GDD/
+
+ class _OutputGDDBase(_OutputFileBase):
+ def __init__(self, **kwargs):
+ kwargs.setdefault('file', self.default_file)
+ kwargs.setdefault('height', 550)
+ kwargs.setdefault('width', 750)
+ super(_OutputGDDBase, self).__init__(**kwargs)
+
+ def new_page(self, init_fn_ignored = None):
+ self.done()
+ r.library('GDD')
+ args = dict(self.init_args)
+ args['h'] = args.pop('height')
+ args['w'] = args.pop('width')
+ r.GDD(type=self.devname, **args)
+ r.plot_new()
+
+ class _OutputPNG(_OutputGDDBase):
+ devname = 'png'
+ default_file = '/tmp/soom.png'
+
+ class _OutputJPEG(_OutputGDDBase):
+ devname = 'jpeg'
+ default_file = '/tmp/soom.jpeg'
+else:
+ class _OutputPNG(_OutputFileBase):
+ devname = 'png'
+
+ def __init__(self, **kwargs):
+ from SOOMv0 import xvfb_spawn
+ xauth_file = None
+ if soom.writepath:
+ xauth_file = os.path.join(soom.writepath, '.xvfb-auth')
+ xvfb_spawn.spawn_if_necessary(xauth_file)
+ kwargs.setdefault('file', '/tmp/soom.png')
+ kwargs.setdefault('height', 550)
+ kwargs.setdefault('width', 750)
+ super(_OutputPNG, self).__init__(**kwargs)
+
+ class _OutputJPEG(_OutputFileBase):
+ devname = 'jpeg'
+
+ def __init__(self, **kwargs):
+ from SOOMv0 import xvfb_spawn
+ xauth_file = None
+ if soom.writepath:
+ xauth_file = os.path.join(soom.writepath, '.xvfb-auth')
+ xvfb_spawn.spawn_if_necessary(xauth_file)
+ kwargs.setdefault('file', '/tmp/soom.jpeg')
+ kwargs.setdefault('height', 550)
+ kwargs.setdefault('width', 750)
+ super(_OutputJPEG, self).__init__(**kwargs)
+
+if True:
+ class _OutputPS(_OutputFileBase):
+ devname = 'postscript'
+
+ def __init__(self, **kwargs):
+ kwargs.setdefault('file', '/tmp/soom.ps')
+ kwargs.setdefault('horizontal', True)
+ kwargs.setdefault('paper', 'a4')
+ super(_OutputPS, self).__init__(**kwargs)
+
+ class _OutputPDF(_OutputFileBase):
+ devname = 'pdf'
+
+ def __init__(self, **kwargs):
+ kwargs.setdefault('file', '/tmp/soom.pdf')
+ kwargs.setdefault('horizontal', True)
+ kwargs.setdefault('paper', 'a4')
+ if kwargs['horizontal']:
+ kwargs['height'] = kwargs.get('height', 20.9) / 2.54
+ kwargs['width'] = kwargs.get('width', 29.6) / 2.54
+ else:
+ kwargs['width'] = kwargs.get('width', 20.9) / 2.54
+ kwargs['height'] = kwargs.get('height', 29.6) / 2.54
+ super(_OutputPDF, self).__init__(**kwargs)
+
+ class _OutputSVG(_OutputFileBase):
+ devname = 'devSVG'
+
+ def __init__(self, **kwargs):
+ r.library('RSvgDevice')
+ super(_OutputSVG, self).__init__(**kwargs)
+
+
+_devices = {
+ 'x11': _OutputX11,
+ 'png': _OutputPNG,
+ 'jpeg': _OutputJPEG,
+ 'postscript': _OutputPS,
+ 'pdf': _OutputPDF,
+ 'svg': _OutputSVG,
+}
+
+dev = _OutputNull()
+
+class Output:
+ """Set Plot output device
+
+ First argument is one of 'X11, 'PNG', 'JPEG', 'Postscript',
+ or 'PDF'. Other kwargs are passed to the R output device,
+ and typically include 'file', 'height', 'width', 'paper',
+ 'horizontal'
+ """
+ def __call__(self, device, **kwargs):
+ global dev
+ try:
+ dev_cls = _devices[device.lower()]
+ except KeyError:
+ raise Error('Unknown output device %r' % device)
+ dev = dev_cls(**kwargs)
+
+ def _display_hook(self):
+ lines = [self.__doc__]
+ devices = _devices.keys()
+ devices.sort()
+ lines.append('Available devices: %s' % ', '.join(devices))
+ for name, cls in _devices.items():
+ if isinstance(dev, cls):
+ lines.append('Currently selected: %s' % name)
+ break
+ print '\n'.join(lines)
+
+output = Output()
+
diff --git a/SOOMv0/Plot/panelfn.py b/SOOMv0/Plot/panelfn.py
new file mode 100644
index 0000000..120ae3e
--- /dev/null
+++ b/SOOMv0/Plot/panelfn.py
@@ -0,0 +1,119 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: panelfn.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/panelfn.py,v $
+
+from rpy import *
+
+panel_macros = {
+ 'barchart':
+ '''\
+ function(x, y, ..., box.ratio=1, subscripts, groups=NULL, horizontal=TRUE)
+ {
+ panel.barchart(x, y, box.ratio=box.ratio,
+ subscripts=subscripts, groups=groups,
+ horizontal=horizontal, ...)
+
+ if (is.null(groups)) {
+ COL_ll <- plotframe$COL.ll[subscripts]
+ COL_ul <- plotframe$COL.ul[subscripts]
+ ticks <- as.numeric(TICKAXIS)
+ ARROWS
+ }
+ else {
+ TICKAXIS <- as.numeric(TICKAXIS)
+ groupSub <- function(groups, subscripts, ...) groups[subscripts]
+ groups <- as.numeric(groups)
+ vals <- sort(unique(groups))
+ nvals <- length(vals)
+ groups <- groupSub(groups, subscripts, ...)
+ width <- box.ratio/(1 + nvals * box.ratio)
+ for (i in unique(TICKAXIS)) {
+ ok <- TICKAXIS == i
+ ticks <- (i + width * (groups[ok] - (nvals + 1)/2))
+ COL_ll <- plotframe$COL.ll[subscripts][ok]
+ COL_ul <- plotframe$COL.ul[subscripts][ok]
+ ARROWS
+ }
+ }
+ }
+ ''',
+
+ 'xyplot':
+ '''\
+ function(x, y, ..., subscripts, groups=NULL)
+ {
+ COL_ll <- plotframe$COL.ll[subscripts]
+ COL_ul <- plotframe$COL.ul[subscripts]
+ ticks <- as.numeric(TICKAXIS)
+ ARROWS
+ if (is.null(groups))
+ panel.xyplot(x, y, subscripts, ...)
+ else
+ panel.superpose(x, y, subscripts, groups, ...)
+ }
+ ''',
+
+ 'dotplot':
+ '''\
+ function(x, y, ..., subscripts, groups=NULL)
+ {
+ COL_ll <- plotframe$COL.ll[subscripts]
+ COL_ul <- plotframe$COL.ul[subscripts]
+ ticks <- as.numeric(TICKAXIS)
+ ARROWS
+ panel.dotplot(x, y, subscripts=subscripts, groups=groups, ...)
+ }
+ ''',
+}
+
+
+def ci_panel(colname, rmethod, axis, grouping):
+ try:
+ macro = panel_macros[rmethod]
+ except KeyError:
+ raise PlotError('%r plot method does not support confidence intervals')
+
+ macro = macro.replace('COL', colname)
+
+ if axis == 'x':
+ args = [colname+'_ll', 'ticks', colname+'_ul', 'ticks']
+ macro = macro.replace('TICKAXIS', 'y')
+ else:
+ args = ['ticks', colname+'_ll', 'ticks', colname+'_ul']
+ macro = macro.replace('TICKAXIS', 'x')
+ args.extend([
+ 'length=0.02',
+ 'code=3',
+ 'angle=90',
+ "col='red'",
+ 'lwd=2',
+ ])
+ arrows = 'panel.arrows(%s)' % ',\n '.join(args)
+ macro = macro.replace('ARROWS', arrows)
+
+# print macro
+ return r(macro)
+
+def violin_panel():
+ return r('''\
+ function(..., box.ratio) {
+ dots <- c(list(...), list(col = "transparent",
+ box.ratio = box.ratio))
+ dots$varwidth = FALSE
+ do.call("panel.violin", dots)
+ panel.bwplot(..., fill = NULL, box.ratio = .1)
+ }
+ ''')
diff --git a/SOOMv0/Plot/plotmethods.py b/SOOMv0/Plot/plotmethods.py
new file mode 100644
index 0000000..182685b
--- /dev/null
+++ b/SOOMv0/Plot/plotmethods.py
@@ -0,0 +1,337 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: plotmethods.py 2673 2007-06-04 07:22:53Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/plotmethods.py,v $
+
+# Standard Python modules
+import sys
+
+# 3rd Party modules
+try:
+ from rpy_options import rpy_options
+except ImportError:
+ pass
+else:
+ rpy_options['VERBOSE'] = False
+from rpy import *
+
+# SOOM bits
+from SOOMv0.common import *
+
+# SOOM.Plot bits
+import rplot
+
+class KWArgMap:
+ def __init__(self, *args, **mapped):
+ self.args = args
+ self.mapped = mapped
+
+ def apply(self, dest, source, unused):
+ def _set(d, s):
+ try:
+ dest[d] = source[s]
+ except KeyError:
+ pass
+ else:
+ try:
+ del unused[s]
+ except KeyError:
+ pass
+ for v in self.args:
+ if isinstance(v, KWArgMap):
+ v.apply(dest, source, unused)
+ else:
+ _set(v, v)
+ for k, v in self.mapped.items():
+ _set(k, v)
+ return dest
+
+
+class KWArgs:
+ """
+ Magic repository of keyword args
+
+ When instances are called, they yield up (potentially remapped)
+ keyword args. When all arg processing is complete, assert_all_used
+ can be called to check that there are no unused arguments.
+ """
+
+ def __init__(self, kwargs):
+ self.kwargs = kwargs
+ self.unused = dict(kwargs) # Copy
+
+ def __call__(self, *args, **mapped):
+ return KWArgMap(*args, **mapped).apply({}, self.kwargs, self.unused)
+
+ def assert_all_used(self):
+ if self.unused:
+ params = self.unused.keys()
+ params.sort()
+ raise TypeError('Unknown parameter(s): %s' % ', '.join(params))
+
+# "Canned" argument sets
+filter_args = KWArgMap('nofilter', 'filtername', 'filterexpr', 'filterlabel')
+common_args = KWArgMap('title', 'footer', 'debug')
+compan_args = KWArgMap('layout', 'xlim', 'ylim')
+yaxis_args = KWArgMap(ticks='yticks',
+ labelrotate='ylabelrotate')
+xaxis_args = KWArgMap(ticks='xticks',
+ labelrotate='xlabelrotate')
+
+
+class histogram(rplot.RLatticeBinPlot):
+ label = 'Distribution'
+ rmethod = 'histogram'
+ autosample = True
+ # Complete
+
+ def __init__(self, ds, *args, **kwargs):
+ super(histogram, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, measurecol, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.add_filter(**kwargs(filter_args))
+ self.set_params(**kwargs(common_args, compan_args, 'bins', 'sample'))
+ self.add_percent(ds, **kwargs('hist_type', yaxis_args))
+ self.add_continuous(ds, measurecol, **kwargs(xaxis_args))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class densityplot(rplot.RLatticeBinPlot):
+ label = 'Density'
+ rmethod = 'densityplot'
+ autosample = True
+ # Complete?
+
+ def __init__(self, ds, *args, **kwargs):
+ super(densityplot, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, measurecol, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.add_filter(**kwargs(filter_args))
+ self.set_params(**kwargs(common_args, compan_args,
+ 'bins', 'line_width', 'line_style', 'sample'))
+ self.add_density(ds, **kwargs('plot_points', 'ref', yaxis_args))
+ self.add_continuous(ds, measurecol, **kwargs(xaxis_args))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class scatterplot(rplot.RLatticePlot):
+ label = 'Scatter plot'
+ rmethod = 'xyplot'
+ autosample = True
+
+ def __init__(self, ds, *args, **kwargs):
+ super(scatterplot, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, xcolname, ycolname, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.add_filter(**kwargs(filter_args))
+ self.set_params(**kwargs(common_args, compan_args,
+ 'horizontal', 'vertical',
+ 'point_style', 'sample'))
+ self.add_continuous(ds, ycolname, **kwargs(yaxis_args,
+ logscale='logyscale'))
+ self.add_continuous(ds, xcolname, **kwargs(xaxis_args,
+ logscale='logxscale'))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class scattermatrix(rplot.RLatticeMatrixPlot):
+ label = 'Scatter plot matrices'
+ rmethod = 'splom'
+ autosample = True
+
+ def __init__(self, ds, *args, **kwargs):
+ super(scattermatrix, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, *cols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.add_filter(**kwargs(filter_args))
+ self.set_params(**kwargs(common_args, compan_args,
+ 'horizontal', 'vertical',
+ 'point_style', 'sample'))
+ if len(cols) < 2:
+ raise PlotError('must specify at least 2 continuous columns')
+ for colname in cols:
+ self.add_any(ds, colname)
+ self.add_groupby(ds, **kwargs('groupby'))
+ kwargs.assert_all_used()
+
+
+class boxplot(rplot.RLatticeBoxPlot):
+ label = 'Box and whisker plot'
+ rmethod = 'bwplot'
+ autosample = True
+
+ def __init__(self, ds, *args, **kwargs):
+ super(boxplot, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, measurecol, xcolname, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.set_params(**kwargs(common_args, compan_args,
+ 'horizontal', 'vertical',
+ 'line_width', 'line_style', 'sample',
+ 'notches', 'outliers', 'variable_width',
+ 'violins'))
+ self.add_filter(**kwargs(filter_args))
+ self.add_continuous(ds, measurecol, **kwargs(yaxis_args,
+ logscale='logyscale'))
+ self.add_discrete(ds, xcolname, **kwargs(labelrotate='xlabelrotate'))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class lineplot(rplot.RLatticeSummPlot):
+ label = 'Line plot'
+ rmethod = 'xyplot'
+
+ def __init__(self, ds, *args, **kwargs):
+ super(lineplot, self).__init__(ds)
+ self.horizontal = True
+ self.r_args['type'] = 'l'
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, xcolname, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.set_params(**kwargs(common_args, compan_args,
+ 'weightcol', 'horizontal', 'vertical',
+ 'line_width', 'line_style', 'conflev'))
+ self.add_filter(**kwargs(filter_args))
+ self.add_discrete(ds, xcolname,
+ **kwargs(labelrotate='xlabelrotate'))
+ self.add_measure(ds, **kwargs('measure', yaxis_args,
+ logscale='logyscale'))
+ self.add_groupby(ds, **kwargs('groupby'))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class barchart(rplot.RLatticeCatPlot):
+ label = 'Barchart'
+ rmethod = 'barchart'
+
+ def __init__(self, ds, *args, **kwargs):
+ super(barchart, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, xcolname, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.set_params(**kwargs(common_args, compan_args,
+ 'weightcol', 'horizontal', 'vertical',
+ 'origin', 'reference', 'pack', 'conflev'))
+ self.add_filter(**kwargs(filter_args))
+ self.add_measure(ds, **kwargs('measure', yaxis_args,
+ logscale='logyscale'))
+ self.add_discrete(ds, xcolname,
+ **kwargs(labelrotate='xlabelrotate'))
+ self.add_groupby(ds, **kwargs('groupby', 'stackby'))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class dotchart(rplot.RLatticeCatPlot):
+ label = 'Dotplot'
+ rmethod = 'dotplot'
+
+ def __init__(self, ds, *args, **kwargs):
+ super(dotchart, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, xcolname, *condcols, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.set_params(**kwargs(common_args, compan_args,
+ 'weightcol', 'horizontal', 'vertical',
+ 'point_size', 'point_style',
+ 'origin', 'reference', 'conflev'))
+ self.add_filter(**kwargs(filter_args))
+ self.add_measure(ds, **kwargs('measure', yaxis_args,
+ logscale='logyscale'))
+ self.add_discrete(ds, xcolname,
+ **kwargs(labelrotate='xlabelrotate'))
+ self.add_groupby(ds, **kwargs('groupby'))
+ for colname in condcols:
+ self.add_discrete(ds, colname)
+ kwargs.assert_all_used()
+
+
+class piechart(rplot.RPlot):
+ label = 'Pie Chart'
+ rmethod = 'pie'
+
+ def __init__(self, ds, *args, **kwargs):
+ raise NotImplemented
+ super(densityplot, self).__init__(ds)
+ if args:
+ plotargs = _plotutils.PlotArgs(ds, *condcols,
+ **dict(kwargs, measure=measurecol))
+ ds = plotargs.summarised_dataset
+ kwargs = KWArgs(kwargs)
+ self.add_discrete(ds, measurecol)
+ kwargs.assert_all_used()
+ self.plot(ds)
+
+class fourfold(rplot.TwoByTwoPlot):
+ label = 'Fourfold 2 x 2 x k'
+ rmethod = 'fourfold'
+
+ def __init__(self, ds, *args, **kwargs):
+ super(fourfold, self).__init__(ds)
+ if args:
+ self.procargs(ds, *args, **kwargs)
+ self.plot(ds)
+
+ def procargs(self, ds, sidecol, topcol, stratacol=None, **kwargs):
+ kwargs = KWArgs(kwargs)
+ self.set_params(**kwargs(common_args, 'weightcol', 'margin', 'conflev',
+ 'extended', 'std'))
+ self.add_filter(**kwargs(filter_args))
+ self.add_discrete(ds, topcol,
+ **kwargs(labelrotate='xlabelrotate'))
+ self.add_discrete(ds, sidecol,
+ **kwargs(labelrotate='ylabelrotate'))
+ if stratacol is not None:
+ self.add_discrete(ds, stratacol)
+ kwargs.assert_all_used()
diff --git a/SOOMv0/Plot/raxis.py b/SOOMv0/Plot/raxis.py
new file mode 100644
index 0000000..723c3b4
--- /dev/null
+++ b/SOOMv0/Plot/raxis.py
@@ -0,0 +1,415 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: raxis.py 2832 2007-09-28 00:41:03Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/raxis.py,v $
+
+# Standard Python modules
+import sys
+import types
+import textwrap
+
+# 3rd Party modules
+from rpy import *
+import MA, Numeric
+from mx import DateTime
+
+# SOOM bits
+from soomfunc import union
+from SOOMv0.common import *
+from SOOMv0.SummaryProp import proportion_label
+
+# SOOM.Plot bits
+from SOOMv0.Plot import rconv
+from SOOMv0.Plot import panelfn
+
+class RCondCol(object):
+ def __init__(self, ds, colname, axislabel=None,
+ ticks=None, labelrotate=None):
+ self.summ_arg = colname
+ try:
+ self.colname = colname.get_colname()
+ except AttributeError:
+ self.colname = colname
+ self.axislabel = axislabel
+ self.axisscale = {}
+ if ticks is not None:
+ self.setscale('tick.number', ticks)
+ self.labelrotate = labelrotate
+ self.long_labels = False
+
+ def __repr__(self):
+ cls = self.__class__
+ return '<%s.%s for %r>' % (cls.__module__, cls.__name__, self.colname)
+
+ def rname(self):
+ """
+ Return the SOOM column name in a form acceptable to R
+
+ If None or a null string is returned, the column will not
+ be included in the resulting R data frame.
+
+ If None is returned, the column will not be included
+ in the resulting R formula. A null string will result
+ in a null formula field.
+ """
+ if self.colname is not None:
+ return rconv.rname(self.colname)
+
+ def title(self, ds):
+ if self.axislabel:
+ return self.axislabel
+ else:
+ return ds.get_column(self.colname).label
+
+ def setscale(self, attr, value):
+ self.axisscale[attr] = value
+
+ def setscaledefault(self, attr, value):
+ self.axisscale.setdefault(attr, value)
+
+ def date_ticks(self, ds, npanels):
+ """
+ Generate tickmarks for date/time axis
+
+ Attempts to be intelligent about the number and alignment
+ of the tickmarks, reducing the tickmarks when more panels
+ are in use, and align the tickmarks with natural boundaries
+ (years, months, weeks, days, etc).
+ """
+ col = ds[self.colname]
+ if not col.is_datetimetype():
+ return
+ def reldate(**kwargs):
+ if not kwargs.has_key('hours'):
+ kwargs['hour'] = 0
+ if not kwargs.has_key('minutes'):
+ kwargs['minute'] = 0
+ kwargs['second'] = 0
+ return DateTime.RelativeDateTime(**kwargs)
+
+ # need to account for possible None values, hence can't use
+ # min() & max()
+ mindatetime = None
+ maxdatetime = None
+ for datetime in col:
+ if datetime:
+ if maxdatetime is None or datetime > maxdatetime:
+ maxdatetime = datetime
+ if mindatetime is None or datetime < mindatetime:
+ mindatetime = datetime
+ if mindatetime is None or maxdatetime is None:
+ raise PlotError('No date/time data to plot')
+ range = maxdatetime - mindatetime
+ step = range / (20 / math.sqrt(npanels))
+ pad = step / 3
+
+ dateformat = '%d-%b-%y'
+ if step.days > 300:
+ # Years
+ step = reldate(years=round(step.days / 365.25), month=1, day=1)
+ elif step.days >= 28:
+ # Months
+ step = reldate(months=round(step.days / 30.4375), day=1)
+ elif step.days > 5:
+ # Weeks
+ step = reldate(days=7*round(step.days / 7), weekday=(0,0))
+ elif step.days >= 1:
+ # Days
+ step = reldate(days=1)
+ elif step.hours >= 1:
+ # Hours
+ step = reldate(hours=round(step.hours))
+ dateformat = '%d-%b-%y %H:%M'
+ else:
+ dateformat = '%d-%b-%y %H:%M'
+ dates = []
+ dates.append(mindatetime)
+ date = mindatetime + pad + step
+ while date < maxdatetime - pad:
+ dates.append(date)
+ date += step
+ dates.append(maxdatetime)
+ self.long_labels = True
+ self.setscale('at', [rconv.Date_to_R(dates)] * npanels)
+ self.setscale('relation', 'free')
+ self.setscale('format', dateformat)
+
+ def to_r_args(self, ds, plot, r_args, axisname, npanels):
+ """
+ Collect R plot method kwargs
+ """
+ if axisname:
+ if self.colname and plot.rmethod not in ('barchart', 'dotplot'):
+ self.date_ticks(ds, npanels)
+ if self.labelrotate is not None:
+ self.setscale('rot', self.labelrotate)
+ elif self.long_labels:
+ if axisname == 'x':
+ self.setscale('rot', 90)
+ else:
+ self.setscale('rot', 0)
+ if self.axisscale:
+ scales = r_args.setdefault('scales', {})
+ scales[axisname] = self.axisscale
+ label = self.title(ds)
+ if label:
+ r_args[axisname + 'lab'] = label
+
+ def to_summ_args(self, summargs):
+ """
+ Collect SOOM summ() args and kwargs (for methods using summ)
+ """
+ summargs.add_condcol(self.summ_arg)
+
+ def to_frame(self, frame, ds, **kwargs):
+ rname = self.rname()
+ if rname:
+ frame[self.rname()] = self.to_R(ds, **kwargs)
+
+
+ def __repr__(self):
+ return '%s(%r, axislabel=%r)' % (self.__class__.__name__,
+ self.colname, self.axislabel)
+
+class RPercentAxisCol(RCondCol):
+ axislabel_map = {
+ 'percent': 'Per cent',
+ 'count': 'Frequency',
+ 'density': 'Probability density',
+ }
+
+ def __init__(self, ds, hist_type = None, **kwargs):
+ super(RPercentAxisCol, self).__init__(ds, None, **kwargs)
+ if hist_type is None:
+ hist_type = 'percent'
+ try:
+ self.axislabel = self.axislabel_map[hist_type]
+ except KeyError:
+ raise KeyError('Unsupported histogram type: %r' % hist_type)
+ self.hist_type = hist_type
+
+ def rname(self):
+ return ''
+
+ def title(self, ds):
+ return None
+
+ def to_r_args(self, ds, plot, r_args, axisname, npanels):
+ super(RPercentAxisCol, self).to_r_args(ds, plot, r_args, axisname, npanels)
+ r_args['type'] = self.hist_type
+
+
+class RDensityAxisCol(RCondCol):
+ # Closely related to RPercentAxisCol
+
+ def __init__(self, ds, plot_points = False, ref = True, **kwargs):
+ super(RDensityAxisCol, self).__init__(ds, None, **kwargs)
+ self.plot_points = plot_points
+ self.ref = ref
+ self.axislabel = 'Probability density'
+
+ def rname(self):
+ return ''
+
+ def title(self, ds):
+ return None
+
+ def to_r_args(self, ds, plot, r_args, axisname, npanels):
+ super(RDensityAxisCol, self).to_r_args(ds, plot, r_args, axisname, npanels)
+ r_args['plot_points'] = self.plot_points
+ r_args['ref'] = self.ref
+
+
+class RContinuousCondCol(RCondCol):
+ def __init__(self, ds, colname, logscale=None, **kwargs):
+ super(RContinuousCondCol, self).__init__(ds, colname, **kwargs)
+ if logscale:
+ self.setscale('log', logscale)
+
+ def to_R(self, ds, dates_as_factor, **kwargs):
+ col = ds[self.colname]
+ if MA.isMaskedArray(col.data):
+ if Numeric.alltrue(col.data.mask()):
+ raise PlotError('%r: all data-points masked' % col.name)
+ return rconv.MA_to_R(col.data)
+ elif col.is_datetimetype():
+ return rconv.Missing_Date_to_R(col.data)
+ else:
+ return col.data
+
+
+class RMeasureCondCol(RContinuousCondCol):
+ """
+ Simple scalar measure with optional associated confidence
+ interval columns.
+ """
+
+ def __init__(self, ds, colname, measure, **kwargs):
+ super(RMeasureCondCol, self).__init__(ds, colname, **kwargs)
+ self.summ_arg = measure
+ self.max = None
+ self.min = None
+
+ def limit_cols(self, ds):
+ colname_ul = self.colname + '_ul'
+ colname_ll = self.colname + '_ll'
+ if (ds.has_column(colname_ul) and ds.has_column(colname_ll)):
+ return colname_ul, colname_ll
+ else:
+ return None
+
+ def to_frame(self, frame, ds, **kwargs):
+ limit_cols = self.limit_cols(ds)
+ if limit_cols:
+ self.max = 0
+ self.min = 0
+ for colname in (self.colname,) + limit_cols:
+ col = ds[colname]
+ if MA.isMaskedArray(col.data):
+ if Numeric.alltrue(col.data.mask()):
+ raise PlotError('%r: all data-points masked' % col.name)
+ data = rconv.MA_to_R(col.data)
+ self.max = max(MA.maximum(col.data), self.max)
+ self.min = min(MA.minimum(col.data), self.min)
+ else:
+ data = col.data
+ self.max = max(max(col.data), self.max)
+ self.min = min(min(col.data), self.min)
+ frame[rconv.rname(colname)] = data
+ else:
+ super(RMeasureCondCol, self).to_frame(frame, ds, **kwargs)
+
+ def to_r_args(self, ds, plot, r_args, axisname, npanels):
+ super(RMeasureCondCol, self).to_r_args(ds, plot, r_args, axisname,
+ npanels)
+ if self.limit_cols(ds):
+ r_args['panel'] = panelfn.ci_panel(self.rname(), plot.rmethod,
+ axisname, plot.grouping)
+ if self.max is not None and self.min is not None:
+ range = self.max - self.min
+ r_args[axisname + 'lim'] = (self.min - range * 0.1,
+ self.max + range * 0.1)
+
+
+class RPropnMeasureCol(RContinuousCondCol):
+ def __init__(self, ds, propncols, axislabel=None, **kwargs):
+ colname, label = proportion_label(ds, propncols)
+ if axislabel is None:
+ axislabel = label
+ super(RPropnMeasureCol, self).__init__(ds, colname,
+ axislabel=axislabel, **kwargs)
+ self.propncols = propncols
+
+ def to_summ_args(self, summargs):
+ for propncol in self.propncols:
+ summargs.add_condcol(propncol)
+ summargs.set_kw('proportions', True)
+
+
+class RDiscreteCondCol(RCondCol):
+ def __init__(self, ds, colname,
+ **kwargs):
+ # Logscale?
+ super(RDiscreteCondCol, self).__init__(ds, colname, **kwargs)
+
+ def discrete_col_to_R(self, col, rdata,
+ suppress_all_value=False, suppress_missing=False):
+ levels = []
+ labels = []
+ col_levels = col.inverted.keys()
+ if suppress_all_value and col.all_value in col.inverted:
+ col_levels.remove(col.all_value)
+ if col.is_ordered():
+ col_levels.sort()
+ for l in col_levels:
+ if l is not None or not suppress_missing:
+ labels.append(col.do_outtrans(l))
+ levels.append(l)
+ if col.is_datetimetype():
+ rdata = r.as_POSIXct(rdata)
+ levels = r.as_POSIXct(rconv.Date_to_R(levels))
+ labels = [col.do_format(l) for l in labels]
+ if labels:
+ wrapper = textwrap.TextWrapper(width=20)
+ labels = [wrapper.wrap(str(label))[:2]
+ for label in labels]
+ maxlen = max([len(line)
+ for label in labels
+ for line in label])
+ labels = ['\n'.join(label) for label in labels]
+ if maxlen > 5:
+ self.long_labels = True
+ return r.factor(rdata, levels=levels, labels=labels,
+ ordered=col.is_ordered())
+
+ def continuous_col_to_R(self, col, rdata):
+ # equal.count returns an R shingle
+ return r.equal_count(rdata)
+
+ def to_R(self, ds, dates_as_factor, suppress_all_value=True, **kwargs):
+ if not self.colname:
+ return
+ col = ds[self.colname]
+ if col.is_datetimetype():
+ rdata = rconv.Missing_Date_to_R(col)
+ if not dates_as_factor:
+ return rdata
+ elif MA.isMaskedArray(col.data):
+ if Numeric.alltrue(col.data.mask()):
+ raise PlotError('%r: all data-points masked' % col.name)
+ rdata = rconv.MA_to_R(col.data)
+ else:
+ rdata = col.data
+ if col.is_discrete():
+ if 0 and suppress_all_value:
+ row_vecs = [vec for value, vec in col.inverted.items()
+ if value != col.all_value]
+ row_vec = union(*row_vecs)
+ rdata = MA.take(rdata, row_vec)
+ return self.discrete_col_to_R(col, rdata, suppress_all_value)
+ else:
+ return self.continuous_col_to_R(col, rdata)
+
+
+class RGroupByCol(RDiscreteCondCol):
+ keyarg_map = {
+ 'xyplot': {'rectangles': False, 'lines': True, 'points': False},
+ 'barchart': {'rectangles': True, 'lines': False, 'points': False},
+ 'dotplot': {'rectangles': False, 'lines': False, 'points': True},
+ 'splom': {'rectangles': False, 'lines': False, 'points': True},
+ }
+
+ def __init__(self, ds, groupby=None, stackby=None, **kwargs):
+ assert groupby or stackby
+ super(RGroupByCol, self).__init__(ds, stackby or groupby, **kwargs)
+ self.stack = bool(stackby)
+
+ def to_r_args(self, ds, plot, r_args, axisname, npanels):
+ super(RGroupByCol, self).to_r_args(ds, plot, r_args, axisname, npanels)
+ rdata = super(RGroupByCol, self).to_R(ds, dates_as_factor=True)
+ key_args = dict(self.keyarg_map[plot.rmethod])
+ levels = r.levels(rdata)
+ if len(levels) >= 15:
+ key_args['columns'] = 3
+ elif len(levels) >= 10:
+ key_args['columns'] = 2
+ r_args['groups'] = rdata
+ r_args['key'] = r.simpleKey(levels, **key_args)
+ if self.stack:
+ r_args['stack'] = True
+
+ def rname(self):
+ return None
+
diff --git a/SOOMv0/Plot/rconv.py b/SOOMv0/Plot/rconv.py
new file mode 100644
index 0000000..2981a50
--- /dev/null
+++ b/SOOMv0/Plot/rconv.py
@@ -0,0 +1,126 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: rconv.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/rconv.py,v $
+
+# 3rd Party modules
+from rpy import *
+import MA, Numeric
+from mx import DateTime
+
+# SOOM bits
+from SOOMv0.common import *
+from SOOMv0 import DataTypes
+
+r_datefmt = '%Y-%m-%d %H:%M:%S'
+MissingDate = DateTime.DateTime(0).strftime(r_datefmt)
+
+def rname(name):
+ """
+ Make a Python symbol name safe as an R symbol name
+ """
+ return name.replace('_', '.').replace('-', '.')
+
+def MA_to_R(data):
+ """
+ Given an MA array, return an R array with "missing" values
+ """
+ mask = data.mask()
+ if mask is None:
+ return data.filled()
+ try:
+ # We have to do this within R because the assignment to is.na is a
+ # very strange beast, and there is no obvious way to do it directly
+ # via rpy.
+ r.assign('mask', Numeric.nonzero(mask) + 1)
+ r.assign('data', data.filled())
+ r('is.na(data)<-mask')
+ return r('data')
+ finally:
+ r.remove('data')
+ r.remove('mask')
+
+def Date_to_R(data):
+ return r.strptime([d.strftime(r_datefmt) for d in data], r_datefmt)
+
+def Missing_Date_to_R(data):
+ """
+ Given an array (list) of mx.DateTime or None objects, return
+ an R time array with missing values.
+ """
+ dates = []
+ mask = []
+ for i in xrange(len(data)):
+ d = data[i]
+ if d is None:
+ mask.append(i+1)
+ dates.append(MissingDate)
+ else:
+ dates.append(d.strftime(r_datefmt))
+ try:
+ r.assign('mask', mask)
+ r.assign('data', r.strptime(dates, r_datefmt))
+ r('is.na(data)<-mask')
+ return r('data')
+ finally:
+ r.remove('data')
+ r.remove('mask')
+
+def rwrap(text, cex=1):
+ lines = []
+ rmode = get_default_mode()
+ try:
+ set_default_mode(BASIC_CONVERSION)
+ for line in text.splitlines():
+ words = line.split()
+ wordwidth = r.strwidth(words, units='fig')
+ if not isinstance(wordwidth, list):
+ wordwidth = [wordwidth]
+ partial = []
+ partial_width = 0
+ for word, width in zip(words, wordwidth):
+ if partial_width + width * cex >= 0.8:
+ lines.append(' '.join(partial))
+ partial = []
+ partial_width = 0
+ partial.append(word)
+ partial_width += width * cex
+ if partial:
+ lines.append(' '.join(partial))
+ finally:
+ set_default_mode(rmode)
+ return '\n'.join(lines)
+
+def summ_to_array(ds, measure, debug=False):
+ from SOOMv0.CrossTab import CrossTab
+
+ ct = CrossTab.from_summset(ds)
+ data = ct[measure].data
+ if MA.isMaskedArray(data):
+ data = data.filled() # Dealing with frequencies, so this is valid.
+ data = data.astype(Numeric.Float64)
+ if debug: print type(data)
+ if debug: print data
+ array = r.as_array(data)
+ dimnames = []
+ labels = []
+ for axis in ct.axes:
+ dimnames.append([axis.col.do_outtrans(v) for v in axis.values])
+ labels.append(axis.col.label or axis.col.name)
+ dimnames = r.list(*dimnames)
+ dimnames = r.names__(dimnames, labels)
+ array = r.dimnames__(array, dimnames)
+ if debug: r.print_(array)
+ return array
diff --git a/SOOMv0/Plot/rplot.py b/SOOMv0/Plot/rplot.py
new file mode 100644
index 0000000..4a27921
--- /dev/null
+++ b/SOOMv0/Plot/rplot.py
@@ -0,0 +1,663 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: rplot.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Plot/rplot.py,v $
+
+# Standard Python modules
+import sys
+import textwrap
+import sets
+
+# 3rd Party modules
+import RandomArray
+from rpy import *
+
+# SOOM bits
+from SOOMv0.SummaryStats import freq
+from SOOMv0.Filter import sampled_ds
+from SOOMv0.common import *
+
+# SOOM.Plot bits
+from SOOMv0.Plot import panelfn, rconv
+import raxis
+import _output
+
+class RArgs(dict):
+ """
+ Container for arguments to be passed to the R plot function.
+ """
+
+
+class SummArgs:
+ """
+ Container for arguments to be passed to the SOOM summ() method.
+ """
+ def __init__(self, *args, **kwargs):
+ self.condcols = []
+ self.kwargs = dict(*args, **kwargs)
+
+ def set_kw(self, name, value):
+ self.kwargs[name] = value
+
+ def add_condcol(self, condcol):
+ if condcol and condcol != '_freq_':
+ try:
+ i = self.condcols.index(condcol)
+ except ValueError:
+ self.condcols.append(condcol)
+ else:
+ # condcol()'s override str specifications
+ if not isinstance(condcol, basestring):
+ self.condcols[i] = condcol
+
+
+class RPlotBase(object):
+ dates_as_factor = False
+
+ def __init__(self, ds, debug=False):
+ if not hasattr(ds, 'name') or not hasattr(ds, 'get_column'):
+ raise AttributeError('first argument must be a dataset')
+ if not ds:
+ raise PlotError('dataset is empty')
+ self.debug = debug
+ self.r_args = RArgs()
+ self.filter_args = {}
+ self.condcols = []
+ self.title = None
+ self.footer = None
+
+ def add_percent(self, ds, **kwargs):
+ self.condcols.append(raxis.RPercentAxisCol(ds, **kwargs))
+
+ def add_density(self, ds, **kwargs):
+ self.condcols.append(raxis.RDensityAxisCol(ds, **kwargs))
+
+ def add_continuous(self, ds, colname, **kwargs):
+ self.condcols.append(raxis.RContinuousCondCol(ds, colname, **kwargs))
+
+ def add_discrete(self, ds, colname, **kwargs):
+ self.condcols.append(raxis.RDiscreteCondCol(ds, colname, **kwargs))
+
+ def add_any(self, ds, colname, **kwargs):
+ if ds[colname].is_discrete():
+ self.add_discrete(ds, colname, **kwargs)
+ else:
+ self.add_continuous(ds, colname, **kwargs)
+
+ def add_filter(self, **kwargs):
+ self.filter_args = kwargs
+
+ def set_params(self, title=None, footer=None, debug=None):
+ if title is not None:
+ self.title = title.replace('\r', '')
+ if footer is not None:
+ self.footer = footer.replace('\r', '')
+ if debug is not None:
+ self.debug = debug
+
+ def get_title(self, ds):
+ if self.title is not None:
+ return self.title
+ labels = []
+ cols = sets.Set()
+ for condcol in self.condcols:
+ if condcol.colname not in cols:
+ label = condcol.title(ds)
+ if label:
+ cols.add(condcol.colname)
+ labels.append(label)
+ title = ' by '.join(labels)
+ if self.label:
+ title = '%s of %s' % (self.label, title)
+ return title
+
+ def get_footer(self, ds):
+ if self.footer is not None:
+ return self.footer
+ return str(ds.describe(NO_DETAIL))
+
+ def get_filtered_ds(self, ds):
+ filtered_ds = ds.filter(kwargs=dict(self.filter_args))
+ if not filtered_ds:
+ raise PlotError('filter returns no records')
+ return filtered_ds
+
+ def to_r_args(self, ds):
+ title = str(self.get_title(ds))
+ if title:
+ self.r_args['main'] = rconv.rwrap(title, 1.2)
+ footer = self.get_footer(ds)
+ if footer:
+ self.r_args['sub'] = rconv.rwrap(footer)
+
+ def get_rmethod(self):
+ return getattr(r, self.rmethod)
+
+
+class SummPlotMixin(object):
+ def __init__(self, ds):
+ super(SummPlotMixin, self).__init__(ds)
+ self.weightcol = None
+
+ def set_params(self, weightcol=None, **kwargs):
+ super(SummPlotMixin, self).set_params(**kwargs)
+ if weightcol:
+ self.weightcol = weightcol
+
+ def get_weightcol(self, ds):
+ if self.weightcol:
+ return self.weightcol
+ return ds.weightcol
+
+ def get_filtered_ds(self, ds):
+ if ds.is_summarised():
+ summ_ds = ds
+ else:
+ summargs = SummArgs(self.filter_args,
+ weightcol=self.get_weightcol(ds),
+ nomt=True)
+ for condcol in self.condcols:
+ condcol.to_summ_args(summargs)
+ if self.debug: print 'summ:', summargs
+ summ_ds = ds.summ(*summargs.condcols, **summargs.kwargs)
+ if not summ_ds:
+ raise PlotError('summarisation yields an empty dataset')
+ return summ_ds
+
+
+class RPlot(RPlotBase):
+ """
+ Interface to traditional R plots (no panelling)
+ """
+
+ def plot(self, ds):
+ _output.dev.new_page()
+
+ rpy_mode = get_default_mode()
+ try:
+ # Don't convert R objects to Python objects by default
+ set_default_mode(NO_CONVERSION)
+ rmethod = self.get_rmethod()
+ self.to_r_args(ds)
+ data = self.condcols.to_r(ds)
+ r.print_(rmethod(model, data=frame, **self.r_args))
+ finally:
+ set_default_mode(rpy_mode)
+
+
+class RLatticePlotBase(RPlotBase):
+ """
+ Interface to R lattice (panelling) plots
+ """
+ def __init__(self, ds):
+ super(RLatticePlotBase, self).__init__(ds)
+ self.line_width = 2
+ self.line_style = 1
+ self.point_style = 19
+ self.horizontal = False
+ self.layout = None
+ self.xlim = self.ylim = None
+ self.grouping = False
+ self.conflev = None
+
+ def get_title(self, ds):
+ title = super(RLatticePlotBase, self).get_title(ds)
+ if self.conflev is not None:
+ title += ' (%g%% conf. limits)' % (self.conflev * 100)
+ return title
+
+ def _get_model(self):
+ mod_str = []
+ for condcol in self.condcols:
+ rname = condcol.rname()
+ if rname is None:
+ continue
+ if len(mod_str) == 1:
+ mod_str.append('~')
+ elif len(mod_str) == 3:
+ mod_str.append('|')
+ elif len(mod_str) > 3:
+ mod_str.append('*')
+ mod_str.append(rname)
+ if self.horizontal:
+ mod_str[0], mod_str[2] = mod_str[2], mod_str[0]
+ mod_str = ' '.join(mod_str)
+ if self.debug: print >> sys.stderr, 'model: %r' % mod_str
+ try:
+ return r(mod_str)
+ except:
+ exc_type, exc_val, exc_tb = sys.exc_info()
+ try:
+ raise exc_type, '%s (mod_str: %s)' % (exc_val, mod_str), exc_tb
+ finally:
+ del exc_type, exc_val, exc_tb
+
+ def _get_frame(self, ds):
+ frame = {}
+ for i, condcol in enumerate(self.condcols):
+ dates_as_factor = i > 1 or self.dates_as_factor
+ condcol.to_frame(frame, ds, dates_as_factor=dates_as_factor)
+ if self.debug:
+ for key, value in frame.items():
+ if isinstance(value, type(r.eval)):
+ print >> sys.stderr, 'frame: %r: ' % key
+ r.print_(value)
+ else:
+ print >> sys.stderr, 'frame: %r: %r' % (key, value)
+ try:
+ return r.data_frame(**frame)
+ except Exception, e:
+ if 1:
+ print >> sys.stderr, 'Frame:'
+ cols = frame.keys()
+ cols.sort()
+ for col in cols:
+ print '--- %r ---' % col
+ r.print_(frame[col])
+ raise
+
+ def _set_trellis_params(self, value, attr, *params):
+ trellis_params = with_mode(BASIC_CONVERSION, r.trellis_par_get)()
+ for param_name in params:
+ param = trellis_params[param_name]
+ if isinstance(param[attr], list):
+ param[attr] = [value] * len(param[attr])
+ else:
+ param[attr] = value
+ with_mode(BASIC_CONVERSION, r.trellis_par_set)(param_name, param)
+
+ def _set_line_width(self, line_width=2):
+ self._set_trellis_params(float(line_width), 'lwd',
+ 'plot.line', 'superpose.line', 'add.line',
+ 'box.rectangle', 'box.umbrella')
+
+ def _set_line_style(self, line_style=1):
+ self._set_trellis_params(line_style, 'lty',
+ 'plot.line', 'superpose.line', 'add.line',
+ 'box.rectangle', 'box.umbrella')
+
+ def _set_point_style(self, point_style=19):
+ self._set_trellis_params(point_style, 'pch',
+ 'dot.symbol', 'plot.symbol',
+ 'superpose.symbol')
+
+ def set_params(self, line_width=None, line_style=None, point_style=None,
+ horizontal=None, vertical=None, layout=None,
+ xlim=None, ylim=None, conflev=None, **kwargs):
+ super(RLatticePlotBase, self).set_params(**kwargs)
+ if line_width is not None:
+ self.line_width = line_width
+ if line_style is not None:
+ self.line_style = line_style
+ if point_style is not None:
+ self.point_style = point_style
+ if horizontal is not None:
+ self.horizontal = horizontal
+ if vertical is not None:
+ self.horizontal = not vertical
+ if layout is not None:
+ self.layout = layout
+ if xlim is not None:
+ self.xlim = xlim
+ if ylim is not None:
+ self.ylim = ylim
+ if conflev is not None:
+ if conflev <= 0 or conflev > 1:
+ raise PlotError('conflev argument must be between 0 and 1')
+ self.conflev = conflev
+
+ def to_r_args(self, ds):
+ super(RLatticePlotBase, self).to_r_args(ds)
+ npanels = 1
+ for condcol in self.condcols[2:]:
+ if condcol.colname and ds[condcol.colname].is_discrete():
+ npanels *= ds[condcol.colname].cardinality()
+ if self.layout is not None:
+ self.r_args['layout'] = self.layout
+ if self.xlim is not None:
+ self.r_args['xlim'] = self.xlim
+ if self.ylim is not None:
+ self.r_args['ylim'] = self.ylim
+ self.r_args['par_strip_text'] = r.list(lines=2,cex=.8)
+ for i, condcol in enumerate(self.condcols):
+ axis = None
+ if self.horizontal:
+ if i == 0: axis = 'x'
+ elif i == 1: axis = 'y'
+ else:
+ if i == 0: axis = 'y'
+ elif i == 1: axis = 'x'
+ condcol.to_r_args(ds, self, self.r_args, axis, npanels)
+
+ def _lattice_init(self):
+ r.library('lattice')
+ _output.dev.new_page(r.trellis_device)
+ # the set the background to white, not the default grey...
+ r('trellis.par.set(theme=list(background = list(col = "white")))')
+
+ self._set_line_width(self.line_width)
+ self._set_line_style(self.line_style)
+ self._set_point_style(self.point_style)
+
+
+ def _rplot(self, model, *args, **kwargs):
+ rmethod = self.get_rmethod()
+ try:
+ r.print_(rmethod(model, *args, **kwargs), newpage=False)
+ except RException, e:
+ if self.debug:
+ print 'Method:', self.rmethod
+ print 'KWargs:', kwargs
+ print 'Model:'
+ r.print_(model)
+ #print 'Data:'
+ #r.print_(bcd)
+ raise PlotError('R: %s' % e)
+
+
+ def plot(self, ds):
+ if not self.condcols:
+ raise PlotError('No columns to plot?')
+ self._lattice_init()
+ filtered_ds = self.get_filtered_ds(ds)
+
+ rpy_mode = get_default_mode()
+ try:
+ set_default_mode(NO_CONVERSION)
+ model = self._get_model()
+ frame = self._get_frame(filtered_ds)
+ self.to_r_args(filtered_ds)
+ if self.debug: print 'r_args:', self.r_args
+ # Most plot types are satisfied with the frame being passed via the
+ # "data" argument, but matrix scatter plots and CI bars need to
+ # access it directly, so we put it into the R environment.
+ r.assign('plotframe', frame)
+ try:
+ self._rplot(model, data=frame, **self.r_args)
+ finally:
+ r.remove('plotframe')
+ _output.dev.done()
+ finally:
+ set_default_mode(rpy_mode)
+
+
+class RLatticePlot(RLatticePlotBase):
+ autosample = False # Sample if len(ds) > sample_target
+ sample_target = 100000 # Biggest vector R will handle quickly
+
+ def __init__(self, ds):
+ super(RLatticePlot, self).__init__(ds)
+ self.sample = None
+
+ def set_params(self, sample=None, conflev=None, **kwargs):
+ super(RLatticePlot, self).set_params(**kwargs)
+ if sample is not None:
+ if sample <= 0 or sample > 1:
+ raise PlotError('sample argument must be between 0 and 1')
+ self.sample = sample
+
+ def get_filtered_ds(self, ds):
+ ds = super(RLatticePlot, self).get_filtered_ds(ds)
+ if self.sample is not None:
+ if self.sample < 1:
+ ds = sampled_ds(ds, self.sample)
+ elif self.autosample:
+ if len(ds) > self.sample_target:
+ ds = sampled_ds(ds, float(self.sample_target) / len(ds))
+ return ds
+
+
+class RLatticeMatrixPlot(RLatticePlot):
+
+ def add_groupby(self, ds, groupby=None, stackby=None, **kwargs):
+ if groupby or stackby:
+ self.condcols.append(raxis.RGroupByCol(ds, groupby=groupby,
+ stackby=stackby, **kwargs))
+
+ def to_r_args(self, ds):
+ super(RLatticeMatrixPlot, self).to_r_args(ds)
+ # xlab and ylab don't make sense for this plot type
+ del self.r_args['xlab'], self.r_args['ylab']
+ self.r_args['varnames'] = [condcol.title(ds)
+ for condcol in self.condcols
+ if condcol.rname()]
+
+ def _get_model(self):
+ return r('~plotframe')
+
+
+class RLatticeBinPlot(RLatticePlot):
+
+ def __init__(self, ds):
+ super(RLatticeBinPlot, self).__init__(ds)
+ self.bins = None
+
+ def set_params(self, bins=None, **kwargs):
+ super(RLatticeBinPlot, self).set_params(**kwargs)
+ if bins is not None:
+ self.bins = bins
+ if kwargs.has_key('horizontal'):
+ raise AttributeError('"horizontal" not supported')
+
+ def to_r_args(self, ds):
+ super(RLatticeBinPlot, self).to_r_args(ds)
+ if self.bins is not None:
+ self.r_args['n'] = int(self.bins)
+
+
+class RLatticeBoxPlot(RLatticePlot):
+
+ def __init__(self, ds):
+ super(RLatticeBoxPlot, self).__init__(ds)
+ self.notches = True
+ self.outliers = True
+ self.variable_width = True
+ self.violins = False
+
+ def set_params(self, notches=None, outliers=None,
+ variable_width=None, violins=None, **kwargs):
+ super(RLatticeBoxPlot, self).set_params(**kwargs)
+ if notches is not None:
+ self.notches = notches
+ if outliers is not None:
+ self.outliers = outliers
+ if variable_width is not None:
+ self.variable_width = variable_width
+ if violins is not None:
+ self.violins = violins
+
+ def to_r_args(self, ds):
+ super(RLatticeBoxPlot, self).to_r_args(ds)
+ self.r_args['varwidth'] = bool(self.variable_width)
+ self.r_args['horizontal'] = bool(self.horizontal)
+ # Following two don't work - speculate bwplot doesn't support these
+ # but the core boxplot does?
+ self.r_args['do_conf'] = bool(self.notches)
+ self.r_args['do_out'] = bool(self.outliers)
+ if self.violins:
+ self.r_args['panel'] = panelfn.violin_panel()
+
+
+class RLatticeSummPlot(SummPlotMixin,RLatticePlotBase):
+
+ def __init__(self, ds):
+ super(RLatticeSummPlot, self).__init__(ds)
+ self.pack = False
+
+ def set_params(self, pack=None, **kwargs):
+ super(RLatticeSummPlot, self).set_params(**kwargs)
+ if pack is not None:
+ self.pack = pack
+
+# def get_footer(self, ds):
+# # We need to find a way to get the source dataset description,
+# # but include the filter description (i.e., the description of
+# # the filtered_ds internal to the summ method).
+# return ds.short_description()
+
+ def add_measure(self, ds, measure=None, axislabel=None, **kwargs):
+ if measure is None:
+ measure = '_freq_'
+ # AM - This was a mistake:
+ # if type(measure) in (str, unicode) and measure != '_freq_':
+ # measure = [measure] # implicit proportion
+ weightcol=self.get_weightcol(ds)
+ if type(measure) in (list, tuple):
+ measurecol = raxis.RPropnMeasureCol(ds, measure,
+ axislabel=axislabel,
+ **kwargs)
+ else:
+ if isinstance(measure, basestring):
+ colname = measure
+ else:
+ # should be a stat method
+ try:
+ get_statcolname = measure.get_statcolname
+ except AttributeError:
+ raise AttributeError('measure column should be a stat '
+ 'method, string column name, or '
+ 'proportion list')
+ else:
+ colname = get_statcolname(weightcol)
+ if not axislabel:
+ axislabel = measure.get_label(ds, weightcol)
+ try:
+ self.conflev = measure.conflev
+ except AttributeError:
+ pass
+
+ measurecol = raxis.RMeasureCondCol(ds, colname, measure,
+ axislabel=axislabel, **kwargs)
+ self.condcols.append(measurecol)
+
+ def add_groupby(self, ds, groupby=None, stackby=None, **kwargs):
+ if groupby or stackby:
+ self.condcols.append(raxis.RGroupByCol(ds, groupby=groupby,
+ stackby=stackby, **kwargs))
+ self.grouping = True
+
+ def to_r_args(self, ds):
+ super(RLatticeSummPlot, self).to_r_args(ds)
+ self.r_args['horizontal'] = bool(self.horizontal)
+ if self.pack:
+ self.r_args['box_ratio'] = sys.maxint
+
+
+class RLatticeCatPlot(RLatticeSummPlot):
+ dates_as_factor = True
+
+ def __init__(self, ds):
+ super(RLatticeCatPlot, self).__init__(ds)
+ self.origin = 0
+ self.reference = True
+ self.point_size = 2
+
+ def set_params(self, origin=None, reference=None,
+ point_size=None, **kwargs):
+ super(RLatticeCatPlot, self).set_params(**kwargs)
+ if origin is not None:
+ self.origin = origin
+ if reference is not None:
+ self.reference = reference
+ if point_size is not None:
+ self.point_size = point_size
+
+ def to_r_args(self, ds):
+ super(RLatticeCatPlot, self).to_r_args(ds)
+ # Only work for barchart
+ self.r_args['origin'] = float(self.origin)
+ self.r_args['reference'] = bool(self.reference)
+ if 'stack' in self.r_args and 'panel' in self.r_args:
+ raise PlotError('%s plot does not allow stacking and confidence '
+ 'limits simultaneously' % self.label)
+
+ # Doesn't work
+ self._set_trellis_params(self.point_size, 'cex', 'dot.symbol')
+
+
+class TwoByTwoPlot(SummPlotMixin,RPlotBase):
+ def __init__(self, ds):
+ super(TwoByTwoPlot, self).__init__(ds)
+ self.margin = None
+ self.conflev = None
+ self.extended = None
+ self.std = None
+
+ def set_params(self, margin=None, conflev=None, extended=None,
+ std=None, **kwargs):
+ super(TwoByTwoPlot, self).set_params(**kwargs)
+ if margin is not None:
+ self.margin = margin
+ if conflev is not None:
+ self.conflev = conflev
+ if extended is not None:
+ self.extended = extended
+ if std is not None:
+ self.std = std
+
+ def to_r_args(self, ds):
+ super(TwoByTwoPlot, self).to_r_args(ds)
+ if self.margin is not None:
+ self.r_args['margin'] = self.margin
+ if self.conflev is not None:
+ self.r_args['conf_level'] = self.conflev
+ if self.extended is not None:
+ self.r_args['extended'] = bool(self.extended)
+ if self.std is not None:
+ self.r_args['std'] = self.std
+ self.r_args['newpage'] = False
+ if 'sub' in self.r_args:
+ # VCD plots currently don't support subtitles - hopefully will be
+ # fixed soon.
+ del self.r_args['sub']
+
+ def _vcd_init(self):
+ if 'vcd' not in r._packages(all=True):
+ raise PlotError('The required R "vcd" package is not installed')
+ try:
+ # The vcd import produces a lot of noise on startup - deadly to CGI
+ r.sink("/dev/null")
+ try:
+ r.library('vcd')
+ finally:
+ r.sink()
+ except RException, e:
+ raise PlotError('R: %s' % e)
+
+ def get_rmethod(self):
+ # Necessary because we can't otherwise specify arguments with
+ # underscores in their name.
+ return r('function(x, conf.level=0.95, ...) { %s(x, conf_level=conf.level, ...)}' % self.rmethod)
+
+ def plot(self, ds):
+ self._vcd_init()
+ _output.dev.new_page()
+ filtered_ds = self.get_filtered_ds(ds)
+
+ rpy_mode = get_default_mode()
+ try:
+ set_default_mode(NO_CONVERSION)
+ rmethod = self.get_rmethod()
+ self.to_r_args(filtered_ds)
+ data = rconv.summ_to_array(filtered_ds, '_freq_')
+ try:
+ r.print_(rmethod(data, **self.r_args))
+ except RException, e:
+ raise PlotError('R: %s' % e)
+ finally:
+ set_default_mode(rpy_mode)
+ _output.dev.done()
+
+
diff --git a/SOOMv0/PlotRegistry.py b/SOOMv0/PlotRegistry.py
new file mode 100644
index 0000000..cdfeb9e
--- /dev/null
+++ b/SOOMv0/PlotRegistry.py
@@ -0,0 +1,70 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: PlotRegistry.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/PlotRegistry.py,v $
+
+class _PlotMethod(object):
+ def __init__(self, name):
+ self.name = name
+ self._method = None
+
+ def get_method(self):
+ if self._method is None:
+ # Demand load - rpy import is slow
+ import SOOMv0.Plot.plotmethods
+ self._method = getattr(SOOMv0.Plot.plotmethods, self.name)
+ return self._method
+ method = property(get_method)
+
+ def __cmp__(self, other):
+ return cmp(self.method.label, other.method.label)
+
+class _PlotRegistry(object):
+ def __init__(self):
+ self.methods = {}
+
+ def get_output(self):
+ # Demand load - rpy import is slow
+ import SOOMv0.Plot._output
+ return SOOMv0.Plot._output.output
+ output = property(get_output)
+
+ def register(self, *args):
+ method = _PlotMethod(*args)
+ self.methods[method.name] = method
+
+ def _display_hook(self):
+ methods = self.methods.values()
+ methods.sort()
+ for method in methods:
+ print '%-20s %s' % (method.name, method.method.label)
+
+ def __getattr__(self, name):
+ try:
+ return self.methods[name].method
+ except KeyError:
+ raise AttributeError(name)
+
+
+plot = _PlotRegistry()
+plot.register('scatterplot')
+plot.register('scattermatrix')
+plot.register('boxplot')
+plot.register('histogram')
+plot.register('densityplot')
+plot.register('lineplot')
+plot.register('barchart')
+plot.register('dotchart')
+plot.register('fourfold')
diff --git a/SOOMv0/PrintDataset.py b/SOOMv0/PrintDataset.py
new file mode 100644
index 0000000..426dffb
--- /dev/null
+++ b/SOOMv0/PrintDataset.py
@@ -0,0 +1,88 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Magic to take a dataset and pretty-print it in a form suitable for
+a terminal device.
+"""
+# $Id: PrintDataset.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/PrintDataset.py,v $
+
+import textwrap
+
+from SOOMv0 import soom
+
+__all__ = 'DSFormatter',
+
+class ColFormatter:
+ def __init__(self, col, low, high):
+ col.load("data")
+ # Format the column data as strings, determine if it should
+ # be right or left justified, and the maximum column width
+ self.right_just = col.is_numerictype()
+ self.fmtrows = []
+ for v in col[low:high]:
+ v = col.do_outtrans(v)
+ if self.right_just and type(v) in (str, unicode):
+ self.right_just = False
+ self.fmtrows.append(col.do_format(v))
+ colwidth = 0
+ if self.fmtrows:
+ colwidth = max([len(v) for v in self.fmtrows])
+ # Now format up the column label, attempting to minimise its
+ # length, even if that means wrapping it over multiple rows
+ labelwrap = textwrap.TextWrapper(width=max(colwidth, 16))
+ self.labelrows = labelwrap.wrap(col.label or col.name)
+ labelwidth = max([len(v) for v in self.labelrows])
+ self.colwidth = max(colwidth, labelwidth)
+
+ def insert_label_rows(self, label_row_count):
+ # After we've collected all the columns, we work out how many
+ # header rows will be required, then insert them before the data
+ # rows.
+ pad_needed = label_row_count - len(self.labelrows)
+ rule = '-' * self.colwidth
+ self.fmtrows[:0] = self.labelrows + [''] * pad_needed + [rule]
+
+ def __getitem__(self, i):
+ if self.right_just:
+ return self.fmtrows[i].rjust(self.colwidth)
+ else:
+ return self.fmtrows[i].ljust(self.colwidth)
+
+class DSFormatter:
+ """
+ Instances of this object are iterables that return pretty-printed
+ dataset rows.
+ """
+
+ def __init__(self, ds, colnames=None, low=None, high=None):
+ if colnames is None:
+ cols = ds.get_print_columns()
+ else:
+ cols = ds.get_columns(colnames)
+ cols = [c for c in cols if c.name != 'row_ordinal']
+ if soom.row_ordinals:
+ cols.insert(0, ds.get_column('row_ordinal'))
+ self.fmtcols = [ColFormatter(col, low, high) for col in cols]
+ label_row_count = max([len(fmtcol.labelrows)
+ for fmtcol in self.fmtcols])
+ for fmtcol in self.fmtcols:
+ fmtcol.insert_label_rows(label_row_count)
+
+ def __iter__(self):
+ rowcnt = min([len(colfmt.fmtrows) for colfmt in self.fmtcols])
+ for rownum in xrange(rowcnt):
+ row = [fmtcol[rownum] for fmtcol in self.fmtcols]
+ yield ' '.join(row)
diff --git a/SOOMv0/Search.py b/SOOMv0/Search.py
new file mode 100644
index 0000000..8e5bba2
--- /dev/null
+++ b/SOOMv0/Search.py
@@ -0,0 +1,236 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Search.py 3653 2008-12-24 03:06:36Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Search.py,v $
+
+import re
+import sets
+import Numeric
+import soomfunc
+
+# A search expression tree is built up by the parser when the query is first
+# parsed and evaulated by the contains operator.
+#
+# Intermediate results are parsed up the tree as a pair whose first element
+# is a Numeric.array of rows which have been hit and the whose second element is
+# a dictionary keyed by row containing a value consisting of a Numeric.array
+# containing words hit in that row. This allows the use of some of the soomfunc
+# operations on Numeric.array structures.
+
+class Disjunction:
+ # Used below to make rowwise unions easier
+ _EMPTY = Numeric.array([], Numeric.Int)
+
+ def __init__(self, part):
+ self.parts = [part]
+
+ def append(self, part):
+ self.parts.append(part)
+
+ def __call__(self, datasetColumn):
+ # compute the union of the part hit sets
+ rows, words = self.parts[0](datasetColumn)
+ for part in self.parts[1:]:
+ other_rows, other_words = part(datasetColumn)
+ rows = soomfunc.union(rows, other_rows)
+ # almalgamate word hits
+ new_words = {}
+ for r in rows:
+ new_words[r] = soomfunc.union(words.get(r, self._EMPTY),
+ other_words.get(r, self._EMPTY))
+ words = new_words
+ return rows, words
+
+ def __str__(self):
+ return "(%s)" % " | ".join(map(str, self.parts))
+
+class Conjunction:
+
+ # nearness masks
+ _BEFORE = 1
+ _AFTER = 2
+
+ DEFAULT_NEARNESS = 10
+
+ def __init__(self, op, lhs, rhs, nearness = DEFAULT_NEARNESS):
+ self.op = op
+ self.lhs = lhs
+ self.rhs = rhs
+ self.nearness = nearness
+
+ def __call__(self, datasetColumn):
+ lhsrowwords = self.lhs(datasetColumn)
+ rhsrowwords = self.rhs(datasetColumn)
+ if self.op == '&':
+ # & is rowwise intersection
+ return self.intersect(lhsrowwords, rhsrowwords)
+ if self.op == '&!':
+ # &! is rowwise set difference
+ return self.difference(lhsrowwords, rhsrowwords)
+ # before and after are a variation on near
+ if self.op == '<':
+ return self.near(lhsrowwords, rhsrowwords, mask = self._BEFORE)
+ if self.op == '>':
+ return self.near(lhsrowwords, rhsrowwords, mask = self._AFTER)
+ if self.op == '~':
+ return self.near(lhsrowwords, rhsrowwords)
+ raise NotImplementedError
+
+ def __str__(self):
+ if self.nearness != self.DEFAULT_NEARNESS:
+ op = "%s[%d]" % (self.op, self.nearness)
+ else:
+ op = self.op
+ return "(%s %s %s)" % (self.lhs, op, self.rhs)
+
+ def intersect(self, lhsrowwords, rhsrowwords):
+ # find matching rows
+ rows = soomfunc.intersect(lhsrowwords[0], rhsrowwords[0])
+ lhswords = lhsrowwords[1]
+ rhswords = rhsrowwords[1]
+ # almalgamate word hits
+ words = {}
+ for r in rows:
+ words[r] = soomfunc.union(lhswords[r], rhswords[r])
+ return rows, words
+
+ def difference(self, lhsrowwords, rhsrowwords):
+ # find residual rows
+ rows = soomfunc.difference(lhsrowwords[0], rhsrowwords[0])
+ # use only lhs word hits
+ lhswords = lhsrowwords[1]
+ words = {}
+ for r in rows:
+ words[r] = lhswords[r]
+ return rows, words
+
+ def near(self, lhsrowwords, rhsrowwords, mask = _BEFORE | _AFTER):
+ # only check matching rows
+ rows = soomfunc.intersect(lhsrowwords[0], rhsrowwords[0])
+ lhswords = lhsrowwords[1]
+ rhswords = rhsrowwords[1]
+ words = {}
+ _BEFORE = self._BEFORE
+ _AFTER = self._AFTER
+ nearness = self.nearness
+ for row in rows:
+ hits = sets.Set()
+ # this is O(n*n) and could be improved
+ # find all hits and then remove duplicates
+ for left in lhswords[row]:
+ for right in rhswords[row]:
+ if (mask & _BEFORE) and right - nearness <= left <= right:
+ hits.add(left)
+ hits.add(right)
+ if (mask & _AFTER) and right <= left <= right + nearness:
+ hits.add(left)
+ hits.add(right)
+ if hits:
+ hits = list(hits)
+ hits.sort()
+ words[row] = Numeric.array(hits, Numeric.Int)
+ # remove rows that have no hits left
+ rows = Numeric.array(filter(lambda r: r in words, rows), Numeric.Int)
+ return rows, words
+
+class Phrase:
+ def __init__(self, words):
+ self.words = words
+
+ def __call__(self, datasetColumn):
+ # a phrase is a conjunction with the added constraint that words
+ # must exactly follow one another
+ rowwords = self.words[0](datasetColumn)
+ for word in self.words[1:]:
+ otherwords = word(datasetColumn)
+ rowwords = self.follow(rowwords, otherwords)
+ if not rowwords:
+ # nothing left
+ break
+ return rowwords
+
+ def follow(self, lhsrowwords, rhsrowwords):
+ # find matching rows
+ rows = soomfunc.intersect(lhsrowwords[0], rhsrowwords[0])
+ lhswords = lhsrowwords[1]
+ rhswords = rhsrowwords[1]
+ # almalgamate word hits
+ words = {}
+ for row in rows:
+ hits = sets.Set()
+ # this is O(n*n) and could be improved
+ # find all hits and remove duplicates
+ for left in lhswords[row]:
+ for right in rhswords[row]:
+ if right == left + 1:
+ hits.add(left)
+ hits.add(right)
+ if hits:
+ hits = list(hits)
+ hits.sort()
+ words[row] = Numeric.array(hits, Numeric.Int)
+ # remove rows that have no hits left
+ rows = Numeric.array(filter(lambda r: r in words, rows), Numeric.Int)
+ return rows, words
+
+ def __str__(self):
+ return '"%s"' % ' '.join(map(str, self.words))
+
+class Word:
+ def __init__(self, word):
+ self.word = soomfunc.strip_word(word)
+ if '*' in word:
+ self.re = re.compile('%s$' % self.word.replace('*', '.*'))
+ self.wildcard = True
+ else:
+ self.wildcard = False
+
+ def __call__(self, datasetColumn):
+ if self.wildcard:
+ return self.wild(datasetColumn)
+ v = datasetColumn.word_occurrences(self.word)
+ if v is None:
+ return [], {}
+ rows = []
+ wordOccurrences = {}
+ currentRow = None
+ for n in range(0, len(v), 2):
+ row, word = v[n:n + 2]
+ if row != currentRow:
+ rows.append(row)
+ wordOccurrences[row] = []
+ currentRow = row
+ wordOccurrences[row].append(word)
+ rows = Numeric.array(rows, Numeric.Int)
+ words = {}
+ for k, v in wordOccurrences.iteritems():
+ words[k] = Numeric.array(v, Numeric.Int)
+ return rows, words
+
+ def wild(self, datasetColumn):
+ # find all words matching this pattern and turn the word
+ # list into a disjunction
+ words = [ w for w in datasetColumn.wordidx.keys() if self.re.match(w) ]
+ if not words:
+ return [], {}
+ expr = Disjunction(Word(words[0]))
+ for word in words[1:]:
+ expr.append(Word(word))
+ return expr(datasetColumn)
+
+ def __str__(self):
+ return self.word
+
diff --git a/SOOMv0/Soom.py b/SOOMv0/Soom.py
new file mode 100644
index 0000000..099b876
--- /dev/null
+++ b/SOOMv0/Soom.py
@@ -0,0 +1,195 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Soom.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Soom.py,v $
+
+import sys
+import os
+import errno
+import re
+import logging
+from SOOMv0 import common, Utils
+
+class Soom(object):
+ """
+ Top-level SOOM configuration object
+
+ Attributes include:
+
+ messages If True, information messages are sent to
+ sys.stdout.
+ row_ordinals If True, row ordinal column is printed
+ lazy_column_loading If True, column data is loaded on demand,
+ otherwise loaded with dataset.
+ """
+
+ version_info = common.version_info
+ version = common.version
+
+ # AM - I'm not happy allowing '$' through, but it's used by the
+ # summ() method when generating a summary Dataset.
+ valid_identifier_re = re.compile('^[a-z_-][a-z0-9_$-]*$', re.IGNORECASE)
+
+ metadata_filename = 'Metadata.pickle'
+
+ # default path for persistent storage of SOOM objects
+ default_object_path = 'SOOM_objects'
+
+ def __init__(self):
+ self.init_logger()
+ self.messages = False
+ self.row_ordinals = True
+ self.lazy_column_loading = True
+ self.searchpath = [self.default_object_path]
+ self.writepath = None
+ self.nproc = 1
+ if os.access(self.searchpath[0], os.W_OK | os.X_OK):
+ self.writepath = self.searchpath[0]
+
+ def check_name_ok(self, name, msg):
+ if not name or not name.strip():
+ raise common.Error, '%s has no name' % msg
+ if not self.valid_identifier_re.match(name):
+ raise common.Error, \
+ '%s name "%s" contains illegal characters' % (msg, name)
+
+ def setpath(self, path, writepath = None):
+ if path is not None:
+ self.searchpath = [os.path.normpath(os.path.expanduser(p))
+ for p in path.split(':')]
+ if writepath:
+ self.writepath = writepath
+
+ def object_path(self, name, path = None):
+ if path is not None:
+ self.setpath(path)
+ for pathdir in self.searchpath:
+ if os.path.exists(os.path.join(pathdir, name)):
+ return pathdir
+
+ def find_metadata(self, prefix):
+ paths = []
+ for pathdir in self.searchpath:
+ pathdir = os.path.join(pathdir, prefix)
+ try:
+ dirlist = os.listdir(pathdir)
+ except OSError:
+ continue
+ for filename in dirlist:
+ pd = os.path.join(pathdir, filename)
+ if os.path.exists(os.path.join(pd, soom.metadata_filename)):
+ paths.append(pd)
+ return paths
+
+ def available_datasets(self):
+ datasets = []
+ for pathdir in self.searchpath:
+ try:
+ files = os.listdir(pathdir)
+ except OSError, (eno, estr):
+ if eno == errno.ENOENT:
+ self.warning('soompath: %r: %s - skipped' % (pathdir, estr))
+ continue
+ raise
+ for dsname in files:
+ metadatapath = os.path.join(pathdir, dsname,
+ soom.metadata_filename)
+ if os.access(metadatapath, os.R_OK):
+ datasets.append(dsname)
+ datasets.sort()
+ return datasets
+
+ def _display_hook(self):
+ print 'SOOM version (soom.version): %s' % self.version
+ print 'Informational messages (soom.messages): %s' % bool(self.messages)
+ print 'Print row ordinals (soom.row_ordinals): %s' % bool(self.row_ordinals)
+ print 'Lazy column loading (soom.lazy_column_loading): %s' % bool(self.lazy_column_loading)
+
+ def init_logger(self):
+ self.logger = logging.getLogger('SOOM')
+ self.default_handler = logging.StreamHandler()
+ if not hasattr(sys, 'ps1'):
+ # Not interactive, add timestamp
+ fmt_str = '%(asctime)s %(name)s %(levelname)s %(message)s'
+ else:
+ fmt_str = '%(name)s %(levelname)s %(message)s'
+ formatter = logging.Formatter(fmt_str)
+ self.default_handler.setFormatter(formatter)
+ self.logger.addHandler(self.default_handler)
+
+ def _set_messages(self, v):
+ if v:
+ self.logger.setLevel(logging.INFO)
+ else:
+ self.logger.setLevel(logging.WARNING)
+
+ def _get_messages(self):
+ return self.logger.getEffectiveLevel() < logging.WARNING
+
+ messages = property(_get_messages, _set_messages)
+
+ def add_logging_handler(self, handler):
+ self.logger.removeHandler(self.default_handler)
+ self.logger.addHandler(handler)
+
+ def critical(self, *args):
+ self.logger.critical(*args)
+
+ def exception(self, *args):
+ self.logger.exception(*args)
+
+ def error(self, *args):
+ self.logger.error(*args)
+
+ def warning(self, *args):
+ self.logger.warning(*args)
+
+ def info(self, *args):
+ self.logger.info(*args)
+
+ def debug(self, *args):
+ self.logger.debug(*args)
+
+ if sys.platform == 'linux2':
+ _pagesize = os.sysconf('SC_PAGESIZE')
+ _lastsz = 0
+ def mem_report(self):
+ if self.messages:
+ f = open('/proc/%d/statm' % os.getpid())
+ try:
+ statm = [int(n) * self._pagesize for n in f.read().split()]
+ finally:
+ f.close()
+ delta = statm[0] - self._lastsz
+ self._lastsz = statm[0]
+ self.info('mem delta: %dk, total: %dk' % (delta / 1024, self._lastsz / 1024))
+ else:
+ def mem_report(self):
+ pass
+ if 0:
+ def mem_report(self):
+ if self.messages:
+ labels = 'sz', 'res', 'share', 'txt', 'data', 'lib', 'dirty'
+ f = open('/proc/%d/statm' % os.getpid())
+ try:
+ statm = [int(n) * self._pagesize for n in f.read().split()]
+ finally:
+ f.close()
+ fields = ['%s %6.2fk' % (f, n / 1024)
+ for f, n in zip(labels, statm)]
+ self.info('Mem: %s' % ', '.join(fields))
+
+
+soom = Soom()
diff --git a/SOOMv0/SourceDataTypes.py b/SOOMv0/SourceDataTypes.py
new file mode 100644
index 0000000..bb55f5a
--- /dev/null
+++ b/SOOMv0/SourceDataTypes.py
@@ -0,0 +1,163 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Support for converting data from source files into python types
+"""
+# $Id: SourceDataTypes.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/SourceDataTypes.py,v $
+
+import re
+import mx.DateTime
+
+__all__ = 'add_datatype', 'is_valid_datatype', 'convert_datatype', \
+ 'DateConvert'
+
+datatypes = {}
+
+def add_datatype(name, conversion_fn):
+ """
+ Register a datatype "name"
+
+ The supplied conversion function should accept a single
+ argument, being the data in source type, and return a python
+ type (in particular, a type supported by soomarray - the basic
+ python types int, float, str, tuple, list and dict, as well as
+ mx.DateTime types.
+ """
+ datatypes[name] = conversion_fn
+
+def is_valid_datatype(name):
+ if not datatypes.has_key(name):
+ raise ValueError, 'invalid datatype: %s' % name
+
+def convert_datatype(datatype, value):
+ try:
+ cnvt = datatypes[datatype]
+ except KeyError:
+ raise TypeError, 'Unknown datatype: %s' % datatype
+ else:
+ return cnvt(value)
+
+def get_conversion(datatype):
+ return datatypes[datatype]
+
+def no_conversion(value):
+ return value
+
+add_datatype('int', int)
+add_datatype('long', long)
+add_datatype('str', str)
+add_datatype('float', float)
+add_datatype('tuple', tuple)
+add_datatype('recode', no_conversion)
+
+class DateTimeMagicBase:
+ rewrite_re = re.compile(r'(d+|m+|y+|H+|M+|S+|U+)')
+ def __init__(self, fmt):
+ def repl(match):
+ s = match.group(0)
+ return r'(?P<%s>\d{1,%d})' % (s[0], len(s))
+ self.fmt = fmt
+ self.extract_re = self.rewrite_re.sub(repl, self.fmt)
+ self.extract_re_c = re.compile('^' + self.extract_re)
+
+ def extract(self, s):
+ match = self.extract_re_c.match(s)
+ if not match:
+ raise ValueError, \
+ 'unable to parse date/time "%s" with format %s' % (s, self.fmt)
+ return match
+
+class DateConvert(DateTimeMagicBase):
+ def __call__(self, s):
+ if not s:
+ return None
+ if isinstance(s, mx.DateTime.DateTimeType):
+ return s
+ match = self.extract(s)
+ day, month, year = [int(match.group(f)) for f in 'dmy']
+ if year < 50:
+ year += 2000
+ elif year < 100:
+ year += 1900
+ return mx.DateTime.Date(year, month, day)
+
+class DateTimeConvert(DateTimeMagicBase):
+ def __call__(self, s):
+ if not s:
+ return None
+ if isinstance(s, mx.DateTime.DateTimeType):
+ return s
+ match = self.extract(s)
+ day, month, year, hour, minute = [int(match.group(f)) for f in 'dmyHM']
+ try:
+ msec = '.' + match.group('U')
+ except IndexError:
+ msec = ''
+ try:
+ second = float(match.group('S') + msec)
+ except IndexError:
+ second = 0.0
+ if year < 50:
+ year += 2000
+ elif year < 100:
+ year += 1900
+ return mx.DateTime.DateTime(year, month, day, hour, minute, second)
+
+class TimeConvert(DateTimeMagicBase):
+ def __call__(self, s):
+ if not s:
+ return None
+ if isinstance(s, mx.DateTime.DateTimeType):
+ return s
+ match = self.extract(s)
+ hour, minute = int(match.group('H')), int(match.group('M'))
+ try:
+ msec = '.' + match.group('U')
+ except IndexError:
+ msec = ''
+ try:
+ second = float(match.group('S') + msec)
+ except IndexError:
+ second = 0.0
+ return mx.DateTime.Time(hour, minute, second)
+
+def get_format(datatype, format):
+ ctor_map = {
+ 'date': DateConvert,
+ 'datetime': DateTimeConvert,
+ 'time': TimeConvert,
+ }
+ try:
+ return datatypes[format]
+ except KeyError:
+ datatypes[format] = conversion = ctor_map[datatype](format)
+ return conversion
+
+add_datatype('date', DateConvert('dd/mm/yyyy'))
+add_datatype('datetime', DateConvert('dd/mm/yyyy HH:MM:SS'))
+add_datatype('recodedate', DateConvert('dd/mm/yyyy'))
+add_datatype('iso-date', DateConvert('yyyy-mm-dd'))
+add_datatype('us-date', DateConvert('mm/dd/yyyy'))
+add_datatype('iso-datetime', DateTimeConvert(r'yyyy-mm-dd\s+HH:MM:SS.UU'))
+add_datatype('iso-time', TimeConvert('HH:MM:SS.UU'))
+
+# AM - I thought the performance cost of the magic above might be too much, so
+# I tried the following - the difference was barely measurable.
+#
+#def simpledate(value):
+# day, month, year = value.split('/')
+# return mx.DateTime.Date(int(year), int(month), int(day))
+#add_datatype('date', simpledate)
diff --git a/SOOMv0/Sources/All.py b/SOOMv0/Sources/All.py
new file mode 100644
index 0000000..f20d2f0
--- /dev/null
+++ b/SOOMv0/Sources/All.py
@@ -0,0 +1,20 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: All.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/All.py,v $
+
+# for backward compatibility - you probably don't want to use this
+from SOOMv0.Sources.CSV import CSVDataSource
+from SOOMv0.Sources.Columns import ColumnDataSource
diff --git a/SOOMv0/Sources/CSV.py b/SOOMv0/Sources/CSV.py
new file mode 100644
index 0000000..029b070
--- /dev/null
+++ b/SOOMv0/Sources/CSV.py
@@ -0,0 +1,79 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: CSV.py 2733 2007-07-13 07:28:35Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/CSV.py,v $
+
+# Standard modules
+import csv
+
+# SOOMv0 modules
+from SOOMv0.Sources.common import *
+
+__all__ = 'CSVDataSource', 'HeaderCSVDataSource'
+
+class CSVDataSource(TextDataSourceBase):
+ """
+ Iterable class to load DataSource into a DataSet from a CSV file
+ """
+
+ def __init__(self, name, columns, filename, **kwargs):
+ # Separate csv.reader args from data source args:
+ readerkw = {}
+ for arg in dir(csv.Dialect):
+ if arg.startswith('_') or arg not in kwargs:
+ continue
+ readerkw[arg] = kwargs.pop(arg)
+ TextDataSourceBase.__init__(self, name, columns, filename, **kwargs)
+ self.type = 'comma-separated values text file'
+ self.csv_reader = csv.reader(self.get_file_iter(), **readerkw)
+ self.skip_header_rows(self.csv_reader)
+
+ def next_rowdict(self):
+ """Method to read a row from an initialised csv data source"""
+ # read a line
+ fields = self.csv_reader.next()
+ rowdict = {}
+ if fields:
+ for col in self.columns:
+ if col.ordinalpos is not None:
+ i = col.ordinalpos - col.posbase
+ try:
+ rowdict[col.name] = fields[i]
+ except IndexError, e:
+ raise IndexError('%s: %s (want %d, have %d fields)' %
+ (col.name, e, i, len(fields)))
+ return rowdict
+
+ def close(self):
+ self.csv_reader = None
+
+
+class HeaderCSVDataSource(CSVDataSource):
+ """
+ Iterable class to load DataSource into a DataSet from a CSV
+ file where the first line of the csv file defines column names.
+ """
+
+ def __init__(self, name, columns = [], **kwargs):
+ CSVDataSource.__init__(self, name, columns, **kwargs)
+ self.col_map = None
+
+ def next_rowdict(self):
+ """Method to read a row from an initialised csv data source"""
+ # read a line
+ if self.col_map is None:
+ self.col_map = self.csv_reader.next()
+ fields = self.csv_reader.next()
+ return dict(zip(self.col_map, fields))
diff --git a/SOOMv0/Sources/Columns.py b/SOOMv0/Sources/Columns.py
new file mode 100644
index 0000000..834bc26
--- /dev/null
+++ b/SOOMv0/Sources/Columns.py
@@ -0,0 +1,48 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Columns.py 2733 2007-07-13 07:28:35Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/Columns.py,v $
+
+# SOOMv0 modules
+from SOOMv0.Sources.common import *
+
+__all__ = 'ColumnDataSource',
+
+class ColumnDataSource(TextDataSourceBase):
+ def __init__(self, name, columns, **kwargs):
+ TextDataSourceBase.__init__(self, name, columns, **kwargs)
+ self.type = 'columnar text file'
+ self.file_iter = self.get_file_iter()
+ self.skip_header_rows(self.file_iter)
+ self.columns = columns
+
+ def next_rowdict(self):
+ """Method to read a row from an initialised data source"""
+ line = self.file_iter.next()
+ if line == chr(26):
+ raise StopIteration
+ row = {}
+ for col in self.columns:
+ if col.startpos is not None:
+ startpos = col.startpos - col.posbase
+ val = line[startpos:startpos + col.length]
+ if val.lstrip():
+ row[col.name] = val
+ else:
+ row[col.name] = col.blankval
+ return row
+
+ def close(self):
+ self.file_iter = None
diff --git a/SOOMv0/Sources/DB.py b/SOOMv0/Sources/DB.py
new file mode 100644
index 0000000..4c3a8c2
--- /dev/null
+++ b/SOOMv0/Sources/DB.py
@@ -0,0 +1,81 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: DB.py 2733 2007-07-13 07:28:35Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/DB.py,v $
+
+# SOOMv0 modules
+from SOOMv0.Sources.common import *
+
+__all__ = 'DBDataSource',
+
+class DBDataSource(DataSourceBase):
+ """
+ Iterable class to load DataSource into a DataSet from a DB API
+ v2.0 connection.
+
+ Note: pyPgSQL will perform faster if PgSQL.fetchReturnsList and
+ PgSQL.noPostgresCursor are set to 1 (these make pyPgSQL behave
+ like dumber adapters - we duplicate much of those smarts anyway).
+ """
+
+ def __init__(self, name, columns,
+ db, table, where = None, whereargs = None, fetchcount = 1000,
+ **kwargs):
+ DataSourceBase.__init__(self, name, columns, **kwargs)
+ self.db = db
+ self.fetchcount = fetchcount
+ self.rows = []
+ colmap = {}
+ dbcols = []
+ for col in columns:
+ if col.dbname:
+ dbname = col.dbname
+ else:
+ dbname = col.name
+ colmap[dbname.lower()] = col
+ dbcols.append(dbname)
+ self.type = 'database connection'
+ query = 'select %s from %s' % (', '.join(dbcols), table)
+ if where:
+ query += ' where ' + where
+ self.curs = self.db.cursor()
+ if whereargs is None:
+ self.curs.execute(query)
+ else:
+ self.curs.execute(query, whereargs)
+ self.col_ord = [colmap.get(d[0].lower(), None)
+ for d in self.curs.description]
+
+ def next_rowdict(self):
+ if self.rows is None:
+ raise StopIteration
+ if not self.rows:
+ self.rows = self.curs.fetchmany(self.fetchcount)
+ try:
+ row = self.rows.pop(0)
+ except IndexError:
+ self.rows = None
+ raise StopIteration
+ row_dict = {}
+ for col, value in zip(self.col_ord, row):
+ if col:
+ row_dict[col.name] = value
+ return row_dict
+
+ def close(self):
+ if self.curs is not None:
+ self.curs.close()
+ self.curs = None
+ self.db = None
diff --git a/SOOMv0/Sources/__init__.py b/SOOMv0/Sources/__init__.py
new file mode 100644
index 0000000..a6b1454
--- /dev/null
+++ b/SOOMv0/Sources/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/__init__.py,v $
diff --git a/SOOMv0/Sources/common.py b/SOOMv0/Sources/common.py
new file mode 100644
index 0000000..51e74cd
--- /dev/null
+++ b/SOOMv0/Sources/common.py
@@ -0,0 +1,206 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Classes and functions for loading SOOM datasets from external
+sources. Currently data is loaded from one or more instances of
+the DataSource class - defined below Each DataSource object contains
+a number of DataSourceColumn instances
+"""
+
+# $Id: common.py 2733 2007-07-13 07:28:35Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Sources/common.py,v $
+
+import sys
+import os
+from SOOMv0.Soom import soom
+from SOOMv0.DataSourceColumn import DataSourceColumn
+
+class DataSourceBase:
+ """
+ Definitions of sources of data.
+
+ Arguments:
+ name name of data source definition
+ columns list of DataSourceColumns
+ type ?
+ label data source label [I]
+ desc descriptive label [I]
+
+ Key:
+ [I] inherited by the DataSet(s) created from this data
+ source definition.
+
+ Subclasses are intended to be iterable.
+ """
+
+ def __init__(self, name, columns,
+ type = None, label = None, desc = None,
+ missing = None,
+ xformpre = None, xformpost = None):
+ soom.check_name_ok(name, 'DataSource')
+ self.name = name
+ self.type = type
+ self.label = label
+ self.desc = desc
+ self.missing = missing
+ self.col_dict = dict([(c.name, c) for c in columns])
+ self.columns = columns
+ self.xformpre = xformpre
+ self.xformpost = xformpost
+
+ def register_dataset_types(self, dataset_cols):
+ """
+ The Dataset tells us what columns and datatypes it's
+ expecting before it starts.
+ """
+ self.columns = []
+ for ds_col in dataset_cols:
+ try:
+ datatype = ds_col.datatype.name
+ except AttributeError:
+ continue
+ try:
+ column = self.col_dict[ds_col.name]
+ except KeyError:
+ column = DataSourceColumn(ds_col.name, ds_col.label)
+ self.col_dict[ds_col.name] = column
+ column.set_datatype(datatype)
+ self.columns.append(column)
+
+ def __str__(self):
+ """
+ String method to print out the definition of a DataSource
+ including the DataSourceColumns contained in it.
+ """
+ # TO DO: improve the layout of the definition print out
+ m = [""]
+ m.append("DataSource definition: %s" % self.name)
+ if self.label != None:
+ m.append(" Label: %s" % self.label)
+ if self.desc != None:
+ m.append(" Description: %s" % self.desc)
+ m.append(" Type: %s" % self.type)
+ m.append(" Containing the following DataSourceColumns:")
+ for col in self.columns:
+ # column definitions know how to print themselves
+ m.extend([' ' * 8 + l for l in str(col).split('\n')])
+ return '\n'.join(m)
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ while 1:
+ rowdict = self.next_rowdict()
+ if not rowdict:
+ continue
+ if self.xformpre:
+ rowdict = self.xformpre(rowdict)
+ if rowdict is None:
+ continue
+ for col in self.columns:
+ v = rowdict.get(col.name)
+ if v is not None:
+ if v == '' or self.missing == v:
+ v = None
+ else:
+ try:
+ v = col.conversion(v)
+ except (TypeError, ValueError):
+ exc = sys.exc_info()
+ try:
+ raise exc[0], 'column "%s": %s, value was "%r", datatype "%s"' % (col.name, exc[1], v, col.datatype), exc[2]
+
+ finally:
+ del exc
+ rowdict[col.name] = v
+ if self.xformpost:
+ rowdict = self.xformpost(rowdict)
+ if rowdict is None:
+ continue
+ return rowdict
+
+ def close(self):
+ pass
+
+
+class TextDataSourceBase(DataSourceBase):
+ """
+ Hold common methods for accessing text file DataSources. Note:
+ opens file in constructor.
+
+ As well as the arguments supported by DataSourceBase, TextDataSourceBase
+ subclasses also support the following:
+
+ filename name of file which contains source data
+ path path to file which contains source data
+ header_rows number of rows to skip in source data file
+ before actual data starts
+
+ TODO: other sources such as XML and DBMS data in future.
+ """
+
+ def __init__(self, name, columns, filename, path = None,
+ header_rows = 0, **kwargs):
+ DataSourceBase.__init__(self, name, columns, **kwargs)
+ self.filename = filename
+ self.path = path
+ self.header_rows = header_rows
+ self.type = 'text file'
+ if self.path:
+ self.filepath = os.path.join(self.path, self.filename)
+ else:
+ self.filepath = self.filename
+
+ def __str__(self):
+ return '%s\n Filename: %s' % \
+ (DataSourceBase.__str__(self), self.filepath)
+
+ def get_file_iter(self):
+ def _zipiter(filepath, member=None):
+ try:
+ import zipfile
+ except ImportError:
+ raise IOError, '%s: zip support not available' % self.filename
+ f = zipfile.ZipFile(filepath)
+ if member is None:
+ if len(f.namelist()) != 1:
+ raise IOError('%s: zip file member not specified' %
+ self.filename)
+ member = f.namelist()[0]
+ try:
+ return iter(f.read(member).splitlines())
+ finally:
+ f.close()
+
+ if self.filename.endswith('.gz'):
+ try:
+ import gzip
+ except ImportError:
+ raise IOError, '%s: gzip support not available' % self.filename
+ return iter(gzip.GzipFile(self.filepath))
+ elif self.filename.endswith('.zip'):
+ return _zipiter(self.filepath)
+ else:
+ dirname = os.path.dirname(self.filename)
+ if dirname.endswith('.zip'):
+ return _zipiter(dirname, os.path.basename(self.filename))
+ else:
+ return iter(open(self.filepath))
+
+ def skip_header_rows(self, i):
+ if self.header_rows:
+ for n in range(self.header_rows):
+ i.next()
diff --git a/SOOMv0/Stats.py b/SOOMv0/Stats.py
new file mode 100644
index 0000000..5cf7dcf
--- /dev/null
+++ b/SOOMv0/Stats.py
@@ -0,0 +1,1595 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Stats.py 2692 2007-06-08 03:03:15Z tchur $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Stats.py,v $
+
+import Numeric, MA, math
+
+def mask_nonpositive_weights(wgtvector):
+ if MA.isMaskedArray(wgtvector):
+ return MA.masked_where(MA.less_equal(wgtvector,0.0),wgtvector)
+ else:
+ return MA.masked_where(Numeric.less_equal(wgtvector,0.0),wgtvector)
+
+def zero_nonpositive_weights(wgtvector):
+ if MA.isMaskedArray(wgtvector):
+ return MA.choose(MA.less(wgtvector,0.0),(wgtvector,0.0))
+ else:
+ return Numeric.choose(Numeric.less(wgtvector,0.0),(wgtvector,0.0))
+
+def mask_where_masked(a, b):
+ """
+ Apply b's mask to a if b has a mask.
+ """
+ if MA.isMaskedArray(b):
+ mask = b.mask()
+ if mask is not None:
+ return MA.masked_where(mask, a)
+ return a
+
+def mask_and_compress(datavector, wgtvector):
+ """
+ Mutually apply each other's masks and compress the resulting arrays.
+ """
+ datavector = mask_where_masked(datavector, wgtvector)
+ wgtvector = mask_where_masked(wgtvector, datavector)
+ if MA.isMaskedArray(datavector):
+ datavector = datavector.compressed()
+ if MA.isMaskedArray(wgtvector):
+ wgtvector = wgtvector.compressed()
+ assert len(wgtvector) == len(datavector), 'datavector and wgtvector are different lengths after compressing masked values'
+ return datavector, wgtvector
+
+def get_alpha(conflev):
+ try:
+ conflev = float(conflev)
+ if conflev < 0.5 or conflev >= 1.0:
+ raise ValueError
+ except (TypeError, ValueError):
+ raise ValueError('conflev (confidence level) must be a proportion between 0.5 and 1.0')
+ return 1.0 - conflev
+
+def nonmissing(datavector):
+ """
+ Returns the number of non-missing elements in the rank-1 Numpy
+ or MA array (vector) passed as the only argument.
+ """
+ if MA.isMaskedArray(datavector):
+ # print datavector.count() # debug
+ return datavector.count()
+ else:
+ # print len(datavector) # debug
+ return len(datavector)
+
+def missing(datavector):
+ """
+ Returns the number of missing elements in the rank-1 MA array
+ (vector) passed as the only argument. Numpy arrays do not have
+ missing values by definition.
+ """
+ if MA.isMaskedArray(datavector):
+ return datavector.size() - datavector.count()
+ else:
+ return 0
+
+def assert_same_shape(datavector, wgtvector):
+ if datavector.shape[0] != wgtvector.shape[0]:
+ raise ValueError('datavector and wgtvector must have the same length (%d vs %d)' %
+ (datavector.shape[0], wgtvector.shape[0]))
+
+
+def wnonmissing(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the number of non-missing elements in the rank-1 Numpy
+ or MA array (vector) passed as the first argument. Elements of
+ datavector are regarded as missing if they are masked as missing
+ (in the case of an MA array) or if the associated weight is
+ missing or negative. The optional second Boolean named argument
+ causes non-positive weights to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if MA.isMaskedArray(datavector):
+ # print datavector.count() # debug
+ return datavector.count()
+ else:
+ # print len(datavector) # debug
+ return len(datavector)
+
+def wmissing(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the number of missing elements in the rank-1 Numpy or
+ MA array (vector) passed as the first argument. Elements of
+ datavector are regarded as missing if they are masked as missing
+ (in the case of an MA array) or if the associated weight is
+ missing or negative. The optional second Boolean argument
+ causes non-positive weights to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if MA.isMaskedArray(datavector):
+ return datavector.size() - datavector.count()
+ else:
+ return 0
+
+def aminimum(datavector):
+ """
+ Returns the minimum value of non-missing elements in the rank-1
+ Numpy or MA array (vector) passed as the only argument.
+
+ Note - Numeric.minimum compares two or more arrays returning
+ an array, whereas MA.minimum works like the builtin min()
+ and returns a scalar, but also copes with missing values,
+ which is what we want.
+ """
+ if nonmissing(datavector) == 0:
+ return None
+ return float(MA.minimum(datavector))
+
+def amaximum(datavector):
+ """
+ Returns the maximum value of non-missing elements in the rank-1
+ Numpy array (vector) passed as the only argument.
+
+ Note - see aminimum comment.
+ """
+ if nonmissing(datavector) == 0:
+ return None
+ return float(MA.maximum(datavector))
+
+def wminimum(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the minimum value of non-missing elements in the rank-1
+ Numpy or MA array (vector) passed as the only argument.
+
+ Note - Numeric.minimum compares two or more arrays returning
+ an array, whereas MA.minimum works like the builtin min()
+ and returns a scalar, but also copes with missing values,
+ which is what we want.
+
+ Elements of datavector are
+ regarded as missing if they are masked as missing (in the case
+ of an MA array) or if the associated weight is missing or negative.
+ The optional second Boolean argument causes non-positive weights
+ to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if nonmissing(datavector) == 0:
+ return None
+ return float(MA.minimum(datavector))
+
+def wmaximum(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the maximum value of non-missing elements in the rank-1
+ Numpy array (vector) passed as the only argument.
+
+ Note - see aminimum comment.
+
+ Elements of datavector are
+ regarded as missing if they are masked as missing (in the case
+ of an MA array) or if the associated weight is missing or negative.
+ The optional second Boolean argument causes non-positive weights
+ to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if nonmissing(datavector) == 0:
+ return None
+ return float(MA.maximum(datavector))
+
+def arange(datavector):
+ "Returns the differences betwen the maximum and the minimum"
+ if nonmissing(datavector) == 0:
+ return None
+ return float(MA.maximum(datavector)) - float(MA.minimum(datavector))
+
+def wrange(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the difference between the maximum and the minimum
+
+ Elements of datavector are
+ regarded as missing if they are masked as missing (in the case
+ of an MA array) or if the associated weight is missing or negative.
+ The optional second Boolean argument causes non-positive weights
+ to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if nonmissing(datavector) == 0:
+ return None
+
+ return float(MA.maximum(datavector)) - float(MA.minimum(datavector))
+
+def asum(datavector):
+ """
+ Returns the sum of non-missing values in the Numpy or MA rank-1
+ array passed as the only argument.
+ """
+ if nonmissing(datavector) == 0:
+ return None
+ return MA.add.reduce(datavector)
+
+def wsum(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted sum of the rank-1 array (vector) passed
+ as the first argument, with a vector of weights with the same
+ number of elements as the first argument.
+
+ Elements of datavector are
+ regarded as missing if they are masked as missing (in the case
+ of an MA array) or if the associated weight is missing or negative.
+ The optional second Boolean argument causes non-positive weights
+ to be regarded as missing.
+
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector = mask_where_masked(datavector, wgtvector)
+
+ if nonmissing(datavector) == 0:
+ return None
+
+ if MA.isMaskedArray(datavector) or MA.isMaskedArray(wgtvector):
+ return MA.add.reduce(datavector * wgtvector)
+ else:
+ return Numeric.add.reduce(datavector * wgtvector)
+
+def amean(datavector):
+ """
+ Returns the arithmetic mean of the rank-1 Numpy array (vector)
+ passed as the only argument.
+ """
+ n = nonmissing(datavector)
+ if n == 0:
+ return None
+ return float(MA.add.reduce(datavector))/float(n)
+
+def ameancl(datavector,conflev=0.95):
+ """
+ Returns a 3-tuple of a) the arithmetic mean of the rank-1 Numpy
+ array (vector) passed as the first argument, b) and c) the lower
+ and upper two-sided confidence limits using alpha of conflev.
+ """
+ alpha = get_alpha(conflev)
+
+ n = nonmissing(datavector)
+ if n == 0:
+ return (None,None,None)
+ elif n == 1:
+ x = float(MA.add.reduce(datavector))/float(n)
+ return (x,None,None)
+ else:
+ import rpy
+ s = stderr(datavector)
+ x = float(MA.add.reduce(datavector))/float(n)
+ t = rpy.r.qt(1.0 - alpha/2.0, n - 1)
+ return (x, x - t*s, x + t*s)
+
+def geomean(datavector):
+ """
+ Returns the geometric mean of the rank-1 Numpy array (vector)
+ passed as the only argument.
+ """
+ n = nonmissing(datavector)
+ if n == 0:
+ return None
+ sum = MA.add.reduce(MA.log(datavector))
+ return MA.power(MA.e,float(sum)/float(n))
+
+
+def wamean(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted arithmetic mean of the rank-1 array (vector)
+ passed as the first argument, with a vector of weights with
+ the same number of elements as the second argument.
+
+ Elements of datavector are regarded as missing if they are masked
+ as missing (in the case of an MA array) or if the associated
+ weight is missing or negative. The optional second Boolean
+ argument causes non-positive weights to be regarded as missing.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ if len(datavector) == 0:
+ return None
+
+ sum = float(Numeric.add.reduce(datavector * wgtvector))
+ sumwgt = float(Numeric.add.reduce(wgtvector))
+ if sumwgt != 0.0:
+ return float(sum)/float(sumwgt)
+ else:
+ return None
+
+def wameancl(datavector,wgtvector,conflev=0.95,exclude_nonpositive_weights=False):
+ """
+ Returns a 3-tuple of a) the weighted arithmetic mean of the
+ rank-1 array (vector) passed as the first argument, with
+ a vector of weights with the same number of elements as the
+ second argument; b) the lower and upper confidence limits for
+ the mean given the alpha level specified by conflev.
+
+ Elements of datavector are regarded as missing if they are masked
+ as missing (in the case of an MA array) or if the associated
+ weight is missing or negative. The optional second Boolean
+ argument causes non-positive weights to be regarded as missing.
+ """
+ alpha = get_alpha(conflev)
+
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ n = len(datavector)
+ if n == 0:
+ return (None,None,None)
+
+ sum = float(Numeric.add.reduce(datavector * wgtvector))
+ sumwgt = float(Numeric.add.reduce(wgtvector))
+ if sumwgt == 0.0:
+ return (None,None,None)
+ if n == 1:
+ x = float(sum)/float(sumwgt)
+ return (x,None,None)
+ else:
+ x = float(sum)/float(sumwgt)
+ import rpy
+ s = wstderr(datavector,wgtvector,exclude_nonpositive_weights=exclude_nonpositive_weights)
+ t = rpy.r.qt(1.0 - alpha/2.0, n - 1)
+ return (x, x - t*s, x + t*s)
+
+def wgeomean(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted geometric mean of the rank-1 array (vector)
+ passed as the first argument, with a vector of weights with
+ the same number of elements as the second argument.
+
+ Elements of datavector are
+ regarded as missing if they are masked as missing (in the case
+ of an MA array) or if the associated weight is missing or negative.
+ The optional second Boolean argument causes non-positive weights
+ to be regarded as missing.
+
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ n = len(datavector)
+ if n == 0:
+ return None
+
+ sum = MA.add.reduce(MA.log(datavector))
+ return MA.power(MA.e,float(sum)/float(n))
+
+def wn(wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the sum of the non-negative weights passed as the
+ first argument. The optional second Boolean argument causes
+ non-positive weights to be considered as missing - should give
+ the same answers.
+ """
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ if nonmissing(wgtvector) == 0:
+ return 0
+
+ return float(MA.add.reduce(wgtvector))
+
+
+def quantiles(datavector,p=None,defn=5):
+ """
+ Returns the p quantiles of the rank-1 passed array.
+
+ p is a list or tuple of values between 0 and 1,
+ a tuple of corresponding quantiles is returned.
+
+ Optional arguments are:
+ 1) definitions of quantiles are as used by SAS, defaults to
+ definition 1 (as does SAS).
+ See http://v9doc.sas.com/cgi-bin/sasdoc/cgigdoc?file=../proc.hlp/tw5520statapp-formulas.htm
+ Default is 5 as used by SAS
+ """
+
+ p_error = "p argument must be a number between 0 and 1 (inclusive), or a list or tuple of such numbers."
+ pvals = []
+ for p_val in p:
+ try:
+ pval = float(p_val)
+ except:
+ raise ValueError, p_error
+ if pval < 0.0 or pval > 1.0:
+ raise ValueError, p_error
+ pvals.append(pval)
+ quantiles = [None] * len(pvals)
+
+ defn_error = "defn argument must be an integer between 1 and 5 inclusive"
+ try:
+ defn = int(defn)
+ except:
+ raise ValueError, defn_error
+ if defn < 1 or defn > 5:
+ raise ValueError, defn_error
+
+
+ if MA.isMaskedArray(datavector):
+ datavector = datavector.compressed()
+
+ datavector = Numeric.sort(datavector)
+
+ n = datavector.shape[0]
+ if n == 0:
+ return tuple(quantiles)
+ if n == 1:
+ d = datavector[0]
+ return tuple([d for i in quantiles])
+
+ for p_index, p in enumerate(pvals):
+ if defn == 4:
+ np = (n + 1) * p
+ else:
+ np = n * p
+
+ j = int(np)
+ g = np - j
+
+ if defn == 1:
+ if j > 0:
+ j -= 1
+ if j <= n - 2:
+ quantile = float(((1-g)*datavector[j]) + (g*datavector[j+1]))
+ else:
+ quantile = float(datavector[j])
+ else:
+ # quantile = float(((1-g)*datavector[j]) + (g*datavector[j]))
+ quantile = float(datavector[j])
+ elif defn == 2:
+ i = int(np + 0.5)
+ if i > 0:
+ i -= 1
+ if g != 0.5:
+ quantile = float(datavector[i])
+ elif g == 0.5 and j % 2 == 0:
+ if j > 0:
+ j -= 1
+ quantile = float(datavector[j])
+ elif g == 0.5 and j % 2 != 0:
+ quantile = float(datavector[j])
+ elif defn == 3:
+ if g == 0.0:
+ if j > 0:
+ j -= 1
+ quantile = float(datavector[j])
+ else:
+ quantile = float(datavector[j])
+ elif defn == 4:
+ if j > 0:
+ j -= 1
+ if j <= n - 2:
+ quantile = float(((1-g)*datavector[j]) + (g*datavector[j+1]))
+ elif j <= n-1:
+ quantile = float(datavector[j])
+ else:
+ quantile = float(datavector[j-1])
+ else:
+ quantile = float(datavector[j])
+ elif defn == 5:
+ if j > 0:
+ j -= 1
+ if j <= n - 2:
+ if g == 0.0:
+ quantile = 0.5 * ( datavector[j] + datavector[j+1] )
+ else:
+ quantile = float(datavector[j+1])
+ else:
+ quantile = float(datavector[j])
+ else:
+ quantile = float(datavector[j])
+ quantiles[p_index] = quantile
+ return tuple(quantiles)
+
+def quantile(datavector,p=None,defn=5):
+ """
+ Returns the p quantile(s) of the rank-1 passed array.
+
+ If p is a single value between 0 and 1, a single quantile
+ is returned.
+
+ If p is a list or tuple of values between 0 and 1, a tuple of
+ corresponding quantiles is returned.
+
+ Optional arguments are:
+ 1) definitions of quantiles are as used by SAS, defaults to
+ definition 1 (as does SAS).
+ See http://v9doc.sas.com/cgi-bin/sasdoc/cgigdoc?file=../proc.hlp/tw5520statapp-formulas.htm
+ Default is 5 as used by SAS
+ """
+ return quantiles(datavector,p=(p,),defn=defn)[0]
+
+def median(datavector,defn=5):
+ return quantile(datavector,p=0.5,defn=defn)
+
+def wquantiles(datavector,wgtvector,p=None,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted p quantile(s) of the rank-1 passed array,
+ where wgtvector gives the weights.
+
+ p is a list or tuple of values between 0 and 1
+ a tuple of corresponding quantiles is returned.
+
+ Formula is as used by SAS.
+ See http://v9doc.sas.com/cgi-bin/sasdoc/cgigdoc?file=../proc.hlp/tw5520statapp-formulas.htm
+
+ Optional boolean argument exclude_nonpositive_weights causes
+ non-positive weights to be treated as missing if True. The
+ default (False) causes treats negative weights like zero weights
+ and includes them in the calculations.
+ """
+ p_error = "p argument must be a number between 0 and 1 (inclusive), or a list or tuple of such numbers."
+ pvals = []
+ for p_val in p:
+ try:
+ pval = float(p_val)
+ except:
+ raise ValueError, p_error
+ if pval < 0.0 or pval > 1.0:
+ raise ValueError, p_error
+ pvals.append(pval)
+ quantiles = [None] * len(pvals)
+
+ assert_same_shape(datavector, wgtvector)
+
+ if datavector.shape[0] == 0:
+ return quantiles
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ sort_arg = Numeric.argsort(datavector)
+ datavector = Numeric.take(datavector,sort_arg)
+ wgtvector = Numeric.take(wgtvector,sort_arg)
+
+ n = datavector.shape[0]
+
+ if n == 1:
+ d = float(datavector[0])
+ return tuple([d for i in quantiles])
+
+ # Now accumulate the weights
+ accumulated_wgtvector = Numeric.add.accumulate(wgtvector)
+ if type(accumulated_wgtvector) not in (float,int,long):
+ W = accumulated_wgtvector[-1] # sum of weights
+ else:
+ W = accumulated_wgtvector
+
+ for p_index in range(len(pvals)):
+ p = pvals[p_index]
+
+ if float(W) == 0.0:
+ quantiles[p_index] = None
+ else:
+
+ pW = p * W
+
+ ge_vector = Numeric.greater_equal(accumulated_wgtvector, pW)
+ le_vector = Numeric.less_equal(accumulated_wgtvector, pW)
+
+ # print datavector, wgtvector
+ # print accumulated_wgtvector, pW
+ # print len(ge_vector), ge_vector
+ # print len(le_vector), le_vector
+ # print Numeric.compress(le_vector,accumulated_wgtvector)
+ # print len(Numeric.compress(le_vector,accumulated_wgtvector))
+
+ if Numeric.sum(ge_vector) > 0:
+ # we have an exact match...
+ below_wgts = Numeric.compress(le_vector,accumulated_wgtvector)
+ above_wgts = Numeric.compress(ge_vector,accumulated_wgtvector)
+ below_values = Numeric.compress(le_vector,datavector)
+ above_values = Numeric.compress(ge_vector,datavector)
+ # Not defined in SAS docs, assume lower value
+ if len(below_wgts) == 0 and len(above_wgts) and above_values[0]:
+ quantiles[p_index] = float(above_values[0])
+ # assume upper value
+ elif len(above_wgts) == 0 and len(below_wgts) and below_values[-1]:
+ quantiles[p_index] = float(below_values[-1])
+ else:
+ first_two_accum_wgts = (below_wgts[-1],
+ above_wgts[0])
+ first_two_accum_values = (below_wgts[-1],
+ above_wgts[0])
+ if first_two_accum_wgts[0] == pW:
+ quantiles[p_index] = 0.5 * float(Numeric.sum(first_two_accum_values))
+ elif first_two_accum_wgts[0] < pW and first_two_accum_wgts[1]:
+ quantiles[p_index] = float(first_two_accum_values[1])
+ return tuple(quantiles)
+
+def wquantile(datavector,wgtvector,p=None,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted p quantile of the rank-1 passed array,
+ where wgtvector gives the weights.
+
+ Formula is as used by SAS.
+ See http://v9doc.sas.com/cgi-bin/sasdoc/cgigdoc?file=../proc.hlp/tw5520statapp-formulas.htm
+
+ Optional boolean argument exclude_nonpositive_weights causes
+ non-positive weights to be treated as missing if True. The
+ default (False) causes treats negative weights like zero weights
+ and includes them in the calculations.
+ """
+ return wquantiles(datavector,wgtvector,p=(p,),exclude_nonpositive_weights=exclude_nonpositive_weights)[0]
+
+def wmedian(datavector,wgtvector,exclude_nonpositive_weights=False):
+ return wquantile(datavector,wgtvector,p=0.5,exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+def variance(datavector,df=None):
+ """
+ Returns the sample or population variance - same as variance
+ in SAS with VARDEF=DF or VARDEF=N, depending on whether df='DF'
+ (sample) or 'N' (population)
+ """
+ if df not in ('DF','N'):
+ raise ValueError, 'DF argument must be DF or N'
+
+ # first calculate n
+ n = nonmissing(datavector)
+ if (n < 2 and df == 'DF') or n is None:
+ return None
+ elif n < 1:
+ return None
+ else:
+ pass
+
+ # then calculate the mean
+ mean = amean(datavector)
+
+ if mean is None:
+ return None
+
+ # now the sum of squares about the mean
+
+ if MA.isMaskedArray(datavector):
+ sumsquares = MA.add.reduce(MA.power((datavector - mean),2))
+ else:
+ sumsquares = Numeric.add.reduce(Numeric.power((datavector - mean),2))
+
+ if df == 'DF':
+ return sumsquares/float((n-1))
+ else:
+ return sumsquares/float(n)
+
+def samplevar(datavector):
+ """
+ Returns sample variance - same as variance in SAS with VARDEF=DF
+ """
+ return variance(datavector,df='DF')
+
+def populationvar(datavector):
+ """
+ Returns population variance - same as variance in SAS with VARDEF=N
+ """
+ return variance(datavector,df='N')
+
+def wvariance(datavector,wgtvector,df=None,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted sample or population variance - same as
+ variance in SAS with VARDEF=WDF or VARDEF=WEIGHT, depending on
+ whether df = 'WDF' (sample) or 'WEIGHT' (population)
+ """
+
+ if df not in ('WDF','WEIGHT'):
+ raise ValueError, 'DF argument must be WDF or WEIGHT'
+
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ # first calculate n
+ n = len(datavector)
+ if n < 1 or n is None:
+ return None
+
+ # then calculate the weighted mean
+ sum = float(Numeric.add.reduce(datavector * wgtvector))
+ sumwgt = float(Numeric.add.reduce(wgtvector))
+ if sumwgt != 0.0:
+ wmean = float(sum)/float(sumwgt)
+ else:
+ return None
+ # now the squares about the mean
+ squaresaboutmean = Numeric.power((datavector - wmean),2)
+ # Now the weighted squares about the mean
+ sumsquares = Numeric.add.reduce(wgtvector * squaresaboutmean)
+ # now d
+ if df == 'WDF':
+ d = float(Numeric.add.reduce(wgtvector)) - 1.0
+ else:
+ d = float(Numeric.add.reduce(wgtvector))
+ if d > 0:
+ return float(sumsquares)/d
+ else:
+ return None
+
+def wsamplevar(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted sample variance - - same as variance in
+ SAS with VARDEF=WDF
+ """
+ return wvariance(datavector,wgtvector,df='WDF',exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+def wpopulationvar(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns the weighted population variance - - same as variance
+ in SAS with VARDEF=WEIGHT
+ """
+ return wvariance(datavector,wgtvector,df='WEIGHT',exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+def sample_stddev(datavector):
+ svar = variance(datavector,df='DF')
+ if svar is None:
+ return None
+ else:
+ return svar**0.5
+
+def wsample_stddev(datavector,wgtvector,exclude_nonpositive_weights=False):
+
+ wsvar = wvariance(datavector,wgtvector,df='WDF',exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+ if wsvar is None:
+ return None
+ else:
+ return wsvar**0.5
+
+def population_stddev(datavector):
+ pvar = variance(datavector,df='N')
+ if pvar is None:
+ return None
+ else:
+ return pvar**0.5
+
+def wpopulation_stddev(datavector,wgtvector,exclude_nonpositive_weights=False):
+
+ wpvar = wvariance(datavector,wgtvector,df='WEIGHT',exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+ if wpvar is None:
+ return None
+ else:
+ return wpvar**0.5
+
+def cv(datavector,df=None):
+ """
+ Returns the percent co-efficient of variation - same as CV in
+ SAS with VARDEF=DF or VARDEF=N, depending on whether df='DF'
+ (sample) or 'N' (population)
+ """
+ if df not in ('DF','N'):
+ raise ValueError, 'DF argument must be DF or N'
+
+ # first calculate s
+ s2 = variance(datavector,df=df)
+ if s2 == None:
+ return None
+ else:
+ s = s2**0.5
+
+ # then calculate the mean
+ mean = amean(datavector)
+
+ if mean is None or mean == 0:
+ return None
+
+ # now the CV
+ return (100.0 * s)/float(mean)
+
+def sample_cv(datavector):
+ return cv(datavector,df="DF")
+
+def population_cv(datavector):
+ return cv(datavector,df="N")
+
+def wcv(datavector,wgtvector,df=None,exclude_nonpositive_weights=True):
+
+ if df not in ('WDF','WEIGHT'):
+ raise ValueError, 'DF argument must be WDF or WEIGHT'
+
+ # first calculate s
+ s2 = wvariance(datavector,wgtvector,df=df,exclude_nonpositive_weights=exclude_nonpositive_weights)
+ if s2 == None:
+ return None
+ else:
+ s = s2**0.5
+
+ # then calculate the mean
+ mean = wamean(datavector,wgtvector,exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+ if mean is None or mean == 0:
+ return None
+
+ # now the CV
+ return (100.0 * s)/float(mean)
+
+def wsample_cv(datavector,wgtvector,exclude_nonpositive_weights=False):
+ return wcv(datavector,wgtvector,df="WDF",exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+def wpopulation_cv(datavector,wgtvector,exclude_nonpositive_weights=False):
+ return wcv(datavector,wgtvector,df="WEIGHT",exclude_nonpositive_weights=exclude_nonpositive_weights)
+
+def stderr(datavector):
+ """
+ Returns standard error of the sample mean.
+ """
+ s2 = variance(datavector,df="DF")
+ n = nonmissing(datavector)
+ if s2 == None or n == None or n == 0:
+ return None
+ else:
+ return s2**0.5 / n**0.5
+
+def wstderr(datavector,wgtvector,exclude_nonpositive_weights=False):
+ """
+ Returns weighted standard error of the sample mean.
+ """
+ assert_same_shape(datavector, wgtvector)
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ datavector, wgtvector = mask_and_compress(datavector, wgtvector)
+
+ # first calculate n
+ n = len(datavector)
+ if n < 1 or n is None:
+ return None
+
+ # then calculate the weighted mean
+ sum = float(Numeric.add.reduce(datavector * wgtvector))
+ sumwgt = float(Numeric.add.reduce(wgtvector))
+ if sumwgt != 0.0:
+ wmean = float(sum)/float(sumwgt)
+ else:
+ return None
+ # now the squares about the mean
+ squaresaboutmean = Numeric.power((datavector - wmean),2)
+ # Now the weighted squares about the mean
+ sumsquares = Numeric.add.reduce(wgtvector * squaresaboutmean)
+ # now d
+ d = float(len(wgtvector)) - 1.0
+ if d > 0:
+ s2 = float(sumsquares)/d
+ else:
+ return None
+
+ sumwgt = float(Numeric.add.reduce(wgtvector))
+
+ if s2 == None or n == None or sumwgt == 0:
+ return None
+ else:
+ return s2**0.5 / sumwgt**0.5
+
+def t(datavector,muzero=0.0):
+ """
+ Returns Student's T statistic for datavector, given muzero
+ (optional, defaults to zero).
+ """
+ se = stderr(datavector)
+ if se == None or se == 0:
+ return None
+ mean = amean(datavector)
+ return float(mean - muzero) / se
+
+def probit(p):
+ """
+ Returns the inverse of the standard normal cumulative
+ distribution function. That is, given probability p (between
+ 0.0 and 1.0 inclusive), returns standardised deviate z.
+
+ This function was adapted from Python code which appears in a
+ paper by Dridi (2003). The programme code is copyright 2003
+ Dridi and Regional Economics Applications Laboratory, University
+ of Illinois and is used here with the permission of its author.
+
+ The code in the above paper was in turn based on an algorthim
+ developed by Odeh and Evan (1974) as described by Kennedy and
+ Gentle (1980). The method uses rational fractions derived from
+ Taylor series to approximate the desired value.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of
+ the Standard Normal Cumulative Distribution and its Inverse,
+ Regional Economics Applications Laboratory, University of
+ Illinois and Federal Reserve Bank of Chicago. Available at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+
+ Kennedy, W.J and Gentle J.E. (1980): Statistical Computing,
+ Marcel Dekker, New York. p93-95.
+
+ Odeh, R.R and Evan, J.O (1974): "Algorithm AS 70: Percentage
+ Points of the Normal Distribution", Applied Statistics 23, 96-97
+ """
+
+ xp = 0.0
+ lim = 1.e-20
+ p0 = -0.322232431088
+ p1 = -1.0
+ p2 = -0.342242088547
+ p3 = -0.0204231210245
+ p4 = -0.453642210148e-4
+ q0 = 0.0993484626060
+ q1 = 0.588581570495
+ q2 = 0.531103462366
+ q3 = 0.103537752850
+ q4 = 0.38560700634e-2
+ p = float(p)
+ if p < 0.0 or p > 1.0:
+ raise ValueError, "p must be between 0.0 and 1.0 inclusive"
+ elif p < lim or p == 1.0:
+ xp = -1.0 / lim
+ elif p == 0.5:
+ xp = 0.0
+ elif p > 0.5:
+ p = 1.0 - p
+ y = math.sqrt(math.log(1.0 / p**2.0))
+ xp = y + ((((y*p4+p3)*y+p2)*y+p1)*y+p0)/((((y*q4+q3)*y+q2)*y+q1)*y+q0)
+ elif p < 0.5:
+ y = math.sqrt(math.log(1.0 / p**2.0))
+ xp = -(y + ((((y*p4+p3)*y+p2)*y+p1)*y+p0)/((((y*q4+q3)*y+q2)*y+q1)*y+q0))
+ else:
+ raise ValueError, "p must be between 0.0 and 1.0 inclusive"
+
+ if p > 0.5:
+ return -xp
+ else:
+ return xp
+
+def cdf_gauss_GL(x):
+ """
+ Returns the probability that an observation from the standard
+ normal distribution is less than or equal to x (that is, x must
+ be a standard normal deviate).
+
+ The following function was adapted from Python code which appears
+ in a paper by Dridi (2003). The programme code is copyright 2003
+ Dridi and Regional Economics Applications Laboratory, University
+ of Illinois and is used here with the permission of its author.
+
+ This function uses Gauss-legendre quadrature to provide good
+ accuracy in the tails of the distribution, at the expense of
+ speed - this function is very slow due to its iterative nature.
+ A faster version can be be found in the Cstats.pyx module which
+ accompanies this (Stats.py) module.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of
+ the Standard Normal Cumulative Distribution and its Inverse,
+ Regional Economics Applications Laboratory, University of
+ Illinois and Federal Reserve Bank of Chicago. Available at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ if x >= 0.0:
+ return (1.0 + _GL(0, x/math.sqrt(2.0))) / 2.0
+ else:
+ return (1.0 - _GL(0, -x/math.sqrt(2.0))) / 2.0
+
+def _GL(a,b):
+ """
+ Support function for the cdf_gauss_GL() function, which returns
+ the standard normal cumulative distribution using Gauss-Legendre
+ quadrature.
+
+ The following function was adapted from Python code which appears
+ in a paper by Dridi (2003). The programme code is copyright 2003
+ Dridi and Regional Economics Applications Laboratory, University
+ of Illinois and is used here with the permission of its author.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of
+ the Standard Normal Cumulative Distribution and its Inverse,
+ Regional Economics Applications Laboratory, University of
+ Illinois and Federal Reserve Bank of Chicago. Available at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ y1=0.0
+ y2=0.0
+ y3=0.0
+ y4=0.0
+ y5=0.0
+
+ x1=-math.sqrt(245.0 + 14.0 * math.sqrt(70.0)) / 21.0
+ x2=-math.sqrt(245.0 - 14.0 * math.sqrt(70.0)) / 21.0
+ x3=0.0
+ x4=-x2
+ x5=-x1
+
+ w1=(322.0 - 13.0 * math.sqrt(70.0)) / 900.0
+ w2=(322.0 + 13.0 * math.sqrt(70.0)) / 900.0
+ w3=128.0/225.0
+ w4=w2
+ w5=w1
+
+ # n=4800 # Original number of iterations used by Dridi
+ n = 120
+ s=0.0
+ h=(b-a)/n
+
+ for i in range(0,n,1):
+ y1=h*x1/2.0+(h+2.0*(a+i*h))/2.0
+ y2=h*x2/2.0+(h+2.0*(a+i*h))/2.0
+ y3=h*x3/2.0+(h+2.0*(a+i*h))/2.0
+ y4=h*x4/2.0+(h+2.0*(a+i*h))/2.0
+ y5=h*x5/2.0+(h+2.0*(a+i*h))/2.0
+ s=s+h*(w1*_f(y1)+w2*_f(y2)+w3*_f(y3)+w4*_f(y4)+w5*_f(y5))/2.0;
+ return s;
+
+def _f(x):
+ """
+ Support function for the cdf_gauss_GL() function, which returns
+ the standard normal cumulative distribution using Gauss-Legendre
+ quadrature.
+
+ The following function was adapted from Python code which appears
+ in a paper by Dridi (2003). The programme code is copyright 2003
+ Dridi and Regional Economics Applications Laboratory, University
+ of Illinois and is used here with the permission of its author.
+
+ References:
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of
+ the Standard Normal Cumulative Distribution and its Inverse,
+ Regional Economics Applications Laboratory, University of
+ Illinois and Federal Reserve Bank of Chicago. Available at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+ """
+ return (2.0/math.sqrt(math.pi))*math.exp(-x**2.0);
+
+def cdf_gauss_RA(x):
+ """
+ Returns the standard normal cumulative distribution
+ function. That is, given a standardised deviate z, returns a
+ probability. The cdf is 0 for all z < -8.29314441 and is 1 for
+ all z > 8.29314441.
+
+ The following function was adapted from Python code which appears
+ in a paper by Dridi (2003). The programme code is copyright 2003
+ Dridi and Regional Economics Applications Laboratory, University
+ of Illinois and is used here with the permission of its author.
+
+ This function uses rational fraction approximations given by
+ Cody (1969), using co-efficients provided by Kennedy and Gentle
+ (1980, pp91-92). This function is fast, but is only accurate
+ for values between -0.5 and 0.75.
+
+ References:
+
+ Cody, W.J. (1969): "Rational Chebyshev Approximations for the
+ Error Function", Mathematical Computation 23, 631-638
+
+ Dridi, C. (2003): A Short Note on the Numerical Aproximation of
+ the Standard Normal Cumulative Distribution and its Inverse,
+ Regional Economics Applications Laboratory, University of
+ Illinois and Federal Reserve Bank of Chicago. Available at
+ http://www2.uiuc.edu/unit/real/d-paper/real03-t-7.pdf or at
+ http://econwpa.wustl.edu/eps/comp/papers/0212/0212001.pdf
+
+ Kennedy, W.J and Gentle J.E. (1980): Statistical Computing,
+ Marcel Dekker, New York. p93-95.
+ """
+ if x > 0.0:
+ y=x
+ else:
+ y=-x
+
+ if y >= 0.0 and y <= 1.5:
+ p=(1.0 + erf(y/math.sqrt(2.0)))/2.0
+ if y > 1.5:
+ p=1.0 - erfc(y/math.sqrt(2.0))/2.0
+ if x > 0.0:
+ return p
+ else:
+ return 1.0-p
+
+def erf(x):
+ " for 0<x<=0.5 "
+ return x*_R1(x)
+
+def erfc(x):
+ " for 0.46875<=x<=4.0 "
+ if x > 0.46875 and x < 4.0:
+ return math.exp(-x**2.0)*(0.5*_R1(x**2.0)+0.2*_R2(x**2.0)+0.3*_R3(x**2.0))
+ if x >= 4.0:
+ " for x>=4.0 "
+ return (math.exp(-x**2.0)/x)*(1.0/math.sqrt(math.pi)+_R3(x**-2.0)/(x**2.0))
+
+def _R1(x):
+ N=0.0
+ D=0.0
+ p=[2.4266795523053175e2,2.1979261618294152e1,6.9963834886191355,-3.5609843701815385e-2]
+ q=[2.1505887586986120e2,9.1164905404514901e1,1.5082797630407787e1,1.]
+ for i in range(0,3,1):
+ N=N+p[i]*x**(2.0*i)
+ D=D+q[i]*x**(2.0*i)
+ return N/D
+
+def _R2(x):
+ N=0.0
+ D=0.0
+ p=[3.004592610201616005e2,4.519189537118729422e2,3.393208167343436870e2,
+ 1.529892850469404039e2,4.316222722205673530e1,7.211758250883093659,
+ 5.641955174789739711e-1,-1.368648573827167067e-7]
+ q=[3.004592609569832933e2,7.909509253278980272e2,
+ 9.313540948506096211e2,6.389802644656311665e2,
+ 2.775854447439876434e2,7.700015293522947295e1,1.278272731962942351e1,1.]
+ for i in range(0,7,1):
+ N=N+p[i]*x**(-2.0*i)
+ D=D+q[i]*x**(-2.0*i)
+ return N/D
+
+def _R3(x):
+ N=0.0
+ D=0.0
+ p=[-2.99610707703542174e-3,-4.94730910623250734e-2,
+ -2.26956593539686930e-1,-2.78661308609647788e-1,-2.23192459734184686e-2]
+ q=[1.06209230528467918e-2,1.91308926107829841e-1,1.05167510706793207,1.98733201817135256,1.]
+ for i in range(0,4,1):
+ N=N+p[i]*x**(-2.0*i)
+ D=D+q[i]*x**(-2.0*i)
+ return N/D
+
+def acceptbin(x, n, p):
+ """Support function used by Blaker method in clprop()
+ - computes Blaker acceptibility of p when x is observed and X is bin(n, p).
+ based on R code by Alan Agresti at
+ http://www.stat.ufl.edu/~aa/cda/R/one_sample/R1/index.html
+ """
+ import rpy
+ p1 = 1.0 - rpy.r.pbinom(x - 1.0, n, p)
+ p2 = rpy.r.pbinom(x, n, p)
+ a1 = p1 + rpy.r.pbinom(rpy.r.qbinom(p1, n, p) - 1.0, n, p)
+ a2 = p2 + 1.0 - rpy.r.pbinom(rpy.r.qbinom(1.0 - p2, n, p), n, p)
+ return min(a1, a2)
+
+def propcl(num, den, conflev=0.95, method='blaker',noninteger='reject',epsilon=1e-05):
+ """
+ Returns a three-tuple of proportion and lower and upper
+ confidence limits for a proportion.
+
+ num is the numerator for the proportion, must be zero or a
+ positive value
+
+ den is the denominator for the proportion, must be a positive
+ value
+
+ nondiscrete= controls what types of values are acceptable for
+ num and den if nondiscrete='reject' (the default), then both
+ num and den must be integers. If nondiscrete='accept', then
+ floating point values are accepted, if nondiscrete='round',
+ then values are rounded to the nearest integer, and if
+ nondiscrete='truncate', they are truncated to the nearest
+ integer.
+
+ conflev= is the two-sided confidence level as a proportion
+ (defaults to 0.95). Must be between 0.5 and 1.0 exclusive
+
+ method= selects the calculation method, must be one of 'wald',
+ 'modwald', 'wilsonscore', 'fleissquadratic', 'exact',
+ 'geigy' or 'blaker' Default is 'blaker'
+
+ epsilon= comparison tolerance for Blaker method
+
+ Method details:
+ wald - Wald normal aproximation
+
+ modwald - Agresti-Coull modifications of Wald approximation -
+ see Agresti A, Coull BA. Approximate is better than exact
+ for interval estimation of binomial proportions. The
+ American Statistician, 1998.
+
+ wilsonscore - Wilson score approximation,
+ based on R code by Alan Agresti at
+ http://www.stat.ufl.edu/~aa/cda/R/one_sample/R1/index.html
+
+ fleissquadratic - Score with continuity correction -
+ see Fleiss JL. Statustical methods for rates and
+ proportions, 2nd Ed. New York: John Wiley and Sons,
+ 1981. pp (get page numbers)
+
+ exact - Fisher's exact method, based
+ on R code by Alan Agresti at
+ http://www.stat.ufl.edu/~aa/cda/R/one_sample/R1/index.html
+
+ blaker - method described in Blaker, H. Confidence curves
+ and improved exact confidence intervals for discrete
+ distributions. Canadian Journal of Statistics 2000;
+ 28(4):783-798 based on R code by Alan Agresti at
+ http://www.stat.ufl.edu/~aa/cda/R/one_sample/R1/index.html
+
+ geigy - method given in Geigy Scientific Tables Vol
+ 2 (edited by C. Lentner), Ciba-Geigy Ltd, Basle,
+ Switzerland, p221, as reported in Daly, L. Simple SAS
+ macros for the calculation of exact binomial and Poisson
+ confidence limits. Comput Biol Med 1992; 22(5):351-61
+
+ """
+ try:
+ if num < 0 or den <= 0 or den < num :
+ raise ValueError, "num (numerator) must be zero or a positive integer, and den (denominator) must be a positive integer, and den must be greater than or equal to num"
+ num = float(num)
+ den = float(den)
+ except:
+ raise ValueError, "num (numerator) must be zero or a positive integer, and den (denominator) must be a positive integer, and den must be greater than or equal to num"
+ nonint = num - int(num) != 0 or den - int(den) != 0
+ if noninteger=='reject' and nonint:
+ raise ValueError, "When noninteger='reject', num (numerator) and den (denominator) must be integers"
+ elif noninteger=='round' and nonint:
+ num = round(num)
+ den = round(den)
+ elif noninteger=='truncate' and nonint:
+ num = float(int(num))
+ den = float(int(den))
+ elif noninteger not in ('accept','reject','round','float'):
+ raise ValueError, "noninteger argument must be one of 'reject', 'accept', 'round' or 'truncate'"
+ else:
+ pass
+
+ alpha = get_alpha(conflev)
+ phat = num / den
+
+ if method=='blaker':
+ import rpy
+ lower = 0.0
+ upper = 1.0
+ if num != 0.0:
+ lower = rpy.r.qbeta(alpha/2.0, num, den - num + 1.0)
+ while acceptbin(num, den, lower + epsilon) < alpha:
+ lower += epsilon
+ if num != den:
+ upper = rpy.r.qbeta(1.0 - (alpha/2.0), num + 1.0, den - num)
+ while acceptbin(num, den, upper - epsilon) < alpha:
+ upper -= epsilon
+ return (phat, lower, upper)
+ if method=='geigy':
+ alpha /= 2.0
+ if num == 0.0:
+ LL = 0.0
+ UL = 1.0 - 10.0**(math.log10(alpha) / den)
+ return (phat, LL, UL)
+ elif num == den:
+ LL = 10.0**(math.log10(alpha) / den)
+ UL = 1.0
+ return (phat, LL, UL)
+ else:
+ calpha = probit(1.0 - alpha)
+ calphasq = calpha**2
+ calpha4thpower = calpha**4
+ a = calpha4thpower/18.0 + calphasq*(2.0*den + 1.0)/6.0 + (den + 1.0/3.0)**2.0
+ al = num
+ ar = num + 1.0
+ bl = calpha4thpower/18.0 + calphasq*(4.0*(num - al) + 3.0)/6.0 + 2.0*(al*(3.0*den + 1.0)-den)/3.0 - 2.0/9.0
+ cl = calpha4thpower/18.0 + (al-1.0/3.0)**2.0 - calphasq*(2.0*al - 1.0)/6.0
+ LL = bl/(2.0*a) - ((bl/(2.0*a))**2.0 - cl/a)**0.5
+ br = calpha4thpower/18.0 + (ar-1.0/3.0)**2.0 - calphasq*(2.0*ar-1)/6.0 + 2.0*(ar*(3.0*den + 1.0)-den)/3.0 - 2.0/9.0
+ cr = calpha4thpower/18.0 + (ar - 1.0/3.0)**2.0 - calphasq*(2.0*ar - 1)/6.0
+ UL = br/(2.0*a) + ((br/(2.0*a))**2.0 - cr/a)**0.5
+ return (phat, LL, UL)
+ elif method=='exact':
+ import rpy
+ if num == 0.0:
+ LL = 0.0
+ UL = 1.0 - (alpha/2.0)**(1.0/den)
+ elif num == den:
+ LL = (alpha/2.0)**(1.0/den)
+ UL = 1.0
+ else:
+ LL = 1.0 / (1.0 + (den - num + 1) / (num * rpy.r.qf(alpha/2.0, 2.0 * num, 2.0 * (den - num + 1.0))))
+ UL = 1.0 / (1.0 + (den - num) / ((num + 1.0) * rpy.r.qf(1.0 - alpha/2.0, 2.0 * (num + 1.0), 2.0 * (den - num))))
+ if LL < 0.0:
+ LL = 0.0
+ if UL > 1.0:
+ UL = 1.0
+ return (phat, LL, UL)
+ elif method=='wilsonscore':
+ zalpha = abs(probit(alpha/2.0))
+ zalphasq = zalpha**2
+ bound = (zalpha*((phat*(1-phat)+zalphasq/(4*den))/den)**0.5)/(1+zalphasq/den)
+ midpoint = (phat+zalphasq/(2*den))/(1+zalphasq/den)
+ LL = midpoint - bound
+ UL = midpoint + bound
+ if LL < 0.0:
+ LL = 0.0
+ if UL > 1.0:
+ UL = 1.0
+ return (phat, LL, UL)
+ elif method=='fleissquadratic':
+ zalpha = abs(probit(alpha/2.0))
+ zalphasq = zalpha**2
+ LL = ((2.0*den*phat + zalphasq - 1.0) - zalpha*(zalphasq - (2.0 + 1.0/den) + 4.0*phat*(den*(1.0-phat) + 1.0))**0.5) / \
+ (2.0*(den + zalphasq))
+ UL = ((2.0*den*phat + zalphasq + 1.0) + zalpha*(zalphasq + (2.0 - 1.0/den) + 4.0*phat*(den*(1.0-phat) - 1.0))**0.5) / \
+ (2.0*(den + zalphasq))
+ if LL < 0.0 or phat == 0.0:
+ LL = 0.0
+ if UL > 1.0 or phat == 1.0:
+ UL = 1.0
+ return (phat, LL, UL)
+ elif method=='modwald':
+ zed = abs(probit(alpha/2.0))
+ zedsq = zed**2
+ mphat = (num + (zedsq/2.0)) / (den + zedsq)
+ qhat = 1.0 - mphat
+ semiinterval = zed * ((mphat * qhat / (den + zedsq))**0.5)
+ LL = mphat - semiinterval
+ UL = mphat + semiinterval
+ if LL < 0.0:
+ LL = 0.0
+ if UL > 1.0:
+ UL = 1.0
+ return (phat, LL, UL)
+ elif method=='wald':
+ zed = abs(probit(alpha/2.0))
+ qhat = 1.0 - phat
+ semiinterval = zed * ((phat * qhat / den)**0.5)
+ LL = phat - semiinterval
+ UL = phat + semiinterval
+ if LL < 0.0:
+ LL = 0.0
+ if UL > 1.0:
+ UL = 1.0
+ return (phat, LL, UL)
+ else:
+ raise ValueError, "method parameter must be one of 'wald', 'modwald', 'wilsonscore', 'fleissquadratic', 'exact', 'geigy' or 'blaker'"
+
+def ratecl(num, den, conflev=0.95, basepop=100000, method='daly'):
+ """
+ Returns a 3-tuple of rate and lower and upper confidence limits for a rate (that is
+ a number of events divided by the person-time during which the events were observed.
+ num is the numerator for the rate, must be zero or a positive value
+ den is the denominator for the rate, must be greater than zero
+ conflev= is the two-sided confidence level as a proportion (defaults to 0.95). Must be between 0.5 and 1.0 exclusive
+ method= selects the calculation method, must be one of 'rg', 'byar','daly' or 'normal'
+ Default is 'daly'
+ basepop= person-time multiplier, defaults to 100000
+
+ Method details:
+ rg - Rothman-Greenland method - see Rothman KJ, Greenland S. Modern Epidemiology. 2nd Ed. Philadelphia:
+ Lippincott-Raven, 1998. ppp247-248
+ byar - Poisson approximation by Byar, as described in Rothman KJ and Boice JD. Epidemiologic analysis with a
+ programmable calculator. US National Institues of Health Publication No. 79 (find complete reference)
+ normal - normal approximation as described in every textbook of epidemiology and biostatistics
+ daly - exact Poisson method as described in: Daly, L. Simple SAS macros for the calculation
+ of exact binomial and Poisson confidence limits. Comput Biol Med 1992; 22(5):351-61
+ """
+ try:
+ if num - int(num) != 0 or den - int(den) != 0 or num < 0 or den <= 0.0:
+ raise ValueError, "num (numerator) must be an integer greater than or equal to zero, and den (denominator) must be an integer greater than zero"
+ num = float(num)
+ den = float(den)
+ except:
+ raise ValueError, "num (numerator) must be an integer greater than or equal to zero, and den (denominator) must be an integer greater than zero"
+
+ alpha = get_alpha(conflev)
+ try:
+ if basepop <= 0:
+ raise ValueError, "basepop must be a greater than zero"
+ except:
+ raise ValueError, "basepop must be a greater than zero"
+
+ zalpha = abs(probit(1.0 - alpha/2.0))
+
+ pt = basepop
+
+ rate = (num / den) * pt
+
+ if method=='rg':
+ unitrate = num / den
+ bound = zalpha * (1.0 / num**0.5)
+ lograte = math.log(unitrate)
+ LL = math.e**(lograte - bound) * pt
+ if LL < 0.0:
+ LL = 0.0
+ UL = math.e**(lograte + bound) * pt
+ return (rate, LL, UL)
+ elif method=='byar':
+ Lnum = num * (1.0 - (1.0/(9.0*num)) - ((zalpha / 3.0) * (1.0 / num)**0.5))**3.0
+ Unum = (num + 1.0) * (1.0 - (1.0/(9.0*(num+1.0))) + ((zalpha / 3.0) * (1.0 / (num + 1.0))**0.5))**3.0
+ LL = (Lnum / den) * pt
+ if LL < 0.0:
+ LL = 0.0
+ UL = (Unum / den) * pt
+ return (rate, LL, UL)
+ elif method=='normal':
+ unitrate = num / den
+ bound = zalpha * ((num / den**2.)**0.5)
+ LL = (unitrate - bound) * pt
+ if LL < 0.0:
+ LL = 0.0
+ UL = (unitrate + bound) * pt
+ return (rate, LL, UL)
+ elif method=='daly':
+ import rpy
+ alpha = alpha / 2.0
+ if num > 0.0:
+ Lnum = rpy.r.qgamma(alpha,num)
+ Unum = rpy.r.qgamma(1.0 - alpha, num + 1.0)
+ elif num == 0.0:
+ Lnum = 0.0
+ Unum = -math.log(alpha)
+ else:
+ raise ValueError, "num (numerator) must be greater than or equal to zero, and den (denominator) must be greater than zero"
+ return (rate, (Lnum/den)*pt, (Unum/den)*pt)
+ else:
+ raise ValueError, "method parameter must be one of 'rg', 'byar', 'daly' or 'normal'"
+
+def freqcl(num,conflev=0.95,method='daly'):
+ """
+ Returns a 3-tuple of a Poisson count and lower and upper confidence limits for that count
+ - num is the count of events - must be zero or a positive value
+ - conflev= is the two-sided confidence level as a proportion (defaults to 0.95). Must be between 0.5 and 1.0 exclusive
+ - method= selects the calculation method, must be one of 'byar' or 'daly', default is 'daly'
+
+ Method details:
+ byar - Poisson approximation by Byar, as described in Rothman KJ and Boice JD. Epidemiologic analysis with a
+ programmable calculator. US National Institues of Health Publication No. 79 (find complete reference)
+ daly - exact Poisson method as described in: Daly, L. Simple SAS macros for the calculation
+ of exact binomial and Poisson confidence limits. Comput Biol Med 1992; 22(5):351-61
+ """
+ try:
+ if num - int(num) != 0 or num < 0:
+ raise ValueError, "num (count) must be an integer greater than or equal to zero"
+ num = float(num)
+ except:
+ raise ValueError, "num (numerator) must be an integer greater than or equal to zero"
+
+ alpha = get_alpha(conflev)
+
+ if method=='byar':
+ zalpha = abs(probit(1.0 - alpha/2.0))
+ if num > 0:
+ Lnum = num * (1.0 - (1.0/(9.0*num)) - ((zalpha / 3.0) * (1.0 / num)**0.5))**3.0
+ else:
+ Lnum = 0.0
+ Unum = (num + 1.0) * (1.0 - (1.0/(9.0*(num+1.0))) + ((zalpha / 3.0) * (1.0 / (num + 1.0))**0.5))**3.0
+ if Lnum < 0.0:
+ Lnum = 0.0
+ return (num, Lnum, Unum)
+ elif method=='daly':
+ import rpy
+ alpha = alpha / 2.0
+ if num > 0.0:
+ Lnum = rpy.r.qgamma(alpha,num)
+ Unum = rpy.r.qgamma(1.0 - alpha, num + 1.0)
+ elif num == 0.0:
+ Lnum = 0.0
+ Unum = -math.log(alpha)
+ else:
+ raise ValueError, "num (numerator) must be an integer greater than or equal to zero"
+ return (num, Lnum, Unum)
+ else:
+ raise ValueError, "method parameter must be 'byar' or 'daly'"
+
+def wncl(wgtvector,conflev=0.95,method='daly',exclude_nonpositive_weights=False):
+ """
+ Returns a 3-tuple of the sum of the non-negative weights passed
+ as the first argument, and the lower and upper Poisson confidence limits around that
+ at alpha level specified by conflev. The optional third Boolean argument
+ causes non-positive weights to be considered as missing - should give the same answers either way.
+
+ - conflev= is the two-sided confidence level as a proportion (defaults to 0.95). Must be between 0.5 and 1.0 exclusive
+ - method= selects the calculation method, must be one of 'byar' or 'daly', default is 'daly'
+
+ Method details:
+ byar - Poisson approximation by Byar, as described in Rothman KJ and Boice JD. Epidemiologic analysis with a
+ programmable calculator. US National Institues of Health Publication No. 79 (find complete reference)
+ daly - exact Poisson method as described in: Daly, L. Simple SAS macros for the calculation
+ of exact binomial and Poisson confidence limits. Comput Biol Med 1992; 22(5):351-61
+ """
+
+ if exclude_nonpositive_weights:
+ wgtvector = mask_nonpositive_weights(wgtvector)
+ else:
+ wgtvector = zero_nonpositive_weights(wgtvector)
+
+ n = nonmissing(wgtvector)
+
+ unwgted_results = freqcl(n,conflev=conflev,method=method)
+
+ # The following is incorrect but a rogh approximation pending resultion.
+ if n == 0:
+ sumwgts = 0.0
+ return (0.0,0.0,0.0)
+ else:
+ sumwgts = float(MA.add.reduce(wgtvector))
+ return (sumwgts, (unwgted_results[1]/unwgted_results[0])*sumwgts, (unwgted_results[2]/unwgted_results[0])*sumwgts)
+
+def wfreqcl(wgtvector,conflev=0.95, method='daly',exclude_nonpositive_weights=False):
+ """Same as wncl()"""
+ return wncl(wgtvector,conflev=conflev,method=method,exclude_nonpositive_weights=exclude_nonpositive_weights)
diff --git a/SOOMv0/SummaryCond.py b/SOOMv0/SummaryCond.py
new file mode 100644
index 0000000..46b6704
--- /dev/null
+++ b/SOOMv0/SummaryCond.py
@@ -0,0 +1,554 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: SummaryCond.py 2859 2007-10-18 07:45:37Z andrewm $
+
+# Standard Library
+import sys
+import time
+import bisect
+# 3rd Party
+import MA, Numeric
+# Application
+from SOOMv0.common import *
+from SOOMv0.Soom import soom
+import soomfunc
+
+__all__ = (
+ 'numcmp',
+ 'suppress', 'retain', 'order', 'reversed', 'coalesce', 'bins',
+ 'condcol',
+)
+
+def numcmp(a, b):
+ import math
+ try:
+ a = float(a)
+ b = float(b)
+ except ValueError:
+ return 0
+ else:
+ return int(math.ceil(a - b))
+
+
+class CondColArg(object):
+ """
+ Subclasses of this abstract class contain parameters that are
+ later applied to the summary conditioning columns. Instances of
+ these classes may be pickled or shared between summary calls,
+ so members should be considered R/O.
+ """
+ def apply(self, summcondcol):
+ raise NotImplementedError
+
+ def usage(self, msg):
+ raise Error('%s: %s' % (self.__class__.__name__, msg))
+
+ def __repr__(self):
+ return '%s(%s)' % (self.__class__.__name__,
+ ', '.join([repr(v) for v in self.values]))
+
+
+class CondColArgValues(CondColArg):
+
+ def __init__(self, *values):
+ self.callable = None
+ if values:
+ if callable(values[0]):
+ if len(values) > 1:
+ self.usage('only one callable argument')
+ self.callable = values[0]
+ if isinstance(self.callable, type):
+ # Instantiate class
+ self.callable = self.callable()
+ values = ()
+ elif type(values[0]) in (list, tuple):
+ if len(values) > 1:
+ self.usage('only one list argument')
+ values = values[0]
+ self.values = values
+
+ def calc_values(self, available):
+ if self.callable:
+ return [v for v in available if self.callable(v)]
+ return self.values
+
+ def __repr__(self):
+ return '%s(%s)' % (self.__class__.__name__,
+ ', '.join([repr(v) for v in self.values]))
+
+
+class suppress(CondColArgValues):
+
+ def apply(self, summcondcol):
+ if summcondcol.suppress_set is None:
+ summcondcol.suppress_set = set()
+ values = self.calc_values(summcondcol.inverted)
+ summcondcol.suppress_set.update(values)
+
+
+class retain(CondColArgValues):
+
+ def apply(self, summcondcol):
+ if summcondcol.suppress_set is None:
+ summcondcol.suppress_set = set(self.keys)
+ values = self.calc_values(summcondcol.inverted)
+ summcondcol.suppress_set.difference_update(values)
+
+
+class order(CondColArgValues):
+
+ def apply(self, summcondcol):
+ if self.callable:
+ summcondcol.key_order.sort(self.callable)
+ else:
+ summcondcol.key_order.sort()
+ if self.values:
+ prepend = []
+ for v in self.values:
+ try:
+ summcondcol.key_order.remove(v)
+ except ValueError:
+ pass
+ else:
+ prepend.append(v)
+ summcondcol.key_order = prepend + summcondcol.key_order
+
+
+class reversed(order):
+
+ def apply(self, summcondcol):
+ order.apply(self, summcondcol)
+ summcondcol.key_order.reverse()
+
+
+class coalesce(CondColArgValues):
+ def __init__(self, *values, **kwargs):
+ CondColArgValues.__init__(self, *values)
+ self.label = kwargs.pop('label', None)
+ self.value = kwargs.pop('value', None)
+ if kwargs:
+ raise TypeError('coalesce: unknown keywords arg(s): %s' %
+ ', '.join(kwargs.keys()))
+
+ def apply_callable(self, summcondcol):
+ for v in summcondcol.inverted.keys():
+ agv = self.callable(v)
+ if agv != v:
+ v_rows = summcondcol.inverted.pop(v)
+ agv_rows = summcondcol.inverted.get(agv)
+ if agv_rows is not None:
+ v_rows = soomfunc.union(agv_rows, v_rows)
+ summcondcol.inverted[agv] = v_rows
+
+ def apply_values(self, summcondcol):
+ if self.value is None:
+ newvalue = self.values[0]
+ else:
+ newvalue = self.value
+ if newvalue not in self.values and newvalue in summcondcol.inverted:
+ raise Error('coalesce target value %d conflicts with an '
+ 'existing value' % newvalue)
+ valrows = []
+ vals = []
+ for v in self.values:
+ rows = summcondcol.inverted.pop(v, None)
+ if rows is not None:
+ valrows.append(rows)
+ vals.append(v)
+ if len(valrows) == 0:
+ summcondcol.inverted[newvalue] = []
+ elif len(valrows) == 1:
+ summcondcol.inverted[newvalue] = valrows[0]
+ else:
+ summcondcol.inverted[newvalue] = soomfunc.union(*valrows)
+ if self.label:
+ summcondcol.outtrans[newvalue] = self.label
+ else:
+ vals = [summcondcol.col.do_format(summcondcol.col.do_outtrans(v))
+ for v in vals]
+ summcondcol.outtrans[newvalue] = ', '.join(vals)
+
+ def apply(self, summcondcol):
+ summcondcol.use_outtrans = True
+ if self.callable:
+ self.apply_callable(summcondcol)
+ elif self.values:
+ self.apply_values(summcondcol)
+
+ def __repr__(self):
+ repr = CondColArgValues.__repr__(self)
+ if self.label:
+ repr = '%s, value=%r, label=%r)' % (repr[:-1], self.value,
+ self.label)
+ return repr
+
+
+class BinFunction(object):
+ pass
+
+
+class DateBinFunction(BinFunction):
+
+ def get_nbins(self, col):
+ if not col.is_datetimetype():
+ raise Error('%s binning but %r is not a date column' %
+ (self.__class__.__name__, col.name))
+ return self.nbins
+
+ def get_outtrans(self, col):
+ return self.outtrans
+
+
+class bin_dayofweek(DateBinFunction):
+ nbins = 8 # 1 based
+ label = 'Day of Week'
+ outtrans = {
+ 1: 'Mon',
+ 2: 'Tue',
+ 3: 'Wed',
+ 4: 'Thu',
+ 5: 'Fri',
+ 6: 'Sat',
+ 7: 'Sun',
+ }
+ def bin_fn(self, v):
+ return v.iso_week[2]
+
+
+class bin_weekofyear(DateBinFunction):
+ nbins = 53 # 1 based
+ outtrans = {}
+ label = 'Week of Year'
+
+ def bin_fn(self, v):
+ return v.iso_week[1]
+
+
+class bin_monthofyear(DateBinFunction):
+ nbins = 13 # 1 based
+ label = 'Month'
+ outtrans = {
+ 1: 'Jan',
+ 2: 'Feb',
+ 3: 'Mar',
+ 4: 'Apr',
+ 5: 'May',
+ 6: 'Jun',
+ 7: 'Jul',
+ 8: 'Aug',
+ 9: 'Sep',
+ 10: 'Oct',
+ 11: 'Nov',
+ 12: 'Dec',
+ }
+
+ def bin_fn(self, v):
+ return v.month
+
+
+class bin_year(DateBinFunction):
+ nbins = 1 # Unknown
+ outtrans = {}
+ label = 'Year'
+
+ def bin_fn(self, v):
+ return v.year
+
+class bin_yearmonth(DateBinFunction):
+ nbins = 1 # Unknown
+ outtrans = {}
+ label = 'Year+Month'
+
+ def bin_fn(self, v):
+ return v.year * 100 + v.month
+
+
+class bin_yearweek(DateBinFunction):
+ nbins = 1 # Unknown
+ outtrans = {}
+ label = 'Year+Week'
+
+ def bin_fn(self, v):
+ return v.year * 100 + v.iso_week[1]
+
+
+class bin_dayofyear(DateBinFunction):
+ nbins = 366 # 1 based
+ outtrans = {}
+ label = 'Day of Year'
+
+ def bin_fn(self, v):
+ return v.day_of_year
+
+
+class bin_dayofmonth(DateBinFunction):
+ nbins = 32 # 1 based
+ outtrans = {}
+ label = 'Day of Month'
+
+ def bin_fn(self, v):
+ return v.day
+
+
+class bin_hour(DateBinFunction):
+ nbins = 24
+ outtrans = {}
+ label = 'Hour of Day'
+
+ def bin_fn(self, v):
+ return v.hour
+
+
+class bin_time(DateBinFunction):
+ nbins = 0
+ outtrans = {}
+ label = 'Time of Day'
+
+ def get_outtrans(self, col):
+ outtrans = {}
+ for t in xrange(0, 24 * 60):
+ outtrans[t] = '%02d:%02d' % (t / 60, t % 60)
+ return outtrans
+
+ def bin_fn(self, v):
+ return int(v.abstime) / 60
+
+
+class bins(CondColArgValues):
+ named_bin_fns = {
+ 'weekofyear': bin_weekofyear,
+ 'month': bin_monthofyear,
+ 'dayofweek': bin_dayofweek,
+ 'weekday': bin_dayofweek,
+ 'year': bin_year,
+ 'yearmonth': bin_yearmonth,
+ 'yearweek': bin_yearweek,
+ 'dayofyear': bin_dayofyear,
+ 'dayofmonth': bin_dayofmonth,
+ 'day': bin_dayofmonth,
+ 'hour': bin_hour,
+ 'timeofday': bin_time,
+ 'time': bin_time,
+ }
+
+ def __init__(self, *args, **kwargs):
+ values = []
+ for a in args:
+ if isinstance(a, basestring):
+ try:
+ a = self.named_bin_fns[a]
+ except KeyError:
+ raise Error('Unknown named binning function: %r' % a)
+ values.append(a)
+ CondColArgValues.__init__(self, *values)
+ self.n = kwargs.pop('n', None)
+ if self.n is not None:
+ if self.values:
+ raise Error('Specify either bin breaks or number of bins, '
+ 'not both')
+ if self.values < 2:
+ raise Error('Specify at least 2 bins')
+
+ def even_breaks(self, col):
+ """
+ Scans the column values to derive self.n even breaks.
+ """
+ st = time.time()
+ missing = None
+ if col.is_discrete():
+ data = [v for v, vec in col.inverted.items() if len(vec) > 0]
+ else:
+ data = col.data
+ if type(data) is MA.MaskedArray:
+ # This isn't very nice, but it makes a huge difference to speed.
+ missing = -sys.maxint
+ data = data.filled(missing)
+ min = max = None
+ for v in data:
+ if v != missing:
+ if v < min or min is None:
+ min = v
+ if v > max or max is None:
+ max = v
+ span = (max - min) / float(self.n)
+ breaks = [min + span * n for n in xrange(1, self.n)]
+ soom.info('Calculated %s breaks in %.2fs, min %s, max %s, span %s' %
+ (self.n, time.time() - st, min, max, span))
+ return breaks
+
+ def get_bins(self, bins, nbins):
+ for i in xrange(nbins):
+ bins.append([])
+ return bins
+
+ def discrete_bin(self, col, value_bin, bins):
+ _bisect = bisect.bisect # make it a local for perf.
+ for v, vec in col.inverted.items():
+ if v is None:
+ bin = -1
+ else:
+ bin = value_bin(v)
+ try:
+ bins[bin].append(vec)
+ except IndexError:
+ self.get_bins(bins, bin)
+ bins[bin].append(vec)
+ for i, vecs in enumerate(bins):
+ n = len(vecs)
+ if not n:
+ bins[i] = []
+ elif n == 1:
+ bins[i] = vecs[0]
+ else:
+ bins[i] = soomfunc.union(*vecs)
+
+ def continuous_bin(self, col, value_bin, bins):
+ missing = None
+ data = col.data
+ if type(data) is MA.MaskedArray:
+ # This isn't very nice, but it makes a huge difference to speed.
+ missing = -sys.maxint
+ data = data.filled(missing)
+ for i, v in enumerate(data):
+ if v == missing:
+ bin = -1
+ else:
+ bin = value_bin(v)
+ try:
+ bins[bin].append(i)
+ except IndexError:
+ self.get_bins(bins, bin)
+ bins[bin].append(i)
+
+ def bins_to_inverted(self, bins):
+ inverted = {}
+ missing_bin = len(bins) - 1
+ for v, vec in enumerate(bins):
+ if v == missing_bin:
+ v = None
+ # This makes it much slower... why?
+# inverted[v] = Numeric.array(vec, typecode=Numeric.Int)
+ inverted[v] = vec
+ return inverted
+
+ def make_outtrans(self, col, inverted, breaks):
+ outtrans = {}
+ format = col.do_format
+ last_bin = len(breaks)
+ for v, vec in inverted.iteritems():
+ if v == 0:
+ outtrans[v] = '< %s' % format(breaks[0])
+ elif v == last_bin:
+ outtrans[v] = '>= %s' % format(breaks[-1])
+ elif v is None:
+ outtrans[v] = '--'
+ else:
+ outtrans[v] = '%s - %s' % (format(breaks[v-1]),
+ format(breaks[v]))
+ return outtrans
+
+ def break_bin(self, summcondcol):
+ if self.n is not None:
+ breaks = self.even_breaks(summcondcol.col)
+ else:
+ breaks = sorted(self.values)
+ _bisect = bisect.bisect # make it a local for perf.
+ value_bin = lambda v: _bisect(breaks, v)
+ bins = self.get_bins([], len(breaks) + 2) # + 1 for "missing" bin
+ if summcondcol.col.is_discrete():
+ self.discrete_bin(summcondcol.col, value_bin, bins)
+ else:
+ self.continuous_bin(summcondcol.col, value_bin, bins)
+ summcondcol.inverted = self.bins_to_inverted(bins)
+ summcondcol.outtrans = self.make_outtrans(summcondcol.col, summcondcol.inverted, breaks)
+
+ def fn_bin(self, summcondcol):
+ nbins = self.callable.get_nbins(summcondcol.col)
+ value_bin = self.callable.bin_fn
+ summcondcol.outtrans = self.callable.get_outtrans(summcondcol.col)
+ summcondcol.outtrans.setdefault(None, 'N/A')
+ bins = self.get_bins([], nbins + 1) # + 1 for "missing" bin
+ if summcondcol.col.is_discrete():
+ self.discrete_bin(summcondcol.col, value_bin, bins)
+ else:
+ self.continuous_bin(summcondcol.col, value_bin, bins)
+ summcondcol.inverted = self.bins_to_inverted(bins)
+ fn_label = getattr(self.callable, 'label', None)
+ if fn_label:
+ col_label = summcondcol.col.label or summcondcol.col.name
+ summcondcol.label = '%s (%s)' % (col_label, fn_label)
+
+ def apply(self, summcondcol):
+ if self.callable:
+ return self.fn_bin(summcondcol)
+ else:
+ return self.break_bin(summcondcol)
+
+
+class condcol:
+ def __init__(self, colname, *args, **kwargs):
+ self.colname = colname
+ self.args = []
+ self.order = None
+ self.binning = None
+ self.label = kwargs.pop('label', None)
+ for arg in args:
+ if isinstance(arg, (order, reversed)):
+ if self.order is not None:
+ raise Error('only one "order" per condcol allowed')
+ self.order = arg
+ elif isinstance(arg, bins):
+ if self.binning is not None:
+ raise Error('only one "bins" per condcol allowed')
+ self.binning = arg
+ elif not isinstance(arg, CondColArg):
+ raise Error('Unknown condcol() argument: %r' % arg)
+ else:
+ self.args.append(arg)
+
+ def apply(self, summcondcol):
+ if self.binning:
+ st = time.time()
+ self.binning.apply(summcondcol)
+ soom.info("Summarise binning took %.2fs" % (time.time() - st))
+ elif summcondcol.col.is_discrete():
+ summcondcol.inverted = dict(summcondcol.col.inverted)
+ summcondcol.outtrans = dict(summcondcol.col.outtrans)
+ else:
+ raise Error('%r is not a discrete column' % self.colname)
+ for arg in self.args:
+ arg.apply(summcondcol)
+ summcondcol.key_order = summcondcol.inverted.keys()
+ if self.order is None:
+ summcondcol.key_order.sort()
+ else:
+ self.order.apply(summcondcol)
+
+ def __cmp__(self, other):
+ if isinstance(other, self.__class__):
+ return cmp(self.colname, other.colname)
+ # This allows others to compare new-style condcol() args with old-style
+ # string column name args (we convert all to condcol internally).
+ return cmp(self.colname, other)
+
+ def get_colname(self):
+ return self.colname
+
+ def __repr__(self):
+ res = []
+ res.append(repr(self.colname))
+ for arg in self.args:
+ res.append(repr(arg))
+ return 'condcol(%s)' % (', '.join(res))
diff --git a/SOOMv0/SummaryProp.py b/SOOMv0/SummaryProp.py
new file mode 100644
index 0000000..e87bca9
--- /dev/null
+++ b/SOOMv0/SummaryProp.py
@@ -0,0 +1,131 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: SummaryProp.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/SummaryProp.py,v $
+
+"""
+Calculate proportions of summary sets
+"""
+
+import copy
+from SOOMv0 import common, Utils
+
+_colname_prefix = '_prop_of_all-'
+
+def proportion_label(dataset, colnames, marginal_colnames = None):
+ """
+ Given a dataset, a list of conditioning column names, and a
+ list of marginal column names, return proportion column name
+ and label.
+ """
+ if marginal_colnames is None:
+ marginal_colnames = colnames
+ if 0:
+ if len(marginal_colnames) == len(colnames):
+ label_parts = ['All']
+ else:
+ label_parts = []
+ for colname in colnames:
+ label = dataset[colname].label
+ if colname in marginal_colnames:
+ label_parts.append('all ' + Utils.pluralise(label))
+ else:
+ label_parts.append('same ' + label)
+
+ if 1:
+ label_parts = []
+ for colname in colnames:
+ label = dataset[colname].label
+ if colname in marginal_colnames:
+ label_parts.append('all ' + Utils.pluralise(label))
+
+ if 0:
+ if len(marginal_colnames) == len(colnames):
+ label_parts = ['All']
+ else:
+ label_parts = []
+ for colname in colnames:
+ label = dataset[colname].label
+ if colname not in marginal_colnames:
+ label_parts.append('same ' + label)
+
+ return (_colname_prefix + '-'.join(marginal_colnames),
+ 'Propn. of ' + ', '.join(label_parts))
+
+
+def _yield_prop_combos(summaryset, colnames):
+ colnames = tuple(colnames)
+ for a in range(len(colnames)):
+ for non_marginal_colnames in Utils.xcombinations(colnames,a):
+ marginal_colnames = list(colnames) # copy or cast
+ for d in non_marginal_colnames:
+ marginal_colnames.remove(d)
+ yield marginal_colnames
+
+def extract_propn_cols(name):
+ """
+ Given a propn column name, extract the names of the "All" columns
+ """
+ if name.startswith(_colname_prefix):
+ return name[len(_colname_prefix):].split('-')
+
+def propn_names_and_labels(dataset, colnames):
+ return [proportion_label(dataset, colnames, marginal_colnames)
+ for marginal_colnames in _yield_prop_combos(dataset, colnames)]
+
+def calc_props(summaryset, colnames, allvals, freqcol):
+ """
+ Add proportions columns to a summaryset
+
+ If the conditioning columns are a, b, and c, the resulting proportions
+ columns will be:
+ '_prop_of_all-a-b-c',
+ '_prop_of_all-b-c',
+ '_prop_of_all-a-c',
+ '_prop_of_all-a-b',
+ '_prop_of_all-c',
+ '_prop_of_all-b',
+ '_prop_of_all-a'
+
+ summaryset is a temporary summary dataset (still subtly
+ different from a real dataset at this time).
+ colnames is a list of the conditioning column names.
+ allvalues is a list containing the allvalues for the above columns
+ freqcol is the frequency column, typically _freq_
+ """
+
+ allvals_dict = dict(zip(colnames, allvals))
+ for marginal_colnames in _yield_prop_combos(summaryset, colnames):
+ props = []
+ for i, freq in enumerate(summaryset[freqcol].data):
+ val_list = []
+ for colname in colnames:
+ if colname in marginal_colnames:
+ val_list.append(allvals_dict[colname])
+ else:
+ val_list.append(summaryset[colname].data[i])
+ mt = summaryset.marginal_total_idx[tuple(val_list)]
+ den = summaryset[freqcol].data[mt]
+ if den > 0:
+ prop = freq / float(den)
+ else:
+ prop = None
+# if marginal_colnames == ['race']:
+# print '%2d: %8d %8d %8d %8.4g %s' % (i, mt, freq, den, prop, val_list)
+ props.append(prop)
+ propname, proplabel = proportion_label(summaryset, colnames,
+ marginal_colnames)
+ col = summaryset.addcolumn(propname, proplabel)
+ col.data = props
diff --git a/SOOMv0/SummaryStats.py b/SOOMv0/SummaryStats.py
new file mode 100644
index 0000000..269d7c7
--- /dev/null
+++ b/SOOMv0/SummaryStats.py
@@ -0,0 +1,501 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: SummaryStats.py 2859 2007-10-18 07:45:37Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/SummaryStats.py,v $
+
+from MA import Numeric
+from SOOMv0 import Stats
+from SOOMv0.common import *
+
+__all__ = (
+ 'applyto', 'arange', 'asum', 'extract', 'freq', 'freqcl', 'geomean',
+ 'isstatmethod', 'maximum', 'mean', 'meancl', 'median', 'minimum',
+ 'missing', 'nonmissing', 'p10', 'p25', 'p75', 'p90', 'popcv',
+ 'popstddev', 'popvar', 'quantile', 'samplecv', 'samplevar',
+ 'stat_methods', 'stddev', 'stderr', 'studentt',
+)
+
+def label_or_name(col):
+ if col.label:
+ return col.label
+ return col.name
+
+class _UseDefault: pass
+
+class _SummaryStatBase:
+ usage = '(col, <weightcol=...>)'
+ short_label = None
+
+ def __init__(self, srccolname, weightcol = _UseDefault, **kwargs):
+ self.srccolname = srccolname
+ self.wgtcolname = weightcol
+ self.args = []
+ self.kwargs = kwargs
+
+ def select_weightcol(self, default_weightcol):
+ if self.wgtcolname == _UseDefault:
+ return default_weightcol
+ else:
+ return self.wgtcolname
+
+ def get_statcolname(self, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ colname = self.name_fmt % self.srccolname
+ if wgtcolname:
+ colname = '%s_wgtd_by_%s' % (colname, wgtcolname)
+ return colname
+
+ def get_label(self, dataset, default_weightcol):
+ col = dataset[self.srccolname]
+ label = '%s of %s' % (self.short_label or self.__doc__,
+ label_or_name(col))
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = dataset[wgtcolname]
+ label = '%s weighted by %s' % (label, label_or_name(wgtcol))
+ return label
+
+ def args_okay(self, dataset, default_weightcol):
+ col = dataset.get_column(self.srccolname)
+ if not col.is_scalar():
+ raise Error('%s of %s column must be scalar' %
+ self.__doc__, label_or_name(col))
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = dataset.get_column(wgtcolname)
+ if not wgtcol.is_scalar():
+ raise Error('%s weighting column %r must be scalar' %
+ self.__doc__, label_or_name(wgtcol))
+ if not self.statcol_fns[1]:
+ raise Error('%s method does not support weighting' %
+ self.__doc__)
+
+ def add_statcol(self, dataset, statcolname, summaryset, default_weightcol):
+ label = self.get_label(dataset, default_weightcol)
+ summaryset.addcolumn(statcolname, label)
+
+ def _calc(self, statcolname, summaryset, colvectors, default_weightcol):
+ col = colvectors[self.srccolname]
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = colvectors[wgtcolname]
+ if col.data:
+ statcol_fn, wgtd_statcol_fn = self.statcol_fns
+ try:
+ if wgtcolname:
+ return wgtd_statcol_fn(col.data, wgtcol.data,
+ *self.args, **self.kwargs)
+ else:
+ return statcol_fn(col.data, *self.args, **self.kwargs)
+ except:
+ import sys
+ exc_type, exc_value, exc_tb = sys.exc_info()
+ try:
+ if wgtcolname:
+ exc_str = '%s: %s (%r,%r,%r,%r)' % (statcolname,
+ exc_value, col.data,
+ wgtcol.data,
+ self.args, self.kwargs)
+ else:
+ exc_str = '%s: %s (%r,%r,%r)' % (statcolname,
+ exc_value, col.data,
+ self.args, self.kwargs)
+ raise exc_type, exc_str, exc_tb
+ finally:
+ del exc_type, exc_value, exc_tb
+ return None
+
+ def calc(self, statcolname, summaryset, colvectors, default_weightcol):
+ statdata = self._calc(statcolname, summaryset, colvectors,
+ default_weightcol)
+ summaryset[statcolname].data.append(statdata)
+
+ def __repr__(self):
+ params = []
+ if hasattr(self, 'srccolname'):
+ params.append(repr(self.srccolname))
+ if self.wgtcolname != _UseDefault:
+ params.append('wgtcolname=%r' % self.wgtcolname)
+ if hasattr(self, 'args'):
+ for arg in self.args:
+ params.append(repr(a))
+ for k, v in self.kwargs.items():
+ params.append('%s=%r' % (k, v))
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(params))
+
+
+class _CISummaryStatBase(_SummaryStatBase):
+ def __init__(self, srccolname, weightcol=_UseDefault,
+ conflev=0.95, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, weightcol=weightcol, **kwargs)
+ self.kwargs['conflev'] = self.conflev = conflev
+
+ def add_statcol(self, dataset, statcolname, summaryset, default_weightcol):
+ label = self.get_label(dataset, default_weightcol)
+ conflev_label = ' (%g%% conf. limit)' % (self.conflev * 100)
+ summaryset.addcolumn(statcolname + '_ll',
+ 'Lower limit of ' + label + conflev_label)
+ summaryset.addcolumn(statcolname, label)
+ summaryset.addcolumn(statcolname + '_ul',
+ 'Upper limit of ' + label + conflev_label)
+
+ def calc(self, statcolname, summaryset, colvectors, default_weightcol):
+ statdata = self._calc(statcolname, summaryset,
+ colvectors, default_weightcol)
+ if statdata:
+ statdata, lower, upper = statdata
+ else:
+ statdata = lower = upper = None
+ summaryset[statcolname].data.append(statdata)
+ summaryset[statcolname+'_ll'].data.append(lower)
+ summaryset[statcolname+'_ul'].data.append(upper)
+
+
+class freq(_SummaryStatBase):
+ 'Frequency'
+ usage = '(<weightcol=...>)'
+
+ def __init__(self, weightcol = _UseDefault, **kwargs):
+ self.wgtcolname = weightcol
+ self.kwargs = kwargs
+
+ def args_okay(self, dataset, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = dataset.get_column(wgtcolname)
+ if not wgtcol.is_scalar():
+ raise Error('%s weighting column %r must be scalar' %
+ self.__doc__, label_or_name(wgtcol))
+
+ def get_statcolname(self, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ return 'freq_wgtd_by_%s' % wgtcolname
+ return '_freq_'
+
+ def get_label(self, dataset, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ label = 'Frequency'
+ if wgtcolname:
+ wgtcol = dataset[wgtcolname]
+ label += ' weighted by %s' % label_or_name(wgtcol)
+ return label
+
+ def add_statcol(self, dataset, statcolname, summaryset, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ label = self.get_label(dataset, default_weightcol)
+ summaryset.addcolumn(statcolname, label)
+
+ def calc(self, statcolname, summaryset, colvectors, default_weightcol):
+ # Odd one out - if not weighted, just reuse core _freq_ column.
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = colvectors[wgtcolname]
+ summaryset[statcolname].data.append(Stats.wn(wgtcol.data,
+ **self.kwargs))
+
+class freqcl(freq):
+ 'Frequency (with confidence limits)'
+ short_label = 'Frequency'
+
+ def __init__(self, weightcol=_UseDefault, conflev=0.95, **kwargs):
+ freq.__init__(self, weightcol=weightcol, **kwargs)
+ self.kwargs['conflev'] = self.conflev = conflev
+
+ def add_statcol(self, dataset, statcolname, summaryset, default_weightcol):
+ label = self.get_label(dataset, default_weightcol)
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ summaryset.addcolumn(statcolname, label)
+ conflev_label = ' (%g%% conf. limit)' % (self.conflev * 100)
+ summaryset.addcolumn(statcolname + '_ll',
+ 'Lower limit of ' + label + conflev_label)
+ summaryset.addcolumn(statcolname + '_ul',
+ 'Upper limit of ' + label + conflev_label)
+
+ def calc(self, statcolname, summaryset, colvectors, default_weightcol):
+ wgtcolname = self.select_weightcol(default_weightcol)
+ if wgtcolname:
+ wgtcol = colvectors[wgtcolname]
+ statdata = Stats.wfreqcl(wgtcol.data, **self.kwargs)
+ else:
+ statdata = Stats.freqcl(len(colvectors), **self.kwargs)
+ if statdata:
+ statdata, lower, upper = statdata
+ else:
+ statdata = lower = upper = None
+ if wgtcolname:
+ summaryset[statcolname].data.append(statdata)
+ summaryset[statcolname+'_ll'].data.append(lower)
+ summaryset[statcolname+'_ul'].data.append(upper)
+
+class asum(_SummaryStatBase):
+ 'Sum'
+
+ name_fmt = 'sum_of_%s'
+ statcol_fns = Stats.asum, Stats.wsum
+
+class mean(_SummaryStatBase):
+ 'Mean'
+
+ name_fmt = 'mean_of_%s'
+ statcol_fns = Stats.amean, Stats.wamean
+
+class meancl(_CISummaryStatBase):
+ 'Mean (with confidence limits)'
+ short_label = 'Mean'
+
+ name_fmt = 'mean_of_%s'
+ statcol_fns = Stats.ameancl, Stats.wameancl
+
+class geomean(_SummaryStatBase):
+ 'Geometric Mean'
+
+ name_fmt = 'geomean_of_%s'
+ statcol_fns = Stats.geomean, Stats.wgeomean
+
+class minimum(_SummaryStatBase):
+ 'Minimum'
+
+ name_fmt = 'minimum_of_%s'
+ statcol_fns = Stats.aminimum, Stats.wminimum
+
+class maximum(_SummaryStatBase):
+ 'Maximum'
+
+ name_fmt = 'maximum_of_%s'
+ statcol_fns = Stats.amaximum, Stats.wmaximum
+
+class arange(_SummaryStatBase):
+ 'Range'
+
+ name_fmt = 'range_of_%s'
+ statcol_fns = Stats.arange, Stats.wrange
+
+class median(_SummaryStatBase):
+ 'Median'
+
+ name_fmt = 'median_of_%s'
+ statcol_fns = Stats.median, Stats.wmedian
+
+class p10(_SummaryStatBase):
+ '10th Percentile'
+
+ name_fmt = 'p10_of_%s'
+ statcol_fns = Stats.quantile, Stats.wquantile
+
+ def __init__(self, srccolname, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, **kwargs)
+ self.args.append(0.10)
+
+
+class p25(_SummaryStatBase):
+ '25th Percentile'
+
+ name_fmt = 'p25_of_%s'
+ statcol_fns = Stats.quantile, Stats.wquantile
+
+ def __init__(self, srccolname, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, **kwargs)
+ self.args.append(0.25)
+
+
+class p75(_SummaryStatBase):
+ '75th Percentile'
+
+ name_fmt = 'p75_of_%s'
+ statcol_fns = Stats.quantile, Stats.wquantile
+
+ def __init__(self, srccolname, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, **kwargs)
+ self.args.append(0.75)
+
+
+class p90(_SummaryStatBase):
+ '90th Percentile'
+
+ name_fmt = 'p90_of_%s'
+ statcol_fns = Stats.quantile, Stats.wquantile
+
+ def __init__(self, srccolname, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, **kwargs)
+ self.args.append(0.90)
+
+
+class quantile(_SummaryStatBase):
+ 'Percentile'
+
+ statcol_fns = Stats.quantile, Stats.wquantile
+
+ def __init__(self, srccolname, p, **kwargs):
+ _SummaryStatBase.__init__(self, srccolname, **kwargs)
+ percent = int(round(p * 100))
+ sufidx = percent % 10
+ suffix = 'th'
+ if sufidx <= 3:
+ suffix = ['th', 'st', 'nd', 'rd'][sufidx]
+ self.name_fmt = 'p%02d_of_%%s' % percent
+ self.__doc__ = '%d%s %s' % (percent, suffix, self.__doc__)
+ self.args.append(p)
+
+class samplevar(_SummaryStatBase):
+ 'Sample Variance'
+
+ name_fmt = 'samplevar_of_%s'
+ statcol_fns = Stats.samplevar, Stats.wsamplevar
+
+class popvar(_SummaryStatBase):
+ 'Population Variance'
+
+ name_fmt = 'popvar_of_%s'
+ statcol_fns = Stats.populationvar, Stats.wpopulationvar
+
+class stddev(_SummaryStatBase):
+ 'Sample Standard Deviation'
+
+ name_fmt = 'samplestddev_of_%s'
+ statcol_fns = Stats.sample_stddev, Stats.wsample_stddev
+
+class popstddev(_SummaryStatBase):
+ 'Population Standard Deviation'
+
+ name_fmt = 'popstddev_of_%s'
+ statcol_fns = Stats.population_stddev, Stats.wpopulation_stddev
+
+class samplecv(_SummaryStatBase):
+ 'Sample Co-efficient of Variation'
+
+ name_fmt = 'samplecv_of_%s'
+ statcol_fns = Stats.sample_cv, Stats.wsample_cv
+
+class popcv(_SummaryStatBase):
+ 'Population Co-efficient of Variation'
+
+ name_fmt = 'popcv_of_%s'
+ statcol_fns = Stats.population_cv, Stats.wpopulation_cv
+
+class stderr(_SummaryStatBase):
+ 'Standard Error'
+
+ name_fmt = 'stderr_of_%s'
+ statcol_fns = Stats.stderr, Stats.wstderr
+
+class nonmissing(_SummaryStatBase):
+ 'Count of non-missing values'
+
+ name_fmt = 'nonmissing_of_%s'
+ statcol_fns = Stats.nonmissing, Stats.wnonmissing
+
+class missing(_SummaryStatBase):
+ 'Count of missing values'
+
+ name_fmt = 'missing_of_%s'
+ statcol_fns = Stats.missing, Stats.wmissing
+
+class studentt(_SummaryStatBase):
+ 'Student\'s T'
+
+ name_fmt = 't_of_%s'
+ statcol_fns = Stats.t, None
+
+class applyto(_SummaryStatBase):
+ 'Apply method(s) to column(s)'
+ usage = '(cols..., methods..., options...)'
+
+ def __init__(self, *args, **kwargs):
+ stat_classes = []
+ cols = []
+ for arg in args:
+ if type(arg) in (unicode, str):
+ cols.append(arg)
+ else:
+ stat_classes.append(arg)
+ self.stat_methods = [cls(col, **kwargs)
+ for cls in stat_classes
+ for col in cols]
+
+def isstatmethod(arg):
+ return isinstance(arg, _SummaryStatBase)
+
+class StatMethods:
+ """
+ A collection of statistical methods (and associated parameters)
+ """
+ def __init__(self, default_weightcol):
+ self.by_statcolname = {}
+ self.in_order = []
+ self.default_weightcol = default_weightcol
+
+ def append(self, method):
+ statcolname = method.get_statcolname(self.default_weightcol)
+ if not self.by_statcolname.has_key(statcolname):
+ self.by_statcolname[statcolname] = method
+ self.in_order.append((statcolname, method))
+
+ def __getitem__(self, statcolname):
+ return self.by_statcolname[statcolname]
+
+ def __iter__(self):
+ return iter(self.in_order)
+
+ def get_method_statcolname(self, method):
+ return method.get_statcolname(self.default_weightcol)
+
+ def check_args(self, dataset):
+ for statcolname, stat_method in self:
+ stat_method.args_okay(dataset, self.default_weightcol)
+
+ def add_statcols(self, dataset, summaryset):
+ for statcolname, stat_method in self:
+ stat_method.add_statcol(dataset, statcolname, summaryset,
+ self.default_weightcol)
+
+ def calc(self, summaryset, colvectors):
+ for statcolname, stat_method in self:
+ stat_method.calc(statcolname, summaryset, colvectors,
+ self.default_weightcol)
+
+def extract(args, default_weightcol = None):
+ args_remain = []
+ stat_methods = StatMethods(default_weightcol)
+ for arg in args:
+ if isinstance(arg, applyto):
+ for stat_method in arg.stat_methods:
+ stat_methods.append(stat_method)
+ elif isstatmethod(arg):
+ stat_methods.append(arg)
+ else:
+ args_remain.append(arg)
+ return stat_methods, args_remain
+
+def stat_methods():
+ items = globals().items()
+ items.sort()
+ methods = []
+ for name, obj in items:
+ try:
+ if not name.startswith('_') and issubclass(obj, _SummaryStatBase):
+ methods.append(obj)
+ except TypeError:
+ pass
+ return methods
+
+def stat_method_help():
+ help = ['Stat methods are optional. They include:']
+ for obj in stat_methods():
+ usage = obj.__name__ + obj.usage
+ help.append(' %-40s %s' % (usage, obj.__doc__))
+ return '\n'.join(help)
diff --git a/SOOMv0/Timers.py b/SOOMv0/Timers.py
new file mode 100644
index 0000000..b433713
--- /dev/null
+++ b/SOOMv0/Timers.py
@@ -0,0 +1,59 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Timers.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Timers.py,v $
+
+import time
+
+class Timers(object):
+ __slots__ = 'label', 'start_time', 'cumulative'
+ timers = {}
+
+ def __new__(cls, label):
+ try:
+ return cls.timers[label]
+ except KeyError:
+ self = object.__new__(cls)
+ self.label = label
+ self.cumulative = 0.0
+ cls.timers[label] = self
+ return self
+
+ def __init__(self, label):
+ self.start_time = time.time()
+
+ def start(self):
+ self.start_time = time.time()
+
+ def stop(self):
+ self.cumulative += time.time() - self.start_time
+
+ def reset(cls):
+ for timer in cls.timers.values():
+ timer.cumulative = 0.0
+ reset = classmethod(reset)
+
+ def report(cls):
+ lines = []
+ timers = cls.timers.items()
+ timers.sort()
+ total = 0.0
+ for label, timer in timers:
+ if timer.cumulative >= 0.001:
+ lines.append('%8.3fs %s' % (timer.cumulative, label))
+ total += timer.cumulative
+ lines.append('%8.3fs TOTAL' % (total))
+ return '\n'.join(lines)
+ report = classmethod(report)
diff --git a/SOOMv0/TransformFN.py b/SOOMv0/TransformFN.py
new file mode 100644
index 0000000..8f3dbef
--- /dev/null
+++ b/SOOMv0/TransformFN.py
@@ -0,0 +1,64 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: TransformFN.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/TransformFN.py,v $
+
+import cPickle
+
+from SOOMv0.Soom import soom
+
+__all__ = 'maketransform', 'loadtransform'
+
+def maketransform(name,code=None,path=soom.default_object_path):
+ """
+ Function to take Python code and compile and store it for use
+ in transforming data eg coded values to fuller descriptions
+ for presentation.
+
+ TO DO: integrate the code which implements SAS-style PROC
+ FORMAT syntax as well as supporting Python code. Also need to
+ capture metadata about the transform function etc - needs to
+ be a Transform class.
+
+ Question: how else does one persistently store Python
+ functions except by storing the source code and recompiling
+ it when reloaded from disc. Functions are objects, but how to
+ serialise them **and** re-instatntiate them into the global
+ namespace???? ANSWER: use marshall, not pickle! TO-DO: chnage
+ to using marshall.
+ """
+ try:
+ exec code
+ filename = os.path.join(path, "%s_transform.SOOMpickle" % name)
+ f = open(filename, 'w+')
+ f.write(cPickle.dumps(code, 1))
+ f.close()
+ except:
+ raise ValueError, "Transform function code won't compile"
+ return None
+
+def loadtransform(name,path=soom.SOOM_object_path):
+ """
+ Function to load a predefined transform function from disc.
+ """
+ try:
+ filename = path + name + "_transform.SOOMpickle"
+ f = open(filename, 'r')
+ code = cPickle.loads(f.read())
+ f.close()
+ exec code
+ except:
+ raise ValueError, "Transform function code won't compile"
+ return None
diff --git a/SOOMv0/Utils.py b/SOOMv0/Utils.py
new file mode 100644
index 0000000..4958448
--- /dev/null
+++ b/SOOMv0/Utils.py
@@ -0,0 +1,318 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: Utils.py 2901 2007-11-20 04:52:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/Utils.py,v $
+
+from MA import *
+import soomfunc
+import textwrap # used to wrap column headings
+import errno
+import os
+import copy
+import sets
+from SOOMv0 import common
+
+# from Python Cookbook p 4
+def makedict(**kwargs):
+ return kwargs
+
+# simple pluralisation based on code by Robin Palmar:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/82102
+def pluralise(text):
+ postfix = 's'
+ if len(text) > 2:
+ vowels = 'aeiou'
+ if text[-2:] in ('ch', 'sh'):
+ postfix = 'es'
+ elif text[-1:] == 'y':
+ if text[-2:-1] in vowels:
+ postfix = 's'
+ else:
+ postfix = 'ies'
+ text = text[:-1]
+ elif text[-2:] == 'is':
+ postfix = 'es'
+ text = text[:-2]
+ elif text[-1:] in ('s','z','x'):
+ postfix = 'es'
+ return '%s%s' % (text, postfix)
+
+# combinations support functions
+# Based on Cookbook recipe by Ulrich Hoffmann:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/190465
+# Modified by Dave Cole
+def xcombinations(items, n):
+ if n == 0:
+ yield []
+ else:
+ for i in xrange(len(items)):
+ for cc in xcombinations(items[i + 1:], n - 1):
+ yield [items[i]] + cc
+
+def cross(*args):
+ ans = [[]]
+ for arg in args:
+ ans = [x + [y] for x in ans for y in arg]
+ return ans
+
+def combinations(*seqs):
+ yield []
+ for i in range(len(seqs)):
+ for subseqs in xcombinations(seqs, i + 1):
+ if len(subseqs) == 1:
+ for item in subseqs[0]:
+ yield [item]
+ else:
+ for item in cross(*subseqs):
+ yield item
+
+# other stuff
+
+def isect(a,b): # redundant, I think.
+ return soomfunc.intersect(vector1_val1,vector2_val2)
+
+def standardise_cmp_op(op):
+ op = op.lower()
+ op = op.strip()
+ if op in ("starting with", "=:", "==:", "startingwith", "eq:", "startswith", "starts with"):
+ return "==:", None
+ elif op in ("less than", "lessthan", "lt", "<"):
+ return "<", less # Numeric.less
+ elif op in ("less than:", "lessthan:", "lt:", "<:"):
+ return "<:", None
+ elif op in ("greater than", "greaterthan", "gt", ">"):
+ return ">", greater # Numeric.greater
+ elif op in ("greater than:", "greaterthan:", "gt:", ">:"):
+ return ">:", None
+ elif op in ("greater than or equal to", "greaterthanorequalto", "greater or equal", "greaterequal", "ge", ">=", "=>"):
+ return ">=", greater_equal # Numeric.greater_equal
+ elif op in ("greater than or equal to:", "greaterthanorequalto:", "greater or equal:", "greaterequal:", "ge:", ">=:", "=>:"):
+ return ">=:", None
+ elif op in ("less than or equal to", "lessthanorequalto", "less or equal", "lessequal", "le", "<=", "=<"):
+ return "<=", less_equal # Numeric.less_equal
+ elif op in ("less than or equal to:", "lessthanorequalto:", "less or equal:", "lessequal:", "le:", "<=:", "=<:"):
+ return "<=:", None
+ elif op in ("not equal to", "notequalto", "not equal", "notequal", "does not equal", "doesnotequal", "ne", "!=", "!==", "#", "<>"):
+ return "!=", not_equal # Numeric.not_equal
+ elif op in ("not equal to:", "notequalto:", "not equal:", "notequal:", "does not equal:", "doesnotequal:", "ne:", "!=:", "!==:", "#:", "<>:", "not starting with", "notstartingwith", "notstartswith", "not startswith", "not starts with"):
+ return "!=:", None
+ elif op in ("equal to", "equals", "equalto", "eq", "=", "=="):
+ return "==", equal # Numeric.equal
+ else:
+ return op, None
+
+
+def leftouterjoin(leftds,leftjoincols,leftdatacols,rightds,rightjoincols,rightdatacols,null_rrow):
+ ljkeys = []
+ for ljcol in leftjoincols:
+ origcol = ljcol.split("=")[0]
+ if len(ljcol.split("=")) > 1:
+ newcol = ljcol.split("=")[1]
+ else:
+ newcol = origcol
+ ljkeys.append(newcol)
+ ljkeys.sort()
+ rjkeys = []
+ for rjcol in rightjoincols:
+ origcol = rjcol.split("=")[0]
+ if len(rjcol.split("=")) > 1:
+ newcol = rjcol.split("=")[1]
+ else:
+ newcol = origcol
+ rjkeys.append(newcol)
+ rjkeys.sort()
+ if ljkeys != rjkeys:
+ raise ValueError, "Join keys don't match!"
+
+ leftdictlist,lcols = todictlist(leftds,leftjoincols,leftdatacols)
+ rightdictlist,rcols = todictlist(rightds,rightjoincols,rightdatacols)
+
+ joinedset = {}
+ for lkey in lcols + rcols:
+ joinedset[lkey] = []
+
+ # pprint.pprint(joinedset)
+
+ ldl_keys = leftdictlist.keys()
+ #print ldl_keys
+ ldl_keys.sort()
+ #print ldl_keys
+
+ for lkey in ldl_keys:
+ lrows = leftdictlist[lkey]
+ rrows = rightdictlist.get(lkey,None)
+ for lrow in lrows:
+ jrow = {}
+ jrow.update(lrow)
+ if rrows:
+ for rrow in rrows:
+ jrow.update(rrow)
+ for jkey in jrow.keys():
+ joinedset[jkey].append(jrow[jkey])
+ else:
+ for lrow in lrows:
+ jrow.update(null_rrow)
+ for jkey in jrow.keys():
+ joinedset[jkey].append(jrow[jkey])
+
+ return joinedset
+
+def innerjoin(leftds,leftjoincols,leftdatacols,rightds,rightjoincols,rightdatacols):
+ ljkeys = []
+ for ljcol in leftjoincols:
+ origcol = ljcol.split("=")[0]
+ if len(ljcol.split("=")) > 1:
+ newcol = ljcol.split("=")[1]
+ else:
+ newcol = origcol
+ ljkeys.append(newcol)
+ ljkeys.sort()
+ rjkeys = []
+ for rjcol in rightjoincols:
+ origcol = rjcol.split("=")[0]
+ if len(rjcol.split("=")) > 1:
+ newcol = rjcol.split("=")[1]
+ else:
+ newcol = origcol
+ rjkeys.append(newcol)
+ rjkeys.sort()
+ if ljkeys != rjkeys:
+ raise ValueError, "Join keys don't match!"
+
+ leftdictlist,lcols = todictlist(leftds,leftjoincols,leftdatacols)
+ rightdictlist,rcols = todictlist(rightds,rightjoincols,rightdatacols)
+
+ joinedset = {}
+ for lkey in lcols + rcols:
+ joinedset[lkey] = []
+
+ # pprint.pprint(joinedset)
+
+ ldl_keys = leftdictlist.keys()
+ #print ldl_keys
+ ldl_keys.sort()
+ #print ldl_keys
+
+ for lkey in ldl_keys:
+ lrows = leftdictlist[lkey]
+ rrows = rightdictlist.get(lkey,None)
+ for lrow in lrows:
+ jrow = {}
+ jrow.update(lrow)
+ if rrows:
+ for rrow in rrows:
+ jrow.update(rrow)
+ for jkey in jrow.keys():
+ joinedset[jkey].append(jrow[jkey])
+ return joinedset
+
+# functions for rates
+# this whole area of how datasets are joined needs reworking - principles are OK, details need revision.
+
+"""
+def todictlist(summaryset,joincols,datacols):
+ # print summaryset.keys()
+ dictlist = {}
+ allcols = []
+ for col in list(joincols) + list(datacols):
+ if col not in allcols:
+ allcols.append(col)
+ for i in xrange(len(summaryset[joincols[0]])):
+ joinkeylist = []
+ for jcol in joincols:
+ joinkeylist.append(summaryset[jcol.split("=")[0]][i])
+ joinkey = tuple(joinkeylist)
+ rowdict = {}
+ cols = []
+ for col in allcols:
+ origcol = col.split("=")[0]
+ if len(col.split("=")) > 1:
+ newcol = col.split("=")[1]
+ else:
+ newcol = origcol
+ rowdict[newcol] = summaryset[origcol][i]
+ cols.append(newcol)
+ if dictlist.has_key(joinkey):
+ dictlist[joinkey].append(rowdict)
+ else:
+ dictlist[joinkey] = [rowdict]
+ return dictlist, cols
+"""
+
+def todictlist(sumset,joincols,datacols):
+ # print summaryset.keys()
+ dictlist = {}
+ allcols = []
+ for col in list(joincols) + list(datacols):
+ if col not in allcols:
+ allcols.append(col)
+ for i in xrange(len(getattr(sumset,joincols[0]).data)):
+ joinkeylist = []
+ for jcol in joincols:
+ joinkeylist.append(getattr(sumset,jcol.split("=")[0]).data[i])
+ joinkey = tuple(joinkeylist)
+ rowdict = {}
+ cols = []
+ for col in allcols:
+ origcol = col.split("=")[0]
+ if len(col.split("=")) > 1:
+ newcol = col.split("=")[1]
+ else:
+ newcol = origcol
+ rowdict[newcol] = getattr(sumset,origcol).data[i]
+ cols.append(newcol)
+ if dictlist.has_key(joinkey):
+ dictlist[joinkey].append(rowdict)
+ else:
+ dictlist[joinkey] = [rowdict]
+ return dictlist, cols
+
+# define some date output transformations
+# move to soomutilfuncs.py probably
+def ddmmyyyy(indatetime,sep="/"):
+ if indatetime is None:
+ return "All date/times"
+ else:
+ fmt = "%d" + sep + "%m" + sep + "%Y"
+ return indatetime.strftime(fmt)
+
+def fulldate(indatetime):
+ if indatetime is None:
+ return "All date/times"
+ else:
+ fmt = "%A, %d %B %Y"
+ return indatetime.strftime(fmt)
+
+def helpful_mkdir(path):
+ try:
+ os.makedirs(path)
+ except OSError, (eno, estr):
+ if eno != errno.EEXIST:
+ raise OSError(eno, '%s: mkdir %s' % (estr, path))
+
+def quiet_unlink(fn):
+ try:
+ os.unlink(fn)
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise OSError(eno, '%s: unlink %s' % (estr, path))
+
+def assert_args_exhausted(args = None, kwargs = None):
+ if args is not None and args:
+ raise common.Error('Unknown argument(s): %s' %
+ ', '.join([repr(arg) for arg in args]))
+ if kwargs is not None and kwargs:
+ raise common.Error('Unknown keyword argument(s): %s' %
+ ', '.join(kwargs.keys()))
diff --git a/SOOMv0/__init__.py b/SOOMv0/__init__.py
new file mode 100644
index 0000000..83cba8f
--- /dev/null
+++ b/SOOMv0/__init__.py
@@ -0,0 +1,57 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# SOOMv0.py
+# SOOM Proof-of-concept implementation Version 0
+# Written by Tim Churches April-November 2001
+# Substantially revised by Tim Churches and Ben Golding, April-May 2003
+# Extensive further work by Tim Churches and Andrew McNamara, May 2003-...
+
+# $Id: __init__.py 2859 2007-10-18 07:45:37Z andrewm $
+
+#import sys
+#interactive = hasattr(sys, 'ps1')
+#del sys
+
+# Module imports
+from SOOMv0.Soom import soom
+from SOOMv0.DataSourceColumn import DataSourceColumn
+from SOOMv0.Dataset import Dataset
+from SOOMv0.SummaryCond import *
+from SOOMv0.SummaryStats import *
+from SOOMv0.SummaryProp import propn_names_and_labels, extract_propn_cols
+from SOOMv0.Filter import filtered_ds, sampled_ds, sliced_ds
+from SOOMv0.Datasets import Datasets
+from SOOMv0.DataTypes import datatypes
+from SOOMv0.PlotRegistry import plot
+from SOOMv0.common import * # Exceptions, constants, etc
+import SOOMv0.interactive_hook # Interactive friendliness
+#from SOOMv0.TransformFN import *
+
+#try:
+# import psyco
+#except ImportError:
+# pass
+#else:
+# psyco.log('/tmp/soompsyco.log', 'a')
+# psyco.bind(Dataset)
+# psyco.bind(DatasetColumn)
+# psyco.bind(DatasetFilter)
+## psyco.full()
+
+datasets = Datasets()
+dsload = datasets.dsload
+dsunload = datasets.dsunload
+makedataset = datasets.makedataset
+subset = datasets.subset
diff --git a/SOOMv0/common.py b/SOOMv0/common.py
new file mode 100644
index 0000000..a921568
--- /dev/null
+++ b/SOOMv0/common.py
@@ -0,0 +1,58 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: common.py 2878 2007-11-09 02:59:40Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/common.py,v $
+
+version_info = 0, 9, 0
+version = '-'.join(map(str, version_info))
+expert = False
+
+class Error(Exception): pass
+
+class ColumnNotFound(Error): pass
+class DatasetNotFound(Error): pass
+class ExpressionError(Error): pass
+class PlotError(Error): pass
+class DatasetError(Error): pass # Malformed dataset
+
+SUB_DETAIL, NO_DETAIL, SOME_DETAIL, ALL_DETAIL = range(4)
+
+def yesno(var):
+ if var:
+ return 'Yes'
+ else:
+ return 'No'
+
+try:
+ # Introduced in py 2.4
+ set
+except NameError:
+ from sets import Set as set
+
+try:
+ # Introduced in py 2.4
+ sorted
+except NameError:
+ def sorted(iterable, cmp=None, key=None, reverse=False):
+ new_list = list(iterable)
+ new_list.sort(cmp, key, reverse)
+ return new_list
+
+try:
+ # Removed in py 3.0
+ callable
+except NameError:
+ def callable(f):
+ return hasattr(f, '__call__')
diff --git a/SOOMv0/interactive_hook.py b/SOOMv0/interactive_hook.py
new file mode 100644
index 0000000..9592c54
--- /dev/null
+++ b/SOOMv0/interactive_hook.py
@@ -0,0 +1,41 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: interactive_hook.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/interactive_hook.py,v $
+
+import sys
+import types
+from SOOMv0 import common
+
+def helpful_exceptions(exc_type, exc_value, exc_traceback):
+ if (not common.expert and hasattr(sys, 'ps1')
+ and isinstance(exc_value, common.Error)):
+ # Interactive
+ print >> sys.stderr, '%s error: %s' % (exc_type.__name__, exc_value)
+ else:
+ orig_exc_hook(exc_type, exc_value, exc_traceback)
+
+orig_exc_hook, sys.excepthook = sys.excepthook, helpful_exceptions
+
+def helpful_display(obj):
+ # If the object has a display hook, and the display hook is a function
+ # or a bound method...
+ if (not common.expert and hasattr(obj, '_display_hook')
+ and (not hasattr(obj._display_hook, 'im_self')
+ or obj._display_hook.im_self)):
+ return obj._display_hook()
+ orig_disp_hook(obj)
+
+orig_disp_hook, sys.displayhook = sys.displayhook, helpful_display
diff --git a/SOOMv0/soomparse.g b/SOOMv0/soomparse.g
new file mode 100644
index 0000000..3576770
--- /dev/null
+++ b/SOOMv0/soomparse.g
@@ -0,0 +1,390 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+# 3rd Party
+import Numeric
+import mx.DateTime
+# Application
+import soomfunc
+from SOOMv0.common import Error
+from SOOMv0 import Search
+%%
+
+# A Yapps2 scanner/parser for the SOOM expression language
+# (see http://theory.stanford.edu/~amitp/Yapps)
+
+parser soomparse:
+ ignore: '[ \t\n\r]+'
+ token END: '$'
+ token DATE: r'\d{4}-\d{1,2}-\d{1,2}'
+ token INT: '[-+]?[0-9]+'
+# token FLOAT:'[-+]?([0-9]*\.)?[0-9]+([eE][-+]?[0-9]+)?'
+ # there are three parts to the FLOAT pattern:
+ # [0-9]*\.[0-9]+ 0.1234
+ # [0-9]+\. 123.
+ # ([0-9]*\.)?[0-9]+[eE][-+]?[0-9]+ 0.1e-3
+ token FLOAT:'[-+]?([0-9]*\.[0-9]+)|([0-9]+\.)|(([0-9]*\.)?[0-9]+[eE][-+]?[0-9]+)'
+ token ID: '[a-zA-Z0-9_]+'
+ token STR: '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'
+ token WORD: r"[a-zA-Z0-9'*]?[a-zA-Z0-9][a-zA-Z0-9'*]*"
+ token NULL: '[Nn][Oo][Nn][Ee]|[Nn][Uu][Ll][Ll]'
+
+
+ rule starts_with: "starting" "with" {{ return 'op_equal_col' }}
+ | "=:" {{ return 'op_equal_col' }}
+ | "==:" {{ return 'op_equal_col' }}
+ | "startingwith" {{ return 'op_equal_col' }}
+ | "startswith" {{ return 'op_equal_col' }}
+ | "eq:" {{ return 'op_equal_col' }}
+
+ rule lt: "lessthan" {{ return 'op_less_than' }}
+ | "lt" {{ return 'op_less_than' }}
+ | "<" {{ return 'op_less_than' }}
+
+ rule lt_col: "lessthan:" {{ return 'op_less_than_col' }}
+ | "lt:" {{ return 'op_less_than_col' }}
+ | "<:" {{ return 'op_less_than_col' }}
+
+ rule le: "lessthanorequalto" {{ return 'op_less_equal' }}
+ | "lessequal" {{ return 'op_less_equal' }}
+ | "le" {{ return 'op_less_equal' }}
+ | "<=" {{ return 'op_less_equal' }}
+ | "=<" {{ return 'op_less_equal' }}
+
+ rule le_col: "lessthanorequalto:" {{ return 'op_less_equal_col' }}
+ | "lessequal:" {{ return 'op_less_equal_col' }}
+ | "le:" {{ return 'op_less_equal_col' }}
+ | "<=:" {{ return 'op_less_equal_col' }}
+ | "=<:" {{ return 'op_less_equal_col' }}
+
+ rule lt_clause: "less" ( "than" ( {{ return 'op_less_than' }}
+ | "or" "equal" ("to" {{ return 'op_less_equal' }}
+ | "to:" {{ return 'op_less_equal_col' }}
+ )
+ )
+ | "than:" {{ return 'op_less_than_col' }}
+ | "or" ("equal" {{ return 'op_less_equal' }}
+ | "equal:" {{ return 'op_less_equal_col' }}
+ )
+ )
+
+ rule gt: "greaterthan" {{ return 'op_greater_than' }}
+ | "gt" {{ return 'op_greater_than' }}
+ | ">" {{ return 'op_greater_than' }}
+
+ rule gt_col: "greaterthan:" {{ return 'op_greater_than_col' }}
+ | "gt:" {{ return 'op_greater_than_col' }}
+ | ">:" {{ return 'op_greater_than_col' }}
+
+ rule ge: "greaterthanorequalto" {{ return 'op_greater_equal' }}
+ | "greaterequal" {{ return 'op_greater_equal' }}
+ | "ge" {{ return 'op_greater_equal' }}
+ | ">=" {{ return 'op_greater_equal' }}
+ | "=>" {{ return 'op_greater_equal' }}
+
+ rule ge_col: "greaterthanorequalto:" {{ return 'op_greater_equal_col' }}
+ | "greaterequal:" {{ return 'op_greater_equal_col' }}
+ | "ge:" {{ return 'op_greater_equal_col' }}
+ | ">=:" {{ return 'op_greater_equal_col' }}
+ | "=>:" {{ return 'op_greater_equal_col' }}
+
+ rule gt_clause: "greater" ( "than" ( {{ return 'op_greater_than' }}
+ | "or" "equal" ("to" {{ return 'op_greater_equal' }}
+ | "to:" {{ return 'op_greater_equal_col' }}
+ )
+ )
+ | "than:" {{ return 'op_greater_than_col' }}
+ | "or" ("equal" {{ return 'op_greater_equal' }}
+ | "equal:" {{ return 'op_greater_equal_col' }}
+ )
+ )
+
+ rule ne: "notequalto" {{ return 'op_not_equal' }}
+ | "notequal" {{ return 'op_not_equal' }}
+ | "doesnotequal" {{ return 'op_not_equal' }}
+ | "ne" {{ return 'op_not_equal' }}
+ | "!=" {{ return 'op_not_equal' }}
+ | "!==" {{ return 'op_not_equal' }}
+ | "#" {{ return 'op_not_equal' }}
+ | "is not" {{ return 'op_not_equal' }}
+ | "<>" {{ return 'op_not_equal' }}
+
+ rule ne_col: "notequalto:" {{ return 'op_not_equal_col' }}
+ | "notequal:" {{ return 'op_not_equal_col' }}
+ | "doesnotequal:" {{ return 'op_not_equal_col' }}
+ | "ne:" {{ return 'op_not_equal_col' }}
+ | "!=:" {{ return 'op_not_equal_col' }}
+ | "!==:" {{ return 'op_not_equal_col' }}
+ | "#:" {{ return 'op_not_equal_col' }}
+ | "<>:" {{ return 'op_not_equal_col' }}
+ | "notstartingwith" {{ return 'op_not_equal_col' }}
+ | "notstartswith" {{ return 'op_not_equal_col' }}
+
+ rule ne_clause: "not" ( "equal" ( {{ return 'op_not_equal' }}
+ | "to" {{ return 'op_not_equal' }}
+ | "to:" {{ return 'op_not_equal_col' }}
+ )
+ | "equal:" {{ return 'op_not_equal_col' }}
+ | "starting" "with" {{ return 'op_not_equal_col' }}
+ | "startswith" {{ return 'op_not_equal_col' }}
+ | "starts" "with" {{ return 'op_not_equal_col' }}
+ )
+
+ rule does_not_clause: "does" "not" ( "equal" {{ return 'op_not_equal' }}
+ | "equal:" {{ return 'op_not_equal_col' }}
+ )
+
+ rule eq: "equal" "to" {{ return 'op_equal' }}
+ | "equals" {{ return 'op_equal' }}
+ | "equalto" {{ return 'op_equal' }}
+ | "eq" {{ return 'op_equal' }}
+ | "=" {{ return 'op_equal' }}
+ | "is" {{ return 'op_equal' }}
+ | "==" {{ return 'op_equal' }}
+
+ # (not called "in" because of conflict in parser with python res word)
+ rule in_op: "in" {{ return 'op_in' }}
+
+ rule in_col: "in:" {{ return 'op_in_col' }}
+
+ rule not_in: "notin" {{ return 'op_not_in' }}
+
+ rule not_in_col: "notin:" {{ return 'op_not_in_col' }}
+
+ rule between: "between" {{ return 'op_between' }}
+
+ rule contains: "contains" {{ return 'op_contains' }}
+
+ rule regexp: "~" {{ return 'op_regexp' }}
+
+ rule not_regexp: "!~" {{ return 'op_not_regexp' }}
+
+ rule op: starts_with {{ return starts_with }}
+ | lt {{ return lt }}
+ | lt_col {{ return lt_col }}
+ | lt_clause {{ return lt_clause }}
+ | gt {{ return gt }}
+ | gt_col {{ return gt_col }}
+ | gt_clause {{ return gt_clause }}
+ | ge {{ return ge }}
+ | ge_col {{ return ge_col }}
+ | le {{ return le }}
+ | le_col {{ return le_col }}
+ | ne {{ return ne }}
+ | ne_col {{ return ne_col }}
+ | ne_clause {{ return ne_clause }}
+ | does_not_clause {{ return does_not_clause }}
+ | eq {{ return eq }}
+ | in_op {{ return in_op }}
+ | in_col {{ return in_col }}
+ | not_in {{ return not_in }}
+ | not_in_col {{ return not_in_col }}
+ | between {{ return between }}
+ | contains {{ return contains }}
+ | regexp {{ return regexp }}
+ | not_regexp {{ return not_regexp }}
+
+ rule goal: expr END {{ return expr }}
+
+ # Used for testing the query engine
+ rule sgoal: sexpr END {{ return sexpr }}
+
+ # An expression is the logical "or" of factors
+ rule expr: factor {{ f = factor }}
+ ( "or" factor {{ f = soomfunc.union(f, factor) }}
+ )* {{ return f }}
+
+ # A factor is the logical "and" of comparisons, ("and" has higher precedence than "or")
+ rule factor: comparison {{ f = comparison }}
+ ( "and" comparison {{ f = soomfunc.intersect(f, comparison) }}
+ )* {{ return f }}
+
+ # A comparison is the comparison of terms
+ # the real work's done here
+ rule comparison: col op term {{ return col.filter_op(op, term) }}
+ | "\\(" expr "\\)" {{ return expr }}
+ | "not" comparison {{ return soomfunc.outersect(Numeric.arrayrange(len(self.dataset)), comparison) }}
+
+ # A term is either a number or an expression surrounded by parentheses
+ rule term: NULL {{ return None }}
+ | INT {{ return int(INT) }}
+ | FLOAT {{ return float(FLOAT) }}
+ | STR {{ return dequote(STR) }}
+ | "\\[\\[" sexpr "\\]\\]" {{ return sexpr }}
+ | "\\("
+ ( term {{ term_list = [term] }}
+ ( "," term {{ term_list.append(term) }}
+ ) *
+ )+ "\\)" {{ return term_list }}
+ | "date"
+ ( "\\(" INT {{ year = int(INT) }}
+ "," INT {{ month = int(INT) }}
+ "," INT {{ day = int(INT) }}
+ "\\)" {{ return mx.DateTime.Date(year, month, day) }}
+ | DATE {{ return mx.DateTime.ISO.ParseDate(DATE) }}
+ )
+
+ | "reldate" kwargs {{ return relativeDate(**kwargs) }}
+
+ rule col: ID {{ return self.dataset.get_column(ID) }}
+
+ # Pythonesque keyword arguments, eg... foo="abc", bah=9
+ rule kwargs: "\\(" {{ kwargs = {} }}
+ (
+ | ID "=" term {{ kwargs[ID] = term }}
+ ( "," ID "=" term {{ kwargs[ID] = term }}
+ ) *
+ )
+ "\\)" {{ return kwargs }}
+
+ # a search expression is disjuction of search factors
+ rule sexpr: sfactor {{ f = Search.Disjunction(sfactor) }}
+ ( "\\|" sfactor {{ f.append(sfactor) }}
+ )* {{ return f }}
+
+ # a search factor is the conjunction of words, ("and" has higher precedence than "or")
+ rule sfactor: sphrase {{ f = sphrase}}
+ ( sconjop sphrase {{ f = Search.Conjunction(sconjop, f, sphrase) }}
+ | snearop {{ op = snearop; nearness = Search.Conjunction.DEFAULT_NEARNESS }}
+ ( "\\[" INT "\\]" {{ nearness = int(INT) }}
+ | )
+ sphrase {{ f = Search.Conjunction(op, f, sphrase, nearness) }}
+ | {{ op = '&' }}
+ ( "-" {{ op = '&!' }}
+ | )
+ sphrase {{ f = Search.Conjunction(op, f, sphrase) }}
+ )* {{ return f }}
+
+ # search conjunction operations: and, and not
+ rule sconjop: ( "&" {{ op = '&' }}
+ ( "-" {{ op = '&!' }}
+ | )
+ ) {{ return op }}
+
+ # search proximity operations: before, after, near
+ rule snearop: "<" {{ return '<' }}
+ | ">" {{ return '>' }}
+ | "~" {{ return '~' }}
+
+ # a search phrase is a single word or a phrase or word expressions
+ rule sphrase: sterm {{ return sterm }}
+ | "\"" {{ words = [] }}
+ ( sterm {{ words.append(sterm) }}
+ )+
+ "\"" {{ return Search.Phrase(words) }}
+
+ # a search term is a single word or a sub-expression
+ rule sterm: WORD {{ return Search.Word(WORD) }}
+ | "\\(" sexpr "\\)" {{ return sexpr }}
+
+%%
+
+import re
+dequote_re = re.compile(r'\\(x[0-9a-fA-F]{2}|[0-7]{1,3}|.)')
+backslash_map = {
+ '\\': '\\',
+ "'": "'",
+ '"': '"',
+ 'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'n': '\n',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+}
+def dequote(s):
+ """
+ Remove leading and trailing quotes, honour any backslash quoting
+ within the string.
+
+ Using the built-in eval() looks attractive at first glance, but
+ opens serious security issues.
+ """
+ def backslash_sub(match):
+ match = match.group(0)
+ if match.startswith(r'\x'):
+ try:
+ return chr(int(match[2:], 16))
+ except ValueError:
+ raise ValueError('invalid \\x escape')
+ elif match[1:].isdigit():
+ return chr(int(match[1:], 8))
+ else:
+ return backslash_map.get(match[1:], match)
+ if s[0] in ('r', 'R'):
+ return s[2:-1]
+ return dequote_re.sub(backslash_sub, s[1:-1])
+
+def relativeDate(years=None, months=None, days=None, align=None):
+ def onlyone():
+ raise ValueError('Only specify one of years, months, or days')
+ weekdays = {
+ 'monday': mx.DateTime.Monday,
+ 'tuesday': mx.DateTime.Tuesday,
+ 'wednesday': mx.DateTime.Wednesday,
+ 'thursday': mx.DateTime.Thursday,
+ 'friday': mx.DateTime.Friday,
+ 'saturday': mx.DateTime.Saturday,
+ 'sunday': mx.DateTime.Sunday,
+ }
+ kwargs = {
+ 'hour': 0,
+ 'minute': 0,
+ 'second': 0,
+ }
+ if years:
+ if months or days:
+ onlyone()
+ kwargs['years'] = years
+ elif months:
+ if days:
+ onlyone()
+ kwargs['months'] = months
+ elif days:
+ kwargs['days'] = days
+ if align:
+ weekday = weekdays.get(align.lower())
+ if weekday is not None:
+ kwargs['weekday'] = weekday, 0
+ elif align.lower() in ('bom', '1st', 'som'):
+ kwargs['day'] = 1
+ elif align.lower() in ('boy', 'soy'):
+ kwargs['day'] = 1
+ kwargs['month'] = mx.DateTime.January
+ else:
+ raise ValueError('bad relative date alignment %r' % align)
+ return mx.DateTime.now() + mx.DateTime.RelativeDateTime(**kwargs)
+
+class SoomFilterParse(soomparse):
+ def __init__(self, dataset, expr):
+ self.dataset = dataset
+ self.__expr = expr
+ soomparse.__init__(self, soomparseScanner(expr))
+ try:
+ self.__filter = self.goal()
+ except SyntaxError, s:
+ input = self._scanner.input
+ raise Error('Syntax error in filter expression %s' %
+ (s.msg,))
+ except yappsrt.NoMoreTokens:
+ raise Error('Could not complete parsing filter expression; '
+ 'stopped around here:' + self._scanner)
+
+ def filter(self):
+ # XXX probably want to build the return type here
+# print 'SoomFilterParse.filter: f("%s") %s' % (self.__expr, self.__filter)
+ return self.__filter
diff --git a/SOOMv0/soomparse.py b/SOOMv0/soomparse.py
new file mode 100644
index 0000000..80803e2
--- /dev/null
+++ b/SOOMv0/soomparse.py
@@ -0,0 +1,953 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+# 3rd Party
+import Numeric
+import mx.DateTime
+# Application
+import soomfunc
+from SOOMv0.common import Error
+from SOOMv0 import Search
+
+# Begin -- grammar generated by Yapps
+import sys, re
+import yappsrt
+
+class soomparseScanner(yappsrt.Scanner):
+ patterns = [
+ ('"\\""', re.compile('"')),
+ ('"&"', re.compile('&')),
+ ('"-"', re.compile('-')),
+ ('"\\\\]"', re.compile('\\]')),
+ ('"\\\\["', re.compile('\\[')),
+ ('"\\\\|"', re.compile('\\|')),
+ ('"reldate"', re.compile('reldate')),
+ ('"date"', re.compile('date')),
+ ('","', re.compile(',')),
+ ('"\\\\]\\\\]"', re.compile('\\]\\]')),
+ ('"\\\\[\\\\["', re.compile('\\[\\[')),
+ ('"\\\\)"', re.compile('\\)')),
+ ('"\\\\("', re.compile('\\(')),
+ ('"and"', re.compile('and')),
+ ('"!~"', re.compile('!~')),
+ ('"~"', re.compile('~')),
+ ('"contains"', re.compile('contains')),
+ ('"between"', re.compile('between')),
+ ('"notin:"', re.compile('notin:')),
+ ('"notin"', re.compile('notin')),
+ ('"in:"', re.compile('in:')),
+ ('"in"', re.compile('in')),
+ ('"=="', re.compile('==')),
+ ('"is"', re.compile('is')),
+ ('"="', re.compile('=')),
+ ('"eq"', re.compile('eq')),
+ ('"equalto"', re.compile('equalto')),
+ ('"equals"', re.compile('equals')),
+ ('"does"', re.compile('does')),
+ ('"starts"', re.compile('starts')),
+ ('"not"', re.compile('not')),
+ ('"notstartswith"', re.compile('notstartswith')),
+ ('"notstartingwith"', re.compile('notstartingwith')),
+ ('"<>:"', re.compile('<>:')),
+ ('"#:"', re.compile('#:')),
+ ('"!==:"', re.compile('!==:')),
+ ('"!=:"', re.compile('!=:')),
+ ('"ne:"', re.compile('ne:')),
+ ('"doesnotequal:"', re.compile('doesnotequal:')),
+ ('"notequal:"', re.compile('notequal:')),
+ ('"notequalto:"', re.compile('notequalto:')),
+ ('"<>"', re.compile('<>')),
+ ('"is not"', re.compile('is not')),
+ ('"#"', re.compile('#')),
+ ('"!=="', re.compile('!==')),
+ ('"!="', re.compile('!=')),
+ ('"ne"', re.compile('ne')),
+ ('"doesnotequal"', re.compile('doesnotequal')),
+ ('"notequal"', re.compile('notequal')),
+ ('"notequalto"', re.compile('notequalto')),
+ ('"greater"', re.compile('greater')),
+ ('"=>:"', re.compile('=>:')),
+ ('">=:"', re.compile('>=:')),
+ ('"ge:"', re.compile('ge:')),
+ ('"greaterequal:"', re.compile('greaterequal:')),
+ ('"greaterthanorequalto:"', re.compile('greaterthanorequalto:')),
+ ('"=>"', re.compile('=>')),
+ ('">="', re.compile('>=')),
+ ('"ge"', re.compile('ge')),
+ ('"greaterequal"', re.compile('greaterequal')),
+ ('"greaterthanorequalto"', re.compile('greaterthanorequalto')),
+ ('">:"', re.compile('>:')),
+ ('"gt:"', re.compile('gt:')),
+ ('"greaterthan:"', re.compile('greaterthan:')),
+ ('">"', re.compile('>')),
+ ('"gt"', re.compile('gt')),
+ ('"greaterthan"', re.compile('greaterthan')),
+ ('"equal:"', re.compile('equal:')),
+ ('"than:"', re.compile('than:')),
+ ('"to:"', re.compile('to:')),
+ ('"to"', re.compile('to')),
+ ('"equal"', re.compile('equal')),
+ ('"or"', re.compile('or')),
+ ('"than"', re.compile('than')),
+ ('"less"', re.compile('less')),
+ ('"=<:"', re.compile('=<:')),
+ ('"<=:"', re.compile('<=:')),
+ ('"le:"', re.compile('le:')),
+ ('"lessequal:"', re.compile('lessequal:')),
+ ('"lessthanorequalto:"', re.compile('lessthanorequalto:')),
+ ('"=<"', re.compile('=<')),
+ ('"<="', re.compile('<=')),
+ ('"le"', re.compile('le')),
+ ('"lessequal"', re.compile('lessequal')),
+ ('"lessthanorequalto"', re.compile('lessthanorequalto')),
+ ('"<:"', re.compile('<:')),
+ ('"lt:"', re.compile('lt:')),
+ ('"lessthan:"', re.compile('lessthan:')),
+ ('"<"', re.compile('<')),
+ ('"lt"', re.compile('lt')),
+ ('"lessthan"', re.compile('lessthan')),
+ ('"eq:"', re.compile('eq:')),
+ ('"startswith"', re.compile('startswith')),
+ ('"startingwith"', re.compile('startingwith')),
+ ('"==:"', re.compile('==:')),
+ ('"=:"', re.compile('=:')),
+ ('"with"', re.compile('with')),
+ ('"starting"', re.compile('starting')),
+ ('[ \t\n\r]+', re.compile('[ \t\n\r]+')),
+ ('END', re.compile('$')),
+ ('DATE', re.compile('\\d{4}-\\d{1,2}-\\d{1,2}')),
+ ('INT', re.compile('[-+]?[0-9]+')),
+ ('FLOAT', re.compile('[-+]?([0-9]*\\.[0-9]+)|([0-9]+\\.)|(([0-9]*\\.)?[0-9]+[eE][-+]?[0-9]+)')),
+ ('ID', re.compile('[a-zA-Z0-9_]+')),
+ ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
+ ('WORD', re.compile("[a-zA-Z0-9'*]?[a-zA-Z0-9][a-zA-Z0-9'*]*")),
+ ('NULL', re.compile('[Nn][Oo][Nn][Ee]|[Nn][Uu][Ll][Ll]')),
+ ]
+ def __init__(self, str):
+ yappsrt.Scanner.__init__(self,None,['[ \t\n\r]+'],str)
+
+class soomparse(yappsrt.Parser):
+ Context = yappsrt.Context
+ def starts_with(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'starts_with', [])
+ _token = self._peek('"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"')
+ if _token == '"starting"':
+ self._scan('"starting"')
+ self._scan('"with"')
+ return 'op_equal_col'
+ elif _token == '"=:"':
+ self._scan('"=:"')
+ return 'op_equal_col'
+ elif _token == '"==:"':
+ self._scan('"==:"')
+ return 'op_equal_col'
+ elif _token == '"startingwith"':
+ self._scan('"startingwith"')
+ return 'op_equal_col'
+ elif _token == '"startswith"':
+ self._scan('"startswith"')
+ return 'op_equal_col'
+ else: # == '"eq:"'
+ self._scan('"eq:"')
+ return 'op_equal_col'
+
+ def lt(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'lt', [])
+ _token = self._peek('"lessthan"', '"lt"', '"<"')
+ if _token == '"lessthan"':
+ self._scan('"lessthan"')
+ return 'op_less_than'
+ elif _token == '"lt"':
+ self._scan('"lt"')
+ return 'op_less_than'
+ else: # == '"<"'
+ self._scan('"<"')
+ return 'op_less_than'
+
+ def lt_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'lt_col', [])
+ _token = self._peek('"lessthan:"', '"lt:"', '"<:"')
+ if _token == '"lessthan:"':
+ self._scan('"lessthan:"')
+ return 'op_less_than_col'
+ elif _token == '"lt:"':
+ self._scan('"lt:"')
+ return 'op_less_than_col'
+ else: # == '"<:"'
+ self._scan('"<:"')
+ return 'op_less_than_col'
+
+ def le(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'le', [])
+ _token = self._peek('"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"')
+ if _token == '"lessthanorequalto"':
+ self._scan('"lessthanorequalto"')
+ return 'op_less_equal'
+ elif _token == '"lessequal"':
+ self._scan('"lessequal"')
+ return 'op_less_equal'
+ elif _token == '"le"':
+ self._scan('"le"')
+ return 'op_less_equal'
+ elif _token == '"<="':
+ self._scan('"<="')
+ return 'op_less_equal'
+ else: # == '"=<"'
+ self._scan('"=<"')
+ return 'op_less_equal'
+
+ def le_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'le_col', [])
+ _token = self._peek('"lessthanorequalto:"', '"lessequal:"', '"le:"', '"<=:"', '"=<:"')
+ if _token == '"lessthanorequalto:"':
+ self._scan('"lessthanorequalto:"')
+ return 'op_less_equal_col'
+ elif _token == '"lessequal:"':
+ self._scan('"lessequal:"')
+ return 'op_less_equal_col'
+ elif _token == '"le:"':
+ self._scan('"le:"')
+ return 'op_less_equal_col'
+ elif _token == '"<=:"':
+ self._scan('"<=:"')
+ return 'op_less_equal_col'
+ else: # == '"=<:"'
+ self._scan('"=<:"')
+ return 'op_less_equal_col'
+
+ def lt_clause(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'lt_clause', [])
+ self._scan('"less"')
+ _token = self._peek('"than"', '"than:"', '"or"')
+ if _token == '"than"':
+ self._scan('"than"')
+ _token = self._peek('"or"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"')
+ if _token != '"or"':
+ return 'op_less_than'
+ else: # == '"or"'
+ self._scan('"or"')
+ self._scan('"equal"')
+ _token = self._peek('"to"', '"to:"')
+ if _token == '"to"':
+ self._scan('"to"')
+ return 'op_less_equal'
+ else: # == '"to:"'
+ self._scan('"to:"')
+ return 'op_less_equal_col'
+ elif _token == '"than:"':
+ self._scan('"than:"')
+ return 'op_less_than_col'
+ else: # == '"or"'
+ self._scan('"or"')
+ _token = self._peek('"equal"', '"equal:"')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ return 'op_less_equal'
+ else: # == '"equal:"'
+ self._scan('"equal:"')
+ return 'op_less_equal_col'
+
+ def gt(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'gt', [])
+ _token = self._peek('"greaterthan"', '"gt"', '">"')
+ if _token == '"greaterthan"':
+ self._scan('"greaterthan"')
+ return 'op_greater_than'
+ elif _token == '"gt"':
+ self._scan('"gt"')
+ return 'op_greater_than'
+ else: # == '">"'
+ self._scan('">"')
+ return 'op_greater_than'
+
+ def gt_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'gt_col', [])
+ _token = self._peek('"greaterthan:"', '"gt:"', '">:"')
+ if _token == '"greaterthan:"':
+ self._scan('"greaterthan:"')
+ return 'op_greater_than_col'
+ elif _token == '"gt:"':
+ self._scan('"gt:"')
+ return 'op_greater_than_col'
+ else: # == '">:"'
+ self._scan('">:"')
+ return 'op_greater_than_col'
+
+ def ge(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ge', [])
+ _token = self._peek('"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"')
+ if _token == '"greaterthanorequalto"':
+ self._scan('"greaterthanorequalto"')
+ return 'op_greater_equal'
+ elif _token == '"greaterequal"':
+ self._scan('"greaterequal"')
+ return 'op_greater_equal'
+ elif _token == '"ge"':
+ self._scan('"ge"')
+ return 'op_greater_equal'
+ elif _token == '">="':
+ self._scan('">="')
+ return 'op_greater_equal'
+ else: # == '"=>"'
+ self._scan('"=>"')
+ return 'op_greater_equal'
+
+ def ge_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ge_col', [])
+ _token = self._peek('"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"')
+ if _token == '"greaterthanorequalto:"':
+ self._scan('"greaterthanorequalto:"')
+ return 'op_greater_equal_col'
+ elif _token == '"greaterequal:"':
+ self._scan('"greaterequal:"')
+ return 'op_greater_equal_col'
+ elif _token == '"ge:"':
+ self._scan('"ge:"')
+ return 'op_greater_equal_col'
+ elif _token == '">=:"':
+ self._scan('">=:"')
+ return 'op_greater_equal_col'
+ else: # == '"=>:"'
+ self._scan('"=>:"')
+ return 'op_greater_equal_col'
+
+ def gt_clause(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'gt_clause', [])
+ self._scan('"greater"')
+ _token = self._peek('"than"', '"than:"', '"or"')
+ if _token == '"than"':
+ self._scan('"than"')
+ _token = self._peek('"or"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"')
+ if _token != '"or"':
+ return 'op_greater_than'
+ else: # == '"or"'
+ self._scan('"or"')
+ self._scan('"equal"')
+ _token = self._peek('"to"', '"to:"')
+ if _token == '"to"':
+ self._scan('"to"')
+ return 'op_greater_equal'
+ else: # == '"to:"'
+ self._scan('"to:"')
+ return 'op_greater_equal_col'
+ elif _token == '"than:"':
+ self._scan('"than:"')
+ return 'op_greater_than_col'
+ else: # == '"or"'
+ self._scan('"or"')
+ _token = self._peek('"equal"', '"equal:"')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ return 'op_greater_equal'
+ else: # == '"equal:"'
+ self._scan('"equal:"')
+ return 'op_greater_equal_col'
+
+ def ne(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ne', [])
+ _token = self._peek('"notequalto"', '"notequal"', '"doesnotequal"', '"ne"', '"!="', '"!=="', '"#"', '"is not"', '"<>"')
+ if _token == '"notequalto"':
+ self._scan('"notequalto"')
+ return 'op_not_equal'
+ elif _token == '"notequal"':
+ self._scan('"notequal"')
+ return 'op_not_equal'
+ elif _token == '"doesnotequal"':
+ self._scan('"doesnotequal"')
+ return 'op_not_equal'
+ elif _token == '"ne"':
+ self._scan('"ne"')
+ return 'op_not_equal'
+ elif _token == '"!="':
+ self._scan('"!="')
+ return 'op_not_equal'
+ elif _token == '"!=="':
+ self._scan('"!=="')
+ return 'op_not_equal'
+ elif _token == '"#"':
+ self._scan('"#"')
+ return 'op_not_equal'
+ elif _token == '"is not"':
+ self._scan('"is not"')
+ return 'op_not_equal'
+ else: # == '"<>"'
+ self._scan('"<>"')
+ return 'op_not_equal'
+
+ def ne_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ne_col', [])
+ _token = self._peek('"notequalto:"', '"notequal:"', '"doesnotequal:"', '"ne:"', '"!=:"', '"!==:"', '"#:"', '"<>:"', '"notstartingwith"', '"notstartswith"')
+ if _token == '"notequalto:"':
+ self._scan('"notequalto:"')
+ return 'op_not_equal_col'
+ elif _token == '"notequal:"':
+ self._scan('"notequal:"')
+ return 'op_not_equal_col'
+ elif _token == '"doesnotequal:"':
+ self._scan('"doesnotequal:"')
+ return 'op_not_equal_col'
+ elif _token == '"ne:"':
+ self._scan('"ne:"')
+ return 'op_not_equal_col'
+ elif _token == '"!=:"':
+ self._scan('"!=:"')
+ return 'op_not_equal_col'
+ elif _token == '"!==:"':
+ self._scan('"!==:"')
+ return 'op_not_equal_col'
+ elif _token == '"#:"':
+ self._scan('"#:"')
+ return 'op_not_equal_col'
+ elif _token == '"<>:"':
+ self._scan('"<>:"')
+ return 'op_not_equal_col'
+ elif _token == '"notstartingwith"':
+ self._scan('"notstartingwith"')
+ return 'op_not_equal_col'
+ else: # == '"notstartswith"'
+ self._scan('"notstartswith"')
+ return 'op_not_equal_col'
+
+ def ne_clause(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ne_clause', [])
+ self._scan('"not"')
+ _token = self._peek('"equal"', '"equal:"', '"starting"', '"startswith"', '"starts"')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ _token = self._peek('"to"', '"to:"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"')
+ if _token not in ['"to"', '"to:"']:
+ return 'op_not_equal'
+ elif _token == '"to"':
+ self._scan('"to"')
+ return 'op_not_equal'
+ else: # == '"to:"'
+ self._scan('"to:"')
+ return 'op_not_equal_col'
+ elif _token == '"equal:"':
+ self._scan('"equal:"')
+ return 'op_not_equal_col'
+ elif _token == '"starting"':
+ self._scan('"starting"')
+ self._scan('"with"')
+ return 'op_not_equal_col'
+ elif _token == '"startswith"':
+ self._scan('"startswith"')
+ return 'op_not_equal_col'
+ else: # == '"starts"'
+ self._scan('"starts"')
+ self._scan('"with"')
+ return 'op_not_equal_col'
+
+ def does_not_clause(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'does_not_clause', [])
+ self._scan('"does"')
+ self._scan('"not"')
+ _token = self._peek('"equal"', '"equal:"')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ return 'op_not_equal'
+ else: # == '"equal:"'
+ self._scan('"equal:"')
+ return 'op_not_equal_col'
+
+ def eq(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'eq', [])
+ _token = self._peek('"equal"', '"equals"', '"equalto"', '"eq"', '"="', '"is"', '"=="')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ self._scan('"to"')
+ return 'op_equal'
+ elif _token == '"equals"':
+ self._scan('"equals"')
+ return 'op_equal'
+ elif _token == '"equalto"':
+ self._scan('"equalto"')
+ return 'op_equal'
+ elif _token == '"eq"':
+ self._scan('"eq"')
+ return 'op_equal'
+ elif _token == '"="':
+ self._scan('"="')
+ return 'op_equal'
+ elif _token == '"is"':
+ self._scan('"is"')
+ return 'op_equal'
+ else: # == '"=="'
+ self._scan('"=="')
+ return 'op_equal'
+
+ def in_op(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'in_op', [])
+ self._scan('"in"')
+ return 'op_in'
+
+ def in_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'in_col', [])
+ self._scan('"in:"')
+ return 'op_in_col'
+
+ def not_in(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_in', [])
+ self._scan('"notin"')
+ return 'op_not_in'
+
+ def not_in_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_in_col', [])
+ self._scan('"notin:"')
+ return 'op_not_in_col'
+
+ def between(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'between', [])
+ self._scan('"between"')
+ return 'op_between'
+
+ def contains(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'contains', [])
+ self._scan('"contains"')
+ return 'op_contains'
+
+ def regexp(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'regexp', [])
+ self._scan('"~"')
+ return 'op_regexp'
+
+ def not_regexp(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_regexp', [])
+ self._scan('"!~"')
+ return 'op_not_regexp'
+
+ def op(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'op', [])
+ _token = self._peek('"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"', '"lessthan"', '"lt"', '"<"', '"lessthan:"', '"lt:"', '"<:"', '"less"', '"greaterthan"', '"gt"', '">"', '"greaterthan:"', '"gt:"', '">:"', '"greater"', '"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"', '"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"', '"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"', '"lessthanorequalto:"', '"lessequal: [...]
+ if _token in ['"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"']:
+ starts_with = self.starts_with(_context)
+ return starts_with
+ elif _token in ['"lessthan"', '"lt"', '"<"']:
+ lt = self.lt(_context)
+ return lt
+ elif _token in ['"lessthan:"', '"lt:"', '"<:"']:
+ lt_col = self.lt_col(_context)
+ return lt_col
+ elif _token == '"less"':
+ lt_clause = self.lt_clause(_context)
+ return lt_clause
+ elif _token in ['"greaterthan"', '"gt"', '">"']:
+ gt = self.gt(_context)
+ return gt
+ elif _token in ['"greaterthan:"', '"gt:"', '">:"']:
+ gt_col = self.gt_col(_context)
+ return gt_col
+ elif _token == '"greater"':
+ gt_clause = self.gt_clause(_context)
+ return gt_clause
+ elif _token in ['"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"']:
+ ge = self.ge(_context)
+ return ge
+ elif _token in ['"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"']:
+ ge_col = self.ge_col(_context)
+ return ge_col
+ elif _token in ['"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"']:
+ le = self.le(_context)
+ return le
+ elif _token in ['"lessthanorequalto:"', '"lessequal:"', '"le:"', '"<=:"', '"=<:"']:
+ le_col = self.le_col(_context)
+ return le_col
+ elif _token in ['"notequalto"', '"notequal"', '"doesnotequal"', '"ne"', '"!="', '"!=="', '"#"', '"is not"', '"<>"']:
+ ne = self.ne(_context)
+ return ne
+ elif _token not in ['"not"', '"does"', '"equal"', '"equals"', '"equalto"', '"eq"', '"="', '"is"', '"=="', '"in"', '"in:"', '"notin"', '"notin:"', '"between"', '"contains"', '"~"', '"!~"']:
+ ne_col = self.ne_col(_context)
+ return ne_col
+ elif _token == '"not"':
+ ne_clause = self.ne_clause(_context)
+ return ne_clause
+ elif _token == '"does"':
+ does_not_clause = self.does_not_clause(_context)
+ return does_not_clause
+ elif _token not in ['"in"', '"in:"', '"notin"', '"notin:"', '"between"', '"contains"', '"~"', '"!~"']:
+ eq = self.eq(_context)
+ return eq
+ elif _token == '"in"':
+ in_op = self.in_op(_context)
+ return in_op
+ elif _token == '"in:"':
+ in_col = self.in_col(_context)
+ return in_col
+ elif _token == '"notin"':
+ not_in = self.not_in(_context)
+ return not_in
+ elif _token == '"notin:"':
+ not_in_col = self.not_in_col(_context)
+ return not_in_col
+ elif _token == '"between"':
+ between = self.between(_context)
+ return between
+ elif _token == '"contains"':
+ contains = self.contains(_context)
+ return contains
+ elif _token == '"~"':
+ regexp = self.regexp(_context)
+ return regexp
+ else: # == '"!~"'
+ not_regexp = self.not_regexp(_context)
+ return not_regexp
+
+ def goal(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'goal', [])
+ expr = self.expr(_context)
+ END = self._scan('END')
+ return expr
+
+ def sgoal(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sgoal', [])
+ sexpr = self.sexpr(_context)
+ END = self._scan('END')
+ return sexpr
+
+ def expr(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'expr', [])
+ factor = self.factor(_context)
+ f = factor
+ while self._peek('"or"', 'END', '"\\\\)"') == '"or"':
+ self._scan('"or"')
+ factor = self.factor(_context)
+ f = soomfunc.union(f, factor)
+ if self._peek() not in ['"or"', 'END', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"or"', 'END', '"\\\\)"']))
+ return f
+
+ def factor(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'factor', [])
+ comparison = self.comparison(_context)
+ f = comparison
+ while self._peek('"and"', '"or"', 'END', '"\\\\)"') == '"and"':
+ self._scan('"and"')
+ comparison = self.comparison(_context)
+ f = soomfunc.intersect(f, comparison)
+ if self._peek() not in ['"and"', '"or"', 'END', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"and"', '"or"', 'END', '"\\\\)"']))
+ return f
+
+ def comparison(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'comparison', [])
+ _token = self._peek('"\\\\("', '"not"', 'ID')
+ if _token == 'ID':
+ col = self.col(_context)
+ op = self.op(_context)
+ term = self.term(_context)
+ return col.filter_op(op, term)
+ elif _token == '"\\\\("':
+ self._scan('"\\\\("')
+ expr = self.expr(_context)
+ self._scan('"\\\\)"')
+ return expr
+ else: # == '"not"'
+ self._scan('"not"')
+ comparison = self.comparison(_context)
+ return soomfunc.outersect(Numeric.arrayrange(len(self.dataset)), comparison)
+
+ def term(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'term', [])
+ _token = self._peek('NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"')
+ if _token == 'NULL':
+ NULL = self._scan('NULL')
+ return None
+ elif _token == 'INT':
+ INT = self._scan('INT')
+ return int(INT)
+ elif _token == 'FLOAT':
+ FLOAT = self._scan('FLOAT')
+ return float(FLOAT)
+ elif _token == 'STR':
+ STR = self._scan('STR')
+ return dequote(STR)
+ elif _token == '"\\\\[\\\\["':
+ self._scan('"\\\\[\\\\["')
+ sexpr = self.sexpr(_context)
+ self._scan('"\\\\]\\\\]"')
+ return sexpr
+ elif _token == '"\\\\("':
+ self._scan('"\\\\("')
+ while 1:
+ term = self.term(_context)
+ term_list = [term]
+ while self._peek('","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"') == '","':
+ self._scan('","')
+ term = self.term(_context)
+ term_list.append(term)
+ if self._peek() not in ['","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']))
+ if self._peek('NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"', '"\\\\)"', '","') not in ['NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']: break
+ self._scan('"\\\\)"')
+ return term_list
+ elif _token == '"date"':
+ self._scan('"date"')
+ _token = self._peek('"\\\\("', 'DATE')
+ if _token == '"\\\\("':
+ self._scan('"\\\\("')
+ INT = self._scan('INT')
+ year = int(INT)
+ self._scan('","')
+ INT = self._scan('INT')
+ month = int(INT)
+ self._scan('","')
+ INT = self._scan('INT')
+ day = int(INT)
+ self._scan('"\\\\)"')
+ return mx.DateTime.Date(year, month, day)
+ else: # == 'DATE'
+ DATE = self._scan('DATE')
+ return mx.DateTime.ISO.ParseDate(DATE)
+ else: # == '"reldate"'
+ self._scan('"reldate"')
+ kwargs = self.kwargs(_context)
+ return relativeDate(**kwargs)
+
+ def col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'col', [])
+ ID = self._scan('ID')
+ return self.dataset.get_column(ID)
+
+ def kwargs(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'kwargs', [])
+ self._scan('"\\\\("')
+ kwargs = {}
+ _token = self._peek('ID', '"\\\\)"', '","')
+ if _token != 'ID':
+ pass
+ else: # == 'ID'
+ ID = self._scan('ID')
+ self._scan('"="')
+ term = self.term(_context)
+ kwargs[ID] = term
+ while self._peek('","', '"\\\\)"') == '","':
+ self._scan('","')
+ ID = self._scan('ID')
+ self._scan('"="')
+ term = self.term(_context)
+ kwargs[ID] = term
+ if self._peek() not in ['","', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', '"\\\\)"']))
+ self._scan('"\\\\)"')
+ return kwargs
+
+ def sexpr(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sexpr', [])
+ sfactor = self.sfactor(_context)
+ f = Search.Disjunction(sfactor)
+ while self._peek('"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"') == '"\\\\|"':
+ self._scan('"\\\\|"')
+ sfactor = self.sfactor(_context)
+ f.append(sfactor)
+ if self._peek() not in ['"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"']))
+ return f
+
+ def sfactor(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sfactor', [])
+ sphrase = self.sphrase(_context)
+ f = sphrase
+ while self._peek('"-"', '"&"', '"<"', '">"', '"~"', '"\\""', 'WORD', '"\\\\("', '"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"') not in ['"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"']:
+ _token = self._peek('"-"', '"&"', '"<"', '">"', '"~"', '"\\""', 'WORD', '"\\\\("')
+ if _token == '"&"':
+ sconjop = self.sconjop(_context)
+ sphrase = self.sphrase(_context)
+ f = Search.Conjunction(sconjop, f, sphrase)
+ elif _token not in ['"-"', '"\\""', 'WORD', '"\\\\("']:
+ snearop = self.snearop(_context)
+ op = snearop; nearness = Search.Conjunction.DEFAULT_NEARNESS
+ _token = self._peek('"\\\\["', '"\\""', 'WORD', '"\\\\("')
+ if _token == '"\\\\["':
+ self._scan('"\\\\["')
+ INT = self._scan('INT')
+ self._scan('"\\\\]"')
+ nearness = int(INT)
+ else: # in ['"\\""', 'WORD', '"\\\\("']
+ pass
+ sphrase = self.sphrase(_context)
+ f = Search.Conjunction(op, f, sphrase, nearness)
+ else: # in ['"-"', '"\\""', 'WORD', '"\\\\("']
+ op = '&'
+ _token = self._peek('"-"', '"\\""', 'WORD', '"\\\\("')
+ if _token == '"-"':
+ self._scan('"-"')
+ op = '&!'
+ else: # in ['"\\""', 'WORD', '"\\\\("']
+ pass
+ sphrase = self.sphrase(_context)
+ f = Search.Conjunction(op, f, sphrase)
+ if self._peek() not in ['"-"', '"&"', '"<"', '">"', '"~"', '"\\""', 'WORD', '"\\\\("', '"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"-"', '"&"', '"<"', '">"', '"~"', '"\\""', 'WORD', '"\\\\("', '"\\\\|"', 'END', '"\\\\]\\\\]"', '"\\\\)"']))
+ return f
+
+ def sconjop(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sconjop', [])
+ self._scan('"&"')
+ op = '&'
+ _token = self._peek('"-"', '"\\""', 'WORD', '"\\\\("')
+ if _token == '"-"':
+ self._scan('"-"')
+ op = '&!'
+ else: # in ['"\\""', 'WORD', '"\\\\("']
+ pass
+ return op
+
+ def snearop(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'snearop', [])
+ _token = self._peek('"<"', '">"', '"~"')
+ if _token == '"<"':
+ self._scan('"<"')
+ return '<'
+ elif _token == '">"':
+ self._scan('">"')
+ return '>'
+ else: # == '"~"'
+ self._scan('"~"')
+ return '~'
+
+ def sphrase(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sphrase', [])
+ _token = self._peek('"\\""', 'WORD', '"\\\\("')
+ if _token != '"\\""':
+ sterm = self.sterm(_context)
+ return sterm
+ else: # == '"\\""'
+ self._scan('"\\""')
+ words = []
+ while 1:
+ sterm = self.sterm(_context)
+ words.append(sterm)
+ if self._peek('WORD', '"\\\\("', '"\\""') not in ['WORD', '"\\\\("']: break
+ self._scan('"\\""')
+ return Search.Phrase(words)
+
+ def sterm(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'sterm', [])
+ _token = self._peek('WORD', '"\\\\("')
+ if _token == 'WORD':
+ WORD = self._scan('WORD')
+ return Search.Word(WORD)
+ else: # == '"\\\\("'
+ self._scan('"\\\\("')
+ sexpr = self.sexpr(_context)
+ self._scan('"\\\\)"')
+ return sexpr
+
+
+def parse(rule, text):
+ P = soomparse(soomparseScanner(text))
+ return yappsrt.wrap_error_reporter(P, rule)
+
+# End -- grammar generated by Yapps
+
+
+
+import re
+dequote_re = re.compile(r'\\(x[0-9a-fA-F]{2}|[0-7]{1,3}|.)')
+backslash_map = {
+ '\\': '\\',
+ "'": "'",
+ '"': '"',
+ 'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'n': '\n',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+}
+def dequote(s):
+ """
+ Remove leading and trailing quotes, honour any backslash quoting
+ within the string.
+
+ Using the built-in eval() looks attractive at first glance, but
+ opens serious security issues.
+ """
+ def backslash_sub(match):
+ match = match.group(0)
+ if match.startswith(r'\x'):
+ try:
+ return chr(int(match[2:], 16))
+ except ValueError:
+ raise ValueError('invalid \\x escape')
+ elif match[1:].isdigit():
+ return chr(int(match[1:], 8))
+ else:
+ return backslash_map.get(match[1:], match)
+ if s[0] in ('r', 'R'):
+ return s[2:-1]
+ return dequote_re.sub(backslash_sub, s[1:-1])
+
+def relativeDate(years=None, months=None, days=None, align=None):
+ def onlyone():
+ raise ValueError('Only specify one of years, months, or days')
+ weekdays = {
+ 'monday': mx.DateTime.Monday,
+ 'tuesday': mx.DateTime.Tuesday,
+ 'wednesday': mx.DateTime.Wednesday,
+ 'thursday': mx.DateTime.Thursday,
+ 'friday': mx.DateTime.Friday,
+ 'saturday': mx.DateTime.Saturday,
+ 'sunday': mx.DateTime.Sunday,
+ }
+ kwargs = {
+ 'hour': 0,
+ 'minute': 0,
+ 'second': 0,
+ }
+ if years:
+ if months or days:
+ onlyone()
+ kwargs['years'] = years
+ elif months:
+ if days:
+ onlyone()
+ kwargs['months'] = months
+ elif days:
+ kwargs['days'] = days
+ if align:
+ weekday = weekdays.get(align.lower())
+ if weekday is not None:
+ kwargs['weekday'] = weekday, 0
+ elif align.lower() in ('bom', '1st', 'som'):
+ kwargs['day'] = 1
+ elif align.lower() in ('boy', 'soy'):
+ kwargs['day'] = 1
+ kwargs['month'] = mx.DateTime.January
+ else:
+ raise ValueError('bad relative date alignment %r' % align)
+ return mx.DateTime.now() + mx.DateTime.RelativeDateTime(**kwargs)
+
+class SoomFilterParse(soomparse):
+ def __init__(self, dataset, expr):
+ self.dataset = dataset
+ self.__expr = expr
+ soomparse.__init__(self, soomparseScanner(expr))
+ try:
+ self.__filter = self.goal()
+ except SyntaxError, s:
+ input = self._scanner.input
+ raise Error('Syntax error in filter expression %s' %
+ (s.msg,))
+ except yappsrt.NoMoreTokens:
+ raise Error('Could not complete parsing filter expression; '
+ 'stopped around here:' + self._scanner)
+
+ def filter(self):
+ # XXX probably want to build the return type here
+# print 'SoomFilterParse.filter: f("%s") %s' % (self.__expr, self.__filter)
+ return self.__filter
diff --git a/SOOMv0/xvfb_spawn.py b/SOOMv0/xvfb_spawn.py
new file mode 100644
index 0000000..c9a3396
--- /dev/null
+++ b/SOOMv0/xvfb_spawn.py
@@ -0,0 +1,201 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Start a virtual X server on the specified screen if not already
+running.
+
+$Id: xvfb_spawn.py 2626 2007-03-09 04:35:54Z andrewm $
+$Source: /usr/local/cvsroot/NSWDoH/SOOMv0/SOOMv0/xvfb_spawn.py,v $
+"""
+
+import sys
+import os
+import errno
+import md5
+
+xpath = '/usr/X11R6/bin/'
+xvfb = xpath + 'Xvfb'
+xauth = xpath + 'xauth'
+default_xauth_file = '.xvfb-auth'
+
+class VFBError(Exception): pass
+class PipeIOError(Exception): pass
+
+def is_running(display_num):
+ """
+ Check to see if an X server for the given screen is running on
+ the local machine.
+ """
+ lock_filename = '/tmp/.X%d-lock' % display_num
+ try:
+ lock_file = open(lock_filename)
+ except IOError, (eno, estr):
+ if eno == errno.ENOENT:
+ return False
+ raise
+ try:
+ try:
+ pid = int(lock_file.readline().strip())
+ except (ValueError, EOFError, IOError):
+ return True # Have to assume it is?!
+ try:
+ os.kill(pid, 0)
+ except OSError, (eno, etsr):
+ if eno == errno.ESRCH:
+ return False
+ elif eno == errno.EPERM:
+ return True
+ raise
+ return True
+ finally:
+ lock_file.close()
+
+def fork_vfb(display_num, xauth_file, nullerrors=False):
+ """
+ Start virtual X server in a backgrounded process that is
+ disassociated from the controlling TTY (no HUP signals).
+ Second fork is required to prevent subsequently opened TTY's
+ becoming our controlling TTY [Stevens 93], and so init becomes
+ the parent of Xvfb (and therefore avoiding zombies).
+ """
+ pid = os.fork()
+ if not pid:
+ os.close(0)
+ os.open('/dev/null', os.O_RDONLY)
+ os.close(1)
+ os.open('/dev/null', os.O_WRONLY)
+ if nullerrors:
+ os.close(2)
+ os.open('/dev/null', os.O_WRONLY)
+ # Close any other fd's inherited from our parent (such as client
+ # sockets). A max fd of 31 isn't correct, but is a trade-off that
+ # resolves most problems without wasting too many cycles.
+ for fd in range(3, 32):
+ try:
+ os.close(fd)
+ except OSError:
+ pass
+ os.setsid()
+ pid = os.fork()
+ if pid:
+ os._exit(1)
+ try:
+ os.execl(xvfb, os.path.basename(xvfb),
+ ':%d' % display_num,
+ '-nolisten', 'tcp',
+ # '-terminate',
+ '-auth', xauth_file)
+ finally:
+ os._exit(1)
+
+class PipeIO:
+ def __init__(self, mode, *args):
+ read_fd, write_fd = os.pipe()
+ self.pid = os.fork()
+ self.cmd = args[0]
+ if self.pid:
+ # Parent
+ if mode == 'w':
+ os.close(read_fd)
+ self.file = os.fdopen(write_fd, 'w')
+ else:
+ os.close(write_fd)
+ self.file = os.fdopen(read_fd, 'r')
+ else:
+ # Child
+ if mode == 'w':
+ os.close(write_fd)
+ os.dup2(read_fd, 0)
+ os.close(read_fd)
+ else:
+ os.close(read_fd)
+ os.dup2(write_fd, 1)
+ os.close(write_fd)
+ try:
+ os.execl(self.cmd, os.path.basename(args[0]), *args[1:])
+ except OSError, (eno, estr):
+ print >> sys.stderr, 'exec %s: %s' % (self.cmd, estr)
+ os._exit(1)
+
+ def __getattr__(self, a):
+ return getattr(self.file, a)
+
+ def close(self):
+ self.file.close()
+ while 1:
+ try:
+ pid, status = os.waitpid(self.pid, 0)
+ except OSError, (eno, estr):
+ if errno == errno.ECHILD:
+ raise PipeIOError('Couldn\'t fork xauth')
+ elif errno != errno.EINTR:
+ raise
+ else:
+ break
+ if os.WIFEXITED(status) and os.WEXITSTATUS(status):
+ raise PipeIOError('%s exited with status %d' %\
+ (self.cmd, os.WEXITSTATUS(status)))
+
+def init_auth(display_num, xauth_file):
+ f = open('/dev/urandom', 'rb')
+ try:
+ cookie = md5.new(f.read(40)).hexdigest()
+ finally:
+ f.close()
+ f = PipeIO('w', xauth, '-q', '-f', xauth_file, '-')
+ try:
+ f.write('add :%d MIT-MAGIC-COOKIE-1 %s\n' % (display_num, cookie))
+ finally:
+ f.close()
+
+def auth_display(xauth_file):
+ f = PipeIO('r', xauth, '-f', xauth_file, 'list')
+ try:
+ for line in f:
+ try:
+ dpy, auth_type, auth_str = line.split(None, 2)
+ display_host, display_num = dpy.split(':')
+ display_num = int(display_num)
+ except ValueError:
+ continue
+ if is_running(display_num):
+ return display_num
+ finally:
+ f.close()
+
+def spawn_xvfb(display_num, divorce=False):
+ """
+ Start a virtual X server on the specified screen if not already running
+ """
+ os.environ['DISPLAY'] = ':%d' % display_num
+ os.environ['XAUTHORITY'] = auth_filename()
+ if not is_running(display_num):
+ init_auth(display_num)
+ fork_vfb(display_num, divorce=divorce)
+
+def spawn_if_necessary(xauth_file = default_xauth_file):
+ if not os.environ.get('DISPLAY'):
+ xauth_file = os.path.abspath(xauth_file)
+ display_num = auth_display(xauth_file)
+ if display_num is None:
+ for display_num in xrange(10, 1000):
+ if not is_running(display_num):
+ break
+ else:
+ raise VFBError('no free displays!')
+ init_auth(display_num, xauth_file)
+ fork_vfb(display_num, xauth_file)
+ os.environ['DISPLAY'] = ':%d' % display_num
+ os.environ['XAUTHORITY'] = xauth_file
diff --git a/SOOMv0/yappsrt.py b/SOOMv0/yappsrt.py
new file mode 100755
index 0000000..7485ca7
--- /dev/null
+++ b/SOOMv0/yappsrt.py
@@ -0,0 +1,304 @@
+#
+# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp at cs.stanford.edu>
+#
+# This version of the Yapps 2 Runtime can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+"""Run time libraries needed to run parsers generated by Yapps.
+
+This module defines parse-time exception classes, a scanner class, a
+base class for parsers produced by Yapps, and a context class that
+keeps track of the parse stack.
+
+"""
+
+# TODO: it should be possible to embed yappsrt into the generated
+# grammar to make a standalone module.
+
+import sys, re
+
+class SyntaxError(Exception):
+ """When we run into an unexpected token, this is the exception to use"""
+ def __init__(self, charpos=-1, msg="Bad Token", context=None):
+ Exception.__init__(self)
+ self.charpos = charpos
+ self.msg = msg
+ self.context = context
+
+ def __str__(self):
+ if self.charpos < 0: return 'SyntaxError'
+ else: return 'SyntaxError at char%s(%s)' % (repr(self.charpos), self.msg)
+
+class NoMoreTokens(Exception):
+ """Another exception object, for when we run out of tokens"""
+ pass
+
+class Scanner:
+ """Yapps scanner.
+
+ The Yapps scanner can work in context sensitive or context
+ insensitive modes. The token(i) method is used to retrieve the
+ i-th token. It takes a restrict set that limits the set of tokens
+ it is allowed to return. In context sensitive mode, this restrict
+ set guides the scanner. In context insensitive mode, there is no
+ restriction (the set is always the full set of tokens).
+
+ """
+
+ def __init__(self, patterns, ignore, input):
+ """Initialize the scanner.
+
+ Parameters:
+ patterns : [(terminal, uncompiled regex), ...] or None
+ ignore : [terminal,...]
+ input : string
+
+ If patterns is None, we assume that the subclass has
+ defined self.patterns : [(terminal, compiled regex), ...].
+ Note that the patterns parameter expects uncompiled regexes,
+ whereas the self.patterns field expects compiled regexes.
+ """
+ self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
+ self.restrictions = []
+ self.input = input
+ self.pos = 0
+ self.ignore = ignore
+ self.first_line_number = 1
+
+ if patterns is not None:
+ # Compile the regex strings into regex objects
+ self.patterns = []
+ for terminal, regex in patterns:
+ self.patterns.append( (terminal, re.compile(regex)) )
+
+ def get_token_pos(self):
+ """Get the current token position in the input text."""
+ return len(self.tokens)
+
+ def get_char_pos(self):
+ """Get the current char position in the input text."""
+ return self.pos
+
+ def get_prev_char_pos(self, i=None):
+ """Get the previous position (one token back) in the input text."""
+ if self.pos == 0: return 0
+ if i is None: i = -1
+ return self.tokens[i][0]
+
+ def get_line_number(self):
+ """Get the line number of the current position in the input text."""
+ # TODO: make this work at any token/char position
+ return self.first_line_number + self.get_input_scanned().count('\n')
+
+ def get_column_number(self):
+ """Get the column number of the current position in the input text."""
+ s = self.get_input_scanned()
+ i = s.rfind('\n') # may be -1, but that's okay in this case
+ return len(s) - (i+1)
+
+ def get_input_scanned(self):
+ """Get the portion of the input that has been tokenized."""
+ return self.input[:self.pos]
+
+ def get_input_unscanned(self):
+ """Get the portion of the input that has not yet been tokenized."""
+ return self.input[self.pos:]
+
+ def token(self, i, restrict=None):
+ """Get the i'th token in the input.
+
+ If i is one past the end, then scan for another token.
+
+ Args:
+
+ restrict : [token, ...] or None; if restrict is None, then any
+ token is allowed. You may call token(i) more than once.
+ However, the restrict set may never be larger than what was
+ passed in on the first call to token(i).
+
+ """
+ if i == len(self.tokens):
+ self.scan(restrict)
+ if i < len(self.tokens):
+ # Make sure the restriction is more restricted. This
+ # invariant is needed to avoid ruining tokenization at
+ # position i+1 and higher.
+ if restrict and self.restrictions[i]:
+ for r in restrict:
+ if r not in self.restrictions[i]:
+ raise NotImplementedError("Unimplemented: restriction set changed")
+ return self.tokens[i]
+ raise NoMoreTokens()
+
+ def __repr__(self):
+ """Print the last 10 tokens that have been scanned in"""
+ output = ''
+ for t in self.tokens[-10:]:
+ output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
+ return output
+
+ def scan(self, restrict):
+ """Should scan another token and add it to the list, self.tokens,
+ and add the restriction to self.restrictions"""
+ # Keep looking for a token, ignoring any in self.ignore
+ while 1:
+ # Search the patterns for the longest match, with earlier
+ # tokens in the list having preference
+ best_match = -1
+ best_pat = '(error)'
+ for p, regexp in self.patterns:
+ # First check to see if we're ignoring this token
+ if restrict and p not in restrict and p not in self.ignore:
+ continue
+ m = regexp.match(self.input, self.pos)
+ if m and len(m.group(0)) > best_match:
+ # We got a match that's better than the previous one
+ best_pat = p
+ best_match = len(m.group(0))
+
+ # If we didn't find anything, raise an error
+ if best_pat == '(error)' and best_match < 0:
+ msg = 'Bad Token'
+ if restrict:
+ msg = 'Trying to find one of '+', '.join(restrict)
+ raise SyntaxError(self.pos, msg)
+
+ # If we found something that isn't to be ignored, return it
+ if best_pat not in self.ignore:
+ # Create a token with this data
+ token = (self.pos, self.pos+best_match, best_pat,
+ self.input[self.pos:self.pos+best_match])
+ self.pos = self.pos + best_match
+ # Only add this token if it's not in the list
+ # (to prevent looping)
+ if not self.tokens or token != self.tokens[-1]:
+ self.tokens.append(token)
+ self.restrictions.append(restrict)
+ return
+ else:
+ # This token should be ignored ..
+ self.pos = self.pos + best_match
+
+class Parser:
+ """Base class for Yapps-generated parsers.
+
+ """
+
+ def __init__(self, scanner):
+ self._scanner = scanner
+ self._pos = 0
+
+ def _peek(self, *types):
+ """Returns the token type for lookahead; if there are any args
+ then the list of args is the set of token types to allow"""
+ tok = self._scanner.token(self._pos, types)
+ return tok[2]
+
+ def _scan(self, type):
+ """Returns the matched text, and moves to the next token"""
+ tok = self._scanner.token(self._pos, [type])
+ if tok[2] != type:
+ raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(self._scanner.restrictions[self._pos]))
+ self._pos = 1 + self._pos
+ return tok[3]
+
+class Context:
+ """Class to represent the parser's call stack.
+
+ Every rule creates a Context that links to its parent rule. The
+ contexts can be used for debugging.
+
+ """
+
+ def __init__(self, parent, scanner, tokenpos, rule, args=()):
+ """Create a new context.
+
+ Args:
+ parent: Context object or None
+ scanner: Scanner object
+ pos: integer (scanner token position)
+ rule: string (name of the rule)
+ args: tuple listing parameters to the rule
+
+ """
+ self.parent = parent
+ self.scanner = scanner
+ self.tokenpos = tokenpos
+ self.rule = rule
+ self.args = args
+
+ def __str__(self):
+ output = ''
+ if self.parent: output = str(self.parent) + ' > '
+ output += self.rule
+ return output
+
+def print_line_with_pointer(text, p):
+ """Print the line of 'text' that includes position 'p',
+ along with a second line with a single caret (^) at position p"""
+
+ # TODO: separate out the logic for determining the line/character
+ # location from the logic for determining how to display an
+ # 80-column line to stderr.
+
+ # Now try printing part of the line
+ text = text[max(p-80, 0):p+80]
+ p = p - max(p-80, 0)
+
+ # Strip to the left
+ i = text[:p].rfind('\n')
+ j = text[:p].rfind('\r')
+ if i < 0 or (0 <= j < i): i = j
+ if 0 <= i < p:
+ p = p - i - 1
+ text = text[i+1:]
+
+ # Strip to the right
+ i = text.find('\n', p)
+ j = text.find('\r', p)
+ if i < 0 or (0 <= j < i): i = j
+ if i >= 0:
+ text = text[:i]
+
+ # Now shorten the text
+ while len(text) > 70 and p > 60:
+ # Cut off 10 chars
+ text = "..." + text[10:]
+ p = p - 7
+
+ # Now print the string, along with an indicator
+ print >>sys.stderr, '> ',text
+ print >>sys.stderr, '> ',' '*p + '^'
+
+def print_error(input, err, scanner):
+ """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
+ # NOTE: this function assumes 80 columns :-(
+ # Figure out the line number
+ line_number = scanner.get_line_number()
+ column_number = scanner.get_column_number()
+ print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
+
+ context = err.context
+ if not context:
+ print_line_with_pointer(input, err.charpos)
+
+ while context:
+ # TODO: add line number
+ print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
+ print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
+ context = context.parent
+
+def wrap_error_reporter(parser, rule):
+ try:
+ return getattr(parser, rule)()
+ except SyntaxError, e:
+ input = parser._scanner.input
+ print_error(input, e, parser._scanner)
+ except NoMoreTokens:
+ print >>sys.stderr, 'Could not complete parsing; stopped around here:'
+ print >>sys.stderr, parser._scanner
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..184967a
--- /dev/null
+++ b/TODO
@@ -0,0 +1,288 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+
+This file contains a collection of ideas, in no particular order, for
+future enhancements to NetEpi Analysis. Some ideas which have alreday
+been implemented.
+
+= Confidence Limits ===========================================================
+* Need to support display of mean and conf limits in crosstabs. Generally
+ the format is:
+
+ mean (lower, upper)
+
+ eg
+
+ 23.4 (17.9, 27.8)
+
+* need a mechnism to set the number of decimal places displayed
+ for means and confidence limits (in web crosstab?).
+
+* See the last example in teh __main__ part of PopRate.py in 20050829-01.
+ Want to be able to use aus01stdpop_mf for persons-level standardisation,
+ not a special non-sex-specific version of the std pops.
+
+= SOOM engine =================================================================
+
+* Implement more demo's/tests using the NIST Mathematical and
+ Computational Sciences Division Statistical Reference Datasets:
+
+ http://www.itl.nist.gov/div898/strd/index.html
+
+* review api_demo and plot_demo for things that don't work or need to be
+ reinstated.
+
+* quantiles (plural) to be provided as a stat method. Will require some
+ minor refactoring to support a method that results in multiple output
+ cols.
+
+* likewise correlation co-efficients (will require passing multiple scaler
+ column names to stats function).
+
+* ability to specify resulting column name and label when calling stat methods.
+
+* encode kwargs to stat methods in resulting column name and label - this is
+ needed to make the column name unique if used more than once with different
+ parameters. Other possibilities could include requiring the user to specify
+ the column name in this case.
+
+* Allow code objects to be saved with data set, for things like outtrans
+ to use complex programmes for output value transformation (it can
+ already, but the code is not stored with the dataset, unlike the case
+ when dictionaries are used for outtrans etc).
+
+* comparisons between columns, eg:
+
+ "weight_after > weight_before"
+
+* further investigate storing date and time vales as complex Numpy arrays.
+
+* consider this recipe for printing large numbers (will need some work for
+ floats):
+
+ http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302035
+
+* Support for sort orders - both in the SOOM engine and in the UI - so
+ datasets can be processed in sorted order, and/or the values along a
+ given axis of a table or graph can be arranged in a specific order.
+
+* Transparent support for calculation of rates, including standardised
+ rates. That means building in knowledge about populations into a dataset
+ subclass. Complex but doable. If not, then a UI for manually going
+ through the steps needed to calculate datasets containing rates. The
+ prototype mortality analysis system which used SOOM did standardised
+ rate calculations, so it is not entirely new ground.
+
+* additional dataset and column metadata, eg:
+ copyright, date of production, source/producer, years covered,
+ geographical coverage
+
+* additional column metadata, indicating whether scalars are summable or not
+ (i.e. can the quantities be added up).
+
+* add ability to calculate proportions not just of frequencies (counts)
+ but also of other quantities which are summable (see above)
+
+* add binning facility so that scalar columns can be converted into
+ discrete values (bins) - including support for overlapping bins (by
+ using a tuple column data type).
+
+* add support for calculating correct variance from sampled surveys
+ with non-simple designs, via Taylor series linearisation in the Stats.py
+ module, and/or via Thomas Lumley's survey library in R.
+
+* further investigate changing the back-end storage from memory-mapped
+ Numeric arrays to HDF5 files via PyTables - this would allow
+ platform-independent storage, and avoid the current 2GB or 4GB limit on
+ 32 bit systems. However, would need a) addition of datetime data type
+ to PyTable b) conversion of SOOM from Numeric Python to numarray, c)
+ conversion of RPy from Numeric Python to numarray. None of these are
+ huge tasks, as numarray was designed to be highly backwardly compatible
+ with Numeric Python.
+
+* experimant with parallelising SOOM via PyPar and MPI - an initial
+ experiment with this yielded excellent results, although a high-speed
+ network substrate for the cluster is necessary in order to avoid
+ network bottlenecks.
+
+* ability to publish and import metadata in ISO metadata standard formats
+
+* further work on the way dataset paths are overlayed, where data gets
+ written, private workspaces, etc. The code currently should allow you
+ point at a read-only dataset, and write local changes to an alternate
+ location - some directory creation bugs are preventing this. Another
+ option is to go the database route, and have an opaque store in which
+ dataset objects (private and public) are stored. More thought needed.
+
+* when printing or slicing a dataset, operate col by col, allowing cols
+ to be aggressively unmapped on platforms with restricted address space.
+
+* access control lists for datasets.
+
+* ability to select previous versions of a dataset (web also).
+
+* PopRate.calc_directly_std_rates() does not cope with marginal totals in
+ summarised data. This could be fixed by ignoring all rows with level < number
+ of condcols.
+
+= plotting ====================================================================
+
+* when paneling, value labels are line wrapped by R, but R doesn't allow enough
+ vertical space for more than one line. Determine feasibility of expanding
+ panel label, or abreviating values or allowing short and long outtrans
+ (urgh).
+
+* pie chart support, as a template for support for many old-style R
+ graphics types
+
+* Need to be able to override axis labels via args.
+
+* Plot method argument checking:
+
+* Plot methods to accept already summarised dataset (where that makes sense)
+ and automatically use appropriate weighting
+
+* implement common (and perhaps custom) plot types in matplotlib, which
+ is a Python charting library which uses Antigrain and/or Cairo as the
+ raster back-end to produce beautiful quality charts (which put those
+ produced by R in the shade), as well as PS/PDF and SVG. If only R
+ used Cairo or Antigrain as its rendering engine...
+
+= web interface ===============================================================
+
+* make help button dependent on analysis type.
+
+* support for combining and coalescing categorical (and ordinal)
+ values, and engine and UI support for "binning" or "discretising"
+ continuous values.
+
+* support for supressing certain values from appearing in graphs and
+ tables (but not from the underlying summary - that can already done via
+ filters) - thus NOT excluding them from marginal totals etc. Strictly
+ speaking, bad practice, but in reality, it is a very common requirement
+ to want to present only some values as the detail on a graph or table.
+ This is related to binning above and probably best tackled together.
+ Note that a note should appear on tables and graphs to indicate that
+ values are being suppressed.
+
+* ability to override x and y axis labels.
+
+* NOT operator for AND/OR in filter builder (Tim prefers NOT .. AND
+ button - maybe better to open conjunction edit dialog with NOT checkbox
+ or pulldown).
+
+* add a button to save just the crosstab HTML table part of the page to
+ a file (in HTML or CSV format)
+
+* Row (or column) only crosstabs (add dummy column (row)) - could then
+ remove "summary tables" type and offer its output format as an option
+ in crosstabs
+
+* authentication (use .htaccess, PAM->ldap)
+
+* user groups with dataset and column group ACLs
+
+* One parameter which would be useful to expose in the UI is the layout
+ settings for panelled graphs. There is support for this in the graph
+ functions. In particular, it is very helpful to be able to stack all
+ the panels for time-series and other line graphs in a single vertical
+ column, so the x-axes line up. Also useful for comparing histograms,
+ for that matter.
+
+* options for "landscape" and "portrait" options for the PNG
+ graphs ought to be offered - the aspect ratios for these should be
+ about the same as for an A4 page.
+
+* line listing report facility (print dataset, grouping by values,
+ select columns)
+
+* ability to set colours, line and symbol types, fonts, font sizes etc
+ in graphs.
+
+* "Help" and "Copyright" links in the header bar
+
+* make the user interface less "modal" eg ability to change analysis
+ type while retaining as many parameter values as possible
+
+* categorical value aggregation and recoding facility
+
+* ability to specify sort orders in output (eg bar order in barcharts,
+ row or col order in tables)
+
+* ability to store sets of analysis parameters as "analysis objects",
+ and be able to recall them, or schedule them for automatic execution
+ at intervals, and share them with others. Note that there is already
+ support in the filters module for specifying relative datetime
+ comparisons.
+
+* 2x2xk: it would be useful to be able to specify a) several exposure
+ columns (each one "editable" with respect to +ve and -ve assignemnt as
+ at present); b) possibly several outcomes columns (similarly "editable")
+ and then c) run the 2x2xk analysis on the pairwise combinations of
+ the exposure and outcome columns. Rather than calculate (or display)
+ all the measures of association and related statistics, the user would
+ need to be able to select only some calculated measures/statistics of
+ interest (to avoid being overwhelmed with information), with the output
+ appearing perhaps in a crosstabulation. Such a facility would be very
+ useful in outbreak investigations (eg the Oswego data) where possible
+ causes for the illness in question are being sought, and there are many
+ exposures to screen for positive associations with the outcome. When
+ making such multiple comparisons, it is wise to employ techniques to
+ further adjust the p-values eg Bonferroni or Tukey adjustments (R has
+ functions to do these). Clearly a somewhat modified 2x2xk interface
+ would be needed to accomodate this, but the underlying mechnics of
+ calculation etc would all be the same.
+
+* 2x2xk cotabplots
+
+= epidemiological analysis ====================================================
+
+* replicate much of what OpenEpi does, but do the calculations on the
+ server, not the client. calculations to be done in Numeric Python,
+ or R via RPy where necessary.
+
+* add a 2x2 and r x c cross tab type - which are simplified crosstabs,
+ and calculate epidemiological quantities for them - this is contingent
+ on a the value aggregation and recoding facility mentioned above being
+ in place.
+
+* add dataset metadata to capture sample design information, to enable
+ automatic selection of correct method for variance calculation
+
+* add association and mosaic plots, and implement colour coding of table
+ cells depending on variation from expectation as per Epi Info etc.
+
+* add support (via R) for more complex analysis eg logistic regression,
+ survival analysis
+
+* add data mining facilities (association rules in Python, also via
+ Orange and R)
+
+* add some specific epidemioogic plot types, including association and
+ mosaic plots (via R), various epidemic curve (time-series bar charts),
+ and things like population pyramid charts etc.
+
+* add ability to combine several plot types on a single chart
+
+* add ability to combine multiple analysis objects (plots, tables, line
+ listing reports) with text into a formatted report (HTML, possibly
+ re-rendering to RTF or MS-Word .doc formats via OpenOffice acting as
+ a headless server, driven via PyUNO). Ability to interpolate Python
+ variables into the text for automatically updated reports.
+
+* tighter integration with NetEpi Case Manager
+
+* ability to read .REC files produced by Epi Infor for DOS and EpiData.
+
+* ability to consume SAS XML files.
diff --git a/debian/README.Debian b/debian/README.Debian
deleted file mode 100644
index 39d2e81..0000000
--- a/debian/README.Debian
+++ /dev/null
@@ -1,7 +0,0 @@
-netepi-analysis for Debian
---------------------------
-
-NetEpi Analysis is one part of the NetEpi suite. Have a look at
-netepi-casemgr and netepi-phredss as well.
-
- -- Andreas Tille <tille at debian.org>, Thu, 16 Jun 2005 12:01:09 +0200
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index e833292..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-netepi-analysis (0.9.0-1) unstable; urgency=low
-
- * Initial release Closes: #nnnn (nnnn is the bug number of your ITP)
-
- -- Andreas Tille <tille at debian.org> Thu, 16 Jun 2005 12:01:09 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 3ab6a2f..0000000
--- a/debian/control
+++ /dev/null
@@ -1,78 +0,0 @@
-Source: netepi-analysis
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>,
- Varun Hiremath <varun at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 9),
- cdbs,
- python-numeric-ext,
- python2.4-dev,
- python-central
-Build-Depends-Indep: python-egenix-mxdatetime
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/netepi-analysis/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/netepi-analysis/trunk/
-Homepage: https://github.com/timchurches/NetEpi-Analysis
-X-Python-Version: current
-
-Package: netepi-analysis
-Architecture: all
-Depends: ${shlibs:Depends},
- ${misc:Depends},
- python,
- python-numeric,
- python-egenix-mxdatetime,
- python-bsddb3,
- r-base,
- python-rpy,
- xvfb,
- yapps2
-Recommends: python-psyco
-Description: network-enabled tools for epidemiology and public health practice
- NetEpi, which is short for "Network-enabled Epidemiology", is a
- collaborative project to create a suite of free, open source software
- tools for epidemiology and public health practice. Anyone with an
- interest in population health epidemiology or public health
- informatics is encouraged to examine the prototype tools and to
- consider contributing to their further development. Contributions
- which involve formal and/or informal testing of the tools in a wide
- range of circumstances and environments are particularly welcome, as
- is assistance with design, programming and documentation tasks.
- .
- This is a tool for conducting epidemiological analysis of data sets,
- both large and small, either through a Web browser interface, or via
- a programmatic interface. In many respects it is similar to the
- analysis facilities included in the Epi Info suite, except that
- NetEpi Analysis is designed to be installed on servers and accessed
- remotely via Web browsers, although it can also be installed on
- individual desktop or laptop computers.
- .
- The software was developed by New South Wales Department of Health.
-
-Package: netepi-analysis-web
-Architecture: all
-Depends: netepi-analysis,
- python-albatross
-Description: network-enabled tools for epidemiology and public health practice
- NetEpi, which is short for "Network-enabled Epidemiology", is a
- collaborative project to create a suite of free, open source software
- tools for epidemiology and public health practice. Anyone with an
- interest in population health epidemiology or public health
- informatics is encouraged to examine the prototype tools and to
- consider contributing to their further development. Contributions
- which involve formal and/or informal testing of the tools in a wide
- range of circumstances and environments are particularly welcome, as
- is assistance with design, programming and documentation tasks.
- .
- This is a tool for conducting epidemiological analysis of data sets,
- both large and small, either through a Web browser interface, or via
- a programmatic interface. In many respects it is similar to the
- analysis facilities included in the Epi Info suite, except that
- NetEpi Analysis is designed to be installed on servers and accessed
- remotely via Web browsers, although it can also be installed on
- individual desktop or laptop computers.
- .
- The software was developed by New South Wales Department of Health.
- .
- This package contains the web interface to NetEpi Analysis
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index ce966a7..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,548 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: NetEpi Analysis
-Upstream-Contact: http://code.google.com/p/netepi/downloads/list
-Source: http://code.google.com/p/netepi/downloads/list
-
-Files: *
-Copyright: © 2004-2010 Tim CHURCHES <TCHUR at doh.health.nsw.gov.au>
- Health Administration Corporation
-License: HACOS
-
-Files: debian/*
-Copyright: © 2005-2012 Andreas Tille <tille at debian.org>
-License: LGPL
-
-License: HACOS
- NetEpi Analysis is licensed under the terms of the Health
- Administration Corporation Open Source Licence V1.2 (HACOS License V1.2),
- the complete text of which appears below.
- .
- HEALTH ADMINISTRATION CORPORATION OPEN SOURCE LICENSE VERSION 1.2
- .
- 1. DEFINITIONS.
- .
- "Commercial Use" shall mean distribution or otherwise making the
- Covered Software available to a third party.
- .
- "Contributor" shall mean each entity that creates or contributes to
- the creation of Modifications.
- .
- "Contributor Version" shall mean in case of any Contributor the
- combination of the Original Software, prior Modifications used by a
- Contributor, and the Modifications made by that particular Contributor
- and in case of Health Administration Corporation in addition the
- Original Software in any form, including the form as Executable.
- .
- "Covered Software" shall mean the Original Software or Modifications
- or the combination of the Original Software and Modifications, in
- each case including portions thereof.
- .
- "Electronic Distribution Mechanism" shall mean a mechanism generally
- accepted in the software development community for the electronic
- transfer of data.
- .
- "Executable" shall mean Covered Software in any form other than
- Source Code.
- .
- "Initial Developer" shall mean the individual or entity identified as
- the Initial Developer in the Source Code notice required by Exhibit A.
- .
- "Health Administration Corporation" shall mean the Health
- Administration Corporation as established by the Health Administration
- Act 1982, as amended, of the State of New South Wales, Australia. The
- Health Administration Corporation has its offices at 73 Miller Street,
- North Sydney, New South Wales 2059, Australia.
- .
- "Larger Work" shall mean a work, which combines Covered Software or
- portions thereof with code not governed by the terms of this License.
- .
- "License" shall mean this document.
- .
- "Licensable" shall mean having the right to grant, to the maximum
- extent possible, whether at the time of the initial grant or
- subsequently acquired, any and all of the rights conveyed herein.
- .
- "Modifications" shall mean any addition to or deletion from the
- substance or structure of either the Original Software or any previous
- Modifications. When Covered Software is released as a series of files,
- a Modification is:
- .
- a) Any addition to or deletion from the contents of a file
- containing Original Software or previous Modifications.
- .
- b) Any new file that contains any part of the Original Software or
- previous Modifications.
- .
- "Original Software" shall mean the Source Code of computer software
- code which is described in the Source Code notice required by Exhibit
- A as Original Software, and which, at the time of its release under
- this License is not already Covered Software governed by this License.
- .
- "Patent Claims" shall mean any patent claim(s), now owned or hereafter
- acquired, including without limitation, method, process, and apparatus
- claims, in any patent Licensable by grantor.
- .
- "Source Code" shall mean the preferred form of the Covered Software
- for making modifications to it, including all modules it contains,
- plus any associated interface definition files, scripts used to
- control compilation and installation of an Executable, or source
- code differential comparisons against either the Original Software or
- another well known, available Covered Software of the Contributor's
- choice. The Source Code can be in a compressed or archival form,
- provided the appropriate decompression or de-archiving software is
- widely available for no charge.
- .
- "You" (or "Your") shall mean an individual or a legal entity exercising
- rights under, and complying with all of the terms of, this License or
- a future version of this License issued under Section 6.1. For legal
- entities, "You" includes an entity which controls, is controlled
- by, or is under common control with You. For the purposes of this
- definition, "control" means (a) the power, direct or indirect,
- to cause the direction or management of such entity, whether by
- contract or otherwise, or (b) ownership of more than fifty per cent
- (50%) of the outstanding shares or beneficial ownership of such entity.
- .
- 2. SOURCE CODE LICENSE.
- .
- 2.1 Health Administration Corporation Grant.
- .
- Subject to the terms of this License, Health Administration Corporation
- hereby grants You a world-wide, royalty-free, non-exclusive license,
- subject to third party intellectual property claims:
- .
- a) under copyrights Licensable by Health Administration Corporation
- to use, reproduce, modify, display, perform, sublicense and
- distribute the Original Software (or portions thereof) with or without
- Modifications, and/or as part of a Larger Work;
- .
- b) and under Patents Claims infringed by the making, using or selling
- of Original Software, to make, have made, use, practice, sell, and
- offer for sale, and/or otherwise dispose of the Original Software
- (or portions thereof).
- .
- c) The licenses granted in this Section 2.1(a) and (b) are effective
- on the date Health Administration Corporation first distributes
- Original Software under the terms of this License.
- .
- d) Notwithstanding Section 2.1(b) above, no patent license is granted:
- 1) for code that You delete from the Original Software; 2) separate
- from the Original Software; or 3) for infringements caused by: i)
- the modification of the Original Software or ii) the combination of
- the Original Software with other software or devices.
- .
- 2.2 Contributor Grant.
- .
- Subject to the terms of this License and subject to third party
- intellectual property claims, each Contributor hereby grants You a
- world-wide, royalty-free, non-exclusive license:
- .
- a) under copyrights Licensable by Contributor, to use, reproduce,
- modify, display, perform, sublicense and distribute the Modifications
- created by such Contributor (or portions thereof) either on an
- unmodified basis, with other Modifications, as Covered Software and/or
- as part of a Larger Work; and
- .
- b) under Patent Claims necessarily infringed by the making, using,
- or selling of Modifications made by that Contributor either alone
- and/or in combination with its Contributor Version (or portions of
- such combination), to make, use, sell, offer for sale, have made,
- and/or otherwise dispose of: 1) Modifications made by that Contributor
- (or portions thereof); and 2) the combination of Modifications made
- by that Contributor with its Contributor Version (or portions of
- such combination).
- .
- c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective
- on the date Contributor first makes Commercial Use of the Covered
- Software.
- .
- d) Notwithstanding Section 2.2(b) above, no patent license is granted:
- 1) for any code that Contributor has deleted from the Contributor
- Version; 2) separate from the Contributor Version; 3) for infringements
- caused by: i) third party modifications of Contributor Version or ii)
- the combination of Modifications made by that Contributor with other
- software (except as part of the Contributor Version) or other devices;
- or 4) under Patent Claims infringed by Covered Software in the absence
- of Modifications made by that Contributor.
- .
- 3. DISTRIBUTION OBLIGATIONS.
- .
- 3.1 Application of License.
- .
- The Modifications which You create or to which You contribute are governed
- by the terms of this License, including without limitation Section
- 2.2. The Source Code version of Covered Software may be distributed
- only under the terms of this License or a future version of this License
- released under Section 6.1, and You must include a copy of this License
- with every copy of the Source Code You distribute. You may not offer or
- impose any terms on any Source Code version that alters or restricts the
- applicable version of this License or the recipients' rights hereunder.
- .
- 3.2 Availability of Source Code.
- .
- Any Modification which You create or to which You contribute must be made
- available in Source Code form under the terms of this License either on
- the same media as an Executable version or via an accepted Electronic
- Distribution Mechanism to anyone to whom you made an Executable version
- available; and if made available via Electronic Distribution Mechanism,
- must remain available for at least twelve (12) months after the date it
- initially became available, or at least six (6) months after a subsequent
- version of that particular Modification has been made available to
- such recipients. You are responsible for ensuring that the Source Code
- version remains available even if the Electronic Distribution Mechanism
- is maintained by a third party.
- .
- 3.3 Description of Modifications.
- .
- You must cause all Covered Software to which You contribute to contain
- a file documenting the changes You made to create that Covered Software
- and the date of any change. You must include a prominent statement that
- the Modification is derived, directly or indirectly, from Original
- Software provided by Health Administration Corporation and including
- the name of Health Administration Corporation in (a) the Source Code,
- and (b) in any notice in an Executable version or related documentation
- in which You describe the origin or ownership of the Covered Software.
- .
- 3.4 Intellectual Property Matters
- .
- a) Third Party Claims.
- .
- If Contributor has knowledge that a license under a third party's
- intellectual property rights is required to exercise the rights
- granted by such Contributor under Sections 2.1 or 2.2, Contributor
- must include a text file with the Source Code distribution titled
- "LEGAL'' which describes the claim and the party making the claim
- in sufficient detail that a recipient will know whom to contact. If
- Contributor obtains such knowledge after the Modification is made
- available as described in Section 3.2, Contributor shall promptly
- modify the LEGAL file in all copies Contributor makes available
- thereafter and shall take other steps (such as notifying appropriate
- mailing lists or newsgroups) reasonably calculated to inform those
- who received the Covered Software that new knowledge has been obtained.
- .
- b) Contributor APIs.
- .
- If Contributor's Modifications include an application programming
- interface (API) and Contributor has knowledge of patent licenses
- which are reasonably necessary to implement that API, Contributor
- must also include this information in the LEGAL file.
- .
- c) Representations.
- .
- Contributor represents that, except as disclosed pursuant to Section
- 3.4(a) above, Contributor believes that Contributor's Modifications are
- Contributor's original creation(s) and/or Contributor has sufficient
- rights to grant the rights conveyed by this License.
- .
- 3.5 Required Notices.
- .
- You must duplicate the notice in Exhibit A in each file of the Source
- Code. If it is not possible to put such notice in a particular Source
- Code file due to its structure, then You must include such notice in a
- location (such as a relevant directory) where a user would be likely to
- look for such a notice. If You created one or more Modification(s) You
- may add your name as a Contributor to the notice described in Exhibit
- A. You must also duplicate this License in any documentation for the
- Source Code where You describe recipients' rights or ownership rights
- relating to Covered Software. You may choose to offer, and to charge a
- fee for, warranty, support, indemnity or liability obligations to one or
- more recipients of Covered Software. However, You may do so only on Your
- own behalf, and not on behalf of Health Administration Corporation or any
- Contributor. You must make it absolutely clear that any such warranty,
- support, indemnity or liability obligation is offered by You alone,
- and You hereby agree to indemnify Health Administration Corporation and
- every Contributor for any liability incurred by Health Administration
- Corporation or such Contributor as a result of warranty, support,
- indemnity or liability terms You offer.
- .
- 3.6 Distribution of Executable Versions.
- .
- You may distribute Covered Software in Executable form only if the
- requirements of Sections 3.1-3.5 have been met for that Covered Software,
- and if You include a notice stating that the Source Code version of the
- Covered Software is available under the terms of this License, including
- a description of how and where You have fulfilled the obligations of
- Section 3.2. The notice must be conspicuously included in any notice in
- an Executable version, related documentation or collateral in which You
- describe recipients' rights relating to the Covered Software. You may
- distribute the Executable version of Covered Software or ownership rights
- under a license of Your choice, which may contain terms different from
- this License, provided that You are in compliance with the terms of this
- License and that the license for the Executable version does not attempt
- to limit or alter the recipient's rights in the Source Code version from
- the rights set forth in this License. If You distribute the Executable
- version under a different license You must make it absolutely clear
- that any terms which differ from this License are offered by You alone,
- not by Health Administration Corporation or any Contributor. You hereby
- agree to indemnify Health Administration Corporation and every Contributor
- for any liability incurred by Health Administration Corporation or such
- Contributor as a result of any such terms You offer.
- .
- 3.7 Larger Works.
- .
- You may create a Larger Work by combining Covered Software with other
- software not governed by the terms of this License and distribute the
- Larger Work as a single product. In such a case, You must make sure the
- requirements of this License are fulfilled for the Covered Software.
- .
- 4. INABILITY TO COMPLY DUE TO STATUTE OR REGULATION.
- .
- If it is impossible for You to comply with any of the terms of this
- License with respect to some or all of the Covered Software due to
- statute, judicial order, or regulation then You must: (a) comply with the
- terms of this License to the maximum extent possible; and (b) describe the
- limitations and the code they affect. Such description must be included
- in the LEGAL file described in Section 3.4 and must be included with all
- distributions of the Source Code. Except to the extent prohibited by
- statute or regulation, such description must be sufficiently detailed
- for a recipient of ordinary skill to be able to understand it.
- .
- 5. APPLICATION OF THIS LICENSE.
- .
- This License applies to code to which Health Administration Corporation
- has attached the notice in Exhibit A and to related Covered Software.
- .
- 6. VERSIONS OF THE LICENSE.
- .
- 6.1 New Versions.
- .
- Health Administration Corporation may publish revised and/or new
- versions of the License from time to time. Each version will be given
- a distinguishing version number.
- .
- 6.2 Effect of New Versions.
- .
- Once Covered Software has been published under a particular version
- of the License, You may always continue to use it under the terms of
- that version. You may also choose to use such Covered Software under
- the terms of any subsequent version of the License published by Health
- Administration Corporation. No one other than Health Administration
- Corporation has the right to modify the terms applicable to Covered
- Software created under this License.
- .
- 7. DISCLAIMER OF WARRANTY.
- .
- COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS'' BASIS,
- WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF
- DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE
- ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS
- WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU
- (NOT HEALTH ADMINISTRATION CORPORATION, ITS LICENSORS OR AFFILIATES OR
- ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR
- OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
- OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER
- EXCEPT UNDER THIS DISCLAIMER.
- .
- 8. TERMINATION.
- .
- 8.1 This License and the rights granted hereunder will terminate
- automatically if You fail to comply with terms herein and fail to
- cure such breach within 30 days of becoming aware of the breach. All
- sublicenses to the Covered Software which are properly granted shall
- survive any termination of this License. Provisions which, by their
- nature, must remain in effect beyond the termination of this License
- shall survive.
- .
- 8.2 If You initiate litigation by asserting a patent infringement claim
- (excluding declatory judgment actions) against Health Administration
- Corporation or a Contributor (Health Administration Corporation
- or Contributor against whom You file such action is referred to as
- "Participant") alleging that:
- .
- a) such Participant's Contributor Version directly or indirectly
- infringes any patent, then any and all rights granted by such
- Participant to You under Sections 2.1 and/or 2.2 of this License
- shall, upon 60 days notice from Participant terminate prospectively,
- unless if within 60 days after receipt of notice You either: (i)
- agree in writing to pay Participant a mutually agreeable reasonable
- royalty for Your past and future use of Modifications made by such
- Participant, or (ii) withdraw Your litigation claim with respect to
- the Contributor Version against such Participant. If within 60 days
- of notice, a reasonable royalty and payment arrangement are not
- mutually agreed upon in writing by the parties or the litigation
- claim is not withdrawn, the rights granted by Participant to
- You under Sections 2.1 and/or 2.2 automatically terminate at the
- expiration of the 60 day notice period specified above.
- .
- b) any software, hardware, or device, other than such Participant's
- Contributor Version, directly or indirectly infringes any patent,
- then any rights granted to You by such Participant under Sections
- 2.1(b) and 2.2(b) are revoked effective as of the date You first
- made, used, sold, distributed, or had made, Modifications made by
- that Participant.
- .
- 8.3 If You assert a patent infringement claim against Participant
- alleging that such Participant's Contributor Version directly or
- indirectly infringes any patent where such claim is resolved (such as by
- license or settlement) prior to the initiation of patent infringement
- litigation, then the reasonable value of the licenses granted by such
- Participant under Sections 2.1 or 2.2 shall be taken into account in
- determining the amount or value of any payment or license.
- .
- 8.4 In the event of termination under Sections 8.1 or 8.2 above, all
- end user license agreements (excluding distributors and resellers) which
- have been validly granted by You or any distributor hereunder prior to
- termination shall survive termination.
- .
- 9. LIMITATION OF LIABILITY.
- .
- 9.1 UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
- (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, HEALTH
- ADMINISTRATION CORPORATION, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR
- OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE
- TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL
- DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS
- OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND
- ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE
- BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
- LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
- RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
- PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION
- OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, BUT MAY ALLOW
- LIABILITY TO BE LIMITED; IN SUCH CASES, A PARTY'S, ITS EMPLOYEES',
- LICENSORS' OR AFFILIATES' LIABILITY SHALL BE LIMITED TO AUD$100. NOTHING
- CONTAINED IN THIS LICENSE SHALL PREJUDICE THE STATUTORY RIGHTS OF ANY
- PARTY DEALING AS A CONSUMER.
- .
- 9.2 Notwithstanding any other clause in the licence, and to the extent
- permitted by law:
- .
- (a) Health Administration Corporation ("the Corporation") excludes all
- conditions and warranties which would otherwise be implied into
- a supply of goods or services arising out of or in relation to
- the granting of this licence by the Corporation or any associated
- acquisition of software to which this licence relates;
- .
- (b) Where a condition or warranty is implied into such a supply and
- that condition or warranty cannot be excluded by law that warranty
- or condition is implied into that supply and the liability of the
- Health Administration Corporation for a breach of that condition or
- warranty is limited to the fullest extent permitted by law and, in
- respect of conditions and warranties implied by the Trade Practices
- Act (Commonwealth of Australia) 1974, is limited, to the extent
- permitted by law, to one or more of the following at the election
- of the Corporation:
- .
- (A) In the case of goods: (i) the replacement of the goods or the
- supply of equivalent goods; (ii) the repair of the goods; (iii)
- the payment of the cost of replacing the goods or of acquiring
- equivalent goods; (iv) the payment of the cost of having the
- goods repaired; and
- .
- (B) in the case of services: (i) the supplying of the services again;
- or (ii) the payment of the cost of having the services supplied
- again.
- .
- 10. MISCELLANEOUS.
- .
- This License represents the complete agreement concerning subject matter
- hereof. All rights in the Covered Software not expressly granted under
- this License are reserved. Nothing in this License shall grant You any
- rights to use any of the trademarks of Health Administration Corporation
- or any of its Affiliates, even if any of such trademarks are included
- in any part of Covered Software and/or documentation to it.
- .
- This License is governed by the laws of the State of New South Wales,
- Australia excluding its conflict-of-law provisions. All disputes or
- litigation arising from or relating to this Agreement shall be subject
- to the jurisdiction of the Supreme Court of New South Wales. If any part
- of this Agreement is found void and unenforceable, it will not affect
- the validity of the balance of the Agreement, which shall remain valid
- and enforceable according to its terms.
- .
- 11. RESPONSIBILITY FOR CLAIMS.
- .
- As between Health Administration Corporation and the Contributors,
- each party is responsible for claims and damages arising, directly or
- indirectly, out of its utilisation of rights under this License and You
- agree to work with Health Administration Corporation and Contributors
- to distribute such responsibility on an equitable basis. Nothing herein
- is intended or shall be deemed to constitute any admission of liability.
- .
- EXHIBIT A
- .
- The contents of this file are subject to the HACOS License Version 1.2
- (the "License"); you may not use this file except in compliance with
- the License.
- .
- Software distributed under the License is distributed on an "AS IS"
- basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
- License for the specific language governing rights and limitations under
- the License.
- .
- The Original Software is "NetEpi Analysis". The Initial Developer
- of the Original Software is the Health Administration Corporation,
- incorporated in the State of New South Wales, Australia.
- .
- APPENDIX 1. DIFFERENCES BETWEEN THE HACOS LICENSE VERSION 1.2, THE
- MOZILLA PUBLIC LICENSE VERSION 1.1 AND THE NOKIA OPEN SOURCE LICENSE
- (NOKOS LICENSE) VERSION 1.0A
- .
- The HACOS License Version 1.2 was derived from the Mozilla Public
- License Version 1.1 using some of the changes to the Mozilla Public
- License embodied in the Nokia Open Source License (NOKOS License)
- Version 1.0a. The differences between the HACOS License Version 1.2
- (this document), the Mozilla Public License and the NOKOS License are
- as follows:
- .
- i. The title of the license was changed to "Health Administration
- Corporation Open Source License Version 1.2".
- .
- ii. Globally, all references to "Netscape Communications Corporation",
- "Mozilla", "Nokia" and "Nokia Corporation" were changed to "Health
- Administration Corporation".
- .
- iii. Globally, the words "means", "Covered Code" and "Covered Software"
- as used in the Mozilla Public License were changed to "shall means",
- "Covered Code" and "Covered Software" respectively, as used in
- the NOKOS License.
- .
- iv. In Section 1 (Definitions), a definition of "Health Administration
- Corporation" was added.
- .
- v. In Section 2, the term "intellectual property rights" used in the
- Mozilla Public License was replaced by the term "copyrights"
- as used in the NOKOS License.
- .
- vi. In Section 2.2 (Contributor Grant), the words "Subject to the
- terms of this License" which appear in the NOKOS License were
- added to the Mozilla Public License.
- .
- vii. The sentence "However, You may include an additional document
- offering the additional rights described in Section 3.5." which
- appears in the Mozilla Public License was omitted.
- .
- viii. Section 6.3 (Derivative Works) of the Mozilla Public License,
- which permits modifications to the Mozilla Public License,
- was omitted.
- .
- ix. The original Section 9 (Limitation of Liability) was renumbered
- as Section 9.1, a maximum liability of AUD$100 was specified
- for those jurisdictions which do not allow complete exclusion of
- liability but which do allow limitation of liability. The sentence
- "NOTHING CONTAINED IN THE LICENSE SHALL PREJUDICE THE STATUTORY
- RIGHTS OF ANY PARTY DEALING AS A CONSUMER.", which appears in the
- NOKOS License but not in the Mozilla Public License, was added.
- .
- x. Section 9.2 was added in order to further limit liability to the
- maximum extent permitted by the Commonwealth of Australia Trade
- Practices Act 1974.
- .
- xi. Section 10 of the Mozilla Public License, which provides additional
- conditions for United States Government End Users, was omitted.
- .
- xii. The governing law and jurisdiction for the settlement of disputes
- in Section 11 of the Mozilla Public License and Section 10 of the
- NOKOS License was changed to the laws of the State of New South
- Wales and the Supreme Court of New South Wales respectively. The
- exclusion of the application of the United Nations Convention on
- Contracts for the International Sale of Goods which appears in
- the Mozilla Public License was omitted.
- .
- xiii. Section 13 (Multiple-Licensed Code) of the Mozilla Public License
- was omitted.
- .
- xiv. The provisions for alternative licensing arrangement for contributed
- code which appear in Exhibit A of the Mozilla Public License
- were omitted.
-
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index 206cb2c..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,2 +0,0 @@
-docs/README.searchabletext
-README
diff --git a/debian/netepi-analysis.examples b/debian/netepi-analysis.examples
deleted file mode 100644
index 1549b67..0000000
--- a/debian/netepi-analysis.examples
+++ /dev/null
@@ -1 +0,0 @@
-demo
diff --git a/debian/netepi-analysis.install b/debian/netepi-analysis.install
deleted file mode 100644
index dd95e40..0000000
--- a/debian/netepi-analysis.install
+++ /dev/null
@@ -1 +0,0 @@
-debian/tmp/*
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 865600a..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/make -f
-
-DEB_PYTHON_SYSTEM:= pycentral
-
-include /usr/share/cdbs/1/rules/debhelper.mk
-include /usr/share/cdbs/1/class/python-distutils.mk
-
-# Add soomext build dir to PYTHONPATH; required for building other modules.
-export PYTHONPATH=$(CURDIR)/soomext/build/build-modules/
-
-clean::
- $(RM) -r soomext/build
-
-makebuilddir::
- (cd soomext; \
- python setup.py build --build-platlib build/build-modules; \
- python setup.py install --root=debian/netepi-analysis/)
-
-install/netepi-analysis-web::
- (cd web; python install.py install_prefix=$(CURDIR)/debian/netepi-analysis-web)
-
-get-orig-source:
- -uscan --upstream-version 0 --rename
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 74adaa3..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,8 +0,0 @@
-version=4
-
-https://github.com/timchurches/NetEpi-Analysis/releases .*/archive/v at ANY_VERSION@@ARCHIVE_EXT@
-
-## There is a later version than at Github (0.8.1) remaining at GCode (0.9.0)
-#opts="uversionmangle=s/-/\./g" \
-#http://code.google.com/p/netepi/downloads/list?can=1 \
-# .*/NetEpi-Analysis-([-.\d]+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))|zip)
diff --git a/demo/SOOM_demo_data_load.py b/demo/SOOM_demo_data_load.py
new file mode 100644
index 0000000..ad8d6f7
--- /dev/null
+++ b/demo/SOOM_demo_data_load.py
@@ -0,0 +1,90 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: SOOM_demo_data_load.py 2757 2007-07-26 07:07:10Z tchur $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/SOOM_demo_data_load.py,v $
+
+# Python standard modules
+import time
+import os
+import sys
+import random
+import optparse
+
+try:
+ import psyco
+except ImportError:
+ pass
+else:
+# psyco.log('/tmp/psyco.log')
+ psyco.full()
+
+# SOOM modules
+from SOOMv0 import *
+
+# following code used to generate random values for a calculated column for
+# testing purposes
+random.seed(123)
+
+datasets = 'nhds', 'whopop', 'whotext', 'syndeath', 'epitools', 'nhmrc'
+
+def main():
+ optp = optparse.OptionParser()
+ optp.add_option('--datasets', dest='datasets',
+ default='all',
+ help='datasets to load - %s, default "all"' % ', '.join(datasets))
+ optp.add_option('-S', '--soompath', dest='soompath',
+ help='SOOM dataset write path')
+ optp.add_option('--nhds-years', dest='nhds_years',
+ help='NHDS years to load')
+ optp.add_option('-N', '--rowlimit', dest='rowlimit', type='int',
+ help='stop loading datasets after ROWLIMIT rows')
+ optp.add_option('--nhds-iterations', dest='nhds_iterations', type='int',
+ help='number of iterations of NHDS data to load (default=1)',
+ default=1)
+ optp.add_option('-C', '--chunkrows', dest='chunkrows', type='int',
+ default=500000,
+ help='read sources in CHUNKROWS blocks (default 500000)')
+ optp.add_option('-q', '--quiet', dest='verbose', action='store_false',
+ default=True,
+ help='quieter operation')
+ options, args = optp.parse_args()
+ options.datasets = options.datasets.split(',')
+ if 'all' in options.datasets:
+ options.datasets = datasets
+ else:
+ for dsname in options.datasets:
+ if dsname not in datasets:
+ optp.error('Unknown dataset %r' % dsname)
+ moduledir = os.path.dirname(__file__)
+ options.datadir = os.path.join(moduledir, 'rawdata')
+ options.scratchdir = os.path.join(moduledir, 'scratch')
+ if not options.soompath:
+ options.soompath = os.path.normpath(os.path.join(moduledir, '..', 'SOOM_objects'))
+ soom.messages = options.verbose
+ soom.setpath(options.soompath, options.soompath)
+
+ loadstart = time.time()
+
+ for dsname in options.datasets:
+ mod = __import__('loaders.' + dsname, globals(), locals(), [dsname])
+ if options.verbose:
+ print 'Using %r loader from %r' % (dsname, mod.__file__)
+ mod.load(options)
+
+ elapsed = time.time() - loadstart
+ print "Loading of demo data took %.3f seconds" % elapsed
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/api_demo.py b/demo/api_demo.py
new file mode 100644
index 0000000..8f94401
--- /dev/null
+++ b/demo/api_demo.py
@@ -0,0 +1,930 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: api_demo.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/api_demo.py,v $
+
+import os
+import sys
+import time
+import itertools
+import inspect
+import traceback
+import struct
+import mx.DateTime
+import Numeric
+import MA
+import optparse
+from SOOMv0 import *
+
+slow_warning = '''
+ NOTE: this may take several minutes if you have the full 2 million
+ NHDS records loaded.
+'''
+
+optp = optparse.OptionParser()
+optp.add_option('--soompath', dest='soompath',
+ help='SOOM dataset path')
+optp.add_option('--writepath', dest='writepath',
+ help='Dataset write path (for temporary objects, defaults '
+ 'to SOOMPATH)')
+optp.add_option('--nopause', dest='pause', action='store_false',
+ default=True,
+ help='Don\'t pause for user after each step')
+optp.add_option('--nomessages', dest='messages', action='store_false',
+ default=True,
+ help='Don\'t generate SOOM messages')
+optp.add_option('--skipto', dest='skipto',
+ help='Skip to the named test')
+optp.add_option('--skipslow', dest='skipslow',
+ default=False, action='store_true',
+ help='Skip slow tests')
+optp.add_option('--use-psyco', dest='psyco',
+ default=False, action='store_true',
+ help='Enable Psyco JIT compiler')
+optp.add_option('--log', dest='log',
+ help='Log output to file LOG')
+options, args = optp.parse_args()
+
+if options.psyco:
+ try:
+ import psyco
+ except ImportError:
+ soom.warning('Psyco not available')
+ else:
+ psyco.full()
+
+class test:
+ skipto = None
+ tests = []
+ slow = False
+ broken = False
+
+ passed = 0
+ exceptions = []
+ skipped = 0
+ cummulative = 0.0
+
+ logfile = None
+ if options.log:
+ logfile = open(options.log, 'a')
+
+ def __init__(self, name, text, code):
+ self.name = name
+ self.text = text
+ self.code = code
+ test.tests.append(self)
+ self.run()
+
+ def write(cls, msg):
+ if cls.logfile:
+ print >> cls.logfile, msg
+ print msg
+ write = classmethod(write)
+
+ def prefix(self, lines, prefix):
+ lines = lines.splitlines()
+ for line in lines:
+ self.write(prefix + line.rstrip())
+
+ def deindent(self, lines):
+ lines = lines.splitlines()
+ while not lines[0]:
+ del lines[0]
+ first = lines[0]
+ offset = len(first) - len(first.lstrip())
+ lines = [line[offset:] for line in lines]
+ while not lines[-1]:
+ del lines[-1]
+ return '\n'.join(lines)
+
+ def run(self):
+ class skip(Exception): pass
+ try:
+ if test.skipto:
+ if test.skipto == self.name:
+ test.skipto = None
+ elif self.name not in ('0a', '1'):
+ raise skip
+ if self.broken:
+ raise skip
+ if self.slow and options.skipslow:
+ raise skip
+ except skip:
+ test.skipped += 1
+ return
+ self.write('\n%s\n# test %s' % ('#' * 78, self.name))
+ if self.text:
+ self.prefix(self.deindent(self.text), '# ')
+ if self.slow:
+ self.prefix(self.deindent(slow_warning), '# ')
+ if self.code:
+ code = self.deindent(self.code)
+ self.prefix(code, '>>> ')
+ if options.pause:
+ raw_input('Press Enter to execute this code\n')
+ st = time.time()
+ try:
+ exec code in globals(), globals()
+ et = time.time()
+ test.cummulative += et - st
+ except KeyboardInterrupt:
+ test.skipto = 'end'
+ except:
+ test.exceptions.append(self.name)
+ exc_type, exc_value, exc_tb = sys.exc_info()
+ try:
+ while exc_tb:
+ if exc_tb.tb_frame.f_code.co_filename == '<string>':
+ exc_tb = exc_tb.tb_next
+ break
+ exc_tb = exc_tb.tb_next
+ l = traceback.format_exception(exc_type, exc_value, exc_tb)
+ l.append('TEST FAILED')
+ self.prefix(''.join(l), '!!! ')
+ finally:
+ del exc_type, exc_value, exc_tb
+ else:
+ test.passed += 1
+ self.write('That took %.3f seconds' % (et - st))
+ else:
+ if options.pause:
+ raw_input('Press Enter to continue\n')
+
+ def report(cls):
+ if cls.exceptions:
+ cls.write('Tests that raised exceptions: %s' %
+ ', '.join(cls.exceptions))
+ cls.write('%d tests okay, %d tests skipped, %d tests raised exceptions'%
+ (cls.passed, cls.skipped, len(cls.exceptions)))
+ cls.write('total test run time %.3f' % (cls.cummulative,))
+ report = classmethod(report)
+
+class slowtest(test):
+ def __init__(self, *args, **kw):
+ self.slow = True
+ test.__init__(self, *args, **kw)
+
+class brokentest(test):
+ def __init__(self, *args, **kw):
+ self.broken = True
+ test.__init__(self, *args, **kw)
+
+test.skipto = options.skipto
+
+test('0',
+ '''
+ SOOMv0 API demo
+
+ Make your terminal window as wide as possible and the font as small as
+ possible so that lines don't wrap.
+ ''', None)
+
+test('0a',
+ 'Import the SOOM module.',
+ 'from SOOMv0 import *')
+
+if not options.soompath:
+ options.soompath = os.path.normpath(os.path.join(os.path.dirname(__file__),
+ '..', 'SOOM_objects'))
+soom.setpath(options.soompath, options.writepath or options.soompath)
+soom.messages = options.messages
+
+
+test('1',
+ 'Load the nhds dataset object from disc where it is stored as a compressed pickle:',
+ 'nhds = dsload("nhds")')
+
+# Currently broken
+brokentest('1a',
+ 'Examine what datasets are currently loaded.',
+ 'print soom')
+
+test('1b',
+ 'Examine the metadata property for the nhds dataset.',
+ 'print nhds.describe()')
+
+test('1c',
+ 'Examine the metadata properties for all nhds columns.',
+ 'print nhds.describe_with_cols()')
+
+test('2a',
+ 'Examine the metadata for one column',
+ '''print nhds['marital_status'].describe()''')
+
+slowtest('2b',
+ '''
+ Now define a further (not very useful) derived age column
+
+ Note: you only need to derive columns once if they are permanent,
+ as here. Typically this would be done when the data is first loaded,
+ but new permanent columns can still be created during analysis
+ without having to re-write the entire dataset.
+ ''',
+ '''
+ def age_hours(raw_age,age_units):
+ units_multiplier = Numeric.choose(age_units - 1, (365.25, 30.5, 1.0))
+ returnarray = raw_age * 24 * units_multiplier
+ returnmask = Numeric.zeros(len(returnarray))
+ return returnarray, returnmask
+ nhds.lock()
+ nhds.derivedcolumn(dername='age_hours',dercols=('raw_age','age_units'),derfunc=age_hours,coltype='scalar',datatype=float,outtrans=None,label='Age (hours)')
+ nhds.save()
+ ''')
+
+test('2c',
+ 'Examine the metadata for the nhds dataset again.',
+ 'print nhds.describe_with_cols()')
+
+test('3a',
+ 'Pythonic item access on one column of the dataset',
+ '''print nhds['age'][3846]''')
+
+test('3b',
+ 'Pythonic item access on a date column of the dataset',
+ '''print nhds['randomdate'][3846]''')
+
+test('3c',
+ 'Pythonic item access on a string column of the dataset',
+ '''print nhds['diagnosis1'][3846]''')
+
+test('4a',
+ 'Pythonic slicing of one column of the dataset',
+ '''print nhds['geog_region'][2:5]''')
+
+test('4c',
+ 'Pythonic slicing of a string column of the dataset',
+ '''print nhds['diagnosis1'][2:9]''')
+
+test('5',
+ 'The len() function returns the number of rows in a dataset.',
+ '''print len(nhds)''')
+
+test('6a',
+ '''
+ Note that a slice on a column with missing values preserves them
+ (denoted by '--'). The randomvalue column contains a uniform random
+ variate in the interval 0 and 1 but with all values between 0.7 and
+ 0.8 set to missing..
+ ''',
+ '''print nhds['randomvalue'][500:530]''')
+
+brokentest('6b',
+ 'Same slice of nhds.randomvalue using the getcolumndata() method',
+ '''print nhds.getcolumndata('randomvalue')[500:530]''')
+
+test('7a',
+ '''
+ Define a function to group age into broad age groups (note use of
+ Numpy array operations, not Python loops).
+ ''',
+ '''
+ def broadagegrp(age):
+ agrp = MA.choose(MA.greater_equal(age,65),(age,-6.0))
+ agrp = MA.choose(MA.greater_equal(agrp,45),(agrp,-5.0))
+ agrp = MA.choose(MA.greater_equal(agrp,25),(agrp,-4.0))
+ agrp = MA.choose(MA.greater_equal(agrp,15),(agrp,-3.0))
+ agrp = MA.choose(MA.greater_equal(agrp,5),(agrp,-2.0))
+ agrp = MA.choose(MA.greater_equal(agrp,0),(agrp,-1.0))
+ returnarray = -agrp.astype(MA.Int)
+ returnmask = Numeric.zeros(len(returnarray))
+ return returnarray, returnmask
+ ''')
+
+test('7b',
+ 'Define an output transformation dictionary for these broad age groups',
+ '''
+ broadagegrp_outtrans = {
+ 1:'0 - 4 yrs',
+ 2:'5 - 14 yrs',
+ 3:'15 - 24 yrs',
+ 4:'25 - 44 yrs',
+ 5:'45 - 64 yrs',
+ 6:'65+ yrs'
+ }
+ ''')
+
+slowtest('7c',
+ 'Now define and create the derived broad age group column.',
+ '''
+ nhds.lock()
+ nhds.derivedcolumn(dername='broadagegrp',dercols=('age',),derfunc=broadagegrp,coltype='ordinal',datatype=int,outtrans=broadagegrp_outtrans,label='Broad Age Group',all_value=0,all_label='All ages')
+ nhds.save()
+ ''')
+
+test('7d',
+ 'Let\'s look at the first 30 values of the derived broadagegrp column.',
+ '''print nhds['broadagegrp'][0:29]''')
+
+slowtest('7e',
+ 'Now define a tuple column, which allows multiple values per record (row).',
+ '''
+ nhds.lock()
+ nhds.derivedcolumn(dername='test_tuples',dercols=('age','sex','geog_region'),derfunc=itertools.izip,coltype='categorical',datatype=tuple,label='Test tuples',all_value=(),all_label='All test tuples')
+ nhds.save()
+ ''')
+
+test('7f',
+ 'Now let us see the results.',
+ '''
+ print nhds['test_tuples'].describe()
+ print nhds['test_tuples'][0:29]
+ ''')
+
+test('8a',
+ 'Define a function to take the logs of a column with missing values',
+ '''
+ def log10random(randomvalue):
+ return MA.log10(randomvalue), MA.log10(randomvalue).mask()
+ ''')
+
+slowtest('8b',
+ 'Use that function to create a derived column',
+ '''
+ nhds.lock()
+ nhds.derivedcolumn(dername='lograndomvalue',dercols=('randomvalue',),derfunc=log10random,coltype='scalar',datatype=float)
+ nhds.save()
+ ''')
+
+test('8c',
+ '''
+ Look at the first 50 values of the new column - note that missing
+ values are propagated correctly.
+ ''',
+ '''
+ print nhds['lograndomvalue'][0:49]
+ print 'Length of slice is %i' % len(nhds['lograndomvalue'][0:49])
+ ''')
+
+test('9a',
+ '''
+ We can specify which columns we want in each row when performing
+ slicing on the whole dataset, rather than when slicing on a particular
+ column.
+ ''',
+ '''
+ nhds['age'].outtrans = None
+ nhds.printcols = ('sex','marital_status','age','agegrp','randomvalue')
+ ''')
+
+test('9b',
+ '''
+ Get a whole row - a bit slow the first time because each column has
+ to be memory-mapped.
+ ''',
+ 'print nhds[3]')
+
+test('9c',
+ '''
+ Much faster to get whole rows once the required columns are
+ memory-mapped/instantiated.
+ ''',
+ 'print nhds[-20:]')
+
+test('10a',
+ '''
+ Define a filter and create a filterd versio of the nhds dataset using
+ it. A filter is like the resolved result of an SQL WHERE clause,
+ but it can be stored and re-used, at least while the underlying
+ dataset remians unchanged. One or more filters can be temporarily
+ assigned to a dataset in order to filter all subsequent operations.
+ ''',
+ '''
+ nhds.makefilter('testa',expr='sex = 2')
+ nhds.makefilter('testb',expr='agegrp = 15')
+ nhds_testa = nhds.filter(name='testa')
+ ''')
+
+test('10b1',
+ 'Note that slicing now honours the filtering.',
+ 'print nhds_testa[-20:]')
+
+test('10b2',
+ 'Make a different filtered view of the dataset using the other filter.',
+ '''
+ nhds_testb = nhds.filter(name='testb')
+ print nhds_testb[-20:]
+ ''')
+
+test('10c',
+ '''
+ Define another filter based on a scalar (continuous) column and make
+ a filtered view of the dataset using it.
+ ''',
+ '''
+ nhds.makefilter('testc',expr='age ge 23 and age le 24')
+ nhds_testc = nhds.filter(name='testc')
+ ''')
+
+test('10d',
+ '''
+ Note that slicing now honours the new filtering only 23 and 24 yr
+ olds are displayed.
+ ''',
+ 'print nhds_testc[-20:]')
+
+test('11a',
+ 'Print the simplest possible summary of the nhds dataset.',
+ 'print nhds.summ()')
+
+test('11b',
+ '''
+ Add some statistical measures. This is similar to the SQL:
+ select count(*), avg(age), min(age), max(age) from nhds;
+ ''',
+ '''
+ print nhds.summ(mean('age'),minimum('age'),maximum('age'))
+ ''')
+
+test('11c',
+ '''
+ Note that missing data is handled correctly - the randomvalue column
+ contains random values in the interval 0 to 1.0, but all values 0.7
+ to 0.8 are set to missing (masked). In SQL you would need to use
+ a subquery to count nulls?
+ ''',
+ '''
+ print nhds.summ(nonmissing('randomvalue'),missing('randomvalue'),mean('randomvalue'),minimum('randomvalue'),maximum('randomvalue'))
+ ''')
+
+test('11d',
+ 'Compare to same query but using the age column (no missing values)',
+ '''
+ print nhds.summ(nonmissing('age'),missing('age'),mean('age'),minimum('age'),maximum('age'))
+ ''')
+
+brokentest('11e',
+ '''
+ Note that intelligence can be built into the objects: try to
+ calculate statistics on a categorical integer column and it gives
+ an error message since that makes no sense.
+ ''',
+ '''
+ print nhds.summ(nonmissing('geog_region'),missing('geog_region'),mean('geog_region'),minimum('geog_region'),maximum('geog_region'))
+ ''')
+
+test('12',
+ '''
+ Check the cardinality of a categorical column.
+ Similar to SQL: select distinct geog_region from nhds;
+ ''',
+ '''
+ print nhds['geog_region'].cardinality()
+ ''')
+
+test('13a',
+ '''
+ Now for a univariate contingency table.
+ Same as SQL: select count(*) from nhds group by geog_region;
+ ''',
+ '''
+ print nhds.summ('geog_region')
+ ''')
+
+test('13b',
+ '''
+ Same again, but adding a marginal total to the table.
+ Same as SQL: select count(*) from nhds group by geog_region union select count(*) from nhds;
+ Note the SQL union subclause to calculate marginal totals will be omitted henceforth for brevity.
+ ''',
+ '''
+ print nhds.summ('geog_region',allcalc=True)
+ ''')
+
+test('13c',
+ '''
+ Same again, but adding proportions to the table
+ Um, the SQL for this would be quite horrible...
+ ''',
+ '''
+ print nhds.summ('geog_region',proportions=True)
+ ''')
+
+test('14a',
+ '''
+ Add some more statistics based on the (scalar) days_of_care column.
+ Same as SQL: select count(*), avg(days_of_care), max(days_of_care), min(days_of_care) from nhds group by geog_region;
+ ''',
+ '''
+ print nhds.summ('geog_region',mean('days_of_care'),maximum('days_of_care'),minimum('days_of_care'),allcalc=True)
+ ''')
+
+test('14b',
+ '''
+ Now calculate these statistics on multiple columns at once.
+ Same as SQL: select count(*), avg(days_of_care), max(days_of_care), min(days_of_care), avg(age), max(age), min(age) from nhds group by geog_region;'
+ ''',
+ '''
+ print nhds.summ('geog_region',mean('days_of_care'),mean('age'),maximum('days_of_care'),maximum('age'),minimum('days_of_care'),minimum('age'),allcalc=True)
+ ''')
+
+test('15a',
+ '''
+ How about quantiles, based on the days_of_care column?
+ Relatively few SQL database do quantiles (percentiles).
+ ''',
+ '''
+ print nhds.summ('geog_region',p25('days_of_care'),median('days_of_care'),p75('days_of_care'),allcalc=True)
+ ''')
+
+test('16a',
+ '''
+ Now a two-way contingency table.
+ Same as SQL: select count(*) from nhds group by geog_region, sex;
+ ''',
+ '''
+ print nhds.summ('geog_region','sex')
+ ''')
+
+test('16b',
+ '''Add some marginal totals and proportions.''',
+ '''
+ print nhds.summ('geog_region','sex',proportions=True)
+ ''')
+
+test('16c',
+ '''Show only intermediate summary levels.''',
+ '''
+ print nhds.summ('geog_region','sex',levels=[1])
+ ''')
+
+test('17a',
+ '''
+ Now a three-way contingency table
+ Same as SQL: select count(*) from nhds group by geog_region, sex, marital_status;
+ ''',
+ '''
+ print nhds.summ('geog_region','sex','marital_status',allcalc=True)
+ ''')
+
+test('17b',
+ '''
+ Add proportions for a greater than two-dimensional table (a unique
+ feature - even stats' packages don't do this!) - note the helpful
+ column labelling (although needs more work...).
+ ''',
+ '''
+ print nhds.summ('geog_region','sex','marital_status',proportions=True)
+ ''')
+
+test('18a',
+ '''
+ And now a four-way contingency table.
+ Same as SQL: select count(*) from nhds group by geog_region, sex, discharge_status, marital_status;'
+ ''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status',allcalc=True)
+ ''')
+
+slowtest('18b',
+ '''Add mean and median age to 18a above.''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status',mean('age'),median('age'),weightcol=None,allcalc=True)
+ ''')
+
+# TODO: the weighted median function throws an exception with this - must
+# investigate
+brokentest('18c',
+ '''Same again but with weighted mean and count.''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status',mean('age'),median('age'),weightcol='analysis_wgt',allcalc=True)
+ ''')
+
+test('18d',
+ '''The acid test - can it handle 5 columns?''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status','hosp_ownership',allcalc=True,weightcol=False)
+ ''')
+
+test('18e',
+ '''
+ The alkali test - can it handle 6 columns? Note that this takes
+ quite a while to run.
+ ''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status','hosp_ownership','num_beds',weightcol=False)
+ ''')
+
+test('18f',
+ '''
+ The litmus test - can it handle 6 columns with all levels? Note
+ that this produces very lengthy output, so we will just calculate
+ the summary dataset, but not print it (which takes ages).
+ ''',
+ '''
+ sixways = nhds.summ('geog_region','sex','discharge_status','marital_status','hosp_ownership','num_beds',weightcol=False,allcalc=True)
+ ''')
+
+test('19a',
+ '''
+ Demonstration of coalescing values on-the-fly.
+ Here is the un-coalesced hospital ownership, both unformatted and
+ formatted.
+ ''',
+ '''
+ nhds['hosp_ownership'].use_outtrans=0
+ print nhds.summ('hosp_ownership')
+ nhds['hosp_ownership'].use_outtrans=1
+ print nhds.summ('hosp_ownership')
+ ''')
+
+test('19b',
+ 'Now coalesce values 1 and 3',
+ '''
+ print nhds.summ(condcol('hosp_ownership',coalesce(1,3,label='Private incl. non-profit')))
+ ''')
+
+test('19d',
+ '''
+ This on-the-fly aggregation also works for higher-order contingency tables.
+ ''',
+ '''
+ print nhds.summ('geog_region',condcol('hosp_ownership',coalesce(1,3,label='Private incl. non-profit')),allcalc=True)
+ ''')
+
+test('19e',
+ '''
+ Note the on-the-fly coalescing can also be done by a function -
+ this function truncates ICD9CM codes to 3 digits.
+ ''',
+ '''
+ def icd9cm_truncate(icd9cm_code):
+ return icd9cm_code[0:3]
+ ''')
+
+# AM - changing coltype is not longer supported (July '05)
+test('19f',
+ '''
+ Let's override the column type for diagnosis1 so it is presented in
+ sorted order.
+ ''',
+ '''
+ nhds['diagnosis1'].coltype = 'ordinal'
+ ''')
+
+test('19g',
+ 'Et voila aussi!',
+ '''
+ print nhds.summ(condcol('diagnosis1',coalesce(icd9cm_truncate)),filterexpr='diagnosis1 startingwith "2"')
+ ''')
+
+# TODO - try the following with weightcol='analsysis_wgt' - seems to tickle errors in Stats.py
+test('20a',
+ '''
+ Let's explore 'lambda' (unnamed, on-the-fly) filters.
+ ''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status',mean('age'),filterexpr='geog_region = 2 and discharge_status = 1',weightcol=None,allcalc=True)
+ ''')
+
+test('20b',
+ '''Use a second filter to filter the dataset.''',
+ '''
+ print nhds.summ('geog_region','sex','discharge_status','marital_status',mean('age'),filterexpr='sex = 2',weightcol=None,allcalc=True)
+ ''')
+
+test('20c',
+ '''Use the first filter again for an overall summary.''',
+ '''
+ print nhds.summ(mean('age'),filterexpr='geog_region = 2 and discharge_status = 1',weightcol=None,allcalc=True)
+ ''')
+
+test('21a',
+ '''
+ Yet another filter - note the 'startingwith' operator, like the SQL
+ 'like' operator.
+ ''',
+ '''
+ print nhds.summ('diagnosis1', filterexpr='diagnosis1 startingwith "250"')
+ ''')
+
+test('21b',
+ '''You can turn off the output formatting.''',
+ '''
+ nhds['diagnosis1'].use_outtrans=False
+ print nhds.summ('diagnosis1', filterexpr='diagnosis1 startingwith "250"')
+ nhds['diagnosis1'].use_outtrans=True
+ ''')
+
+test('21c',
+ '''
+ Define another filter (filter) using the 'in' operator.
+ ''',
+ '''
+ print nhds.summ('diagnosis1',filteexpr="diagnosis1 in ('250.12','250.13','250.21')")
+ ''')
+
+test('21d',
+ '''
+ Define another filter (filter) using the 'in:' operator ('in' plus 'startingwith')
+ ''',
+ '''
+ nhds['diagnosis1'].coltype = 'ordinal'
+ print nhds.summ('diagnosis1',filterexpr="diagnosis1 in: ('250*','01','410.1')")
+ ''')
+
+test('22a',
+ '''
+ Date/time values are supported.
+ ''',
+ '''
+ print nhds.summ('randomdate',filterexpr='year = 1996',allcalc=True)
+ ''')
+
+test('22b',
+ '''
+ You can change the date output format - fulldate is a function object defined in the SOOMv0 module.'
+ ''',
+ '''
+ from SOOMv0.Utils import fulldate
+ nhds['randomdate'].outtrans = fulldate
+ print nhds.summ('randomdate',filterexpr='year = 1996',allcalc=True)
+ ''')
+
+test('22c',
+ '''
+ You can filter on dates - this is currently rather slow, but it does work.
+ ''',
+ '''
+ print nhds.summ('sex','geog_region',filterexpr='randomdate = date(1996,3,12)')
+ ''')
+
+test('22d',
+ '''Date range filtering.''',
+ '''
+ print nhds.summ('randomdate', 'sex', filterexpr='randomdate between (date(1996,10,12),date(1996,11,12))')
+ ''')
+
+test('22e',
+ '''
+ One more test of date ranges, this time using 'not'.
+ ''',
+ '''
+ print nhds.summ('randomdate','sex',filterexpr='randomdate >= date(1996,10,1) and randomdate <= date(1996,11,1) and not randomdate = date(1996,10,15)')
+ ''')
+
+# AM - There appears to be a problem with summarising multivalue columns after
+# filtering.
+brokentest('23a',
+ '''
+ Demonstration of multivalue columns (no SQL equivalent - needs a join
+ between two tables - but perfect for association rule data mining).
+ Use a where clause because the cardinality of the diagnosis (ICD9CM)
+ columns is rather large.
+ ''',
+ '''
+ print nhds.summ('diagnosis1',filterexpr='diagnosis1 startingwith "250"',allcalc=True)
+ print nhds.summ('diagnosis_all',filterexpr='diagnosis_all startingwith "250"',allcalc=True)
+ ''')
+
+# This feature no longer exists, but may be re-instated in future versions
+# print '#24a. Demonstration of partial result caching.'
+# print '# We can precalculate a particular summary or contingency table.'
+# print '>>> nhds.precalculate(var1='sex',var2='geog_region',var3='drg')'
+# t = ex()
+# nhds.precalculate(var1='sex',var2='geog_region',var3='drg')
+# pause(t)
+
+# print '#24b. Now turn on use of the cache and create a summary (but don't print it out - too big).'
+# print '>>> soom.use_cache=1'
+# print '>>> res = nhds.summ('sex','geog_region','drg',allcalc=True)'
+# t = ex()
+# soom.use_cache=1
+# soom.messages=0
+# res = nhds.summ('sex','geog_region','drg',allcalc=True)
+# soom.messages=1
+# soom.use_cache=1
+# res = nhds.summ('sex','geog_region','drg',allcalc=True)
+# pause(t)
+
+# print '#24c. Now turn off use of the cache and create the same summary - should be slower.'
+# print '>>> soom.use_cache=0'
+# print '>>> res = nhds.summ('sex','geog_region','drg',allcalc=True)'
+# t = ex()
+# soom.use_cache=0
+# res = nhds.summ('sex','geog_region','drg',allcalc=True)
+# pause(t)
+
+# AM - Subsetting is currently not functional. Filtered Datasets should
+# largely replace them. At some future point, the ability to deep copy
+# datasets will be added (but first we need per-user workspaces).
+brokentest('25',
+ '''
+ Physical dataset subsetting (as opposed to filtering) is also
+ possible. This is still a bit slow when rebuilding the indexes
+ on the subset and there needs to be work done on accelerating the
+ indexing loop, which could be parallelised with possible speed gains
+ (but not linear gains).
+ ''', None)
+
+brokentest('25a',
+ 'First create a filter containing a random sample of rows.',
+ '''
+ nhds.makefilter('randomsample', expr='randomvalue < 0.1')
+ ''')
+
+brokentest('25b',
+ '''
+ Then physically subset the nhds dataset, keeping only a few columns.
+ ''',
+ '''
+ subnhds = subset(nhds,subsetname='subnhds',label=None,filtername='randomsample',\
+ keepcols=('geog_region','sex','days_of_care','age','randomvalue'))
+ ''')
+
+brokentest('25c',
+ '''Demonstrate that the subsetting has occured..''',
+ '''
+ print subnhds.summ('geog_region',allcalc=True)
+ ''')
+
+brokentest('25d',
+ '''And compare with parent...''',
+ '''
+ print nhds.summ('geog_region',allcalc=True)
+ ''')
+
+brokentest('25e',
+ None,
+ '''
+ print subnhds.summ('sex',minimum('randomvalue'),mean('randomvalue'),maximum('randomvalue'),allcalc=True)
+ print subnhds.describe_with_cols()
+ ''')
+
+# Causes a memory allocation error on large datasets (33 million rows)
+brokentest('26a',
+ '''
+ Note that all the summary methods and function don't just print out
+ results, they can return the results as data objects (in this case
+ as a dataset instance).
+ ''',
+ '''
+ sex_by_geog_region = nhds.summ('sex','geog_region',mean('age'),allcalc=True)
+ print sex_by_geog_region.describe_with_cols()
+ print sex_by_geog_region[:]
+ ''')
+
+# TODO: re-instate this for next version
+brokentest('26b',
+ '''
+ Thus we can further manipulate the summary datasets. Let's summarise
+ by age group, sex and geographical region, for 1997 only, and join
+ it with appropriate populations and calculate age/sex-specific rates.
+ ''',
+ '''
+ soom.messages=0
+ popsgeog97 = dsload('popsgeog97')
+ pops = popsgeog97.sum('sex','agegrp','geog_region',cellcols='pop',options=[s.sum])
+ nhds.makefilter('only97',expr='year eq 97')
+ counts = nhds.sum('sex','agegrp','geog_region',filtername='only97',cellcols='days_of_care',options=[s.wgtfreq],wgtcol='analysis_wgt')
+ print counts.metadata
+ counts_and_pops = leftouterjoin(counts,('geog_region','sex','agegrp'),('_freq_wgtd_by_analysis_wgt',),pops,('geog_region','sex','agegrp'),('sum_of_pop=pop',),None)
+ counts = counts_and_pops['_freq_wgtd_by_analysis_wgt']
+ pops = counts_and_pops['pop']
+ rates = []
+ for i in range(len(counts_and_pops['sex'])):
+ rates.append((float(counts[i]) / pops[i])*100000)
+ counts_and_pops['rates'] = rates
+ rr = makedataset('regional_rates',label='1997 regional age/sex-sepecific hospitalisation rates')
+ rr.addcolumnfromseq(name='agegrp',data=counts_and_pops['agegrp'],mask=None,label='Age Group',datatype=int,coltype='ordinal',outtrans=nhds.agegrp.outtrans)
+ rr.addcolumnfromseq(name='sex',data=counts_and_pops['sex'],mask=None,label='Sex',coltype='categorical',datatype=int,outtrans=nhds.sex.outtrans)
+ rr.addcolumnfromseq(name='geog_region',data=counts_and_pops['geog_region'],mask=None,label='Geographical Region',coltype='categorical',datatype=int,outtrans=nhds.geog_region.outtrans)
+ rr.addcolumnfromseq(name='count',data=counts_and_pops['_freq_wgtd_by_analysis_wgt'],mask=None,label='Weighted count',coltype='scalar',datatype=int)
+ rr.addcolumnfromseq(name='pop',data=counts_and_pops['pop'],mask=None,label='Popn.',coltype='scalar',datatype=int)
+ rr.addcolumnfromseq(name='rate',data=counts_and_pops['rates'],mask=None,label='Rate per 100,000',coltype='scalar',datatype=float)
+ soom.messages=1
+ print rr
+ ''')
+
+brokentest('27',
+ '''
+ Support for association rules is under development. Here we form
+ all maximal frequent sets from a multivalued column (diagnosis_all)
+ which contains all diagnosis codes (1 to 7) for each admission
+ to hospital. Note that a minor extension to the SOOM data model is
+ needed to support formation of frequent sets from heterogenous columns
+ (eg sex, age group and diagnoses). Work on formation of association
+ rules from teh frequent sets is also proceeding. One advantage of
+ the SOOM approach is that the row ordinals (record IDs) of all rows
+ which participate in each frequent set are stored as the sets are
+ created, making subsequent manipulation of those records possible
+ and swift. The following derives all maximal frequent sets with
+ support of ).1% or more, and prints those with 2 or more elements
+ in them. Easily parallelised!
+ ''',
+ '''
+ nhds.maximal_frequent_sets('diagnosis_all',minsup=0.001,min_set_size_to_print=2)
+ ''')
+
+test.report()
diff --git a/demo/demo.txt b/demo/demo.txt
new file mode 100644
index 0000000..922f6db
--- /dev/null
+++ b/demo/demo.txt
@@ -0,0 +1,8474 @@
+# SOOM V0.00 demo V0.00
+# Make your terminal window as wide as possible and the font as small as possible so that lines don't wrap.
+
+##############################################################################
+
+#0a. Import the SOOM module. It will soon be split into smaller modules!
+>>> from SOOMv0 import *
+
+That took 0.873 seconds.
+
+##############################################################################
+
+#0. Load the nhds dataset object from disc where it is stored as a compressed pickle:
+>>> nhds = dsload("nhds",path=os.path.expanduser("./SOOMdatasets"))
+dsload(): dataset nhds loaded.
+
+That took 0.291 seconds.
+
+##############################################################################
+
+#1a. Examine what datasets are currently loaded.
+>>> print soom
+
+SOOM datasets currently loaded:
+0: nhds (National Hospital Discharge Surveys 1996-1999)
+
+
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#1b. Examine the metadata for the nhds dataset by invoking the __repr__() method on the
+ dataset's metadata instance.
+>>> print nhds.metadata
+
+==================================================================================
+Dataset definition: nhds
+Label: National Hospital Discharge Surveys 1996-1999
+Path: ./SOOMdatasets/
+
+Containing the following DatasetColumns:
+ Column Name Label Column Type Data Type
+------------------ ------------------------------------ ----------- ---------
+diagnosis7 Diagnosis Code 7 categorical str
+diagnosis6 Diagnosis Code 6 categorical str
+geog_region Geographic Region categorical int
+diagnosis4 Diagnosis Code 4 categorical str
+diagnosis3 Diagnosis Code 3 categorical str
+diagnosis2 Diagnosis Code 2 categorical str
+diagnosis1 Diagnosis Code 1 categorical str
+age_units Age Units categorical int
+diagnosis_all Diagnosis codes 1-7 categorical str
+randomdate Random date date date
+diagnosis5 Diagnosis Code 5 categorical str
+newborn_status Newborn status categorical int
+sex Sex categorical int
+days_of_care Days of Care scalar int
+year Survey Year categorical int
+month_of_admission Month of Admission/Discharge categorical str
+raw_age Raw age (years, months or days) scalar int
+procedure4 Procedure Code 4 categorical str
+procedure3 Procedure Code 3 categorical str
+procedure2 Procedure Code 2 categorical str
+procedure1 Procedure Code 1 categorical str
+randomvalue Random values scalar float
+procedure_all Procedure codes 1-4 categorical str
+prin_src_payment Principal Expected Source of Payment categorical str
+marital_status Marital Status categorical int
+analysis_wgt Analysis Weight scalar int
+sec_src_payment Secondary Expected Source of Payment categorical str
+hosp_ownership Hospital Ownership categorical int
+race Race categorical int
+los_flag Length of Stay Flag categorical int
+discharge_status Discharge Status categorical int
+drg DRG V13.0/V14.0/V15.0 categorical int
+num_beds Number of Beds categorical int
+==================================================================================
+
+
+That took 0.006 seconds.
+
+##############################################################################
+
+#2a. Examine the metadata for one column - each column has its own metadata class instance.
+>>> print nhds.metadata.race
+
+--------------------
+DatasetColumn metadata for race
+Label: Race
+Data array filename: ./SOOMdatasets/nhds_race_data.SOOMblobstore
+Index filename: ./SOOMdatasets/nhds_race_inverted.SOOMblobstore
+Column Type: categorical
+Data Type: int
+Label for <All>:All races
+Value for <All>:_all_
+Output translation: {1: 'White', 2: 'Black', 3: 'American Indian/Eskimo', 4: 'Asian/Pacific Islander', 5: 'Other', 9: 'Not stated'}
+--------------------
+
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#2b. Now define some derived age columns
+# Note: you only need to derive columns once if they are permanent, as here.
+# Typically this would be done when the data is first loaded, but new permanent
+# columns can still be created during analysis without having to re-write the entire dataset.
+>>> def age_years(raw_age,age_units):
+... units_divisor = Numeric.choose(age_units - 1, (1.0, 12.0, 365.25))
+... returnarray = raw_age / units_divisor
+... returnmask = Numeric.zeros(len(returnarray))
+... return returnarray, returnmask
+>>> nhds.derivedcolumn(permanent=1,dername="age",dercols=("raw_age","age_units"),derfunc=age_years,coltype="scalar",datatype="float",outtrans=None,label="Age (years)")
+>>> def age_months(raw_age,age_units):
+... units_multiplier = Numeric.choose(age_units - 1, (12.0, 1.0, (1/30.5)))
+... returnarray = raw_age * units_multiplier
+... returnmask = Numeric.zeros(len(returnarray))
+... return returnarray, returnmask
+>>> nhds.derivedcolumn(permanent=1,dername="age_months",dercols=("raw_age","age_units"),derfunc=age_months,coltype="scalar",datatype="float",outtrans=None,label="Age (months)")
+>>> def age_days(raw_age,age_units):
+... units_multiplier = Numeric.choose(age_units - 1, (365.25, 30.5, 1.0))
+... returnarray = raw_age * units_multiplier
+... returnmask = Numeric.zeros(len(returnarray))
+... return returnarray, returnmask
+>>> nhds.derivedcolumn(permanent=1,dername="age_days",dercols=("raw_age","age_units"),derfunc=age_days,coltype="scalar",datatype="float",outtrans=None,label="Age (days)")
+
+Creating derived column age...
+Took 0.753 seconds.
+
+
+Creating derived column age_months...
+Took 0.619 seconds.
+
+
+Creating derived column age_days...
+Took 0.625 seconds.
+
+
+That took 2.005 seconds.
+
+##############################################################################
+
+#3a. Pythonic item access on one column of the dataset
+>>> print nhds.age[9846]
+None
+60.0
+
+That took 0.101 seconds.
+
+##############################################################################
+
+#3b. Pythonic item access on a date column of the dataset
+>>> print nhds.randomdate[9846]
+1996-12-20 00:00:00.00
+
+That took 0.003 seconds.
+
+##############################################################################
+
+#3c. Pythonic item access on a string column of the dataset
+>>> print nhds.diagnosis1[9846]
+530.2
+
+That took 0.003 seconds.
+
+##############################################################################
+
+#4a. Pythonic slicing of one column of the dataset
+>>> print nhds.race[2:5]
+[2 ,2 ,2 ,]
+
+That took 0.059 seconds.
+
+##############################################################################
+
+#4b. Pythonic slicing of a date column of the dataset
+>>> print nhds.randomdate[2:5]
+[<DateTime object for '1996-02-18 00:00:00.00' at 466300>, <DateTime object for '1996-09-15 00:00:00.00' at 4a8a20>, <DateTime object for '1996-09-09 00:00:00.00' at 3a0450>]
+
+That took 0.003 seconds.
+
+##############################################################################
+
+#4c. Pythonic slicing of a string column of the dataset
+>>> print nhds.diagnosis1[2:9]
+['V30.01', 'V30.00', 'V30.01', 'V30.00', 'V30.01', 'V30.00', 'V30.00']
+
+That took 0.003 seconds.
+
+##############################################################################
+
+#5. The len() function returns the number of rows in a dataset.
+>>> len(nhds)
+40000
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#6a. Note that a slice on a column with missing values preserves them (denoted by '--').
+ The randomvalue column contains a uniform random variate in the interval 0 and 1
+ but with all values between 0.7 and 0.8 set to missing..
+>>> print nhds.randomvalue[1000:1030]
+[-- ,0.516317660183 ,-- ,0.0802018843994 ,
+ 0.309131737058 ,0.238144463425 ,0.143943843603 ,0.38512995206 ,
+ 0.372722812618 ,0.812914795332 ,0.200220069177 ,0.173219884709 ,
+ 0.650145387372 ,0.684544790818 ,0.470149313204 ,0.491133474349 ,
+ 0.849303563234 ,0.854084328145 ,0.555170396916 ,0.60529459086 ,
+ 0.662663200292 ,0.485516514109 ,0.0948007819846 ,0.661140328428 ,
+ 0.0419955051684 ,0.693758190112 ,0.0560010969061 ,0.0782717420461 ,
+ 0.326067043313 ,0.843211669759 ,]
+
+That took 0.103 seconds.
+
+##############################################################################
+
+#6b. Same slice of nhds.randomvalue using the getcolumndata() method
+>>> nhds.getcolumndata("randomvalue")[1000:1030]
+[-- ,0.516317660183 ,-- ,0.0802018843994 ,
+ 0.309131737058 ,0.238144463425 ,0.143943843603 ,0.38512995206 ,
+ 0.372722812618 ,0.812914795332 ,0.200220069177 ,0.173219884709 ,
+ 0.650145387372 ,0.684544790818 ,0.470149313204 ,0.491133474349 ,
+ 0.849303563234 ,0.854084328145 ,0.555170396916 ,0.60529459086 ,
+ 0.662663200292 ,0.485516514109 ,0.0948007819846 ,0.661140328428 ,
+ 0.0419955051684 ,0.693758190112 ,0.0560010969061 ,0.0782717420461 ,
+ 0.326067043313 ,0.843211669759 ,]
+
+That took 0.003 seconds.
+
+##############################################################################
+
+#7a. Define a function to group age into age groups (note use of Numpy array operations, not Python loops)
+>>>
+def agegrp(age):
+ agrp = MA.choose(MA.greater_equal(age,85),(age,-18.0))
+ agrp = MA.choose(MA.greater_equal(agrp,80),(agrp,-17.0))
+ agrp = MA.choose(MA.greater_equal(agrp,75),(agrp,-16.0))
+ agrp = MA.choose(MA.greater_equal(agrp,70),(agrp,-15.0))
+ agrp = MA.choose(MA.greater_equal(agrp,65),(agrp,-14.0))
+ agrp = MA.choose(MA.greater_equal(agrp,60),(agrp,-13.0))
+ agrp = MA.choose(MA.greater_equal(agrp,55),(agrp,-12.0))
+ agrp = MA.choose(MA.greater_equal(agrp,50),(agrp,-11.0))
+ agrp = MA.choose(MA.greater_equal(agrp,45),(agrp,-10.0))
+ agrp = MA.choose(MA.greater_equal(agrp,40),(agrp,-9.0))
+ agrp = MA.choose(MA.greater_equal(agrp,35),(agrp,-8.0))
+ agrp = MA.choose(MA.greater_equal(agrp,30),(agrp,-7.0))
+ agrp = MA.choose(MA.greater_equal(agrp,25),(agrp,-6.0))
+ agrp = MA.choose(MA.greater_equal(agrp,20),(agrp,-5.0))
+ agrp = MA.choose(MA.greater_equal(agrp,15),(agrp,-4.0))
+ agrp = MA.choose(MA.greater_equal(agrp,10),(agrp,-3.0))
+ agrp = MA.choose(MA.greater_equal(agrp,5),(agrp,-2.0))
+ agrp = MA.choose(MA.greater_equal(agrp,0),(agrp,-1.0))
+ returnarray = -agrp.astype(MA.Int)
+ returnmask = Numeric.zeros(len(returnarray))
+ return returnarray, returnmask
+
+#7b. Define an output transformation dictionary for these age groups
+>>>
+agegrp_outtrans = { 1:"0 - 4 yrs",
+ 2:"5 - 9 yrs",
+ 3:"10 - 14 yrs",
+ 4:"15 - 19 yrs",
+ 5:"20 - 24 yrs",
+ 6:"25 - 29 yrs",
+ 7:"30 - 34 yrs",
+ 8:"35 - 39 yrs",
+ 9:"40 - 44 yrs",
+ 10:"45 - 49 yrs",
+ 11:"50 - 54 yrs",
+ 12:"55 - 59 yrs",
+ 13:"60 - 64 yrs",
+ 14:"65 - 69 yrs",
+ 15:"70 - 74 yrs",
+ 16:"75 - 79 yrs",
+ 17:"80 - 84 yrs",
+ 18:"85+ yrs"}
+
+#7c. Now define the derived age group column
+>>> nhds.derivedcolumn(permanent=1,dername="agegrp",dercols=("age",),derfunc=agegrp,coltype="ordinal",datatype="int",outtrans=agegrp_outtrans,label="Age Group",all_label="All ages")
+
+Creating derived column agegrp...
+Building inverted index...
+Took 2.741 seconds.
+
+
+That took 2.744 seconds.
+
+##############################################################################
+
+#7d. Let's look at the first 30 values of the derived agegrp column.
+>>> print nhds.agegrp[0:29]
+[5 ,6 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,]
+
+That took 0.059 seconds.
+
+##############################################################################
+
+#8a. Define a function to take the logs of a column with missing values
+>>> def log10random(randomvalue):
+... return MA.log10(randomvalue), MA.log10(randomvalue).mask()
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#8b. Use that function to create a derived column
+>>> nhds.derivedcolumn(dername="lograndomvalue",dercols=("randomvalue",),derfunc=log10random,coltype="scalar",datatype="float")
+
+Creating derived column lograndomvalue...
+Took 0.119 seconds.
+
+
+That took 0.120 seconds.
+
+##############################################################################
+
+#8c. Look at the first 50 values of the new column - note that missing values are propagated correctly.
+>>> nhds.lograndomvalue.data[0:49]
+[-0.0216640938655 ,-0.369823643067 ,-0.0194874928686 ,-0.248326530934 ,
+ -- ,-0.279305031044 ,-- ,-0.560116630261 ,
+ -0.896186532318 ,-0.285445180668 ,-0.286157268584 ,-0.662404965629 ,
+ -0.091125427861 ,-0.622901441913 ,-0.400773013529 ,-- ,
+ -- ,-0.0648149737032 ,-0.755095822934 ,-- ,
+ -0.363938439028 ,-- ,-0.21991839301 ,-0.00527891939251 ,
+ -0.863385721664 ,-0.521339350626 ,-1.20471228349 ,-0.0836927605023 ,
+ -0.542281516004 ,-0.250730066792 ,-1.93932425787 ,-0.169810743745 ,
+ -0.162034660604 ,-0.234007932972 ,-0.274205894267 ,-1.40938613534 ,
+ -1.08719262773 ,-0.0479443256762 ,-0.973848736594 ,-0.375116741603 ,
+ -0.674569842108 ,-0.440282307044 ,-0.302968667708 ,-0.521050946796 ,
+ -2.07923190669 ,-0.24214145108 ,-0.786535483203 ,-0.256115247549 ,
+ -1.20418769356 ,]
+
+That took 0.005 seconds.
+
+##############################################################################
+
+#9a. We can specify which columns we want in each row when performing slicing on the whole dataset,
+ . rather than when slicing on a particular column
+>>> nhds.metadata.slicecols = ("sex","race","age","agegrp")
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#9b. Get a whole row - a bit slow the first time because each column has to be memory-mapped.
+>>> print nhds[3]
+3
+{'age': 0.0, 'race': 'Black', 'agegrp': '0 - 4 yrs', 'sex': 'Male'}
+
+That took 0.075 seconds.
+
+##############################################################################
+
+#9c. Much faster to get whole rows once the required columns are memory-mapped/instantiated.
+>>> print nhds[-20:]
+Sex Race Age (years) Age Group
+------ ---------- ----------- -----------
+Female Not stated 15.0 15 - 19 yrs
+Female Not stated 63.0 60 - 64 yrs
+Male Not stated 73.0 70 - 74 yrs
+Female Not stated 68.0 65 - 69 yrs
+Female Not stated 70.0 70 - 74 yrs
+Female Not stated 63.0 60 - 64 yrs
+Female Not stated 38.0 35 - 39 yrs
+Male Not stated 63.0 60 - 64 yrs
+Female Not stated 77.0 75 - 79 yrs
+Male Not stated 73.0 70 - 74 yrs
+Male Not stated 41.0 40 - 44 yrs
+Female Not stated 39.0 35 - 39 yrs
+Female Not stated 63.0 60 - 64 yrs
+Male Not stated 78.0 75 - 79 yrs
+Female Not stated 81.0 80 - 84 yrs
+Male Not stated 58.0 55 - 59 yrs
+Female Not stated 72.0 70 - 74 yrs
+Female Not stated 53.0 50 - 54 yrs
+Male Not stated 38.0 35 - 39 yrs
+Female Not stated 51.0 50 - 54 yrs
+
+
+That took 0.008 seconds.
+
+##############################################################################
+
+#10a. Define a whereset (filter) and assign it to the active wheresets property for the nhds dataset.
+ . A whereset is like the resolved result of an SQL WHERE clause, but it can be stored and
+ . re-used, at least while the underlying dataset remians unchanged. One or more wheresets
+ . can be temporarily assigned to a dataset in order to filter all subsequent operations.
+>>> nhds.makewhereset("testa",var1="sex",op1="=",val1=2)
+>>> nhds.makewhereset("testb",var1="agegrp",op1="=",val1=15)
+>>> nhds.metadata.activewheresets = ("testa","testb")
+Assembling whereset testa containing 22368 elements took 0.111 seconds.
+Assembling whereset testb containing 2932 elements took 0.040 seconds.
+
+That took 0.152 seconds.
+
+##############################################################################
+
+#10b. Note that slicing now honours the active whereset filtering.
+>>> print nhds[-20:]
+Note: the following wheresets are active:
+testa containing 22368 elements
+testb containing 2932 elements
+The intersection of the above wheresets contains 1495 elements.
+Note: the following wheresets are active:
+testa containing 22368 elements
+testb containing 2932 elements
+The intersection of the above wheresets contains 1495 elements.
+Sex Race Age (years) Age Group
+------ ---------- ----------- -----------
+Female Not stated 73.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 73.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 74.0 70 - 74 yrs
+Female Not stated 73.0 70 - 74 yrs
+Female Not stated 73.0 70 - 74 yrs
+Female Not stated 70.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 74.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 70.0 70 - 74 yrs
+Female Not stated 74.0 70 - 74 yrs
+Female Not stated 73.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 71.0 70 - 74 yrs
+Female Not stated 74.0 70 - 74 yrs
+Female Not stated 72.0 70 - 74 yrs
+Female Not stated 70.0 70 - 74 yrs
+Female Not stated 72.0 70 - 74 yrs
+
+
+That took 0.069 seconds.
+
+##############################################################################
+
+#10c. Define another whereset (filter) based on a scalar (continuous) column.
+# and assign it to the active wheresets property for the nhds dataset
+>>> nhds.makewhereset("testc",var1="age",op1="ge",val1=23,con1="and",var2="age",op2="le",val2=24)
+>>> nhds.metadata.activewheresets = ("testc",)
+Assembling whereset testc containing 801 elements took 0.096 seconds.
+
+That took 0.097 seconds.
+
+##############################################################################
+
+#10d. Note that slicing now honours the new whereset filtering only 23 and 24 yr olds are displayed.
+>>> print nhds[-20:]
+Note: the following wheresets are active:
+testc containing 801 elements
+The intersection of the above wheresets contains 801 elements.
+Note: the following wheresets are active:
+testc containing 801 elements
+The intersection of the above wheresets contains 801 elements.
+Sex Race Age (years) Age Group
+------ ---------- ----------- -----------
+Female Other 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female White 23.0 20 - 24 yrs
+Female Other 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female Black 23.0 20 - 24 yrs
+Female White 23.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female Black 24.0 20 - 24 yrs
+Female White 24.0 20 - 24 yrs
+Female Not stated 23.0 20 - 24 yrs
+Male Not stated 24.0 20 - 24 yrs
+Male Not stated 23.0 20 - 24 yrs
+Female Not stated 23.0 20 - 24 yrs
+Male Not stated 24.0 20 - 24 yrs
+Female Not stated 24.0 20 - 24 yrs
+Female Not stated 23.0 20 - 24 yrs
+
+
+That took 0.032 seconds.
+
+##############################################################################
+
+#10e. Cancel the whereset filtering (note the wheresets are still available for further use.
+>>> nhds.metadata.activewheresets = None
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#11a. Print the simplest possible summary of the nhds dataset.
+>>> print nhds.summary(printit=1)
+
+Summary derived from nhds dataset
+summary(): Summary created in 0.000 seconds.
+
+ |
+ |
+ |
+ |
+Frequency|
+---------|
+40000 |
+
+
+That took 0.001 seconds.
+
+##############################################################################
+
+#11b. Add some statistical measures. This is similar to the SQL:
+# select count(*), avg(age), min(age), max(age) from nhds;
+>>> print nhds.summary(cellvar="age",mean=1,minimum=1,maximum=1,printit=1)
+
+Summary derived from nhds dataset
+summary(): Summary created in 0.170 seconds.
+
+ | | | |
+ | | | |
+ |Minimum |Mean |Maximum |
+ |of |of |of |
+Frequency|Age (years)|Age (years) |Age (years)|
+---------|-----------|-------------|-----------|
+40000 |0.0 |44.4418617984|99.0 |
+
+
+That took 0.172 seconds.
+
+##############################################################################
+
+#11c. Note that missing data is handled correctly - the randomvalue column
+# contains random values in the interval 0 to 1.0, but all values 0.7 to 0.8
+# are set to missing (masked).
+# In SQL you would need to use a subquery to count nulls?
+>>> print nhds.summary(cellvar="randomvalue",nonmissing=1,missing=1,mean=1,minimum=1,maximum=1,printit=1)
+
+Summary derived from nhds dataset
+summary(): Summary created in 0.231 seconds.
+
+ | | | | | |
+ |Number of |Number of | | | |
+ |non-missing |missing |Minimum |Mean |Maximum |
+ |values of |values of |of |of |of |
+Frequency|Random values|Random values|Random values |Random values |Random values |
+---------|-------------|-------------|-----------------|--------------|--------------|
+40000 |36013 |3987 |3.14874556133e-05|0.471697689763|0.999974713516|
+
+
+That took 0.234 seconds.
+
+##############################################################################
+
+#11d. Compare to same query but using the age column (no missing values)
+>>> print nhds.summary(cellvar="age",nonmissing=1,missing=1,mean=1,minimum=1,maximum=1,printit=1)
+
+Summary derived from nhds dataset
+summary(): Summary created in 0.383 seconds.
+
+ | | | | | |
+ |Number of |Number of | | | |
+ |non-missing|missing |Minimum |Mean |Maximum |
+ |values of |values of |of |of |of |
+Frequency|Age (years)|Age (years)|Age (years)|Age (years) |Age (years)|
+---------|-----------|-----------|-----------|-------------|-----------|
+40000 |40000 |0 |0.0 |44.4418617984|99.0 |
+
+
+That took 0.387 seconds.
+
+##############################################################################
+
+#11e. Note that intelligence can be built into the objects: try to calculate statistics
+# on a categorical integer column and it gives an error message since that makes no sense.
+>>> print nhds.summary(cellvar="race",nonmissing=1,missing=1,mean=1,minimum=1,maximum=1,printit=1)
+summary(): race is not a scalar column in dataset nhds
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#12. Check the cardinality of a categorical column.
+# Similar to SQL: select distinct race from nhds;
+>>> print nhds.race.cardinality(printit=1)
+Cardinality of race is: 6
+
+That took 0.124 seconds.
+
+##############################################################################
+
+#13a. Now for a univariate contingency table.
+# Same as SQL: select count(*) from nhds group by race;
+>>> print nhds.race.univar(printit=1)
+
+Univariate summary derived from race column in nhds dataset
+univar(): Univariate summary created in 0.002 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Race |Frequency|
+----------------------|---------|
+White |13709 |
+Black |8434 |
+American Indian/Eskimo|1049 |
+Asian/Pacific Islander|595 |
+Other |1875 |
+Not stated |14338 |
+
+
+That took 0.004 seconds.
+
+##############################################################################
+
+#13b. Same again, but the alternate calling syntax, adding a marginal total to the table.
+# Same as SQL: select count(*) from nhds group by race union select count(*) from nhds;
+# Note the SQL union subclause to calculate marginal totals will be omitted henceforth for brevity.
+>>> print nhds.univar("race",printit=1,allcalc=1)
+
+Univariate summary derived from race column in nhds dataset
+univar(): Univariate summary created in 0.002 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Race |Frequency|
+----------------------|---------|
+White |13709 |
+Black |8434 |
+American Indian/Eskimo|1049 |
+Asian/Pacific Islander|595 |
+Other |1875 |
+Not stated |14338 |
+All races |40000 |
+
+
+That took 0.004 seconds.
+
+##############################################################################
+
+#13c. Same again, but adding proportions to the table
+# Um, the SQL for this would be quite horrible...
+>>> print nhds.univar("race",printit=1,proportions=1)
+['all', 'race']
+
+Univariate summary derived from race column in nhds dataset
+univar(): Univariate summary created in 0.002 seconds.
+
+ | | |
+ | | |
+Grouped | |Proportion |
+by | |of |
+Race |Frequency|for all Race|
+----------------------|---------|------------|
+White |13709 |0.342725 |
+Black |8434 |0.21085 |
+American Indian/Eskimo|1049 |0.026225 |
+Asian/Pacific Islander|595 |0.014875 |
+Other |1875 |0.046875 |
+Not stated |14338 |0.35845 |
+All races |40000 |1.0 |
+
+
+That took 0.006 seconds.
+
+##############################################################################
+
+#14. Add some more statistics based on the (scalar) days_of_care column.
+# Same as SQL: select count(*), avg(days_of_care), max(days_of_care), min(days_of_care) from nhds group by race;
+>>> print nhds.race.univar(cellvar="days_of_care",mean=1,maximum=1,minimum=1,allcalc=1,printit=1)
+colload(): memory mapping of data vector for days_of_care took 0.061 seconds.
+
+
+Univariate summary derived from race column in nhds dataset
+univar(): Univariate summary created in 0.490 seconds.
+
+ | | | | |
+ | | | | |
+Grouped | |Minimum |Mean |Maximum |
+by | |of |of |of |
+Race |Frequency|Days of Care|Days of Care |Days of Care|
+----------------------|---------|------------|-------------|------------|
+White |13709 |1 |5.60420161937|246 |
+Black |8434 |1 |7.31100308276|559 |
+American Indian/Eskimo|1049 |1 |5.30409914204|102 |
+Asian/Pacific Islander|595 |1 |5.08235294118|98 |
+Other |1875 |1 |6.2064 |186 |
+Not stated |14338 |1 |5.61745013251|218 |
+All races |40000 |1 |5.981425 |559 |
+
+
+That took 0.498 seconds.
+
+##############################################################################
+
+#15. How about quantiles, based on the days_of_care column?
+# No SQL database that I know of does quantiles (percentiles).
+>>> print nhds.race.univar(cellvar="days_of_care",p25=1,median=1,p75=1,printit=1,allcalc=1)
+
+Univariate summary derived from race column in nhds dataset
+univar(): Univariate summary created in 0.269 seconds.
+
+ | | | | |
+ | |Third quartile |First quartile | |
+Grouped | |(75th percentile)|(25th percentile)|Median |
+by | |of |of |of |
+Race |Frequency|Days of Care |Days of Care |Days of Care|
+----------------------|---------|-----------------|-----------------|------------|
+White |13709 |6.0 |2.0 |3.0 |
+Black |8434 |7.0 |2.0 |4.0 |
+American Indian/Eskimo|1049 |6.0 |2.0 |3.0 |
+Asian/Pacific Islander|595 |5.0 |2.0 |3.0 |
+Other |1875 |7.0 |2.0 |3.0 |
+Not stated |14338 |6.0 |2.0 |3.0 |
+All races |40000 |6.0 |2.0 |3.0 |
+
+
+That took 0.277 seconds.
+
+##############################################################################
+
+#16a. Now a two-way contingency table.
+# Same as SQL: select count(*) from nhds group by race, sex;
+>>> print nhds.bivar(var1="race",var2="sex",printit=1)
+bivar(): Bivariate setup time 0.001 seconds.
+
+Bivariate summary derived from race and sex columns in nhds dataset
+bivar(): Bivariate summary created in 0.022 seconds.
+Time in intersect() function was 0.015 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped| |
+by |by | |
+Race |Sex |Frequency|
+----------------------|-------|---------|
+White |Male |6139 |
+White |Female |7570 |
+Black |Male |3719 |
+Black |Female |4715 |
+American Indian/Eskimo|Male |466 |
+American Indian/Eskimo|Female |583 |
+Asian/Pacific Islander|Male |240 |
+Asian/Pacific Islander|Female |355 |
+Other |Male |844 |
+Other |Female |1031 |
+Not stated |Male |6224 |
+Not stated |Female |8114 |
+
+
+That took 0.027 seconds.
+
+##############################################################################
+
+#16b. Add some marginal totals.
+>>> print nhds.bivar(var1="race",var2="sex",printit=1,allcalc=1)
+bivar(): Bivariate setup time 0.001 seconds.
+
+Bivariate summary derived from race and sex columns in nhds dataset
+bivar(): Bivariate summary created in 0.024 seconds.
+Time in intersect() function was 0.014 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped| |
+by |by | |
+Race |Sex |Frequency|
+----------------------|-------|---------|
+White |Male |6139 |
+White |Female |7570 |
+Black |Male |3719 |
+Black |Female |4715 |
+American Indian/Eskimo|Male |466 |
+American Indian/Eskimo|Female |583 |
+Asian/Pacific Islander|Male |240 |
+Asian/Pacific Islander|Female |355 |
+Other |Male |844 |
+Other |Female |1031 |
+Not stated |Male |6224 |
+Not stated |Female |8114 |
+All races |Male |17632 |
+All races |Female |22368 |
+White |Persons|13709 |
+Black |Persons|8434 |
+American Indian/Eskimo|Persons|1049 |
+Asian/Pacific Islander|Persons|595 |
+Other |Persons|1875 |
+Not stated |Persons|14338 |
+All races |Persons|40000 |
+
+
+That took 0.032 seconds.
+
+##############################################################################
+
+#17a. Now a three-way contingency table
+# Same as SQL: select count(*) from nhds group by race, sex, geog_region;
+>>> print nhds.trivar(var1="race",var2="sex",var3="geog_region",allcalc=1,printit=1)
+colload(): memory mapping of geog_region took 0.004 seconds.
+
+trivar(): Trivariate setup time 0.005 seconds.
+
+Trivariate summary derived from geog_region, race and sex columns in nhds dataset
+trivar(): Trivariate summary created in 0.198 seconds.
+Time in intersect() function was 0.132 seconds.
+Time in ArrayDict was 0.051 seconds.
+Time in loops was 0.194 seconds.
+Time in empty loops was 0.000 seconds.
+Time in count was 0.001 seconds.
+Time in take() was 0.000 seconds.
+
+ | | | |
+ | | | |
+Grouped |Grouped |Grouped| |
+by |by |by | |
+Geographic Region|Race |Sex |Frequency|
+-----------------|----------------------|-------|---------|
+Northeast |White |Male |2654 |
+Northeast |White |Female |3153 |
+Northeast |Black |Male |2367 |
+Northeast |Black |Female |2994 |
+Northeast |American Indian/Eskimo|Male |28 |
+Northeast |American Indian/Eskimo|Female |26 |
+Northeast |Asian/Pacific Islander|Male |52 |
+Northeast |Asian/Pacific Islander|Female |91 |
+Northeast |Other |Male |724 |
+Northeast |Other |Female |853 |
+Northeast |Not stated |Male |572 |
+Northeast |Not stated |Female |799 |
+Midwest |White |Male |1711 |
+Midwest |White |Female |2273 |
+Midwest |Black |Male |374 |
+Midwest |Black |Female |461 |
+Midwest |American Indian/Eskimo|Male |0 |
+Midwest |American Indian/Eskimo|Female |1 |
+Midwest |Asian/Pacific Islander|Male |7 |
+Midwest |Asian/Pacific Islander|Female |15 |
+Midwest |Other |Male |21 |
+Midwest |Other |Female |17 |
+Midwest |Not stated |Male |813 |
+Midwest |Not stated |Female |1250 |
+South |White |Male |1774 |
+South |White |Female |2144 |
+South |Black |Male |978 |
+South |Black |Female |1260 |
+South |American Indian/Eskimo|Male |438 |
+South |American Indian/Eskimo|Female |556 |
+South |Asian/Pacific Islander|Male |181 |
+South |Asian/Pacific Islander|Female |249 |
+South |Other |Male |99 |
+South |Other |Female |161 |
+South |Not stated |Male |4839 |
+South |Not stated |Female |6065 |
+All regions |White |Male |6139 |
+All regions |White |Female |7570 |
+All regions |Black |Male |3719 |
+All regions |Black |Female |4715 |
+All regions |American Indian/Eskimo|Male |466 |
+All regions |American Indian/Eskimo|Female |583 |
+All regions |Asian/Pacific Islander|Male |240 |
+All regions |Asian/Pacific Islander|Female |355 |
+All regions |Other |Male |844 |
+All regions |Other |Female |1031 |
+All regions |Not stated |Male |6224 |
+All regions |Not stated |Female |8114 |
+Northeast |All races |Male |6397 |
+Northeast |All races |Female |7916 |
+Midwest |All races |Male |2926 |
+Midwest |All races |Female |4017 |
+South |All races |Male |8309 |
+South |All races |Female |10435 |
+Northeast |White |Persons|5807 |
+Northeast |Black |Persons|5361 |
+Northeast |American Indian/Eskimo|Persons|54 |
+Northeast |Asian/Pacific Islander|Persons|143 |
+Northeast |Other |Persons|1577 |
+Northeast |Not stated |Persons|1371 |
+Midwest |White |Persons|3984 |
+Midwest |Black |Persons|835 |
+Midwest |American Indian/Eskimo|Persons|1 |
+Midwest |Asian/Pacific Islander|Persons|22 |
+Midwest |Other |Persons|38 |
+Midwest |Not stated |Persons|2063 |
+South |White |Persons|3918 |
+South |Black |Persons|2238 |
+South |American Indian/Eskimo|Persons|994 |
+South |Asian/Pacific Islander|Persons|430 |
+South |Other |Persons|260 |
+South |Not stated |Persons|10904 |
+All regions |All races |Male |17632 |
+All regions |All races |Female |22368 |
+All regions |White |Persons|13709 |
+All regions |Black |Persons|8434 |
+All regions |American Indian/Eskimo|Persons|1049 |
+All regions |Asian/Pacific Islander|Persons|595 |
+All regions |Other |Persons|1875 |
+All regions |Not stated |Persons|14338 |
+Northeast |All races |Persons|14313 |
+Midwest |All races |Persons|6943 |
+South |All races |Persons|18744 |
+All regions |All races |Persons|40000 |
+
+
+That took 0.355 seconds.
+
+##############################################################################
+
+#17b. Add proportions for a greater than two-dimensional table (a unique feature - even stats
+# packages don't do this!) - note the helpful column labelling (although needs more work...).
+>>> print nhds.trivar(var1="race",var2="sex",var3="geog_region",proportions=1,printit=1)
+trivar(): Trivariate setup time 0.001 seconds.
+
+Trivariate summary derived from geog_region, race and sex columns in nhds dataset
+trivar(): Trivariate summary created in 0.250 seconds.
+Time in intersect() function was 0.181 seconds.
+Time in ArrayDict was 0.045 seconds.
+Time in loops was 0.249 seconds.
+Time in empty loops was 0.000 seconds.
+Time in count was 0.001 seconds.
+Time in take() was 0.000 seconds.
+
+ | | | | | | | | | |
+ | | | |Proportion |Proportion |Proportion |Proportion |Proportion |Proportion |
+Grouped |Grouped |Grouped| |for same Geographic Region|for same Geographic Region|for same Geographic Region|of All regions |of All regions |of All regions|
+by |by |by | |for same Race |of All races |of All races |for same Race |of All races |of All races |
+Geographic Region|Race |Sex |Frequency|of Persons |for same Sex |of Persons |for same Sex |for same Sex |of Persons |
+-----------------|----------------------|-------|---------|--------------------------|--------------------------|--------------------------|----------------|-----------------|--------------|
+Northeast |White |Male |2654 |0.457034613398 |0.414881975926 |0.185425836652 |0.193595448246 |0.150521778584 |0.06635 |
+Northeast |White |Female |3153 |0.542965386602 |0.398307225872 |0.220289247537 |0.229994893865 |0.140960300429 |0.078825 |
+Northeast |Black |Male |2367 |0.441522104085 |0.37001719556 |0.165374135401 |0.280649751008 |0.134244555354 |0.059175 |
+Northeast |Black |Female |2994 |0.558477895915 |0.378221323901 |0.209180465311 |0.354991700261 |0.13385193133 |0.07485 |
+Northeast |American Indian/Eskimo|Male |28 |0.518518518519 |0.004377051743 |0.00195626353665 |0.0266920877026 |0.00158802177858 |0.0007 |
+Northeast |American Indian/Eskimo|Female |26 |0.481481481481 |0.0032844871147 |0.00181653042688 |0.0247855100095 |0.00116237482117 |0.00065 |
+Northeast |Asian/Pacific Islander|Male |52 |0.363636363636 |0.00812881037987 |0.00363306085377 |0.0873949579832 |0.00294918330309 |0.0013 |
+Northeast |Asian/Pacific Islander|Female |91 |0.636363636364 |0.0114957049015 |0.0063578564941 |0.152941176471 |0.00406831187411 |0.002275 |
+Northeast |Other |Male |724 |0.459099556119 |0.113178052212 |0.0505833857332 |0.386133333333 |0.0410617059891 |0.0181 |
+Northeast |Other |Female |853 |0.540900443881 |0.107756442648 |0.0595961713128 |0.454933333333 |0.0381348354793 |0.021325 |
+Northeast |Not stated |Male |572 |0.417213712619 |0.0894169141785 |0.0399636693915 |0.0398939880039 |0.0324410163339 |0.0143 |
+Northeast |Not stated |Female |799 |0.582786287381 |0.100934815563 |0.0558233773493 |0.0557260426838 |0.0357206723891 |0.019975 |
+Midwest |White |Male |1711 |0.429467871486 |0.584757347915 |0.246435258534 |0.12480851995 |0.0970394736842 |0.042775 |
+Midwest |White |Female |2273 |0.570532128514 |0.565845158078 |0.32738009506 |0.165803486761 |0.101618383405 |0.056825 |
+Midwest |Black |Male |374 |0.447904191617 |0.127819548872 |0.0538672043785 |0.0443443206071 |0.0212114337568 |0.00935 |
+Midwest |Black |Female |461 |0.552095808383 |0.114762260393 |0.0663978107446 |0.0546597106948 |0.0206097997139 |0.011525 |
+Midwest |American Indian/Eskimo|Male |0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |
+Midwest |American Indian/Eskimo|Female |1 |1.0 |0.000248941996515 |0.000144029958231 |0.00095328884652|4.47067238913e-05|2.5e-05 |
+Midwest |Asian/Pacific Islander|Male |7 |0.318181818182 |0.00239234449761 |0.00100820970762 |0.0117647058824 |0.000397005444646|0.000175 |
+Midwest |Asian/Pacific Islander|Female |15 |0.681818181818 |0.00373412994772 |0.00216044937347 |0.0252100840336 |0.000670600858369|0.000375 |
+Midwest |Other |Male |21 |0.552631578947 |0.00717703349282 |0.00302462912286 |0.0112 |0.00119101633394 |0.000525 |
+Midwest |Other |Female |17 |0.447368421053 |0.00423201394075 |0.00244850928993 |0.00906666666667|0.000760014306152|0.000425 |
+Midwest |Not stated |Male |813 |0.394086282113 |0.277853725222 |0.117096356042 |0.0567024689636 |0.0461093466425 |0.020325 |
+Midwest |Not stated |Female |1250 |0.605913717887 |0.311177495644 |0.180037447789 |0.0871809178407 |0.0558834048641 |0.03125 |
+South |White |Male |1774 |0.452782031649 |0.213503430016 |0.0946436192915 |0.129404041141 |0.100612522686 |0.04435 |
+South |White |Female |2144 |0.547217968351 |0.2054623862 |0.114383269313 |0.156393610037 |0.0958512160229 |0.0536 |
+South |Black |Male |978 |0.436997319035 |0.117703694789 |0.0521766965429 |0.11595921271 |0.0554673321234 |0.02445 |
+South |Black |Female |1260 |0.563002680965 |0.120747484427 |0.0672215108835 |0.149395304719 |0.056330472103 |0.0315 |
+South |American Indian/Eskimo|Male |438 |0.440643863179 |0.05271392466 |0.0233674775928 |0.417540514776 |0.0248411978221 |0.01095 |
+South |American Indian/Eskimo|Female |556 |0.559356136821 |0.053282223287 |0.0296628254375 |0.530028598665 |0.0248569384835 |0.0139 |
+South |Asian/Pacific Islander|Male |181 |0.420930232558 |0.0217836081358 |0.00965642338882 |0.304201680672 |0.0102654264973 |0.004525 |
+South |Asian/Pacific Islander|Female |249 |0.579069767442 |0.0238620028749 |0.0132842509603 |0.418487394958 |0.0111319742489 |0.006225 |
+South |Other |Male |99 |0.380769230769 |0.0119147911903 |0.00528169014085 |0.0528 |0.00561479128857 |0.002475 |
+South |Other |Female |161 |0.619230769231 |0.0154288452324 |0.00858941527956 |0.0858666666667 |0.00719778254649 |0.004025 |
+South |Not stated |Male |4839 |0.443782098313 |0.58238055121 |0.258162612036 |0.337494769145 |0.274444192377 |0.120975 |
+South |Not stated |Female |6065 |0.556217901687 |0.581217057978 |0.323570209134 |0.423001813363 |0.271146280401 |0.151625 |
+All regions |White |Male |6139 |0.447808009337 |0.348173774955 |0.153475 |0.447808009337 |0.348173774955 |0.153475 |
+All regions |White |Female |7570 |0.552191990663 |0.338429899857 |0.18925 |0.552191990663 |0.338429899857 |0.18925 |
+All regions |Black |Male |3719 |0.440953284325 |0.210923321234 |0.092975 |0.440953284325 |0.210923321234 |0.092975 |
+All regions |Black |Female |4715 |0.559046715675 |0.210792203147 |0.117875 |0.559046715675 |0.210792203147 |0.117875 |
+All regions |American Indian/Eskimo|Male |466 |0.444232602479 |0.0264292196007 |0.01165 |0.444232602479 |0.0264292196007 |0.01165 |
+All regions |American Indian/Eskimo|Female |583 |0.555767397521 |0.0260640200286 |0.014575 |0.555767397521 |0.0260640200286 |0.014575 |
+All regions |Asian/Pacific Islander|Male |240 |0.403361344538 |0.013611615245 |0.006 |0.403361344538 |0.013611615245 |0.006 |
+All regions |Asian/Pacific Islander|Female |355 |0.596638655462 |0.0158708869814 |0.008875 |0.596638655462 |0.0158708869814 |0.008875 |
+All regions |Other |Male |844 |0.450133333333 |0.0478675136116 |0.0211 |0.450133333333 |0.0478675136116 |0.0211 |
+All regions |Other |Female |1031 |0.549866666667 |0.0460926323319 |0.025775 |0.549866666667 |0.0460926323319 |0.025775 |
+All regions |Not stated |Male |6224 |0.434091226112 |0.352994555354 |0.1556 |0.434091226112 |0.352994555354 |0.1556 |
+All regions |Not stated |Female |8114 |0.565908773888 |0.362750357654 |0.20285 |0.565908773888 |0.362750357654 |0.20285 |
+Northeast |All races |Male |6397 |0.446936351569 |1.0 |0.446936351569 |0.159925 |0.362806261343 |0.159925 |
+Northeast |All races |Female |7916 |0.553063648431 |1.0 |0.553063648431 |0.1979 |0.353898426323 |0.1979 |
+Midwest |All races |Male |2926 |0.421431657785 |1.0 |0.421431657785 |0.07315 |0.165948275862 |0.07315 |
+Midwest |All races |Female |4017 |0.578568342215 |1.0 |0.578568342215 |0.100425 |0.179586909871 |0.100425 |
+South |All races |Male |8309 |0.443288518993 |1.0 |0.443288518993 |0.207725 |0.471245462795 |0.207725 |
+South |All races |Female |10435 |0.556711481007 |1.0 |0.556711481007 |0.260875 |0.466514663805 |0.260875 |
+Northeast |White |Persons|5807 |1.0 |0.405715084189 |0.405715084189 |0.423590342111 |0.145175 |0.145175 |
+Northeast |Black |Persons|5361 |1.0 |0.374554600713 |0.374554600713 |0.635641451269 |0.134025 |0.134025 |
+Northeast |American Indian/Eskimo|Persons|54 |1.0 |0.00377279396353 |0.00377279396353 |0.0514775977121 |0.00135 |0.00135 |
+Northeast |Asian/Pacific Islander|Persons|143 |1.0 |0.00999091734787 |0.00999091734787 |0.240336134454 |0.003575 |0.003575 |
+Northeast |Other |Persons|1577 |1.0 |0.110179557046 |0.110179557046 |0.841066666667 |0.039425 |0.039425 |
+Northeast |Not stated |Persons|1371 |1.0 |0.0957870467407 |0.0957870467407 |0.0956200306877 |0.034275 |0.034275 |
+Midwest |White |Persons|3984 |1.0 |0.573815353594 |0.573815353594 |0.290612006711 |0.0996 |0.0996 |
+Midwest |Black |Persons|835 |1.0 |0.120265015123 |0.120265015123 |0.0990040313019 |0.020875 |0.020875 |
+Midwest |American Indian/Eskimo|Persons|1 |1.0 |0.000144029958231 |0.000144029958231 |0.00095328884652|2.5e-05 |2.5e-05 |
+Midwest |Asian/Pacific Islander|Persons|22 |1.0 |0.00316865908109 |0.00316865908109 |0.036974789916 |0.00055 |0.00055 |
+Midwest |Other |Persons|38 |1.0 |0.00547313841279 |0.00547313841279 |0.0202666666667 |0.00095 |0.00095 |
+Midwest |Not stated |Persons|2063 |1.0 |0.297133803831 |0.297133803831 |0.143883386804 |0.051575 |0.051575 |
+South |White |Persons|3918 |1.0 |0.209026888604 |0.209026888604 |0.285797651178 |0.09795 |0.09795 |
+South |Black |Persons|2238 |1.0 |0.119398207426 |0.119398207426 |0.265354517429 |0.05595 |0.05595 |
+South |American Indian/Eskimo|Persons|994 |1.0 |0.0530303030303 |0.0530303030303 |0.947569113441 |0.02485 |0.02485 |
+South |Asian/Pacific Islander|Persons|430 |1.0 |0.0229406743491 |0.0229406743491 |0.72268907563 |0.01075 |0.01075 |
+South |Other |Persons|260 |1.0 |0.0138711054204 |0.0138711054204 |0.138666666667 |0.0065 |0.0065 |
+South |Not stated |Persons|10904 |1.0 |0.581732821169 |0.581732821169 |0.760496582508 |0.2726 |0.2726 |
+All regions |All races |Male |17632 |0.4408 |1.0 |0.4408 |0.4408 |1.0 |0.4408 |
+All regions |All races |Female |22368 |0.5592 |1.0 |0.5592 |0.5592 |1.0 |0.5592 |
+All regions |White |Persons|13709 |1.0 |0.342725 |0.342725 |1.0 |0.342725 |0.342725 |
+All regions |Black |Persons|8434 |1.0 |0.21085 |0.21085 |1.0 |0.21085 |0.21085 |
+All regions |American Indian/Eskimo|Persons|1049 |1.0 |0.026225 |0.026225 |1.0 |0.026225 |0.026225 |
+All regions |Asian/Pacific Islander|Persons|595 |1.0 |0.014875 |0.014875 |1.0 |0.014875 |0.014875 |
+All regions |Other |Persons|1875 |1.0 |0.046875 |0.046875 |1.0 |0.046875 |0.046875 |
+All regions |Not stated |Persons|14338 |1.0 |0.35845 |0.35845 |1.0 |0.35845 |0.35845 |
+Northeast |All races |Persons|14313 |1.0 |1.0 |1.0 |0.357825 |0.357825 |0.357825 |
+Midwest |All races |Persons|6943 |1.0 |1.0 |1.0 |0.173575 |0.173575 |0.173575 |
+South |All races |Persons|18744 |1.0 |1.0 |1.0 |0.4686 |0.4686 |0.4686 |
+All regions |All races |Persons|40000 |1.0 |1.0 |1.0 |1.0 |1.0 |1.0 |
+
+
+That took 0.586 seconds.
+
+##############################################################################
+
+#18a. And now a four-way contingency table.
+# Same as SQL: select count(*) from nhds group by race, sex, geog_region, marital_status;
+>>> print nhds.quadrivar(var1="race",var2="sex",var3="geog_region",var4="marital_status",printit=1,allcalc=1)
+colload(): memory mapping of marital_status took 0.014 seconds.
+
+quadrivar(): Quadrivariate setup time 0.016 seconds.
+
+Quadrivariate summary derived from geog_region, marital_status, race and sex columns in nhds dataset
+quadrivar(): Quadrivariate summary created in 1.520 seconds.
+Time in intersect() function was 0.706 seconds.
+
+ | | | | |
+ | | | | |
+Grouped |Grouped |Grouped |Grouped| |
+by |by |by |by | |
+Geographic Region|Marital Status|Race |Sex |Frequency|
+-----------------|--------------|----------------------|-------|---------|
+Northeast |Married |White |Male |910 |
+Northeast |Married |White |Female |886 |
+Northeast |Married |Black |Male |129 |
+Northeast |Married |Black |Female |123 |
+Northeast |Married |American Indian/Eskimo|Male |3 |
+Northeast |Married |American Indian/Eskimo|Female |2 |
+Northeast |Married |Asian/Pacific Islander|Male |13 |
+Northeast |Married |Asian/Pacific Islander|Female |40 |
+Northeast |Married |Other |Male |97 |
+Northeast |Married |Other |Female |111 |
+Northeast |Married |Not stated |Male |161 |
+Northeast |Married |Not stated |Female |194 |
+Northeast |Single |White |Male |546 |
+Northeast |Single |White |Female |601 |
+Northeast |Single |Black |Male |258 |
+Northeast |Single |Black |Female |418 |
+Northeast |Single |American Indian/Eskimo|Male |5 |
+Northeast |Single |American Indian/Eskimo|Female |4 |
+Northeast |Single |Asian/Pacific Islander|Male |30 |
+Northeast |Single |Asian/Pacific Islander|Female |36 |
+Northeast |Single |Other |Male |186 |
+Northeast |Single |Other |Female |265 |
+Northeast |Single |Not stated |Male |293 |
+Northeast |Single |Not stated |Female |420 |
+Northeast |Widowed |White |Male |98 |
+Northeast |Widowed |White |Female |380 |
+Northeast |Widowed |Black |Male |14 |
+Northeast |Widowed |Black |Female |78 |
+Northeast |Widowed |American Indian/Eskimo|Male |3 |
+Northeast |Widowed |American Indian/Eskimo|Female |0 |
+Northeast |Widowed |Asian/Pacific Islander|Male |1 |
+Northeast |Widowed |Asian/Pacific Islander|Female |1 |
+Northeast |Widowed |Other |Male |7 |
+Northeast |Widowed |Other |Female |43 |
+Northeast |Widowed |Not stated |Male |4 |
+Northeast |Widowed |Not stated |Female |44 |
+Northeast |Divorced |White |Male |57 |
+Northeast |Divorced |White |Female |132 |
+Northeast |Divorced |Black |Male |15 |
+Northeast |Divorced |Black |Female |39 |
+Northeast |Divorced |American Indian/Eskimo|Male |2 |
+Northeast |Divorced |American Indian/Eskimo|Female |0 |
+Northeast |Divorced |Asian/Pacific Islander|Male |1 |
+Northeast |Divorced |Asian/Pacific Islander|Female |1 |
+Northeast |Divorced |Other |Male |8 |
+Northeast |Divorced |Other |Female |37 |
+Northeast |Divorced |Not stated |Male |14 |
+Northeast |Divorced |Not stated |Female |29 |
+Northeast |Separated |White |Male |22 |
+Northeast |Separated |White |Female |33 |
+Northeast |Separated |Black |Male |7 |
+Northeast |Separated |Black |Female |28 |
+Northeast |Separated |American Indian/Eskimo|Male |1 |
+Northeast |Separated |American Indian/Eskimo|Female |7 |
+Northeast |Separated |Asian/Pacific Islander|Male |1 |
+Northeast |Separated |Asian/Pacific Islander|Female |2 |
+Northeast |Separated |Other |Male |5 |
+Northeast |Separated |Other |Female |10 |
+Northeast |Separated |Not stated |Male |12 |
+Northeast |Separated |Not stated |Female |16 |
+Northeast |Not stated |White |Male |1021 |
+Northeast |Not stated |White |Female |1121 |
+Northeast |Not stated |Black |Male |1944 |
+Northeast |Not stated |Black |Female |2308 |
+Northeast |Not stated |American Indian/Eskimo|Male |14 |
+Northeast |Not stated |American Indian/Eskimo|Female |13 |
+Northeast |Not stated |Asian/Pacific Islander|Male |6 |
+Northeast |Not stated |Asian/Pacific Islander|Female |11 |
+Northeast |Not stated |Other |Male |421 |
+Northeast |Not stated |Other |Female |387 |
+Northeast |Not stated |Not stated |Male |88 |
+Northeast |Not stated |Not stated |Female |96 |
+Midwest |Married |White |Male |733 |
+Midwest |Married |White |Female |788 |
+Midwest |Married |Black |Male |83 |
+Midwest |Married |Black |Female |89 |
+Midwest |Married |American Indian/Eskimo|Male |0 |
+Midwest |Married |American Indian/Eskimo|Female |1 |
+Midwest |Married |Asian/Pacific Islander|Male |3 |
+Midwest |Married |Asian/Pacific Islander|Female |7 |
+Midwest |Married |Other |Male |4 |
+Midwest |Married |Other |Female |5 |
+Midwest |Married |Not stated |Male |328 |
+Midwest |Married |Not stated |Female |539 |
+Midwest |Single |White |Male |399 |
+Midwest |Single |White |Female |481 |
+Midwest |Single |Black |Male |195 |
+Midwest |Single |Black |Female |213 |
+Midwest |Single |American Indian/Eskimo|Male |0 |
+Midwest |Single |American Indian/Eskimo|Female |0 |
+Midwest |Single |Asian/Pacific Islander|Male |3 |
+Midwest |Single |Asian/Pacific Islander|Female |4 |
+Midwest |Single |Other |Male |9 |
+Midwest |Single |Other |Female |2 |
+Midwest |Single |Not stated |Male |343 |
+Midwest |Single |Not stated |Female |407 |
+Midwest |Widowed |White |Male |59 |
+Midwest |Widowed |White |Female |335 |
+Midwest |Widowed |Black |Male |7 |
+Midwest |Widowed |Black |Female |77 |
+Midwest |Widowed |American Indian/Eskimo|Male |0 |
+Midwest |Widowed |American Indian/Eskimo|Female |0 |
+Midwest |Widowed |Asian/Pacific Islander|Male |0 |
+Midwest |Widowed |Asian/Pacific Islander|Female |1 |
+Midwest |Widowed |Other |Male |0 |
+Midwest |Widowed |Other |Female |3 |
+Midwest |Widowed |Not stated |Male |27 |
+Midwest |Widowed |Not stated |Female |158 |
+Midwest |Divorced |White |Male |70 |
+Midwest |Divorced |White |Female |115 |
+Midwest |Divorced |Black |Male |14 |
+Midwest |Divorced |Black |Female |37 |
+Midwest |Divorced |American Indian/Eskimo|Male |0 |
+Midwest |Divorced |American Indian/Eskimo|Female |0 |
+Midwest |Divorced |Asian/Pacific Islander|Male |0 |
+Midwest |Divorced |Asian/Pacific Islander|Female |1 |
+Midwest |Divorced |Other |Male |0 |
+Midwest |Divorced |Other |Female |2 |
+Midwest |Divorced |Not stated |Male |28 |
+Midwest |Divorced |Not stated |Female |66 |
+Midwest |Separated |White |Male |7 |
+Midwest |Separated |White |Female |4 |
+Midwest |Separated |Black |Male |1 |
+Midwest |Separated |Black |Female |2 |
+Midwest |Separated |American Indian/Eskimo|Male |0 |
+Midwest |Separated |American Indian/Eskimo|Female |0 |
+Midwest |Separated |Asian/Pacific Islander|Male |0 |
+Midwest |Separated |Asian/Pacific Islander|Female |0 |
+Midwest |Separated |Other |Male |0 |
+Midwest |Separated |Other |Female |0 |
+Midwest |Separated |Not stated |Male |1 |
+Midwest |Separated |Not stated |Female |6 |
+Midwest |Not stated |White |Male |443 |
+Midwest |Not stated |White |Female |550 |
+Midwest |Not stated |Black |Male |74 |
+Midwest |Not stated |Black |Female |43 |
+Midwest |Not stated |American Indian/Eskimo|Male |0 |
+Midwest |Not stated |American Indian/Eskimo|Female |0 |
+Midwest |Not stated |Asian/Pacific Islander|Male |1 |
+Midwest |Not stated |Asian/Pacific Islander|Female |2 |
+Midwest |Not stated |Other |Male |8 |
+Midwest |Not stated |Other |Female |5 |
+Midwest |Not stated |Not stated |Male |86 |
+Midwest |Not stated |Not stated |Female |74 |
+South |Married |White |Male |325 |
+South |Married |White |Female |377 |
+South |Married |Black |Male |78 |
+South |Married |Black |Female |136 |
+South |Married |American Indian/Eskimo|Male |0 |
+South |Married |American Indian/Eskimo|Female |1 |
+South |Married |Asian/Pacific Islander|Male |16 |
+South |Married |Asian/Pacific Islander|Female |45 |
+South |Married |Other |Male |0 |
+South |Married |Other |Female |4 |
+South |Married |Not stated |Male |61 |
+South |Married |Not stated |Female |187 |
+South |Single |White |Male |457 |
+South |Single |White |Female |410 |
+South |Single |Black |Male |625 |
+South |Single |Black |Female |701 |
+South |Single |American Indian/Eskimo|Male |0 |
+South |Single |American Indian/Eskimo|Female |0 |
+South |Single |Asian/Pacific Islander|Male |38 |
+South |Single |Asian/Pacific Islander|Female |32 |
+South |Single |Other |Male |0 |
+South |Single |Other |Female |4 |
+South |Single |Not stated |Male |173 |
+South |Single |Not stated |Female |206 |
+South |Widowed |White |Male |31 |
+South |Widowed |White |Female |91 |
+South |Widowed |Black |Male |10 |
+South |Widowed |Black |Female |61 |
+South |Widowed |American Indian/Eskimo|Male |0 |
+South |Widowed |American Indian/Eskimo|Female |0 |
+South |Widowed |Asian/Pacific Islander|Male |1 |
+South |Widowed |Asian/Pacific Islander|Female |5 |
+South |Widowed |Other |Male |0 |
+South |Widowed |Other |Female |0 |
+South |Widowed |Not stated |Male |1 |
+South |Widowed |Not stated |Female |16 |
+South |Divorced |White |Male |53 |
+South |Divorced |White |Female |48 |
+South |Divorced |Black |Male |44 |
+South |Divorced |Black |Female |65 |
+South |Divorced |American Indian/Eskimo|Male |0 |
+South |Divorced |American Indian/Eskimo|Female |0 |
+South |Divorced |Asian/Pacific Islander|Male |2 |
+South |Divorced |Asian/Pacific Islander|Female |0 |
+South |Divorced |Other |Male |0 |
+South |Divorced |Other |Female |0 |
+South |Divorced |Not stated |Male |1 |
+South |Divorced |Not stated |Female |6 |
+South |Separated |White |Male |2 |
+South |Separated |White |Female |3 |
+South |Separated |Black |Male |2 |
+South |Separated |Black |Female |15 |
+South |Separated |American Indian/Eskimo|Male |0 |
+South |Separated |American Indian/Eskimo|Female |0 |
+South |Separated |Asian/Pacific Islander|Male |0 |
+South |Separated |Asian/Pacific Islander|Female |0 |
+South |Separated |Other |Male |0 |
+South |Separated |Other |Female |0 |
+South |Separated |Not stated |Male |1 |
+South |Separated |Not stated |Female |0 |
+South |Not stated |White |Male |906 |
+South |Not stated |White |Female |1215 |
+South |Not stated |Black |Male |219 |
+South |Not stated |Black |Female |282 |
+South |Not stated |American Indian/Eskimo|Male |438 |
+South |Not stated |American Indian/Eskimo|Female |555 |
+South |Not stated |Asian/Pacific Islander|Male |124 |
+South |Not stated |Asian/Pacific Islander|Female |167 |
+South |Not stated |Other |Male |99 |
+South |Not stated |Other |Female |153 |
+South |Not stated |Not stated |Male |4602 |
+South |Not stated |Not stated |Female |5650 |
+All regions |Married |White |Male |1968 |
+All regions |Married |White |Female |2051 |
+All regions |Married |Black |Male |290 |
+All regions |Married |Black |Female |348 |
+All regions |Married |American Indian/Eskimo|Male |3 |
+All regions |Married |American Indian/Eskimo|Female |4 |
+All regions |Married |Asian/Pacific Islander|Male |32 |
+All regions |Married |Asian/Pacific Islander|Female |92 |
+All regions |Married |Other |Male |101 |
+All regions |Married |Other |Female |120 |
+All regions |Married |Not stated |Male |550 |
+All regions |Married |Not stated |Female |920 |
+All regions |Single |White |Male |1402 |
+All regions |Single |White |Female |1492 |
+All regions |Single |Black |Male |1078 |
+All regions |Single |Black |Female |1332 |
+All regions |Single |American Indian/Eskimo|Male |5 |
+All regions |Single |American Indian/Eskimo|Female |4 |
+All regions |Single |Asian/Pacific Islander|Male |71 |
+All regions |Single |Asian/Pacific Islander|Female |72 |
+All regions |Single |Other |Male |195 |
+All regions |Single |Other |Female |271 |
+All regions |Single |Not stated |Male |809 |
+All regions |Single |Not stated |Female |1033 |
+All regions |Widowed |White |Male |188 |
+All regions |Widowed |White |Female |806 |
+All regions |Widowed |Black |Male |31 |
+All regions |Widowed |Black |Female |216 |
+All regions |Widowed |American Indian/Eskimo|Male |3 |
+All regions |Widowed |American Indian/Eskimo|Female |0 |
+All regions |Widowed |Asian/Pacific Islander|Male |2 |
+All regions |Widowed |Asian/Pacific Islander|Female |7 |
+All regions |Widowed |Other |Male |7 |
+All regions |Widowed |Other |Female |46 |
+All regions |Widowed |Not stated |Male |32 |
+All regions |Widowed |Not stated |Female |218 |
+All regions |Divorced |White |Male |180 |
+All regions |Divorced |White |Female |295 |
+All regions |Divorced |Black |Male |73 |
+All regions |Divorced |Black |Female |141 |
+All regions |Divorced |American Indian/Eskimo|Male |2 |
+All regions |Divorced |American Indian/Eskimo|Female |0 |
+All regions |Divorced |Asian/Pacific Islander|Male |3 |
+All regions |Divorced |Asian/Pacific Islander|Female |2 |
+All regions |Divorced |Other |Male |8 |
+All regions |Divorced |Other |Female |39 |
+All regions |Divorced |Not stated |Male |43 |
+All regions |Divorced |Not stated |Female |101 |
+All regions |Separated |White |Male |31 |
+All regions |Separated |White |Female |40 |
+All regions |Separated |Black |Male |10 |
+All regions |Separated |Black |Female |45 |
+All regions |Separated |American Indian/Eskimo|Male |1 |
+All regions |Separated |American Indian/Eskimo|Female |7 |
+All regions |Separated |Asian/Pacific Islander|Male |1 |
+All regions |Separated |Asian/Pacific Islander|Female |2 |
+All regions |Separated |Other |Male |5 |
+All regions |Separated |Other |Female |10 |
+All regions |Separated |Not stated |Male |14 |
+All regions |Separated |Not stated |Female |22 |
+All regions |Not stated |White |Male |2370 |
+All regions |Not stated |White |Female |2886 |
+All regions |Not stated |Black |Male |2237 |
+All regions |Not stated |Black |Female |2633 |
+All regions |Not stated |American Indian/Eskimo|Male |452 |
+All regions |Not stated |American Indian/Eskimo|Female |568 |
+All regions |Not stated |Asian/Pacific Islander|Male |131 |
+All regions |Not stated |Asian/Pacific Islander|Female |180 |
+All regions |Not stated |Other |Male |528 |
+All regions |Not stated |Other |Female |545 |
+All regions |Not stated |Not stated |Male |4776 |
+All regions |Not stated |Not stated |Female |5820 |
+Northeast |All |White |Male |2654 |
+Northeast |All |White |Female |3153 |
+Northeast |All |Black |Male |2367 |
+Northeast |All |Black |Female |2994 |
+Northeast |All |American Indian/Eskimo|Male |28 |
+Northeast |All |American Indian/Eskimo|Female |26 |
+Northeast |All |Asian/Pacific Islander|Male |52 |
+Northeast |All |Asian/Pacific Islander|Female |91 |
+Northeast |All |Other |Male |724 |
+Northeast |All |Other |Female |853 |
+Northeast |All |Not stated |Male |572 |
+Northeast |All |Not stated |Female |799 |
+Midwest |All |White |Male |1711 |
+Midwest |All |White |Female |2273 |
+Midwest |All |Black |Male |374 |
+Midwest |All |Black |Female |461 |
+Midwest |All |American Indian/Eskimo|Male |0 |
+Midwest |All |American Indian/Eskimo|Female |1 |
+Midwest |All |Asian/Pacific Islander|Male |7 |
+Midwest |All |Asian/Pacific Islander|Female |15 |
+Midwest |All |Other |Male |21 |
+Midwest |All |Other |Female |17 |
+Midwest |All |Not stated |Male |813 |
+Midwest |All |Not stated |Female |1250 |
+South |All |White |Male |1774 |
+South |All |White |Female |2144 |
+South |All |Black |Male |978 |
+South |All |Black |Female |1260 |
+South |All |American Indian/Eskimo|Male |438 |
+South |All |American Indian/Eskimo|Female |556 |
+South |All |Asian/Pacific Islander|Male |181 |
+South |All |Asian/Pacific Islander|Female |249 |
+South |All |Other |Male |99 |
+South |All |Other |Female |161 |
+South |All |Not stated |Male |4839 |
+South |All |Not stated |Female |6065 |
+Northeast |Married |All races |Male |1313 |
+Northeast |Married |All races |Female |1356 |
+Northeast |Single |All races |Male |1318 |
+Northeast |Single |All races |Female |1744 |
+Northeast |Widowed |All races |Male |127 |
+Northeast |Widowed |All races |Female |546 |
+Northeast |Divorced |All races |Male |97 |
+Northeast |Divorced |All races |Female |238 |
+Northeast |Separated |All races |Male |48 |
+Northeast |Separated |All races |Female |96 |
+Northeast |Not stated |All races |Male |3494 |
+Northeast |Not stated |All races |Female |3936 |
+Midwest |Married |All races |Male |1151 |
+Midwest |Married |All races |Female |1429 |
+Midwest |Single |All races |Male |949 |
+Midwest |Single |All races |Female |1107 |
+Midwest |Widowed |All races |Male |93 |
+Midwest |Widowed |All races |Female |574 |
+Midwest |Divorced |All races |Male |112 |
+Midwest |Divorced |All races |Female |221 |
+Midwest |Separated |All races |Male |9 |
+Midwest |Separated |All races |Female |12 |
+Midwest |Not stated |All races |Male |612 |
+Midwest |Not stated |All races |Female |674 |
+South |Married |All races |Male |480 |
+South |Married |All races |Female |750 |
+South |Single |All races |Male |1293 |
+South |Single |All races |Female |1353 |
+South |Widowed |All races |Male |43 |
+South |Widowed |All races |Female |173 |
+South |Divorced |All races |Male |100 |
+South |Divorced |All races |Female |119 |
+South |Separated |All races |Male |5 |
+South |Separated |All races |Female |18 |
+South |Not stated |All races |Male |6388 |
+South |Not stated |All races |Female |8022 |
+Northeast |Married |White |Persons|1796 |
+Northeast |Married |Black |Persons|252 |
+Northeast |Married |American Indian/Eskimo|Persons|5 |
+Northeast |Married |Asian/Pacific Islander|Persons|53 |
+Northeast |Married |Other |Persons|208 |
+Northeast |Married |Not stated |Persons|355 |
+Northeast |Single |White |Persons|1147 |
+Northeast |Single |Black |Persons|676 |
+Northeast |Single |American Indian/Eskimo|Persons|9 |
+Northeast |Single |Asian/Pacific Islander|Persons|66 |
+Northeast |Single |Other |Persons|451 |
+Northeast |Single |Not stated |Persons|713 |
+Northeast |Widowed |White |Persons|478 |
+Northeast |Widowed |Black |Persons|92 |
+Northeast |Widowed |American Indian/Eskimo|Persons|3 |
+Northeast |Widowed |Asian/Pacific Islander|Persons|2 |
+Northeast |Widowed |Other |Persons|50 |
+Northeast |Widowed |Not stated |Persons|48 |
+Northeast |Divorced |White |Persons|189 |
+Northeast |Divorced |Black |Persons|54 |
+Northeast |Divorced |American Indian/Eskimo|Persons|2 |
+Northeast |Divorced |Asian/Pacific Islander|Persons|2 |
+Northeast |Divorced |Other |Persons|45 |
+Northeast |Divorced |Not stated |Persons|43 |
+Northeast |Separated |White |Persons|55 |
+Northeast |Separated |Black |Persons|35 |
+Northeast |Separated |American Indian/Eskimo|Persons|8 |
+Northeast |Separated |Asian/Pacific Islander|Persons|3 |
+Northeast |Separated |Other |Persons|15 |
+Northeast |Separated |Not stated |Persons|28 |
+Northeast |Not stated |White |Persons|2142 |
+Northeast |Not stated |Black |Persons|4252 |
+Northeast |Not stated |American Indian/Eskimo|Persons|27 |
+Northeast |Not stated |Asian/Pacific Islander|Persons|17 |
+Northeast |Not stated |Other |Persons|808 |
+Northeast |Not stated |Not stated |Persons|184 |
+Midwest |Married |White |Persons|1521 |
+Midwest |Married |Black |Persons|172 |
+Midwest |Married |American Indian/Eskimo|Persons|1 |
+Midwest |Married |Asian/Pacific Islander|Persons|10 |
+Midwest |Married |Other |Persons|9 |
+Midwest |Married |Not stated |Persons|867 |
+Midwest |Single |White |Persons|880 |
+Midwest |Single |Black |Persons|408 |
+Midwest |Single |American Indian/Eskimo|Persons|0 |
+Midwest |Single |Asian/Pacific Islander|Persons|7 |
+Midwest |Single |Other |Persons|11 |
+Midwest |Single |Not stated |Persons|750 |
+Midwest |Widowed |White |Persons|394 |
+Midwest |Widowed |Black |Persons|84 |
+Midwest |Widowed |American Indian/Eskimo|Persons|0 |
+Midwest |Widowed |Asian/Pacific Islander|Persons|1 |
+Midwest |Widowed |Other |Persons|3 |
+Midwest |Widowed |Not stated |Persons|185 |
+Midwest |Divorced |White |Persons|185 |
+Midwest |Divorced |Black |Persons|51 |
+Midwest |Divorced |American Indian/Eskimo|Persons|0 |
+Midwest |Divorced |Asian/Pacific Islander|Persons|1 |
+Midwest |Divorced |Other |Persons|2 |
+Midwest |Divorced |Not stated |Persons|94 |
+Midwest |Separated |White |Persons|11 |
+Midwest |Separated |Black |Persons|3 |
+Midwest |Separated |American Indian/Eskimo|Persons|0 |
+Midwest |Separated |Asian/Pacific Islander|Persons|0 |
+Midwest |Separated |Other |Persons|0 |
+Midwest |Separated |Not stated |Persons|7 |
+Midwest |Not stated |White |Persons|993 |
+Midwest |Not stated |Black |Persons|117 |
+Midwest |Not stated |American Indian/Eskimo|Persons|0 |
+Midwest |Not stated |Asian/Pacific Islander|Persons|3 |
+Midwest |Not stated |Other |Persons|13 |
+Midwest |Not stated |Not stated |Persons|160 |
+South |Married |White |Persons|702 |
+South |Married |Black |Persons|214 |
+South |Married |American Indian/Eskimo|Persons|1 |
+South |Married |Asian/Pacific Islander|Persons|61 |
+South |Married |Other |Persons|4 |
+South |Married |Not stated |Persons|248 |
+South |Single |White |Persons|867 |
+South |Single |Black |Persons|1326 |
+South |Single |American Indian/Eskimo|Persons|0 |
+South |Single |Asian/Pacific Islander|Persons|70 |
+South |Single |Other |Persons|4 |
+South |Single |Not stated |Persons|379 |
+South |Widowed |White |Persons|122 |
+South |Widowed |Black |Persons|71 |
+South |Widowed |American Indian/Eskimo|Persons|0 |
+South |Widowed |Asian/Pacific Islander|Persons|6 |
+South |Widowed |Other |Persons|0 |
+South |Widowed |Not stated |Persons|17 |
+South |Divorced |White |Persons|101 |
+South |Divorced |Black |Persons|109 |
+South |Divorced |American Indian/Eskimo|Persons|0 |
+South |Divorced |Asian/Pacific Islander|Persons|2 |
+South |Divorced |Other |Persons|0 |
+South |Divorced |Not stated |Persons|7 |
+South |Separated |White |Persons|5 |
+South |Separated |Black |Persons|17 |
+South |Separated |American Indian/Eskimo|Persons|0 |
+South |Separated |Asian/Pacific Islander|Persons|0 |
+South |Separated |Other |Persons|0 |
+South |Separated |Not stated |Persons|1 |
+South |Not stated |White |Persons|2121 |
+South |Not stated |Black |Persons|501 |
+South |Not stated |American Indian/Eskimo|Persons|993 |
+South |Not stated |Asian/Pacific Islander|Persons|291 |
+South |Not stated |Other |Persons|252 |
+South |Not stated |Not stated |Persons|10252 |
+All regions |All |White |Male |6139 |
+All regions |All |White |Female |7570 |
+All regions |All |Black |Male |3719 |
+All regions |All |Black |Female |4715 |
+All regions |All |American Indian/Eskimo|Male |466 |
+All regions |All |American Indian/Eskimo|Female |583 |
+All regions |All |Asian/Pacific Islander|Male |240 |
+All regions |All |Asian/Pacific Islander|Female |355 |
+All regions |All |Other |Male |844 |
+All regions |All |Other |Female |1031 |
+All regions |All |Not stated |Male |6224 |
+All regions |All |Not stated |Female |8114 |
+All regions |Married |All races |Male |2944 |
+All regions |Married |All races |Female |3535 |
+All regions |Single |All races |Male |3560 |
+All regions |Single |All races |Female |4204 |
+All regions |Widowed |All races |Male |263 |
+All regions |Widowed |All races |Female |1293 |
+All regions |Divorced |All races |Male |309 |
+All regions |Divorced |All races |Female |578 |
+All regions |Separated |All races |Male |62 |
+All regions |Separated |All races |Female |126 |
+All regions |Not stated |All races |Male |10494 |
+All regions |Not stated |All races |Female |12632 |
+All regions |Married |White |Persons|4019 |
+All regions |Married |Black |Persons|638 |
+All regions |Married |American Indian/Eskimo|Persons|7 |
+All regions |Married |Asian/Pacific Islander|Persons|124 |
+All regions |Married |Other |Persons|221 |
+All regions |Married |Not stated |Persons|1470 |
+All regions |Single |White |Persons|2894 |
+All regions |Single |Black |Persons|2410 |
+All regions |Single |American Indian/Eskimo|Persons|9 |
+All regions |Single |Asian/Pacific Islander|Persons|143 |
+All regions |Single |Other |Persons|466 |
+All regions |Single |Not stated |Persons|1842 |
+All regions |Widowed |White |Persons|994 |
+All regions |Widowed |Black |Persons|247 |
+All regions |Widowed |American Indian/Eskimo|Persons|3 |
+All regions |Widowed |Asian/Pacific Islander|Persons|9 |
+All regions |Widowed |Other |Persons|53 |
+All regions |Widowed |Not stated |Persons|250 |
+All regions |Divorced |White |Persons|475 |
+All regions |Divorced |Black |Persons|214 |
+All regions |Divorced |American Indian/Eskimo|Persons|2 |
+All regions |Divorced |Asian/Pacific Islander|Persons|5 |
+All regions |Divorced |Other |Persons|47 |
+All regions |Divorced |Not stated |Persons|144 |
+All regions |Separated |White |Persons|71 |
+All regions |Separated |Black |Persons|55 |
+All regions |Separated |American Indian/Eskimo|Persons|8 |
+All regions |Separated |Asian/Pacific Islander|Persons|3 |
+All regions |Separated |Other |Persons|15 |
+All regions |Separated |Not stated |Persons|36 |
+All regions |Not stated |White |Persons|5256 |
+All regions |Not stated |Black |Persons|4870 |
+All regions |Not stated |American Indian/Eskimo|Persons|1020 |
+All regions |Not stated |Asian/Pacific Islander|Persons|311 |
+All regions |Not stated |Other |Persons|1073 |
+All regions |Not stated |Not stated |Persons|10596 |
+Northeast |All |All races |Male |6397 |
+Northeast |All |All races |Female |7916 |
+Midwest |All |All races |Male |2926 |
+Midwest |All |All races |Female |4017 |
+South |All |All races |Male |8309 |
+South |All |All races |Female |10435 |
+Northeast |All |White |Persons|5807 |
+Northeast |All |Black |Persons|5361 |
+Northeast |All |American Indian/Eskimo|Persons|54 |
+Northeast |All |Asian/Pacific Islander|Persons|143 |
+Northeast |All |Other |Persons|1577 |
+Northeast |All |Not stated |Persons|1371 |
+Midwest |All |White |Persons|3984 |
+Midwest |All |Black |Persons|835 |
+Midwest |All |American Indian/Eskimo|Persons|1 |
+Midwest |All |Asian/Pacific Islander|Persons|22 |
+Midwest |All |Other |Persons|38 |
+Midwest |All |Not stated |Persons|2063 |
+South |All |White |Persons|3918 |
+South |All |Black |Persons|2238 |
+South |All |American Indian/Eskimo|Persons|994 |
+South |All |Asian/Pacific Islander|Persons|430 |
+South |All |Other |Persons|260 |
+South |All |Not stated |Persons|10904 |
+Northeast |Married |All races |Persons|2669 |
+Northeast |Single |All races |Persons|3062 |
+Northeast |Widowed |All races |Persons|673 |
+Northeast |Divorced |All races |Persons|335 |
+Northeast |Separated |All races |Persons|144 |
+Northeast |Not stated |All races |Persons|7430 |
+Midwest |Married |All races |Persons|2580 |
+Midwest |Single |All races |Persons|2056 |
+Midwest |Widowed |All races |Persons|667 |
+Midwest |Divorced |All races |Persons|333 |
+Midwest |Separated |All races |Persons|21 |
+Midwest |Not stated |All races |Persons|1286 |
+South |Married |All races |Persons|1230 |
+South |Single |All races |Persons|2646 |
+South |Widowed |All races |Persons|216 |
+South |Divorced |All races |Persons|219 |
+South |Separated |All races |Persons|23 |
+South |Not stated |All races |Persons|14410 |
+All regions |All |All races |Male |17632 |
+All regions |All |All races |Female |22368 |
+Northeast |All |All races |Persons|14313 |
+Midwest |All |All races |Persons|6943 |
+South |All |All races |Persons|18744 |
+All regions |Married |All races |Persons|6479 |
+All regions |Single |All races |Persons|7764 |
+All regions |Widowed |All races |Persons|1556 |
+All regions |Divorced |All races |Persons|887 |
+All regions |Separated |All races |Persons|188 |
+All regions |Not stated |All races |Persons|23126 |
+All regions |All |White |Persons|13709 |
+All regions |All |Black |Persons|8434 |
+All regions |All |American Indian/Eskimo|Persons|1049 |
+All regions |All |Asian/Pacific Islander|Persons|595 |
+All regions |All |Other |Persons|1875 |
+All regions |All |Not stated |Persons|14338 |
+All regions |All |All races |Persons|40000 |
+
+
+That took 2.932 seconds.
+
+##############################################################################
+
+#18b. Add mean and median age to above.
+>>> print nhds.quadrivar(var1="race",var2="sex",var3="geog_region",var4="marital_status",cellvar="age",mean=1,median=1,printit=1,allcalc=1)
+quadrivar(): Quadrivariate setup time 0.003 seconds.
+
+Quadrivariate summary derived from geog_region, marital_status, race and sex columns in nhds dataset
+quadrivar(): Quadrivariate summary created in 4.657 seconds.
+Time in intersect() function was 0.539 seconds.
+
+ | | | | | | |
+ | | | | | | |
+Grouped |Grouped |Grouped |Grouped| |Median |Mean |
+by |by |by |by | |of |of |
+Geographic Region|Marital Status|Race |Sex |Frequency|Age (years) |Age (years) |
+-----------------|--------------|----------------------|-------|---------|---------------|-------------|
+Northeast |Married |White |Male |910 |65.0 |63.2252747253|
+Northeast |Married |White |Female |886 |49.0 |50.8972911964|
+Northeast |Married |Black |Male |129 |58.0 |58.5503875969|
+Northeast |Married |Black |Female |123 |43.0 |46.9837398374|
+Northeast |Married |American Indian/Eskimo|Male |3 |67.0 |63.3333333333|
+Northeast |Married |American Indian/Eskimo|Female |2 |52.5 |52.5 |
+Northeast |Married |Asian/Pacific Islander|Male |13 |66.0 |61.1538461538|
+Northeast |Married |Asian/Pacific Islander|Female |40 |38.0 |43.1 |
+Northeast |Married |Other |Male |97 |62.0 |58.9484536082|
+Northeast |Married |Other |Female |111 |38.0 |44.6666666667|
+Northeast |Married |Not stated |Male |161 |65.0 |61.0869565217|
+Northeast |Married |Not stated |Female |194 |40.5 |45.0360824742|
+Northeast |Single |White |Male |546 |25.0 |27.909374193 |
+Northeast |Single |White |Female |601 |32.0 |33.6883115182|
+Northeast |Single |Black |Male |258 |30.0 |28.5616560283|
+Northeast |Single |Black |Female |418 |32.0 |34.5784999296|
+Northeast |Single |American Indian/Eskimo|Male |5 |0.916666666667 |19.2166666667|
+Northeast |Single |American Indian/Eskimo|Female |4 |7.0 |9.25 |
+Northeast |Single |Asian/Pacific Islander|Male |30 |0.0 |4.82786903947|
+Northeast |Single |Asian/Pacific Islander|Female |36 |8.5 |17.6180555556|
+Northeast |Single |Other |Male |186 |12.0 |21.3201279872|
+Northeast |Single |Other |Female |265 |23.0 |25.1600480415|
+Northeast |Single |Not stated |Male |293 |15.0 |22.6107305296|
+Northeast |Single |Not stated |Female |420 |23.5 |27.3572536749|
+Northeast |Widowed |White |Male |98 |79.0 |78.0816326531|
+Northeast |Widowed |White |Female |380 |79.0 |77.7789473684|
+Northeast |Widowed |Black |Male |14 |74.0 |73.3571428571|
+Northeast |Widowed |Black |Female |78 |76.5 |74.5897435897|
+Northeast |Widowed |American Indian/Eskimo|Male |3 |69.0 |69.3333333333|
+Northeast |Widowed |American Indian/Eskimo|Female |0 |None |None |
+Northeast |Widowed |Asian/Pacific Islander|Male |1 |76.0 |76.0 |
+Northeast |Widowed |Asian/Pacific Islander|Female |1 |76.0 |76.0 |
+Northeast |Widowed |Other |Male |7 |76.0 |75.1428571429|
+Northeast |Widowed |Other |Female |43 |74.0 |72.0465116279|
+Northeast |Widowed |Not stated |Male |4 |79.0 |68.75 |
+Northeast |Widowed |Not stated |Female |44 |80.5 |77.2272727273|
+Northeast |Divorced |White |Male |57 |61.0 |62.0526315789|
+Northeast |Divorced |White |Female |132 |55.0 |55.0909090909|
+Northeast |Divorced |Black |Male |15 |57.0 |59.1333333333|
+Northeast |Divorced |Black |Female |39 |60.0 |57.3846153846|
+Northeast |Divorced |American Indian/Eskimo|Male |2 |42.0 |42.0 |
+Northeast |Divorced |American Indian/Eskimo|Female |0 |None |None |
+Northeast |Divorced |Asian/Pacific Islander|Male |1 |73.0 |73.0 |
+Northeast |Divorced |Asian/Pacific Islander|Female |1 |67.0 |67.0 |
+Northeast |Divorced |Other |Male |8 |60.0 |58.25 |
+Northeast |Divorced |Other |Female |37 |51.0 |51.1081081081|
+Northeast |Divorced |Not stated |Male |14 |51.5 |47.5739708614|
+Northeast |Divorced |Not stated |Female |29 |59.0 |58.1724137931|
+Northeast |Separated |White |Male |22 |70.0 |64.9090909091|
+Northeast |Separated |White |Female |33 |74.0 |65.9116161616|
+Northeast |Separated |Black |Male |7 |62.0 |63.2857142857|
+Northeast |Separated |Black |Female |28 |69.0 |67.1428571429|
+Northeast |Separated |American Indian/Eskimo|Male |1 |49.0 |49.0 |
+Northeast |Separated |American Indian/Eskimo|Female |7 |52.0 |48.8571428571|
+Northeast |Separated |Asian/Pacific Islander|Male |1 |53.0 |53.0 |
+Northeast |Separated |Asian/Pacific Islander|Female |2 |35.0 |35.0 |
+Northeast |Separated |Other |Male |5 |51.0 |49.8 |
+Northeast |Separated |Other |Female |10 |48.0 |46.1 |
+Northeast |Separated |Not stated |Male |12 |77.0 |77.3333333333|
+Northeast |Separated |Not stated |Female |16 |57.5 |60.375 |
+Northeast |Not stated |White |Male |1021 |53.0 |48.3659036349|
+Northeast |Not stated |White |Female |1121 |46.0 |46.1006593678|
+Northeast |Not stated |Black |Male |1944 |32.0 |31.3279618504|
+Northeast |Not stated |Black |Female |2308 |31.5 |34.1187466414|
+Northeast |Not stated |American Indian/Eskimo|Male |14 |35.5 |29.0892857143|
+Northeast |Not stated |American Indian/Eskimo|Female |13 |30.0 |33.9038461538|
+Northeast |Not stated |Asian/Pacific Islander|Male |6 |39.0 |34.5 |
+Northeast |Not stated |Asian/Pacific Islander|Female |11 |22.0 |24.1818181818|
+Northeast |Not stated |Other |Male |421 |34.0 |34.2104218794|
+Northeast |Not stated |Other |Female |387 |30.0 |34.0586519976|
+Northeast |Not stated |Not stated |Male |88 |55.0 |52.709280303 |
+Northeast |Not stated |Not stated |Female |96 |54.5 |53.5421514944|
+Midwest |Married |White |Male |733 |64.0 |63.0536607549|
+Midwest |Married |White |Female |788 |49.0 |49.8073181049|
+Midwest |Married |Black |Male |83 |63.0 |60.7590361446|
+Midwest |Married |Black |Female |89 |46.0 |44.3146067416|
+Midwest |Married |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Married |American Indian/Eskimo|Female |1 |69.0 |69.0 |
+Midwest |Married |Asian/Pacific Islander|Male |3 |63.0 |55.6666666667|
+Midwest |Married |Asian/Pacific Islander|Female |7 |59.0 |54.1428571429|
+Midwest |Married |Other |Male |4 |71.5 |68.5 |
+Midwest |Married |Other |Female |5 |42.0 |42.0 |
+Midwest |Married |Not stated |Male |328 |60.0 |60.3963414634|
+Midwest |Married |Not stated |Female |539 |33.0 |41.5584415584|
+Midwest |Single |White |Male |399 |7.0 |19.2177517716|
+Midwest |Single |White |Female |481 |18.0 |22.3429905186|
+Midwest |Single |Black |Male |195 |33.0 |30.3978772881|
+Midwest |Single |Black |Female |213 |21.0 |24.9679314766|
+Midwest |Single |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Single |American Indian/Eskimo|Female |0 |None |None |
+Midwest |Single |Asian/Pacific Islander|Male |3 |0.0 |13.0 |
+Midwest |Single |Asian/Pacific Islander|Female |4 |24.5 |32.75 |
+Midwest |Single |Other |Male |9 |0.0 |13.8888888889|
+Midwest |Single |Other |Female |2 |37.5 |37.5 |
+Midwest |Single |Not stated |Male |343 |0.0 |12.4288029087|
+Midwest |Single |Not stated |Female |407 |0.75 |14.2958648867|
+Midwest |Widowed |White |Male |59 |78.0 |76.8644067797|
+Midwest |Widowed |White |Female |335 |78.0 |77.0119402985|
+Midwest |Widowed |Black |Male |7 |90.0 |85.5714285714|
+Midwest |Widowed |Black |Female |77 |75.0 |72.0 |
+Midwest |Widowed |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Widowed |American Indian/Eskimo|Female |0 |None |None |
+Midwest |Widowed |Asian/Pacific Islander|Male |0 |None |None |
+Midwest |Widowed |Asian/Pacific Islander|Female |1 |78.0 |78.0 |
+Midwest |Widowed |Other |Male |0 |None |None |
+Midwest |Widowed |Other |Female |3 |70.0 |78.6666666667|
+Midwest |Widowed |Not stated |Male |27 |79.0 |77.5185185185|
+Midwest |Widowed |Not stated |Female |158 |78.0 |76.8734177215|
+Midwest |Divorced |White |Male |70 |51.0 |54.4285714286|
+Midwest |Divorced |White |Female |115 |59.0 |57.1826086957|
+Midwest |Divorced |Black |Male |14 |60.0 |58.5 |
+Midwest |Divorced |Black |Female |37 |52.0 |54.4324324324|
+Midwest |Divorced |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Divorced |American Indian/Eskimo|Female |0 |None |None |
+Midwest |Divorced |Asian/Pacific Islander|Male |0 |None |None |
+Midwest |Divorced |Asian/Pacific Islander|Female |1 |60.0 |60.0 |
+Midwest |Divorced |Other |Male |0 |None |None |
+Midwest |Divorced |Other |Female |2 |56.5 |56.5 |
+Midwest |Divorced |Not stated |Male |28 |47.0 |48.2857142857|
+Midwest |Divorced |Not stated |Female |66 |45.0 |47.2424242424|
+Midwest |Separated |White |Male |7 |46.0 |49.7142857143|
+Midwest |Separated |White |Female |4 |23.5 |25.0 |
+Midwest |Separated |Black |Male |1 |33.0 |33.0 |
+Midwest |Separated |Black |Female |2 |46.5 |46.5 |
+Midwest |Separated |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Separated |American Indian/Eskimo|Female |0 |None |None |
+Midwest |Separated |Asian/Pacific Islander|Male |0 |None |None |
+Midwest |Separated |Asian/Pacific Islander|Female |0 |None |None |
+Midwest |Separated |Other |Male |0 |None |None |
+Midwest |Separated |Other |Female |0 |None |None |
+Midwest |Separated |Not stated |Male |1 |71.0 |71.0 |
+Midwest |Separated |Not stated |Female |6 |29.0 |39.1666666667|
+Midwest |Not stated |White |Male |443 |63.0 |56.9059566795|
+Midwest |Not stated |White |Female |550 |68.0 |59.8014532388|
+Midwest |Not stated |Black |Male |74 |44.5 |45.7162162162|
+Midwest |Not stated |Black |Female |43 |33.0 |36.1627906977|
+Midwest |Not stated |American Indian/Eskimo|Male |0 |None |None |
+Midwest |Not stated |American Indian/Eskimo|Female |0 |None |None |
+Midwest |Not stated |Asian/Pacific Islander|Male |1 |33.0 |33.0 |
+Midwest |Not stated |Asian/Pacific Islander|Female |2 |45.0 |45.0 |
+Midwest |Not stated |Other |Male |8 |51.5 |47.1354166667|
+Midwest |Not stated |Other |Female |5 |22.0 |18.2 |
+Midwest |Not stated |Not stated |Male |86 |47.0 |43.4831630135|
+Midwest |Not stated |Not stated |Female |74 |41.5 |45.1891891892|
+South |Married |White |Male |325 |60.0 |59.0092307692|
+South |Married |White |Female |377 |41.0 |45.2970822281|
+South |Married |Black |Male |78 |56.5 |55.2564102564|
+South |Married |Black |Female |136 |43.0 |44.5441176471|
+South |Married |American Indian/Eskimo|Male |0 |None |None |
+South |Married |American Indian/Eskimo|Female |1 |38.0 |38.0 |
+South |Married |Asian/Pacific Islander|Male |16 |59.5 |56.375 |
+South |Married |Asian/Pacific Islander|Female |45 |36.0 |39.4888888889|
+South |Married |Other |Male |0 |None |None |
+South |Married |Other |Female |4 |34.5 |34.5 |
+South |Married |Not stated |Male |61 |58.0 |57.4754098361|
+South |Married |Not stated |Female |187 |31.0 |33.8502673797|
+South |Single |White |Male |457 |25.0 |25.133028021 |
+South |Single |White |Female |410 |17.0 |18.9354831305|
+South |Single |Black |Male |625 |24.0 |24.902796167 |
+South |Single |Black |Female |701 |21.0 |22.9353002116|
+South |Single |American Indian/Eskimo|Male |0 |None |None |
+South |Single |American Indian/Eskimo|Female |0 |None |None |
+South |Single |Asian/Pacific Islander|Male |38 |3.0 |4.28289473684|
+South |Single |Asian/Pacific Islander|Female |32 |0.0 |7.9375 |
+South |Single |Other |Male |0 |None |None |
+South |Single |Other |Female |4 |17.5 |19.0 |
+South |Single |Not stated |Male |173 |0.0 |4.8530531784 |
+South |Single |Not stated |Female |206 |0.0602327173169|9.66980572556|
+South |Widowed |White |Male |31 |79.0 |73.9032258065|
+South |Widowed |White |Female |91 |77.0 |75.5824175824|
+South |Widowed |Black |Male |10 |73.0 |74.0 |
+South |Widowed |Black |Female |61 |71.0 |70.6393442623|
+South |Widowed |American Indian/Eskimo|Male |0 |None |None |
+South |Widowed |American Indian/Eskimo|Female |0 |None |None |
+South |Widowed |Asian/Pacific Islander|Male |1 |76.0 |76.0 |
+South |Widowed |Asian/Pacific Islander|Female |5 |74.0 |74.4 |
+South |Widowed |Other |Male |0 |None |None |
+South |Widowed |Other |Female |0 |None |None |
+South |Widowed |Not stated |Male |1 |72.0 |72.0 |
+South |Widowed |Not stated |Female |16 |75.0 |73.8125 |
+South |Divorced |White |Male |53 |53.0 |53.4716981132|
+South |Divorced |White |Female |48 |50.0 |53.6666666667|
+South |Divorced |Black |Male |44 |50.0 |54.8409090909|
+South |Divorced |Black |Female |65 |50.0 |48.7846153846|
+South |Divorced |American Indian/Eskimo|Male |0 |None |None |
+South |Divorced |American Indian/Eskimo|Female |0 |None |None |
+South |Divorced |Asian/Pacific Islander|Male |2 |52.0 |52.0 |
+South |Divorced |Asian/Pacific Islander|Female |0 |None |None |
+South |Divorced |Other |Male |0 |None |None |
+South |Divorced |Other |Female |0 |None |None |
+South |Divorced |Not stated |Male |1 |48.0 |48.0 |
+South |Divorced |Not stated |Female |6 |72.0 |60.3333333333|
+South |Separated |White |Male |2 |55.0 |55.0 |
+South |Separated |White |Female |3 |30.0 |32.0 |
+South |Separated |Black |Male |2 |47.0 |47.0 |
+South |Separated |Black |Female |15 |60.0 |49.3333333333|
+South |Separated |American Indian/Eskimo|Male |0 |None |None |
+South |Separated |American Indian/Eskimo|Female |0 |None |None |
+South |Separated |Asian/Pacific Islander|Male |0 |None |None |
+South |Separated |Asian/Pacific Islander|Female |0 |None |None |
+South |Separated |Other |Male |0 |None |None |
+South |Separated |Other |Female |0 |None |None |
+South |Separated |Not stated |Male |1 |55.0 |55.0 |
+South |Separated |Not stated |Female |0 |None |None |
+South |Not stated |White |Male |906 |56.0 |50.5140171312|
+South |Not stated |White |Female |1215 |51.0 |49.2227754258|
+South |Not stated |Black |Male |219 |51.0 |48.6100445682|
+South |Not stated |Black |Female |282 |46.0 |45.9751967223|
+South |Not stated |American Indian/Eskimo|Male |438 |60.0 |54.2831862832|
+South |Not stated |American Indian/Eskimo|Female |555 |50.0 |49.9711761042|
+South |Not stated |Asian/Pacific Islander|Male |124 |50.5 |47.4838930472|
+South |Not stated |Asian/Pacific Islander|Female |167 |41.0 |43.7190618762|
+South |Not stated |Other |Male |99 |34.0 |33.3055832106|
+South |Not stated |Other |Female |153 |29.0 |32.4967320261|
+South |Not stated |Not stated |Male |4602 |59.0 |52.6499063289|
+South |Not stated |Not stated |Female |5650 |51.0 |49.3474989248|
+All regions |Married |White |Male |1968 |64.0 |62.4651084011|
+All regions |Married |White |Female |2051 |47.0 |49.4491305054|
+All regions |Married |Black |Male |290 |59.0 |58.2965517241|
+All regions |Married |Black |Female |348 |43.0 |45.3477011494|
+All regions |Married |American Indian/Eskimo|Male |3 |67.0 |63.3333333333|
+All regions |Married |American Indian/Eskimo|Female |4 |52.5 |53.0 |
+All regions |Married |Asian/Pacific Islander|Male |32 |63.5 |58.25 |
+All regions |Married |Asian/Pacific Islander|Female |92 |37.0 |42.1739130435|
+All regions |Married |Other |Male |101 |62.0 |59.3267326733|
+All regions |Married |Other |Female |120 |37.5 |44.2166666667|
+All regions |Married |Not stated |Male |550 |62.0 |60.2745454545|
+All regions |Married |Not stated |Female |920 |34.0 |40.725 |
+All regions |Single |White |Male |1402 |19.0 |24.530809609 |
+All regions |Single |White |Female |1492 |20.0 |25.976676773 |
+All regions |Single |Black |Male |1078 |27.0 |26.7724869489|
+All regions |Single |Black |Female |1332 |24.0 |26.9141350026|
+All regions |Single |American Indian/Eskimo|Male |5 |0.916666666667 |19.2166666667|
+All regions |Single |American Indian/Eskimo|Female |4 |7.0 |9.25 |
+All regions |Single |Asian/Pacific Islander|Male |71 |2.0 |4.88149396034|
+All regions |Single |Asian/Pacific Islander|Female |72 |0.666666666667 |14.15625 |
+All regions |Single |Other |Male |195 |11.0 |20.9771477211|
+All regions |Single |Other |Female |271 |23.0 |25.1601945794|
+All regions |Single |Not stated |Male |809 |0.0833333333333|14.4964173581|
+All regions |Single |Not stated |Female |1033 |16.0 |18.6838756358|
+All regions |Widowed |White |Male |188 |79.0 |77.0106382979|
+All regions |Widowed |White |Female |806 |78.0 |77.2121588089|
+All regions |Widowed |Black |Male |31 |77.0 |76.3225806452|
+All regions |Widowed |Black |Female |216 |75.0 |72.5509259259|
+All regions |Widowed |American Indian/Eskimo|Male |3 |69.0 |69.3333333333|
+All regions |Widowed |American Indian/Eskimo|Female |0 |None |None |
+All regions |Widowed |Asian/Pacific Islander|Male |2 |76.0 |76.0 |
+All regions |Widowed |Asian/Pacific Islander|Female |7 |76.0 |75.1428571429|
+All regions |Widowed |Other |Male |7 |76.0 |75.1428571429|
+All regions |Widowed |Other |Female |46 |74.0 |72.4782608696|
+All regions |Widowed |Not stated |Male |32 |79.0 |76.25 |
+All regions |Widowed |Not stated |Female |218 |78.0 |76.7201834862|
+All regions |Divorced |White |Male |180 |56.0 |56.5611111111|
+All regions |Divorced |White |Female |295 |56.0 |55.6745762712|
+All regions |Divorced |Black |Male |73 |53.0 |56.4246575342|
+All regions |Divorced |Black |Female |141 |53.0 |52.6453900709|
+All regions |Divorced |American Indian/Eskimo|Male |2 |42.0 |42.0 |
+All regions |Divorced |American Indian/Eskimo|Female |0 |None |None |
+All regions |Divorced |Asian/Pacific Islander|Male |3 |58.0 |59.0 |
+All regions |Divorced |Asian/Pacific Islander|Female |2 |63.5 |63.5 |
+All regions |Divorced |Other |Male |8 |60.0 |58.25 |
+All regions |Divorced |Other |Female |39 |51.0 |51.3846153846|
+All regions |Divorced |Not stated |Male |43 |50.0 |48.0473393502|
+All regions |Divorced |Not stated |Female |101 |50.0 |51.1584158416|
+All regions |Separated |White |Male |31 |61.0 |60.8387096774|
+All regions |Separated |White |Female |40 |66.0 |59.2770833333|
+All regions |Separated |Black |Male |10 |53.0 |57.0 |
+All regions |Separated |Black |Female |45 |60.0 |60.2888888889|
+All regions |Separated |American Indian/Eskimo|Male |1 |49.0 |49.0 |
+All regions |Separated |American Indian/Eskimo|Female |7 |52.0 |48.8571428571|
+All regions |Separated |Asian/Pacific Islander|Male |1 |53.0 |53.0 |
+All regions |Separated |Asian/Pacific Islander|Female |2 |35.0 |35.0 |
+All regions |Separated |Other |Male |5 |51.0 |49.8 |
+All regions |Separated |Other |Female |10 |48.0 |46.1 |
+All regions |Separated |Not stated |Male |14 |77.0 |75.2857142857|
+All regions |Separated |Not stated |Female |22 |56.0 |54.5909090909|
+All regions |Not stated |White |Male |2370 |56.0 |50.7833864731|
+All regions |Not stated |White |Female |2886 |52.0 |50.0260951403|
+All regions |Not stated |Black |Male |2237 |35.0 |33.4958236913|
+All regions |Not stated |Black |Female |2633 |33.0 |35.4219797661|
+All regions |Not stated |American Indian/Eskimo|Male |452 |59.5 |53.5028442302|
+All regions |Not stated |American Indian/Eskimo|Female |568 |50.0 |49.6034379188|
+All regions |Not stated |Asian/Pacific Islander|Male |131 |49.0 |46.7786468538|
+All regions |Not stated |Asian/Pacific Islander|Female |180 |39.5 |42.5393518519|
+All regions |Not stated |Other |Male |528 |34.0 |34.2365978834|
+All regions |Not stated |Other |Female |545 |29.0 |33.4746758221|
+All regions |Not stated |Not stated |Male |4776 |59.0 |52.4859375233|
+All regions |Not stated |Not stated |Female |5820 |51.0 |49.3638170909|
+Northeast |All |White |Male |2654 |58.0 |50.7807482745|
+Northeast |All |White |Female |3153 |51.0 |49.4841730755|
+Northeast |All |Black |Male |2367 |34.0 |33.0293473141|
+Northeast |All |Black |Female |2994 |33.0 |36.377715504 |
+Northeast |All |American Indian/Eskimo|Male |28 |42.5 |36.9404761905|
+Northeast |All |American Indian/Eskimo|Female |26 |33.5 |35.5673076923|
+Northeast |All |Asian/Pacific Islander|Male |52 |9.0 |25.9391552151|
+Northeast |All |Asian/Pacific Islander|Female |91 |33.0 |31.1785714286|
+Northeast |All |Other |Male |724 |35.0 |34.9822257139|
+Northeast |All |Other |Female |853 |32.0 |35.4702357023|
+Northeast |All |Not stated |Male |572 |44.0 |40.1527907411|
+Northeast |All |Not stated |Female |799 |35.0 |39.3217685694|
+Midwest |All |White |Male |1711 |58.0 |51.3083314432|
+Midwest |All |White |Female |2273 |54.0 |50.7527252034|
+Midwest |All |Black |Male |374 |43.0 |42.2582515272|
+Midwest |All |Black |Female |461 |40.0 |40.061104999 |
+Midwest |All |American Indian/Eskimo|Male |0 |None |None |
+Midwest |All |American Indian/Eskimo|Female |1 |69.0 |69.0 |
+Midwest |All |Asian/Pacific Islander|Male |7 |33.0 |34.1428571429|
+Midwest |All |Asian/Pacific Islander|Female |15 |59.0 |49.2 |
+Midwest |All |Other |Male |21 |30.0 |36.9563492063|
+Midwest |All |Other |Female |17 |41.0 |42.6470588235|
+Midwest |All |Not stated |Male |813 |45.0 |38.5346019887|
+Midwest |All |Not stated |Female |1250 |31.0 |37.6491336071|
+South |All |White |Male |1774 |52.0 |46.0340999586|
+South |All |White |Female |2144 |42.0 |43.9348042098|
+South |All |Black |Male |978 |36.0 |34.5264287984|
+South |All |Black |Female |1260 |32.0 |34.3814689873|
+South |All |American Indian/Eskimo|Male |438 |60.0 |54.2831862832|
+South |All |American Indian/Eskimo|Female |556 |50.0 |49.949645212 |
+South |All |Asian/Pacific Islander|Male |181 |41.0 |39.4074736898|
+South |All |Asian/Pacific Islander|Female |249 |36.0 |38.9722222222|
+South |All |Other |Male |99 |34.0 |33.3055832106|
+South |All |Other |Female |161 |29.0 |32.2111801242|
+South |All |Not stated |Male |4839 |58.0 |51.0054654113|
+South |All |Not stated |Female |6065 |49.0 |47.5974194402|
+Northeast |Married |All races |Male |1313 |64.0 |62.1675552171|
+Northeast |Married |All races |Female |1356 |45.5 |48.9660766962|
+Northeast |Single |All races |Male |1318 |21.0 |25.3708898482|
+Northeast |Single |All races |Female |1744 |28.0 |30.6933471717|
+Northeast |Widowed |All races |Male |127 |78.0 |76.8818897638|
+Northeast |Widowed |All races |Female |546 |78.0 |76.8241758242|
+Northeast |Divorced |All races |Male |97 |58.0 |58.897274145 |
+Northeast |Divorced |All races |Female |238 |56.0 |55.2731092437|
+Northeast |Separated |All races |Male |48 |73.5 |65.625 |
+Northeast |Separated |All races |Female |96 |58.0 |61.3967013889|
+Northeast |Not stated |All races |Male |3494 |38.0 |37.1890096526|
+Northeast |Not stated |All races |Female |3936 |34.0 |37.970630403 |
+Midwest |Married |All races |Male |1151 |63.0 |62.1306110628|
+Midwest |Married |All races |Female |1429 |41.0 |46.3612083042|
+Midwest |Single |All races |Male |949 |4.0 |18.9910942315|
+Midwest |Single |All races |Female |1107 |17.0 |19.9544397948|
+Midwest |Widowed |All races |Male |93 |79.0 |77.7096774194|
+Midwest |Widowed |All races |Female |574 |77.0 |76.3118466899|
+Midwest |Divorced |All races |Male |112 |51.0 |53.4017857143|
+Midwest |Divorced |All races |Female |221 |53.0 |53.7601809955|
+Midwest |Separated |All races |Male |9 |46.0 |50.2222222222|
+Midwest |Separated |All races |Female |12 |29.0 |35.6666666667|
+Midwest |Not stated |All races |Male |612 |59.0 |53.4999577803|
+Midwest |Not stated |All races |Female |674 |62.5 |56.33649745 |
+South |Married |All races |Male |480 |59.0 |58.1166666667|
+South |Married |All races |Female |750 |37.0 |41.8906666667|
+South |Single |All races |Male |1293 |17.0 |21.6955681437|
+South |Single |All races |Female |1353 |18.0 |19.3371570667|
+South |Widowed |All races |Male |43 |77.0 |73.9302325581|
+South |Widowed |All races |Female |173 |76.0 |73.6416184971|
+South |Divorced |All races |Male |100 |50.5 |53.99 |
+South |Divorced |All races |Female |119 |50.0 |51.3361344538|
+South |Separated |All races |Male |5 |55.0 |51.8 |
+South |Separated |All races |Female |18 |43.5 |46.4444444444|
+South |Not stated |All races |Male |6388 |58.0 |51.9203912452|
+South |Not stated |All races |Female |8022 |50.0 |48.8146512858|
+Northeast |Married |White |Persons|1796 |59.0 |57.1436525612|
+Northeast |Married |Black |Persons|252 |52.0 |52.9047619048|
+Northeast |Married |American Indian/Eskimo|Persons|5 |61.0 |59.0 |
+Northeast |Married |Asian/Pacific Islander|Persons|53 |40.0 |47.5283018868|
+Northeast |Married |Other |Persons|208 |53.0 |51.3269230769|
+Northeast |Married |Not stated |Persons|355 |54.0 |52.3154929577|
+Northeast |Single |White |Persons|1147 |29.0 |30.9373962788|
+Northeast |Single |Black |Persons|676 |31.0 |32.2821305116|
+Northeast |Single |American Indian/Eskimo|Persons|9 |0.916666666667 |14.787037037 |
+Northeast |Single |Asian/Pacific Islander|Persons|66 |0.541666666667 |11.8043344119|
+Northeast |Single |Other |Persons|451 |19.0 |23.5764003029|
+Northeast |Single |Not stated |Persons|713 |22.0 |25.4067189181|
+Northeast |Widowed |White |Persons|478 |79.0 |77.8410041841|
+Northeast |Widowed |Black |Persons|92 |76.0 |74.402173913 |
+Northeast |Widowed |American Indian/Eskimo|Persons|3 |69.0 |69.3333333333|
+Northeast |Widowed |Asian/Pacific Islander|Persons|2 |76.0 |76.0 |
+Northeast |Widowed |Other |Persons|50 |75.0 |72.48 |
+Northeast |Widowed |Not stated |Persons|48 |80.5 |76.5208333333|
+Northeast |Divorced |White |Persons|189 |58.0 |57.1904761905|
+Northeast |Divorced |Black |Persons|54 |58.0 |57.8703703704|
+Northeast |Divorced |American Indian/Eskimo|Persons|2 |42.0 |42.0 |
+Northeast |Divorced |Asian/Pacific Islander|Persons|2 |70.0 |70.0 |
+Northeast |Divorced |Other |Persons|45 |53.0 |52.3777777778|
+Northeast |Divorced |Not stated |Persons|43 |54.0 |54.7217579549|
+Northeast |Separated |White |Persons|55 |71.0 |65.5106060606|
+Northeast |Separated |Black |Persons|35 |66.0 |66.3714285714|
+Northeast |Separated |American Indian/Eskimo|Persons|8 |52.0 |48.875 |
+Northeast |Separated |Asian/Pacific Islander|Persons|3 |35.0 |41.0 |
+Northeast |Separated |Other |Persons|15 |48.0 |47.3333333333|
+Northeast |Separated |Not stated |Persons|28 |77.0 |67.6428571429|
+Northeast |Not stated |White |Persons|2142 |50.0 |47.180404651 |
+Northeast |Not stated |Black |Persons|4252 |32.0 |32.8428092863|
+Northeast |Not stated |American Indian/Eskimo|Persons|27 |32.0 |31.4074074074|
+Northeast |Not stated |Asian/Pacific Islander|Persons|17 |27.0 |27.8235294118|
+Northeast |Not stated |Other |Persons|808 |32.0 |34.1377301167|
+Northeast |Not stated |Not stated |Persons|184 |55.0 |53.1438217942|
+Midwest |Married |White |Persons|1521 |58.0 |56.1909927679|
+Midwest |Married |Black |Persons|172 |55.0 |52.25 |
+Midwest |Married |American Indian/Eskimo|Persons|1 |69.0 |69.0 |
+Midwest |Married |Asian/Pacific Islander|Persons|10 |60.5 |54.6 |
+Midwest |Married |Other |Persons|9 |56.0 |53.7777777778|
+Midwest |Married |Not stated |Persons|867 |46.0 |48.6851211073|
+Midwest |Single |White |Persons|880 |15.0 |20.9259788594|
+Midwest |Single |Black |Persons|408 |26.5 |27.5631261659|
+Midwest |Single |American Indian/Eskimo|Persons|0 |None |None |
+Midwest |Single |Asian/Pacific Islander|Persons|7 |24.0 |24.2857142857|
+Midwest |Single |Other |Persons|11 |0.0 |18.1818181818|
+Midwest |Single |Not stated |Persons|750 |0.0 |13.4419952088|
+Midwest |Widowed |White |Persons|394 |78.0 |76.9898477157|
+Midwest |Widowed |Black |Persons|84 |75.5 |73.130952381 |
+Midwest |Widowed |American Indian/Eskimo|Persons|0 |None |None |
+Midwest |Widowed |Asian/Pacific Islander|Persons|1 |78.0 |78.0 |
+Midwest |Widowed |Other |Persons|3 |70.0 |78.6666666667|
+Midwest |Widowed |Not stated |Persons|185 |78.0 |76.9675675676|
+Midwest |Divorced |White |Persons|185 |56.0 |56.1405405405|
+Midwest |Divorced |Black |Persons|51 |57.0 |55.5490196078|
+Midwest |Divorced |American Indian/Eskimo|Persons|0 |None |None |
+Midwest |Divorced |Asian/Pacific Islander|Persons|1 |60.0 |60.0 |
+Midwest |Divorced |Other |Persons|2 |56.5 |56.5 |
+Midwest |Divorced |Not stated |Persons|94 |47.0 |47.5531914894|
+Midwest |Separated |White |Persons|11 |41.0 |40.7272727273|
+Midwest |Separated |Black |Persons|3 |42.0 |42.0 |
+Midwest |Separated |American Indian/Eskimo|Persons|0 |None |None |
+Midwest |Separated |Asian/Pacific Islander|Persons|0 |None |None |
+Midwest |Separated |Other |Persons|0 |None |None |
+Midwest |Separated |Not stated |Persons|7 |30.0 |43.7142857143|
+Midwest |Not stated |White |Persons|993 |66.0 |58.5097060326|
+Midwest |Not stated |Black |Persons|117 |43.0 |42.2051282051|
+Midwest |Not stated |American Indian/Eskimo|Persons|0 |None |None |
+Midwest |Not stated |Asian/Pacific Islander|Persons|3 |33.0 |41.0 |
+Midwest |Not stated |Other |Persons|13 |30.0 |36.0064102564|
+Midwest |Not stated |Not stated |Persons|160 |44.5 |44.2722001198|
+South |Married |White |Persons|702 |53.0 |51.6452991453|
+South |Married |Black |Persons|214 |49.0 |48.4485981308|
+South |Married |American Indian/Eskimo|Persons|1 |38.0 |38.0 |
+South |Married |Asian/Pacific Islander|Persons|61 |37.0 |43.9180327869|
+South |Married |Other |Persons|4 |34.5 |34.5 |
+South |Married |Not stated |Persons|248 |34.0 |39.6612903226|
+South |Single |White |Persons|867 |19.0 |22.2022397798|
+South |Single |Black |Persons|1326 |22.0 |23.862664444 |
+South |Single |American Indian/Eskimo|Persons|0 |None |None |
+South |Single |Asian/Pacific Islander|Persons|70 |1.5 |5.95357142857|
+South |Single |Other |Persons|4 |17.5 |19.0 |
+South |Single |Not stated |Persons|379 |0.0 |7.47112976076|
+South |Widowed |White |Persons|122 |77.0 |75.1557377049|
+South |Widowed |Black |Persons|71 |71.0 |71.1126760563|
+South |Widowed |American Indian/Eskimo|Persons|0 |None |None |
+South |Widowed |Asian/Pacific Islander|Persons|6 |75.0 |74.6666666667|
+South |Widowed |Other |Persons|0 |None |None |
+South |Widowed |Not stated |Persons|17 |74.0 |73.7058823529|
+South |Divorced |White |Persons|101 |52.0 |53.5643564356|
+South |Divorced |Black |Persons|109 |50.0 |51.2293577982|
+South |Divorced |American Indian/Eskimo|Persons|0 |None |None |
+South |Divorced |Asian/Pacific Islander|Persons|2 |52.0 |52.0 |
+South |Divorced |Other |Persons|0 |None |None |
+South |Divorced |Not stated |Persons|7 |63.0 |58.5714285714|
+South |Separated |White |Persons|5 |38.0 |41.2 |
+South |Separated |Black |Persons|17 |48.0 |49.0588235294|
+South |Separated |American Indian/Eskimo|Persons|0 |None |None |
+South |Separated |Asian/Pacific Islander|Persons|0 |None |None |
+South |Separated |Other |Persons|0 |None |None |
+South |Separated |Not stated |Persons|1 |55.0 |55.0 |
+South |Not stated |White |Persons|2121 |54.0 |49.7743383608|
+South |Not stated |Black |Persons|501 |49.0 |47.1269565592|
+South |Not stated |American Indian/Eskimo|Persons|993 |55.0 |51.8731503826|
+South |Not stated |Asian/Pacific Islander|Persons|291 |44.0 |45.3233198322|
+South |Not stated |Other |Persons|252 |29.0 |32.8144949915|
+South |Not stated |Not stated |Persons|10252 |55.0 |50.8299100518|
+All regions |All |White |Male |6139 |56.0 |49.5561417733|
+All regions |All |White |Female |7570 |50.0 |48.2933635826|
+All regions |All |Black |Male |3719 |36.0 |34.3511423846|
+All regions |All |Black |Female |4715 |33.0 |36.20439036 |
+All regions |All |American Indian/Eskimo|Male |466 |59.5 |53.241135033 |
+All regions |All |American Indian/Eskimo|Female |583 |50.0 |49.3409137871|
+All regions |All |Asian/Pacific Islander|Male |240 |37.5 |36.3357867043|
+All regions |All |Asian/Pacific Islander|Female |355 |34.0 |37.40657277 |
+All regions |All |Other |Male |844 |35.0 |34.8346771185|
+All regions |All |Other |Female |1031 |31.0 |35.0796421475|
+All regions |All |Not stated |Male |6224 |55.0 |48.3790930022|
+All regions |All |Not stated |Female |8114 |45.0 |45.2499210008|
+All regions |Married |All races |Male |2944 |63.0 |61.4926403986|
+All regions |Married |All races |Female |3535 |41.0 |46.4119283357|
+All regions |Single |All races |Male |3560 |16.0 |22.335323274 |
+All regions |Single |All races |Female |4204 |20.0 |24.2107364014|
+All regions |Widowed |All races |Male |263 |78.0 |76.6920152091|
+All regions |Widowed |All races |Female |1293 |77.0 |76.1709203403|
+All regions |Divorced |All races |Male |309 |54.0 |55.3172672882|
+All regions |Divorced |All races |Female |578 |53.0 |53.884083045 |
+All regions |Separated |All races |Male |62 |64.5 |62.2741935484|
+All regions |Separated |All races |Female |126 |57.0 |56.8101851852|
+All regions |Not stated |All races |Male |10494 |52.0 |47.107664681 |
+All regions |Not stated |All races |Female |12632 |45.0 |45.8371068051|
+All regions |Married |White |Persons|4019 |58.0 |55.8227170938|
+All regions |Married |Black |Persons|638 |51.0 |51.2335423197|
+All regions |Married |American Indian/Eskimo|Persons|7 |61.0 |57.4285714286|
+All regions |Married |Asian/Pacific Islander|Persons|124 |40.0 |46.3225806452|
+All regions |Married |Other |Persons|221 |53.0 |51.1221719457|
+All regions |Married |Not stated |Persons|1470 |45.0 |48.0394557823|
+All regions |Single |White |Persons|2894 |20.0 |25.2762255761|
+All regions |Single |Black |Persons|2410 |25.0 |26.8507754167|
+All regions |Single |American Indian/Eskimo|Persons|9 |0.916666666667 |14.787037037 |
+All regions |Single |Asian/Pacific Islander|Persons|143 |1.0 |9.55130119709|
+All regions |Single |Other |Persons|466 |19.0 |23.4097779756|
+All regions |Single |Not stated |Persons|1842 |2.0 |16.8447585095|
+All regions |Widowed |White |Persons|994 |78.0 |77.1740442656|
+All regions |Widowed |Black |Persons|247 |75.0 |73.024291498 |
+All regions |Widowed |American Indian/Eskimo|Persons|3 |69.0 |69.3333333333|
+All regions |Widowed |Asian/Pacific Islander|Persons|9 |76.0 |75.3333333333|
+All regions |Widowed |Other |Persons|53 |75.0 |72.8301886792|
+All regions |Widowed |Not stated |Persons|250 |78.0 |76.66 |
+All regions |Divorced |White |Persons|475 |56.0 |56.0105263158|
+All regions |Divorced |Black |Persons|214 |53.0 |53.9345794393|
+All regions |Divorced |American Indian/Eskimo|Persons|2 |42.0 |42.0 |
+All regions |Divorced |Asian/Pacific Islander|Persons|5 |60.0 |60.8 |
+All regions |Divorced |Other |Persons|47 |53.0 |52.5531914894|
+All regions |Divorced |Not stated |Persons|144 |50.0 |50.2294138338|
+All regions |Separated |White |Persons|71 |63.0 |59.9589201878|
+All regions |Separated |Black |Persons|55 |58.0 |59.6909090909|
+All regions |Separated |American Indian/Eskimo|Persons|8 |52.0 |48.875 |
+All regions |Separated |Asian/Pacific Islander|Persons|3 |35.0 |41.0 |
+All regions |Separated |Other |Persons|15 |48.0 |47.3333333333|
+All regions |Separated |Not stated |Persons|36 |72.5 |62.6388888889|
+All regions |Not stated |White |Persons|5256 |54.0 |50.3675678303|
+All regions |Not stated |Black |Persons|4870 |34.0 |34.5372136184|
+All regions |Not stated |American Indian/Eskimo|Persons|1020 |55.0 |51.3314101274|
+All regions |Not stated |Asian/Pacific Islander|Persons|311 |44.0 |44.3250355987|
+All regions |Not stated |Other |Persons|1073 |31.0 |33.8496011235|
+All regions |Not stated |Not stated |Persons|10596 |55.0 |50.7710695621|
+Northeast |All |All races |Male |6397 |43.0 |41.2115473094|
+Northeast |All |All races |Female |7916 |38.0 |41.7350533182|
+Midwest |All |All races |Male |2926 |52.0 |46.458255612 |
+Midwest |All |All races |Female |4017 |45.0 |45.4126290268|
+South |All |All races |Male |8309 |54.0 |47.7136633631|
+South |All |All races |Female |10435 |45.0 |44.9312224366|
+Northeast |All |White |Persons|5807 |54.0 |50.0767528203|
+Northeast |All |Black |Persons|5361 |34.0 |34.8993369355|
+Northeast |All |American Indian/Eskimo|Persons|54 |38.5 |36.2793209877|
+Northeast |All |Asian/Pacific Islander|Persons|143 |30.0 |29.2733291691|
+Northeast |All |Other |Persons|1577 |33.0 |35.2461905332|
+Northeast |All |Not stated |Persons|1371 |38.0 |39.6684824149|
+Midwest |All |White |Persons|3984 |56.0 |50.9913402326|
+Midwest |All |Black |Persons|835 |42.0 |41.0452161386|
+Midwest |All |American Indian/Eskimo|Persons|1 |69.0 |69.0 |
+Midwest |All |Asian/Pacific Islander|Persons|22 |48.5 |44.4090909091|
+Midwest |All |Other |Persons|38 |40.5 |39.5021929825|
+Midwest |All |Not stated |Persons|2063 |34.0 |37.9980845496|
+South |All |White |Persons|3918 |48.0 |44.885327604 |
+South |All |Black |Persons|2238 |34.0 |34.4448160361|
+South |All |American Indian/Eskimo|Persons|994 |55.0 |51.8591934909|
+South |All |Asian/Pacific Islander|Persons|430 |38.0 |39.1554327237|
+South |All |Other |Persons|260 |29.0 |32.6278951456|
+South |All |Not stated |Persons|10904 |53.0 |49.1098492324|
+Northeast |Married |All races |Persons|2669 |57.0 |55.4604720869|
+Northeast |Single |All races |Persons|3062 |26.0 |28.4023612957|
+Northeast |Widowed |All races |Persons|673 |78.0 |76.8350668648|
+Northeast |Divorced |All races |Persons|335 |57.0 |56.3224943047|
+Northeast |Separated |All races |Persons|144 |63.5 |62.8061342593|
+Northeast |Not stated |All races |Persons|7430 |36.0 |37.6030687742|
+Midwest |Married |All races |Persons|2580 |55.0 |53.3963178295|
+Midwest |Single |All races |Persons|2056 |13.0 |19.509782723 |
+Midwest |Widowed |All races |Persons|667 |78.0 |76.5067466267|
+Midwest |Divorced |All races |Persons|333 |52.0 |53.6396396396|
+Midwest |Separated |All races |Persons|21 |41.0 |41.9047619048|
+Midwest |Not stated |All races |Persons|1286 |60.0 |54.9866045434|
+South |Married |All races |Persons|1230 |46.5 |48.2227642276|
+South |Single |All races |Persons|2646 |18.0 |20.4896232506|
+South |Widowed |All races |Persons|216 |76.0 |73.6990740741|
+South |Divorced |All races |Persons|219 |50.0 |52.5479452055|
+South |Separated |All races |Persons|23 |48.0 |47.6086956522|
+South |Not stated |All races |Persons|14410 |54.0 |50.1914359396|
+All regions |All |All races |Male |17632 |50.0 |45.1463221951|
+All regions |All |All races |Female |22368 |42.0 |43.8865575373|
+Northeast |All |All races |Persons|14313 |41.0 |41.5010794526|
+Midwest |All |All races |Persons|6943 |49.0 |45.853289172 |
+South |All |All races |Persons|18744 |49.0 |46.1646465541|
+All regions |Married |All races |Persons|6479 |54.0 |53.2644698256|
+All regions |Single |All races |Persons|7764 |19.0 |23.3508097227|
+All regions |Widowed |All races |Persons|1556 |78.0 |76.2589974293|
+All regions |Divorced |All races |Persons|887 |54.0 |54.3833546697|
+All regions |Separated |All races |Persons|188 |58.0 |58.6121453901|
+All regions |Not stated |All races |Persons|23126 |49.0 |46.4136541695|
+All regions |All |White |Persons|13709 |53.0 |48.8588457704|
+All regions |All |Black |Persons|8434 |34.0 |35.3871945786|
+All regions |All |American Indian/Eskimo|Persons|1049 |54.0 |51.0735192214|
+All regions |All |Asian/Pacific Islander|Persons|595 |36.0 |36.9746590628|
+All regions |All |Other |Persons|1875 |33.0 |34.9693752225|
+All regions |All |Not stated |Persons|14338 |50.0 |46.6082671116|
+All regions |All |All races |Persons|40000 |46.0 |44.4418617984|
+
+
+That took 7.940 seconds.
+
+##############################################################################
+
+#18c. Same again but with weighted mean and count
+>>> print nhds.quadrivar(var1="race",var2="sex",var3="geog_region",var4="marital_status",wgtvar="analysis_wgt",cellvar="age",wgtmean=1,wgtn=1,printit=1,allcalc=1)
+colload(): memory mapping of data vector for analysis_wgt took 0.078 seconds.
+
+quadrivar(): Quadrivariate setup time 0.080 seconds.
+
+Quadrivariate summary derived from geog_region, marital_status, race and sex columns in nhds dataset
+quadrivar(): Quadrivariate summary created in 8.002 seconds.
+Time in intersect() function was 0.699 seconds.
+
+ | | | | | | |
+ | | | | |Frequency |Mean of |
+Grouped |Grouped |Grouped |Grouped| |weighted |Age (years) |
+by |by |by |by | |by |weighted by |
+Geographic Region|Marital Status|Race |Sex |Frequency|Analysis Weight|Analysis Weight|
+-----------------|--------------|----------------------|-------|---------|---------------|---------------|
+Northeast |Married |White |Male |910 |116167 |63.2015202252 |
+Northeast |Married |White |Female |886 |113048 |50.7878688699 |
+Northeast |Married |Black |Male |129 |16357 |58.3854618818 |
+Northeast |Married |Black |Female |123 |15688 |46.8068587455 |
+Northeast |Married |American Indian/Eskimo|Male |3 |394 |62.7005076142 |
+Northeast |Married |American Indian/Eskimo|Female |2 |271 |52.8450184502 |
+Northeast |Married |Asian/Pacific Islander|Male |13 |1525 |60.8662295082 |
+Northeast |Married |Asian/Pacific Islander|Female |40 |4886 |42.3145722472 |
+Northeast |Married |Other |Male |97 |12441 |59.1970902661 |
+Northeast |Married |Other |Female |111 |14300 |44.9609090909 |
+Northeast |Married |Not stated |Male |161 |20969 |61.0274691211 |
+Northeast |Married |Not stated |Female |194 |24787 |45.1552023238 |
+Northeast |Single |White |Male |546 |74357 |25.8484707915 |
+Northeast |Single |White |Female |601 |80771 |31.9646920239 |
+Northeast |Single |Black |Male |258 |35232 |27.3834692545 |
+Northeast |Single |Black |Female |418 |55191 |34.0363607491 |
+Northeast |Single |American Indian/Eskimo|Male |5 |639 |28.2488262911 |
+Northeast |Single |American Indian/Eskimo|Female |4 |663 |6.75263951735 |
+Northeast |Single |Asian/Pacific Islander|Male |30 |4001 |3.83363459272 |
+Northeast |Single |Asian/Pacific Islander|Female |36 |4509 |17.9306756857 |
+Northeast |Single |Other |Male |186 |25605 |20.4481258273 |
+Northeast |Single |Other |Female |265 |36150 |24.0950326942 |
+Northeast |Single |Not stated |Male |293 |40769 |21.0625957539 |
+Northeast |Single |Not stated |Female |420 |57816 |26.5161432615 |
+Northeast |Widowed |White |Male |98 |12214 |77.5753233994 |
+Northeast |Widowed |White |Female |380 |46594 |77.9027986436 |
+Northeast |Widowed |Black |Male |14 |1792 |74.0145089286 |
+Northeast |Widowed |Black |Female |78 |10134 |74.7357410697 |
+Northeast |Widowed |American Indian/Eskimo|Male |3 |231 |69.3506493506 |
+Northeast |Widowed |American Indian/Eskimo|Female |0 |0 |None |
+Northeast |Widowed |Asian/Pacific Islander|Male |1 |118 |76.0 |
+Northeast |Widowed |Asian/Pacific Islander|Female |1 |124 |76.0 |
+Northeast |Widowed |Other |Male |7 |935 |75.222459893 |
+Northeast |Widowed |Other |Female |43 |5676 |72.025017618 |
+Northeast |Widowed |Not stated |Male |4 |533 |64.1407129456 |
+Northeast |Widowed |Not stated |Female |44 |5916 |76.8032454361 |
+Northeast |Divorced |White |Male |57 |7353 |61.9827281382 |
+Northeast |Divorced |White |Female |132 |16369 |54.8385362576 |
+Northeast |Divorced |Black |Male |15 |1716 |59.0641025641 |
+Northeast |Divorced |Black |Female |39 |5089 |58.4539202201 |
+Northeast |Divorced |American Indian/Eskimo|Male |2 |244 |42.2950819672 |
+Northeast |Divorced |American Indian/Eskimo|Female |0 |0 |None |
+Northeast |Divorced |Asian/Pacific Islander|Male |1 |154 |73.0 |
+Northeast |Divorced |Asian/Pacific Islander|Female |1 |118 |67.0 |
+Northeast |Divorced |Other |Male |8 |1036 |57.2277992278 |
+Northeast |Divorced |Other |Female |37 |4735 |51.1596620908 |
+Northeast |Divorced |Not stated |Male |14 |1773 |46.7885647246 |
+Northeast |Divorced |Not stated |Female |29 |3614 |58.4584947427 |
+Northeast |Separated |White |Male |22 |2778 |66.0097192225 |
+Northeast |Separated |White |Female |33 |4317 |66.1976295267 |
+Northeast |Separated |Black |Male |7 |933 |62.6602357985 |
+Northeast |Separated |Black |Female |28 |3658 |66.8015308912 |
+Northeast |Separated |American Indian/Eskimo|Male |1 |114 |49.0 |
+Northeast |Separated |American Indian/Eskimo|Female |7 |841 |48.4946492271 |
+Northeast |Separated |Asian/Pacific Islander|Male |1 |133 |53.0 |
+Northeast |Separated |Asian/Pacific Islander|Female |2 |265 |35.0 |
+Northeast |Separated |Other |Male |5 |643 |51.3110419907 |
+Northeast |Separated |Other |Female |10 |1302 |45.7503840246 |
+Northeast |Separated |Not stated |Male |12 |1407 |77.4648187633 |
+Northeast |Separated |Not stated |Female |16 |2020 |61.2440594059 |
+Northeast |Not stated |White |Male |1021 |151442 |48.8105915821 |
+Northeast |Not stated |White |Female |1121 |175273 |47.0690279827 |
+Northeast |Not stated |Black |Male |1944 |65683 |32.3733644884 |
+Northeast |Not stated |Black |Female |2308 |82648 |35.3079828551 |
+Northeast |Not stated |American Indian/Eskimo|Male |14 |1543 |35.0892201339 |
+Northeast |Not stated |American Indian/Eskimo|Female |13 |1200 |38.174375 |
+Northeast |Not stated |Asian/Pacific Islander|Male |6 |443 |12.7990970655 |
+Northeast |Not stated |Asian/Pacific Islander|Female |11 |1932 |17.6537267081 |
+Northeast |Not stated |Other |Male |421 |23001 |39.0177023 |
+Northeast |Not stated |Other |Female |387 |25184 |38.640623281 |
+Northeast |Not stated |Not stated |Male |88 |11244 |53.5444459267 |
+Northeast |Not stated |Not stated |Female |96 |12086 |53.6532935364 |
+Midwest |Married |White |Male |733 |153530 |62.9876006861 |
+Midwest |Married |White |Female |788 |165666 |50.181092278 |
+Midwest |Married |Black |Male |83 |17822 |60.4670070699 |
+Midwest |Married |Black |Female |89 |19544 |44.7604891527 |
+Midwest |Married |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Married |American Indian/Eskimo|Female |1 |179 |69.0 |
+Midwest |Married |Asian/Pacific Islander|Male |3 |704 |58.96875 |
+Midwest |Married |Asian/Pacific Islander|Female |7 |1235 |55.0097165992 |
+Midwest |Married |Other |Male |4 |848 |69.0448113208 |
+Midwest |Married |Other |Female |5 |901 |41.3274139845 |
+Midwest |Married |Not stated |Male |328 |58496 |60.7014325766 |
+Midwest |Married |Not stated |Female |539 |94519 |41.8136882532 |
+Midwest |Single |White |Male |399 |86820 |20.0541065397 |
+Midwest |Single |White |Female |481 |105392 |22.8603358956 |
+Midwest |Single |Black |Male |195 |42431 |29.9901102674 |
+Midwest |Single |Black |Female |213 |47679 |24.2253332515 |
+Midwest |Single |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Single |American Indian/Eskimo|Female |0 |0 |None |
+Midwest |Single |Asian/Pacific Islander|Male |3 |732 |9.27049180328 |
+Midwest |Single |Asian/Pacific Islander|Female |4 |720 |29.4138888889 |
+Midwest |Single |Other |Male |9 |1697 |15.5474366529 |
+Midwest |Single |Other |Female |2 |314 |33.9171974522 |
+Midwest |Single |Not stated |Male |343 |60781 |12.5881926288 |
+Midwest |Single |Not stated |Female |407 |71907 |14.5453358613 |
+Midwest |Widowed |White |Male |59 |12877 |76.9771685952 |
+Midwest |Widowed |White |Female |335 |70266 |76.915876811 |
+Midwest |Widowed |Black |Male |7 |1478 |86.9627875507 |
+Midwest |Widowed |Black |Female |77 |16899 |72.6628794603 |
+Midwest |Widowed |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Widowed |American Indian/Eskimo|Female |0 |0 |None |
+Midwest |Widowed |Asian/Pacific Islander|Male |0 |0 |None |
+Midwest |Widowed |Asian/Pacific Islander|Female |1 |174 |78.0 |
+Midwest |Widowed |Other |Male |0 |0 |None |
+Midwest |Widowed |Other |Female |3 |543 |78.379373849 |
+Midwest |Widowed |Not stated |Male |27 |4850 |77.7092783505 |
+Midwest |Widowed |Not stated |Female |158 |28201 |76.9008191199 |
+Midwest |Divorced |White |Male |70 |13971 |53.9884045523 |
+Midwest |Divorced |White |Female |115 |23950 |57.1633820459 |
+Midwest |Divorced |Black |Male |14 |3631 |60.8176810796 |
+Midwest |Divorced |Black |Female |37 |7332 |55.1419803601 |
+Midwest |Divorced |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Divorced |American Indian/Eskimo|Female |0 |0 |None |
+Midwest |Divorced |Asian/Pacific Islander|Male |0 |0 |None |
+Midwest |Divorced |Asian/Pacific Islander|Female |1 |242 |60.0 |
+Midwest |Divorced |Other |Male |0 |0 |None |
+Midwest |Divorced |Other |Female |2 |361 |59.8060941828 |
+Midwest |Divorced |Not stated |Male |28 |5526 |48.7723488961 |
+Midwest |Divorced |Not stated |Female |66 |11948 |48.1949280214 |
+Midwest |Separated |White |Male |7 |1950 |46.9538461538 |
+Midwest |Separated |White |Female |4 |713 |24.6802244039 |
+Midwest |Separated |Black |Male |1 |239 |33.0 |
+Midwest |Separated |Black |Female |2 |602 |47.6212624585 |
+Midwest |Separated |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Separated |American Indian/Eskimo|Female |0 |0 |None |
+Midwest |Separated |Asian/Pacific Islander|Male |0 |0 |None |
+Midwest |Separated |Asian/Pacific Islander|Female |0 |0 |None |
+Midwest |Separated |Other |Male |0 |0 |None |
+Midwest |Separated |Other |Female |0 |0 |None |
+Midwest |Separated |Not stated |Male |1 |170 |71.0 |
+Midwest |Separated |Not stated |Female |6 |1002 |38.5798403194 |
+Midwest |Not stated |White |Male |443 |101847 |59.8798690211 |
+Midwest |Not stated |White |Female |550 |125807 |64.9911319462 |
+Midwest |Not stated |Black |Male |74 |15028 |45.3792919883 |
+Midwest |Not stated |Black |Female |43 |8030 |35.2581569116 |
+Midwest |Not stated |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |Not stated |American Indian/Eskimo|Female |0 |0 |None |
+Midwest |Not stated |Asian/Pacific Islander|Male |1 |237 |33.0 |
+Midwest |Not stated |Asian/Pacific Islander|Female |2 |411 |45.5060827251 |
+Midwest |Not stated |Other |Male |8 |834 |50.179656275 |
+Midwest |Not stated |Other |Female |5 |455 |18.0813186813 |
+Midwest |Not stated |Not stated |Male |86 |16265 |42.0954993053 |
+Midwest |Not stated |Not stated |Female |74 |14309 |45.3128101195 |
+South |Married |White |Male |325 |43460 |58.1412793373 |
+South |Married |White |Female |377 |49393 |44.8787884923 |
+South |Married |Black |Male |78 |12316 |54.9192107827 |
+South |Married |Black |Female |136 |20964 |45.276044648 |
+South |Married |American Indian/Eskimo|Male |0 |0 |None |
+South |Married |American Indian/Eskimo|Female |1 |204 |38.0 |
+South |Married |Asian/Pacific Islander|Male |16 |1786 |57.3169092945 |
+South |Married |Asian/Pacific Islander|Female |45 |5488 |39.2458090379 |
+South |Married |Other |Male |0 |0 |None |
+South |Married |Other |Female |4 |864 |34.6782407407 |
+South |Married |Not stated |Male |61 |7426 |56.4290331269 |
+South |Married |Not stated |Female |187 |23277 |34.0549469433 |
+South |Single |White |Male |457 |68129 |24.9324569144 |
+South |Single |White |Female |410 |58291 |18.9273152129 |
+South |Single |Black |Male |625 |103103 |24.6619518799 |
+South |Single |Black |Female |701 |119071 |22.616342632 |
+South |Single |American Indian/Eskimo|Male |0 |0 |None |
+South |Single |American Indian/Eskimo|Female |0 |0 |None |
+South |Single |Asian/Pacific Islander|Male |38 |4944 |3.80258899676 |
+South |Single |Asian/Pacific Islander|Female |32 |4647 |7.32106735528 |
+South |Single |Other |Male |0 |0 |None |
+South |Single |Other |Female |4 |1002 |17.3293413174 |
+South |Single |Not stated |Male |173 |23718 |4.47211595129 |
+South |Single |Not stated |Female |206 |28119 |9.48045802285 |
+South |Widowed |White |Male |31 |3950 |73.4650632911 |
+South |Widowed |White |Female |91 |11476 |75.2161031718 |
+South |Widowed |Black |Male |10 |1481 |76.5638082377 |
+South |Widowed |Black |Female |61 |9375 |71.0226133333 |
+South |Widowed |American Indian/Eskimo|Male |0 |0 |None |
+South |Widowed |American Indian/Eskimo|Female |0 |0 |None |
+South |Widowed |Asian/Pacific Islander|Male |1 |97 |76.0 |
+South |Widowed |Asian/Pacific Islander|Female |5 |644 |73.5403726708 |
+South |Widowed |Other |Male |0 |0 |None |
+South |Widowed |Other |Female |0 |0 |None |
+South |Widowed |Not stated |Male |1 |86 |72.0 |
+South |Widowed |Not stated |Female |16 |2051 |73.7640175524 |
+South |Divorced |White |Male |53 |7148 |53.2148852826 |
+South |Divorced |White |Female |48 |6356 |53.3786972939 |
+South |Divorced |Black |Male |44 |6494 |55.5395749923 |
+South |Divorced |Black |Female |65 |9884 |48.2100364225 |
+South |Divorced |American Indian/Eskimo|Male |0 |0 |None |
+South |Divorced |American Indian/Eskimo|Female |0 |0 |None |
+South |Divorced |Asian/Pacific Islander|Male |2 |265 |49.8943396226 |
+South |Divorced |Asian/Pacific Islander|Female |0 |0 |None |
+South |Divorced |Other |Male |0 |0 |None |
+South |Divorced |Other |Female |0 |0 |None |
+South |Divorced |Not stated |Male |1 |165 |48.0 |
+South |Divorced |Not stated |Female |6 |729 |61.9958847737 |
+South |Separated |White |Male |2 |268 |55.0 |
+South |Separated |White |Female |3 |435 |31.7471264368 |
+South |Separated |Black |Male |2 |269 |46.9591078067 |
+South |Separated |Black |Female |15 |2257 |48.9069561365 |
+South |Separated |American Indian/Eskimo|Male |0 |0 |None |
+South |Separated |American Indian/Eskimo|Female |0 |0 |None |
+South |Separated |Asian/Pacific Islander|Male |0 |0 |None |
+South |Separated |Asian/Pacific Islander|Female |0 |0 |None |
+South |Separated |Other |Male |0 |0 |None |
+South |Separated |Other |Female |0 |0 |None |
+South |Separated |Not stated |Male |1 |113 |55.0 |
+South |Separated |Not stated |Female |0 |0 |None |
+South |Not stated |White |Male |906 |45778 |52.2465522386 |
+South |Not stated |White |Female |1215 |67160 |53.7483788038 |
+South |Not stated |Black |Male |219 |15925 |53.9928086395 |
+South |Not stated |Black |Female |282 |13682 |47.2678057102 |
+South |Not stated |American Indian/Eskimo|Male |438 |12239 |53.373389305 |
+South |Not stated |American Indian/Eskimo|Female |555 |15537 |49.1005380854 |
+South |Not stated |Asian/Pacific Islander|Male |124 |3812 |46.0802879053 |
+South |Not stated |Asian/Pacific Islander|Female |167 |4817 |43.0552729915 |
+South |Not stated |Other |Male |99 |2441 |31.6989208146 |
+South |Not stated |Other |Female |153 |3798 |31.6147972617 |
+South |Not stated |Not stated |Male |4602 |117897 |52.147362982 |
+South |Not stated |Not stated |Female |5650 |144700 |48.9992103644 |
+All regions |Married |White |Male |1968 |313157 |62.394381519 |
+All regions |Married |White |Female |2051 |328107 |49.591949679 |
+All regions |Married |Black |Male |290 |46495 |58.2651682977 |
+All regions |Married |Black |Female |348 |56196 |45.5240942416 |
+All regions |Married |American Indian/Eskimo|Male |3 |394 |62.7005076142 |
+All regions |Married |American Indian/Eskimo|Female |4 |654 |52.6360856269 |
+All regions |Married |Asian/Pacific Islander|Male |32 |4015 |58.9546699875 |
+All regions |Married |Asian/Pacific Islander|Female |92 |11609 |42.2144026187 |
+All regions |Married |Other |Male |101 |13289 |59.8254947701 |
+All regions |Married |Other |Female |120 |16065 |44.20410831 |
+All regions |Married |Not stated |Male |550 |86891 |60.4149796872 |
+All regions |Married |Not stated |Female |920 |142583 |41.1279535428 |
+All regions |Single |White |Male |1402 |229306 |23.3824480368 |
+All regions |Single |White |Female |1492 |244454 |24.9306977642 |
+All regions |Single |Black |Male |1078 |180766 |26.4430588839 |
+All regions |Single |Black |Female |1332 |221941 |25.8018616828 |
+All regions |Single |American Indian/Eskimo|Male |5 |639 |28.2488262911 |
+All regions |Single |American Indian/Eskimo|Female |4 |663 |6.75263951735 |
+All regions |Single |Asian/Pacific Islander|Male |71 |9677 |4.22903503208 |
+All regions |Single |Asian/Pacific Islander|Female |72 |9876 |13.7756598488 |
+All regions |Single |Other |Male |195 |27302 |20.1435155596 |
+All regions |Single |Other |Female |271 |37466 |23.9964082607 |
+All regions |Single |Not stated |Male |809 |125268 |13.8095407334 |
+All regions |Single |Not stated |Female |1033 |157842 |18.0278367211 |
+All regions |Widowed |White |Male |188 |29041 |76.7510416308 |
+All regions |Widowed |White |Female |806 |128336 |77.1221948635 |
+All regions |Widowed |Black |Male |31 |4751 |78.8372974111 |
+All regions |Widowed |Black |Female |216 |36408 |72.8174851681 |
+All regions |Widowed |American Indian/Eskimo|Male |3 |231 |69.3506493506 |
+All regions |Widowed |American Indian/Eskimo|Female |0 |0 |None |
+All regions |Widowed |Asian/Pacific Islander|Male |2 |215 |76.0 |
+All regions |Widowed |Asian/Pacific Islander|Female |7 |942 |74.6878980892 |
+All regions |Widowed |Other |Male |7 |935 |75.222459893 |
+All regions |Widowed |Other |Female |46 |6219 |72.5798359865 |
+All regions |Widowed |Not stated |Male |32 |5469 |76.2971292741 |
+All regions |Widowed |Not stated |Female |218 |36168 |76.7069785446 |
+All regions |Divorced |White |Male |180 |28472 |55.8587735319 |
+All regions |Divorced |White |Female |295 |46675 |55.832672737 |
+All regions |Divorced |Black |Male |73 |11841 |57.6688624272 |
+All regions |Divorced |Black |Female |141 |22305 |52.8258686393 |
+All regions |Divorced |American Indian/Eskimo|Male |2 |244 |42.2950819672 |
+All regions |Divorced |American Indian/Eskimo|Female |0 |0 |None |
+All regions |Divorced |Asian/Pacific Islander|Male |3 |419 |58.3866348449 |
+All regions |Divorced |Asian/Pacific Islander|Female |2 |360 |62.2944444444 |
+All regions |Divorced |Other |Male |8 |1036 |57.2277992278 |
+All regions |Divorced |Other |Female |39 |5096 |51.7721742543 |
+All regions |Divorced |Not stated |Male |43 |7464 |48.2840467922 |
+All regions |Divorced |Not stated |Female |101 |16291 |51.0893745013 |
+All regions |Separated |White |Male |31 |4996 |57.9813851081 |
+All regions |Separated |White |Female |40 |5465 |58.0388228118 |
+All regions |Separated |Black |Male |10 |1441 |54.8098542679 |
+All regions |Separated |Black |Female |45 |6517 |58.8324382385 |
+All regions |Separated |American Indian/Eskimo|Male |1 |114 |49.0 |
+All regions |Separated |American Indian/Eskimo|Female |7 |841 |48.4946492271 |
+All regions |Separated |Asian/Pacific Islander|Male |1 |133 |53.0 |
+All regions |Separated |Asian/Pacific Islander|Female |2 |265 |35.0 |
+All regions |Separated |Other |Male |5 |643 |51.3110419907 |
+All regions |Separated |Other |Female |10 |1302 |45.7503840246 |
+All regions |Separated |Not stated |Male |14 |1690 |75.3124260355 |
+All regions |Separated |Not stated |Female |22 |3022 |53.7293183322 |
+All regions |Not stated |White |Male |2370 |299067 |53.1061645014 |
+All regions |Not stated |White |Female |2886 |368240 |54.4101949784 |
+All regions |Not stated |Black |Male |2237 |96636 |37.9586818295 |
+All regions |Not stated |Black |Female |2633 |104360 |36.8721280638 |
+All regions |Not stated |American Indian/Eskimo|Male |452 |13782 |51.3263371333 |
+All regions |Not stated |American Indian/Eskimo|Female |568 |16737 |48.3171601979 |
+All regions |Not stated |Asian/Pacific Islander|Male |131 |4492 |42.1079825233 |
+All regions |Not stated |Asian/Pacific Islander|Female |180 |7160 |36.3417946927 |
+All regions |Not stated |Other |Male |528 |26276 |38.69207907 |
+All regions |Not stated |Other |Female |545 |29437 |37.4163622892 |
+All regions |Not stated |Not stated |Male |4776 |145406 |51.1310035328 |
+All regions |Not stated |Not stated |Female |5820 |171095 |49.0196700395 |
+Northeast |All |White |Male |2654 |364311 |50.074135431 |
+Northeast |All |White |Female |3153 |436372 |49.0096684658 |
+Northeast |All |Black |Male |2367 |121713 |35.6462751593 |
+Northeast |All |Black |Female |2994 |172408 |39.6161718314 |
+Northeast |All |American Indian/Eskimo|Male |28 |3165 |40.7025803054 |
+Northeast |All |American Indian/Eskimo|Female |26 |2975 |35.4256302521 |
+Northeast |All |Asian/Pacific Islander|Male |52 |6374 |22.1349814882 |
+Northeast |All |Asian/Pacific Islander|Female |91 |11834 |29.4330249 |
+Northeast |All |Other |Male |724 |63661 |36.4446903506 |
+Northeast |All |Other |Female |853 |87347 |36.6094300732 |
+Northeast |All |Not stated |Male |572 |76695 |38.680172652 |
+Northeast |All |Not stated |Female |799 |106239 |38.4992615187 |
+Midwest |All |White |Male |1711 |370995 |52.1495866071 |
+Midwest |All |White |Female |2273 |491794 |52.2376598551 |
+Midwest |All |Black |Male |374 |80629 |42.0364926857 |
+Midwest |All |Black |Female |461 |100086 |39.7044608047 |
+Midwest |All |American Indian/Eskimo|Male |0 |0 |None |
+Midwest |All |American Indian/Eskimo|Female |1 |179 |69.0 |
+Midwest |All |Asian/Pacific Islander|Male |7 |1673 |33.5451285117 |
+Midwest |All |Asian/Pacific Islander|Female |15 |2782 |48.8533429188 |
+Midwest |All |Other |Male |21 |3379 |37.5211107823 |
+Midwest |All |Other |Female |17 |2574 |46.7222222222 |
+Midwest |All |Not stated |Male |813 |146088 |38.7374269781 |
+Midwest |All |Not stated |Female |1250 |221886 |37.9909073388 |
+South |All |White |Male |1774 |168733 |43.278392641 |
+South |All |White |Female |2144 |193111 |42.1829737899 |
+South |All |Black |Male |978 |139588 |32.7079598695 |
+South |All |Black |Female |1260 |175233 |31.6239729461 |
+South |All |American Indian/Eskimo|Male |438 |12239 |53.373389305 |
+South |All |American Indian/Eskimo|Female |556 |15741 |48.9566774813 |
+South |All |Asian/Pacific Islander|Male |181 |10904 |29.110423468 |
+South |All |Asian/Pacific Islander|Female |249 |15596 |32.3261894075 |
+South |All |Other |Male |99 |2441 |31.6989208146 |
+South |All |Other |Female |161 |5664 |29.5549081921 |
+South |All |Not stated |Male |4839 |149405 |44.8007516457 |
+South |All |Not stated |Female |6065 |198876 |41.9655903119 |
+Northeast |Married |All races |Male |1313 |167853 |62.1414154051 |
+Northeast |Married |All races |Female |1356 |172980 |48.9018730489 |
+Northeast |Single |All races |Male |1318 |180603 |23.8227146366 |
+Northeast |Single |All races |Female |1744 |235100 |29.5607831261 |
+Northeast |Widowed |All races |Male |127 |15823 |76.4486506984 |
+Northeast |Widowed |All races |Female |546 |68444 |76.847948688 |
+Northeast |Divorced |All races |Male |97 |12276 |58.7258981148 |
+Northeast |Divorced |All races |Female |238 |29925 |55.3563909774 |
+Northeast |Separated |All races |Male |48 |6008 |65.9883488682 |
+Northeast |Separated |All races |Female |96 |12403 |61.5556048268 |
+Northeast |Not stated |All races |Male |3494 |253356 |43.7237164201 |
+Northeast |Not stated |All races |Female |3936 |298323 |43.1396818918 |
+Midwest |Married |All races |Male |1151 |231400 |62.2255157015 |
+Midwest |Married |All races |Female |1429 |282044 |47.0061828414 |
+Midwest |Single |All races |Male |949 |192461 |19.8060949216 |
+Midwest |Single |All races |Female |1107 |226012 |20.539067176 |
+Midwest |Widowed |All races |Male |93 |19205 |77.9305389222 |
+Midwest |Widowed |All races |Female |574 |116083 |76.3015514761 |
+Midwest |Divorced |All races |Male |112 |23128 |53.8142943618 |
+Midwest |Divorced |All races |Female |221 |43833 |54.4180640157 |
+Midwest |Separated |All races |Male |9 |2359 |47.2729970326 |
+Midwest |Separated |All races |Female |12 |2317 |36.6517047907 |
+Midwest |Not stated |All races |Male |612 |134211 |55.9931685907 |
+Midwest |Not stated |All races |Female |674 |149012 |61.3022665071 |
+South |Married |All races |Male |480 |64988 |57.3123499723 |
+South |Married |All races |Female |750 |100190 |42.0367002695 |
+South |Single |All races |Male |1293 |199894 |21.8426477429 |
+South |Single |All races |Female |1353 |211130 |19.4866132892 |
+South |Widowed |All races |Male |43 |5614 |74.3038831493 |
+South |Widowed |All races |Female |173 |23546 |73.3741187463 |
+South |Divorced |All races |Male |100 |14072 |54.1640136441 |
+South |Divorced |All races |Female |119 |16969 |50.7382874654 |
+South |Separated |All races |Male |5 |650 |51.6723076923 |
+South |Separated |All races |Female |18 |2692 |46.1341010401 |
+South |Not stated |All races |Male |6388 |198092 |52.0256640064 |
+South |Not stated |All races |Female |8022 |249694 |49.8089272796 |
+Northeast |Married |White |Persons|1796 |229215 |57.0791527605 |
+Northeast |Married |Black |Persons|252 |32045 |52.7170229365 |
+Northeast |Married |American Indian/Eskimo|Persons|5 |665 |58.6842105263 |
+Northeast |Married |Asian/Pacific Islander|Persons|53 |6411 |46.72749961 |
+Northeast |Married |Other |Persons|208 |26741 |51.5841591564 |
+Northeast |Married |Not stated |Persons|355 |45756 |52.4291240493 |
+Northeast |Single |White |Persons|1147 |155128 |29.0330235812 |
+Northeast |Single |Black |Persons|676 |90423 |31.444158841 |
+Northeast |Single |American Indian/Eskimo|Persons|9 |1302 |17.3026113671 |
+Northeast |Single |Asian/Pacific Islander|Persons|66 |8510 |11.3029128874 |
+Northeast |Single |Other |Persons|451 |61755 |22.5829437892 |
+Northeast |Single |Not stated |Persons|713 |98585 |24.2608744241 |
+Northeast |Widowed |White |Persons|478 |58808 |77.8347843831 |
+Northeast |Widowed |Black |Persons|92 |11926 |74.6273687741 |
+Northeast |Widowed |American Indian/Eskimo|Persons|3 |231 |69.3506493506 |
+Northeast |Widowed |Asian/Pacific Islander|Persons|2 |242 |76.0 |
+Northeast |Widowed |Other |Persons|50 |6611 |72.4772349115 |
+Northeast |Widowed |Not stated |Persons|48 |6449 |75.7567064661 |
+Northeast |Divorced |White |Persons|189 |23722 |57.0529887868 |
+Northeast |Divorced |Black |Persons|54 |6805 |58.6077883909 |
+Northeast |Divorced |American Indian/Eskimo|Persons|2 |244 |42.2950819672 |
+Northeast |Divorced |Asian/Pacific Islander|Persons|2 |272 |70.3970588235 |
+Northeast |Divorced |Other |Persons|45 |5771 |52.2490036389 |
+Northeast |Divorced |Not stated |Persons|43 |5387 |54.6176211726 |
+Northeast |Separated |White |Persons|55 |7095 |66.1240544985 |
+Northeast |Separated |Black |Persons|35 |4591 |65.9599215857 |
+Northeast |Separated |American Indian/Eskimo|Persons|8 |955 |48.554973822 |
+Northeast |Separated |Asian/Pacific Islander|Persons|3 |398 |41.0150753769 |
+Northeast |Separated |Other |Persons|15 |1945 |47.588688946 |
+Northeast |Separated |Not stated |Persons|28 |3427 |67.9037058652 |
+Northeast |Not stated |White |Persons|2142 |326715 |47.8762938708 |
+Northeast |Not stated |Black |Persons|4252 |148331 |34.008493617 |
+Northeast |Not stated |American Indian/Eskimo|Persons|27 |2743 |36.4389050917 |
+Northeast |Not stated |Asian/Pacific Islander|Persons|17 |2375 |16.7482105263 |
+Northeast |Not stated |Other |Persons|808 |48185 |38.8206210918 |
+Northeast |Not stated |Not stated |Persons|184 |23330 |53.600833934 |
+Midwest |Married |White |Persons|1521 |319196 |56.3408913854 |
+Midwest |Married |Black |Persons|172 |37366 |52.2518332174 |
+Midwest |Married |American Indian/Eskimo|Persons|1 |179 |69.0 |
+Midwest |Married |Asian/Pacific Islander|Persons|10 |1939 |56.4471376998 |
+Midwest |Married |Other |Persons|9 |1749 |54.7661520869 |
+Midwest |Married |Not stated |Persons|867 |153015 |49.0342711499 |
+Midwest |Single |White |Persons|880 |192212 |21.5927936366 |
+Midwest |Single |Black |Persons|408 |90110 |26.9398516575 |
+Midwest |Single |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Single |Asian/Pacific Islander|Persons|7 |1452 |19.258953168 |
+Midwest |Single |Other |Persons|11 |2011 |18.4157135753 |
+Midwest |Single |Not stated |Persons|750 |132688 |13.6488182952 |
+Midwest |Widowed |White |Persons|394 |83143 |76.925369544 |
+Midwest |Widowed |Black |Persons|84 |18377 |73.8129727377 |
+Midwest |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Widowed |Asian/Pacific Islander|Persons|1 |174 |78.0 |
+Midwest |Widowed |Other |Persons|3 |543 |78.379373849 |
+Midwest |Widowed |Not stated |Persons|185 |33051 |77.019454782 |
+Midwest |Divorced |White |Persons|185 |37921 |55.9936446824 |
+Midwest |Divorced |Black |Persons|51 |10963 |57.021800602 |
+Midwest |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Divorced |Asian/Pacific Islander|Persons|1 |242 |60.0 |
+Midwest |Divorced |Other |Persons|2 |361 |59.8060941828 |
+Midwest |Divorced |Not stated |Persons|94 |17474 |48.3775323338 |
+Midwest |Separated |White |Persons|11 |2663 |40.9902365753 |
+Midwest |Separated |Black |Persons|3 |841 |43.4661117717 |
+Midwest |Separated |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Separated |Asian/Pacific Islander|Persons|0 |0 |None |
+Midwest |Separated |Other |Persons|0 |0 |None |
+Midwest |Separated |Not stated |Persons|7 |1172 |43.2824232082 |
+Midwest |Not stated |White |Persons|993 |227654 |62.7044741447 |
+Midwest |Not stated |Black |Persons|117 |23058 |41.8545840923 |
+Midwest |Not stated |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Not stated |Asian/Pacific Islander|Persons|3 |648 |40.9320987654 |
+Midwest |Not stated |Other |Persons|13 |1289 |38.8493664339 |
+Midwest |Not stated |Not stated |Persons|160 |30574 |43.6012394911 |
+South |Married |White |Persons|702 |92853 |51.0863192358 |
+South |Married |Black |Persons|214 |33280 |48.8447115385 |
+South |Married |American Indian/Eskimo|Persons|1 |204 |38.0 |
+South |Married |Asian/Pacific Islander|Persons|61 |7274 |43.6828430025 |
+South |Married |Other |Persons|4 |864 |34.6782407407 |
+South |Married |Not stated |Persons|248 |30703 |39.4664690747 |
+South |Single |White |Persons|867 |126420 |22.1635460227 |
+South |Single |Black |Persons|1326 |222174 |23.5656366551 |
+South |Single |American Indian/Eskimo|Persons|0 |0 |None |
+South |Single |Asian/Pacific Islander|Persons|70 |9591 |5.50735064123 |
+South |Single |Other |Persons|4 |1002 |17.3293413174 |
+South |Single |Not stated |Persons|379 |51837 |7.18889297755 |
+South |Widowed |White |Persons|122 |15426 |74.7677298068 |
+South |Widowed |Black |Persons|71 |10856 |71.7785556374 |
+South |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+South |Widowed |Asian/Pacific Islander|Persons|6 |741 |73.8623481781 |
+South |Widowed |Other |Persons|0 |0 |None |
+South |Widowed |Not stated |Persons|17 |2137 |73.6930276088 |
+South |Divorced |White |Persons|101 |13504 |53.2919875592 |
+South |Divorced |Black |Persons|109 |16378 |51.1162535108 |
+South |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+South |Divorced |Asian/Pacific Islander|Persons|2 |265 |49.8943396226 |
+South |Divorced |Other |Persons|0 |0 |None |
+South |Divorced |Not stated |Persons|7 |894 |59.4127516779 |
+South |Separated |White |Persons|5 |703 |40.6116642959 |
+South |Separated |Black |Persons|17 |2526 |48.6995249406 |
+South |Separated |American Indian/Eskimo|Persons|0 |0 |None |
+South |Separated |Asian/Pacific Islander|Persons|0 |0 |None |
+South |Separated |Other |Persons|0 |0 |None |
+South |Separated |Not stated |Persons|1 |113 |55.0 |
+South |Not stated |White |Persons|2121 |112938 |53.1396322659 |
+South |Not stated |Black |Persons|501 |29607 |50.8850472966 |
+South |Not stated |American Indian/Eskimo|Persons|993 |27776 |50.9832939205 |
+South |Not stated |Asian/Pacific Islander|Persons|291 |8629 |44.3916221457 |
+South |Not stated |Other |Persons|252 |6239 |31.6477104838 |
+South |Not stated |Not stated |Persons|10252 |262597 |50.4126223575 |
+All regions |All |White |Male |6139 |904039 |49.6574675006 |
+All regions |All |White |Female |7570 |1121277 |49.2497491611 |
+All regions |All |Black |Male |3719 |341930 |35.9535991562 |
+All regions |All |Black |Female |4715 |447727 |36.5078926857 |
+All regions |All |American Indian/Eskimo|Male |466 |15404 |50.7699674351 |
+All regions |All |American Indian/Eskimo|Female |583 |18895 |47.0161053312 |
+All regions |All |Asian/Pacific Islander|Male |240 |18951 |27.1557928078 |
+All regions |All |Asian/Pacific Islander|Female |355 |30212 |32.714804272 |
+All regions |All |Other |Male |844 |69481 |36.3303108972 |
+All regions |All |Other |Female |1031 |95585 |36.463732684 |
+All regions |All |Not stated |Male |6224 |372188 |41.1595897061 |
+All regions |All |Not stated |Female |8114 |527001 |39.5933257226 |
+All regions |Married |All races |Male |2944 |464241 |61.5073255773 |
+All regions |Married |All races |Female |3535 |555214 |46.7000378833 |
+All regions |Single |All races |Male |3560 |572958 |21.7826940092 |
+All regions |Single |All races |Female |4204 |672242 |23.3636464655 |
+All regions |Widowed |All races |Male |263 |40642 |76.852640126 |
+All regions |Widowed |All races |Female |1293 |208073 |76.1500098523 |
+All regions |Divorced |All races |Male |309 |49476 |55.1324303755 |
+All regions |Divorced |All races |Female |578 |90727 |54.0393157494 |
+All regions |Separated |All races |Male |62 |9017 |60.0601086836 |
+All regions |Separated |All races |Female |126 |17412 |55.8574067693 |
+All regions |Not stated |All races |Male |10494 |585659 |49.3434453862 |
+All regions |Not stated |All races |Female |12632 |697029 |49.4116069 |
+All regions |Married |White |Persons|4019 |641264 |55.8439319324 |
+All regions |Married |Black |Persons|638 |102691 |51.2928202082 |
+All regions |Married |American Indian/Eskimo|Persons|7 |1048 |56.4198473282 |
+All regions |Married |Asian/Pacific Islander|Persons|124 |15624 |46.5162570405 |
+All regions |Married |Other |Persons|221 |29354 |51.2761463514 |
+All regions |Married |Not stated |Persons|1470 |229474 |48.4310422967 |
+All regions |Single |White |Persons|2894 |473760 |24.1813247652 |
+All regions |Single |Black |Persons|2410 |402707 |26.0896805021 |
+All regions |Single |American Indian/Eskimo|Persons|9 |1302 |17.3026113671 |
+All regions |Single |Asian/Pacific Islander|Persons|143 |19553 |9.05092766696 |
+All regions |Single |Other |Persons|466 |64768 |22.3722778796 |
+All regions |Single |Not stated |Persons|1842 |283110 |16.1613625528 |
+All regions |Widowed |White |Persons|994 |157377 |77.0537054334 |
+All regions |Widowed |Black |Persons|247 |41159 |73.5123545276 |
+All regions |Widowed |American Indian/Eskimo|Persons|3 |231 |69.3506493506 |
+All regions |Widowed |Asian/Pacific Islander|Persons|9 |1157 |74.9317199654 |
+All regions |Widowed |Other |Persons|53 |7154 |72.925216662 |
+All regions |Widowed |Not stated |Persons|250 |41637 |76.6531450393 |
+All regions |Divorced |White |Persons|475 |75147 |55.842561912 |
+All regions |Divorced |Black |Persons|214 |34146 |54.5053007673 |
+All regions |Divorced |American Indian/Eskimo|Persons|2 |244 |42.2950819672 |
+All regions |Divorced |Asian/Pacific Islander|Persons|5 |779 |60.1925545571 |
+All regions |Divorced |Other |Persons|47 |6132 |52.693900848 |
+All regions |Divorced |Not stated |Persons|144 |23755 |50.2079193962 |
+All regions |Separated |White |Persons|71 |10461 |58.0113915177 |
+All regions |Separated |Black |Persons|55 |7958 |58.1040462428 |
+All regions |Separated |American Indian/Eskimo|Persons|8 |955 |48.554973822 |
+All regions |Separated |Asian/Pacific Islander|Persons|3 |398 |41.0150753769 |
+All regions |Separated |Other |Persons|15 |1945 |47.588688946 |
+All regions |Separated |Not stated |Persons|36 |4712 |61.4702886248 |
+All regions |Not stated |White |Persons|5256 |667307 |53.8257675969 |
+All regions |Not stated |Black |Persons|4870 |200996 |37.3945275628 |
+All regions |Not stated |American Indian/Eskimo|Persons|1020 |30519 |49.6760669945 |
+All regions |Not stated |Asian/Pacific Islander|Persons|311 |11652 |38.5647363109 |
+All regions |Not stated |Other |Persons|1073 |55713 |38.0180303762 |
+All regions |Not stated |Not stated |Persons|10596 |316501 |49.9896529398 |
+Northeast |All |All races |Male |6397 |635919 |44.2474069089 |
+Northeast |All |All races |Female |7916 |817175 |44.0029866315 |
+Midwest |All |All races |Male |2926 |602764 |47.4125434129 |
+Midwest |All |All races |Female |4017 |819301 |46.823087999 |
+South |All |All races |Male |8309 |483310 |40.5735905781 |
+South |All |All races |Female |10435 |604221 |38.8528269489 |
+Northeast |All |White |Persons|5807 |800683 |49.4940012474 |
+Northeast |All |Black |Persons|5361 |294121 |37.9733512452 |
+Northeast |All |American Indian/Eskimo|Persons|54 |6140 |38.1457519001 |
+Northeast |All |Asian/Pacific Islander|Persons|143 |18208 |26.8782287276 |
+Northeast |All |Other |Persons|1577 |151008 |36.5399801402 |
+Northeast |All |Not stated |Persons|1371 |182934 |38.5751084328 |
+Midwest |All |White |Persons|3984 |862789 |52.1997887944 |
+Midwest |All |Black |Persons|835 |180715 |40.7449355773 |
+Midwest |All |American Indian/Eskimo|Persons|1 |179 |69.0 |
+Midwest |All |Asian/Pacific Islander|Persons|22 |4455 |43.1046015713 |
+Midwest |All |Other |Persons|38 |5953 |41.4995520466 |
+Midwest |All |Not stated |Persons|2063 |367974 |38.2872803463 |
+South |All |White |Persons|3918 |361844 |42.6937831691 |
+South |All |Black |Persons|2238 |314821 |32.1046002443 |
+South |All |American Indian/Eskimo|Persons|994 |27980 |50.8886337361 |
+South |All |Asian/Pacific Islander|Persons|430 |26500 |31.0029927357 |
+South |All |Other |Persons|260 |8105 |30.2006250103 |
+South |All |Not stated |Persons|10904 |348281 |43.1818130719 |
+Northeast |Married |All races |Persons|2669 |340833 |55.4220659385 |
+Northeast |Single |All races |Persons|3062 |415703 |27.0678677913 |
+Northeast |Widowed |All races |Persons|673 |84267 |76.7729716259 |
+Northeast |Divorced |All races |Persons|335 |42201 |56.3365589739 |
+Northeast |Separated |All races |Persons|144 |18411 |63.0021273514 |
+Northeast |Not stated |All races |Persons|7430 |551679 |43.4078970168 |
+Midwest |Married |All races |Persons|2580 |513444 |53.8652631381 |
+Midwest |Single |All races |Persons|2056 |418473 |20.2019640103 |
+Midwest |Widowed |All races |Persons|667 |135288 |76.5327967004 |
+Midwest |Divorced |All races |Persons|333 |66961 |54.2095249474 |
+Midwest |Separated |All races |Persons|21 |4676 |42.0100513259 |
+Midwest |Not stated |All races |Persons|1286 |283223 |58.7864420844 |
+South |Married |All races |Persons|1230 |165178 |48.0467858916 |
+South |Single |All races |Persons|2646 |411024 |20.6324275266 |
+South |Widowed |All races |Persons|216 |29160 |73.5531207133 |
+South |Divorced |All races |Persons|219 |31041 |52.291292162 |
+South |Separated |All races |Persons|23 |3342 |47.2112507481 |
+South |Not stated |All races |Persons|14410 |447786 |50.7895693981 |
+All regions |All |All races |Male |17632 |1721993 |44.3242005828 |
+All regions |All |All races |Female |22368 |2240697 |43.6453645331 |
+Northeast |All |All races |Persons|14313 |1453094 |44.1099525252 |
+Midwest |All |All races |Persons|6943 |1422065 |47.0729376916 |
+South |All |All races |Persons|18744 |1087531 |39.6175520644 |
+All regions |Married |All races |Persons|6479 |1019455 |53.4430035329 |
+All regions |Single |All races |Persons|7764 |1245200 |22.6361975758 |
+All regions |Widowed |All races |Persons|1556 |248715 |76.2648252015 |
+All regions |Divorced |All races |Persons|887 |140203 |54.4250631246 |
+All regions |Separated |All races |Persons|188 |26429 |57.2912772586 |
+All regions |Not stated |All races |Persons|23126 |1282688 |49.3804852211 |
+All regions |All |White |Persons|13709 |2025316 |49.431742134 |
+All regions |All |Black |Persons|8434 |789657 |36.2678776076 |
+All regions |All |American Indian/Eskimo|Persons|1049 |34299 |48.701999726 |
+All regions |All |Asian/Pacific Islander|Persons|595 |49163 |30.5719564747 |
+All regions |All |Other |Persons|1875 |165066 |36.4075716383 |
+All regions |All |Not stated |Persons|14338 |899189 |40.2416262017 |
+All regions |All |All races |Persons|40000 |3962690 |43.9403537767 |
+
+
+That took 10.260 seconds.
+
+##############################################################################
+
+#19a. Demonstration of coalescing values on-the-fly.
+# Here is the un-coalesced hospital ownership, both unformatted and formatted.
+>>> nhds.metadata.hosp_ownership.use_outtrans=0
+>>> print nhds.univar(var1="hosp_ownership",printit=1)
+>>> nhds.metadata.hosp_ownership.use_outtrans=1
+>>> print nhds.univar(var1="hosp_ownership",printit=1)
+colload(): memory mapping of hosp_ownership took 0.018 seconds.
+
+
+Univariate summary derived from hosp_ownership column in nhds dataset
+univar(): Univariate summary created in 0.020 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Hospital Ownership|Frequency|
+------------------|---------|
+2 |8685 |
+3 |31315 |
+
+
+Univariate summary derived from hosp_ownership column in nhds dataset
+univar(): Univariate summary created in 0.001 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Hospital Ownership |Frequency|
+---------------------------|---------|
+Government |8685 |
+Nonprofit, including church|31315 |
+
+
+That took 0.023 seconds.
+
+##############################################################################
+
+#19b. Now create a dictionary which defines how to coalesce values,
+# and override the output translation to suit the new categories.
+>>> aggregated_ownership = {1:1,2:2,3:1}
+>>> nhds.metadata.hosp_ownership.outtrans = {1:'Private sector (incl. non-profit',2:'Public sector'}
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#19c. Et voila!
+>>> print nhds.univar(var1="hosp_ownership",var1agg=aggregated_ownership,printit=1)
+2 2
+3 1
+
+Univariate summary derived from hosp_ownership column in nhds dataset
+univar(): Univariate summary created in 0.001 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Hospital Ownership |Frequency|
+--------------------------------|---------|
+Public sector |8685 |
+Private sector (incl. non-profit|31315 |
+
+
+That took 0.002 seconds.
+
+##############################################################################
+
+#19d. This on-the-fly aggregation also works for higher-order contingency tables.
+>>> print nhds.bivar(var1="race",var2="hosp_ownership",var2agg=aggregated_ownership,printit=1,allcalc=1)
+>>> print nhds.trivar(var1="race",var2="sex",var3="hosp_ownership",var3agg=aggregated_ownership,printit=1,allcalc=1)
+>>> print nhds.quadrivar(var1="race",var2="sex",var3="geog_region",var4="hosp_ownership",var4agg=aggregated_ownership,printit=1,allcalc=1)
+2 2
+3 1
+bivar(): Bivariate setup time 0.001 seconds.
+
+Bivariate summary derived from hosp_ownership and race columns in nhds dataset
+bivar(): Bivariate summary created in 0.048 seconds.
+Time in intersect() function was 0.040 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped | |
+by |by | |
+Hospital Ownership |Race |Frequency|
+--------------------------------|----------------------|---------|
+Public sector |White |1598 |
+Public sector |Black |6248 |
+Public sector |American Indian/Eskimo|9 |
+Public sector |Asian/Pacific Islander|33 |
+Public sector |Other |599 |
+Public sector |Not stated |198 |
+Private sector (incl. non-profit|White |12111 |
+Private sector (incl. non-profit|Black |2186 |
+Private sector (incl. non-profit|American Indian/Eskimo|1040 |
+Private sector (incl. non-profit|Asian/Pacific Islander|562 |
+Private sector (incl. non-profit|Other |1276 |
+Private sector (incl. non-profit|Not stated |14140 |
+<All> |White |13709 |
+<All> |Black |8434 |
+<All> |American Indian/Eskimo|1049 |
+<All> |Asian/Pacific Islander|595 |
+<All> |Other |1875 |
+<All> |Not stated |14338 |
+Public sector |All races |8685 |
+Private sector (incl. non-profit|All races |31315 |
+<All> |All races |40000 |
+
+
+That took 0.056 seconds.
+
+##############################################################################
+
+2 2
+3 1
+trivar(): Trivariate setup time 0.001 seconds.
+
+Trivariate summary derived from hosp_ownership, race and sex columns in nhds dataset
+trivar(): Trivariate summary created in 0.106 seconds.
+Time in intersect() function was 0.073 seconds.
+Time in ArrayDict was 0.024 seconds.
+Time in loops was 0.105 seconds.
+Time in empty loops was 0.000 seconds.
+Time in count was 0.001 seconds.
+Time in take() was 0.000 seconds.
+
+ | | | |
+ | | | |
+Grouped |Grouped |Grouped| |
+by |by |by | |
+Hospital Ownership |Race |Sex |Frequency|
+--------------------------------|----------------------|-------|---------|
+Public sector |White |Male |800 |
+Public sector |White |Female |798 |
+Public sector |Black |Male |2889 |
+Public sector |Black |Female |3359 |
+Public sector |American Indian/Eskimo|Male |4 |
+Public sector |American Indian/Eskimo|Female |5 |
+Public sector |Asian/Pacific Islander|Male |13 |
+Public sector |Asian/Pacific Islander|Female |20 |
+Public sector |Other |Male |327 |
+Public sector |Other |Female |272 |
+Public sector |Not stated |Male |89 |
+Public sector |Not stated |Female |109 |
+Private sector (incl. non-profit|White |Male |5339 |
+Private sector (incl. non-profit|White |Female |6772 |
+Private sector (incl. non-profit|Black |Male |830 |
+Private sector (incl. non-profit|Black |Female |1356 |
+Private sector (incl. non-profit|American Indian/Eskimo|Male |462 |
+Private sector (incl. non-profit|American Indian/Eskimo|Female |578 |
+Private sector (incl. non-profit|Asian/Pacific Islander|Male |227 |
+Private sector (incl. non-profit|Asian/Pacific Islander|Female |335 |
+Private sector (incl. non-profit|Other |Male |517 |
+Private sector (incl. non-profit|Other |Female |759 |
+Private sector (incl. non-profit|Not stated |Male |6135 |
+Private sector (incl. non-profit|Not stated |Female |8005 |
+<All> |White |Male |6139 |
+<All> |White |Female |7570 |
+<All> |Black |Male |3719 |
+<All> |Black |Female |4715 |
+<All> |American Indian/Eskimo|Male |466 |
+<All> |American Indian/Eskimo|Female |583 |
+<All> |Asian/Pacific Islander|Male |240 |
+<All> |Asian/Pacific Islander|Female |355 |
+<All> |Other |Male |844 |
+<All> |Other |Female |1031 |
+<All> |Not stated |Male |6224 |
+<All> |Not stated |Female |8114 |
+Public sector |All races |Male |4122 |
+Public sector |All races |Female |4563 |
+Private sector (incl. non-profit|All races |Male |13510 |
+Private sector (incl. non-profit|All races |Female |17805 |
+Public sector |White |Persons|1598 |
+Public sector |Black |Persons|6248 |
+Public sector |American Indian/Eskimo|Persons|9 |
+Public sector |Asian/Pacific Islander|Persons|33 |
+Public sector |Other |Persons|599 |
+Public sector |Not stated |Persons|198 |
+Private sector (incl. non-profit|White |Persons|12111 |
+Private sector (incl. non-profit|Black |Persons|2186 |
+Private sector (incl. non-profit|American Indian/Eskimo|Persons|1040 |
+Private sector (incl. non-profit|Asian/Pacific Islander|Persons|562 |
+Private sector (incl. non-profit|Other |Persons|1276 |
+Private sector (incl. non-profit|Not stated |Persons|14140 |
+<All> |All races |Male |17632 |
+<All> |All races |Female |22368 |
+<All> |White |Persons|13709 |
+<All> |Black |Persons|8434 |
+<All> |American Indian/Eskimo|Persons|1049 |
+<All> |Asian/Pacific Islander|Persons|595 |
+<All> |Other |Persons|1875 |
+<All> |Not stated |Persons|14338 |
+Public sector |All races |Persons|8685 |
+Private sector (incl. non-profit|All races |Persons|31315 |
+<All> |All races |Persons|40000 |
+
+
+That took 0.135 seconds.
+
+##############################################################################
+
+1 1
+2 2
+3 1
+quadrivar(): Quadrivariate setup time 0.008 seconds.
+
+Quadrivariate summary derived from geog_region, hosp_ownership, race and sex columns in nhds dataset
+quadrivar(): Quadrivariate summary created in 0.375 seconds.
+Time in intersect() function was 0.254 seconds.
+
+ | | | | |
+ | | | | |
+Grouped |Grouped |Grouped |Grouped| |
+by |by |by |by | |
+Geographic Region|Hospital Ownership|Race |Sex |Frequency|
+-----------------|------------------|----------------------|-------|---------|
+Northeast |Public sector |White |Male |628 |
+Northeast |Public sector |White |Female |573 |
+Northeast |Public sector |Black |Male |2649 |
+Northeast |Public sector |Black |Female |3146 |
+Northeast |Public sector |American Indian/Eskimo|Male |4 |
+Northeast |Public sector |American Indian/Eskimo|Female |5 |
+Northeast |Public sector |Asian/Pacific Islander|Male |8 |
+Northeast |Public sector |Asian/Pacific Islander|Female |11 |
+Northeast |Public sector |Other |Male |326 |
+Northeast |Public sector |Other |Female |272 |
+Northeast |Public sector |Not stated |Male |33 |
+Northeast |Public sector |Not stated |Female |61 |
+Northeast |3 |White |Male |3800 |
+Northeast |3 |White |Female |4724 |
+Northeast |3 |Black |Male |696 |
+Northeast |3 |Black |Female |1108 |
+Northeast |3 |American Indian/Eskimo|Male |462 |
+Northeast |3 |American Indian/Eskimo|Female |577 |
+Northeast |3 |Asian/Pacific Islander|Male |225 |
+Northeast |3 |Asian/Pacific Islander|Female |329 |
+Northeast |3 |Other |Male |497 |
+Northeast |3 |Other |Female |742 |
+Northeast |3 |Not stated |Male |5378 |
+Northeast |3 |Not stated |Female |6803 |
+Midwest |Public sector |White |Male |172 |
+Midwest |Public sector |White |Female |225 |
+Midwest |Public sector |Black |Male |240 |
+Midwest |Public sector |Black |Female |213 |
+Midwest |Public sector |American Indian/Eskimo|Male |0 |
+Midwest |Public sector |American Indian/Eskimo|Female |0 |
+Midwest |Public sector |Asian/Pacific Islander|Male |5 |
+Midwest |Public sector |Asian/Pacific Islander|Female |9 |
+Midwest |Public sector |Other |Male |1 |
+Midwest |Public sector |Other |Female |0 |
+Midwest |Public sector |Not stated |Male |56 |
+Midwest |Public sector |Not stated |Female |48 |
+Midwest |3 |White |Male |1539 |
+Midwest |3 |White |Female |2048 |
+Midwest |3 |Black |Male |134 |
+Midwest |3 |Black |Female |248 |
+Midwest |3 |American Indian/Eskimo|Male |0 |
+Midwest |3 |American Indian/Eskimo|Female |1 |
+Midwest |3 |Asian/Pacific Islander|Male |2 |
+Midwest |3 |Asian/Pacific Islander|Female |6 |
+Midwest |3 |Other |Male |20 |
+Midwest |3 |Other |Female |17 |
+Midwest |3 |Not stated |Male |757 |
+Midwest |3 |Not stated |Female |1202 |
+All regions |Public sector |White |Male |800 |
+All regions |Public sector |White |Female |798 |
+All regions |Public sector |Black |Male |2889 |
+All regions |Public sector |Black |Female |3359 |
+All regions |Public sector |American Indian/Eskimo|Male |4 |
+All regions |Public sector |American Indian/Eskimo|Female |5 |
+All regions |Public sector |Asian/Pacific Islander|Male |13 |
+All regions |Public sector |Asian/Pacific Islander|Female |20 |
+All regions |Public sector |Other |Male |327 |
+All regions |Public sector |Other |Female |272 |
+All regions |Public sector |Not stated |Male |89 |
+All regions |Public sector |Not stated |Female |109 |
+All regions |3 |White |Male |5339 |
+All regions |3 |White |Female |6772 |
+All regions |3 |Black |Male |830 |
+All regions |3 |Black |Female |1356 |
+All regions |3 |American Indian/Eskimo|Male |462 |
+All regions |3 |American Indian/Eskimo|Female |578 |
+All regions |3 |Asian/Pacific Islander|Male |227 |
+All regions |3 |Asian/Pacific Islander|Female |335 |
+All regions |3 |Other |Male |517 |
+All regions |3 |Other |Female |759 |
+All regions |3 |Not stated |Male |6135 |
+All regions |3 |Not stated |Female |8005 |
+Northeast |<All> |White |Male |4428 |
+Northeast |<All> |White |Female |5297 |
+Northeast |<All> |Black |Male |3345 |
+Northeast |<All> |Black |Female |4254 |
+Northeast |<All> |American Indian/Eskimo|Male |466 |
+Northeast |<All> |American Indian/Eskimo|Female |582 |
+Northeast |<All> |Asian/Pacific Islander|Male |233 |
+Northeast |<All> |Asian/Pacific Islander|Female |340 |
+Northeast |<All> |Other |Male |823 |
+Northeast |<All> |Other |Female |1014 |
+Northeast |<All> |Not stated |Male |5411 |
+Northeast |<All> |Not stated |Female |6864 |
+Midwest |<All> |White |Male |1711 |
+Midwest |<All> |White |Female |2273 |
+Midwest |<All> |Black |Male |374 |
+Midwest |<All> |Black |Female |461 |
+Midwest |<All> |American Indian/Eskimo|Male |0 |
+Midwest |<All> |American Indian/Eskimo|Female |1 |
+Midwest |<All> |Asian/Pacific Islander|Male |7 |
+Midwest |<All> |Asian/Pacific Islander|Female |15 |
+Midwest |<All> |Other |Male |21 |
+Midwest |<All> |Other |Female |17 |
+Midwest |<All> |Not stated |Male |813 |
+Midwest |<All> |Not stated |Female |1250 |
+Northeast |Public sector |All races |Male |3648 |
+Northeast |Public sector |All races |Female |4068 |
+Northeast |3 |All races |Male |11058 |
+Northeast |3 |All races |Female |14283 |
+Midwest |Public sector |All races |Male |474 |
+Midwest |Public sector |All races |Female |495 |
+Midwest |3 |All races |Male |2452 |
+Midwest |3 |All races |Female |3522 |
+Northeast |Public sector |White |Persons|1201 |
+Northeast |Public sector |Black |Persons|5795 |
+Northeast |Public sector |American Indian/Eskimo|Persons|9 |
+Northeast |Public sector |Asian/Pacific Islander|Persons|19 |
+Northeast |Public sector |Other |Persons|598 |
+Northeast |Public sector |Not stated |Persons|94 |
+Northeast |3 |White |Persons|8524 |
+Northeast |3 |Black |Persons|1804 |
+Northeast |3 |American Indian/Eskimo|Persons|1039 |
+Northeast |3 |Asian/Pacific Islander|Persons|554 |
+Northeast |3 |Other |Persons|1239 |
+Northeast |3 |Not stated |Persons|12181 |
+Midwest |Public sector |White |Persons|397 |
+Midwest |Public sector |Black |Persons|453 |
+Midwest |Public sector |American Indian/Eskimo|Persons|0 |
+Midwest |Public sector |Asian/Pacific Islander|Persons|14 |
+Midwest |Public sector |Other |Persons|1 |
+Midwest |Public sector |Not stated |Persons|104 |
+Midwest |3 |White |Persons|3587 |
+Midwest |3 |Black |Persons|382 |
+Midwest |3 |American Indian/Eskimo|Persons|1 |
+Midwest |3 |Asian/Pacific Islander|Persons|8 |
+Midwest |3 |Other |Persons|37 |
+Midwest |3 |Not stated |Persons|1959 |
+All regions |<All> |White |Male |6139 |
+All regions |<All> |White |Female |7570 |
+All regions |<All> |Black |Male |3719 |
+All regions |<All> |Black |Female |4715 |
+All regions |<All> |American Indian/Eskimo|Male |466 |
+All regions |<All> |American Indian/Eskimo|Female |583 |
+All regions |<All> |Asian/Pacific Islander|Male |240 |
+All regions |<All> |Asian/Pacific Islander|Female |355 |
+All regions |<All> |Other |Male |844 |
+All regions |<All> |Other |Female |1031 |
+All regions |<All> |Not stated |Male |6224 |
+All regions |<All> |Not stated |Female |8114 |
+All regions |Public sector |All races |Male |4122 |
+All regions |Public sector |All races |Female |4563 |
+All regions |3 |All races |Male |13510 |
+All regions |3 |All races |Female |17805 |
+All regions |Public sector |White |Persons|1598 |
+All regions |Public sector |Black |Persons|6248 |
+All regions |Public sector |American Indian/Eskimo|Persons|9 |
+All regions |Public sector |Asian/Pacific Islander|Persons|33 |
+All regions |Public sector |Other |Persons|599 |
+All regions |Public sector |Not stated |Persons|198 |
+All regions |3 |White |Persons|12111 |
+All regions |3 |Black |Persons|2186 |
+All regions |3 |American Indian/Eskimo|Persons|1040 |
+All regions |3 |Asian/Pacific Islander|Persons|562 |
+All regions |3 |Other |Persons|1276 |
+All regions |3 |Not stated |Persons|14140 |
+Northeast |<All> |All races |Male |14706 |
+Northeast |<All> |All races |Female |18351 |
+Midwest |<All> |All races |Male |2926 |
+Midwest |<All> |All races |Female |4017 |
+Northeast |<All> |White |Persons|9725 |
+Northeast |<All> |Black |Persons|7599 |
+Northeast |<All> |American Indian/Eskimo|Persons|1048 |
+Northeast |<All> |Asian/Pacific Islander|Persons|573 |
+Northeast |<All> |Other |Persons|1837 |
+Northeast |<All> |Not stated |Persons|12275 |
+Midwest |<All> |White |Persons|3984 |
+Midwest |<All> |Black |Persons|835 |
+Midwest |<All> |American Indian/Eskimo|Persons|1 |
+Midwest |<All> |Asian/Pacific Islander|Persons|22 |
+Midwest |<All> |Other |Persons|38 |
+Midwest |<All> |Not stated |Persons|2063 |
+Northeast |Public sector |All races |Persons|7716 |
+Northeast |3 |All races |Persons|25341 |
+Midwest |Public sector |All races |Persons|969 |
+Midwest |3 |All races |Persons|5974 |
+All regions |<All> |All races |Male |17632 |
+All regions |<All> |All races |Female |22368 |
+Northeast |<All> |All races |Persons|33057 |
+Midwest |<All> |All races |Persons|6943 |
+All regions |Public sector |All races |Persons|8685 |
+All regions |3 |All races |Persons|31315 |
+All regions |<All> |White |Persons|13709 |
+All regions |<All> |Black |Persons|8434 |
+All regions |<All> |American Indian/Eskimo|Persons|1049 |
+All regions |<All> |Asian/Pacific Islander|Persons|595 |
+All regions |<All> |Other |Persons|1875 |
+All regions |<All> |Not stated |Persons|14338 |
+All regions |<All> |All races |Persons|40000 |
+
+
+That took 0.537 seconds.
+
+##############################################################################
+
+#19e. Note the on-the-fly coalescing can also be done by a function - this function
+# truncates ICD9CM codes to 3 digits.
+>>> def icd9cm_truncate(icd9cm_code):
+... return icd9cm_code[0:3]
+
+That took 0.000 seconds.
+
+##############################################################################
+
+# Let's override the column type for diagnosis1 while we're at it so it is presented in sorted order.
+>>> nhds.metadata.diagnosis1.coltype = "ordinal"
+
+That took 0.001 seconds.
+
+##############################################################################
+
+# Et voila aussi!
+>>> print nhds.univar(var1="diagnosis1",var1agg=icd9cm_truncate,printit=1)
+colload(): memory mapping of diagnosis1 took 0.521 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+univar(): Univariate summary created in 1.701 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1 |Frequency|
+----------------------------------------------------------------------------------------------------------------------------------------|---------|
+002 Typhoid and paratyphoid fevers |1 |
+003 Other salmonella infections |12 |
+004 Shigellosis |3 |
+005 Other food poisoning (bacterial) |2 |
+006 Amoebiasis |1 |
+007 Other protozoal intestinal diseases |4 |
+008 Intestinal infections due to other organisms |74 |
+009 Ill-defined intestinal infections |12 |
+010 Primary tuberculous infection |1 |
+011 Pulmonary tuberculosis |29 |
+014 Tuberculosis of intestines, peritoneum, and mesenteric glands |1 |
+015 Tuberculosis of bones and joints |2 |
+017 Tuberculosis of other organs |1 |
+018 Miliary tuberculosis |6 |
+027 Other zoonotic bacterial diseases |2 |
+031 Diseases due to other mycobacteria |5 |
+033 Whooping cough |2 |
+034 Streptococcal sore throat and scarlet fever |7 |
+035 Erysipelas |1 |
+036 Meningococcal infection |4 |
+038 Septicaemia |293 |
+039 Actinomycotic infections |1 |
+040 Other bacterial diseases |1 |
+041 Bacterial infection in conditions classified elsewhere and of unspecified site |15 |
+042 Human immunodeficiency virus [HIV] infection with specified conditions |321 |
+046 Slow virus infection of central nervous system |1 |
+047 Meningitis due to enterovirus |27 |
+048 Other enterovirus diseases of central nervous system |2 |
+049 Other non-arthropod-borne viral diseases of central nervous system |7 |
+052 Chickenpox |6 |
+053 Herpes zoster |17 |
+054 Herpes simplex |9 |
+057 Other viral exanthemata |1 |
+070 Viral hepatitis |37 |
+073 Ornithosis |1 |
+075 Infectious mononucleosis |7 |
+077 Other diseases of conjunctiva due to viruses and Chlamydiae |1 |
+078 Other diseases due to viruses and Chlamydiae |11 |
+079 Viral and chlamydial infection in conditions classified elsewhere and of unspecified site |87 |
+084 Malaria |7 |
+088 Other arthropod-borne diseases |3 |
+091 Early syphilis, symptomatic |2 |
+094 Neurosyphilis |4 |
+097 Other and unspecified syphilis |1 |
+098 Gonococcal infections |2 |
+099 Other venereal diseases |4 |
+110 Dermatophytosis |2 |
+111 Dermatomycosis, other and unspecified |1 |
+112 Candidiasis |20 |
+115 Histoplasmosis |3 |
+117 Other mycoses |16 |
+123 Other cestode infection |1 |
+127 Other intestinal helminthiases |1 |
+135 Sarcoidosis |15 |
+136 Other and unspecified infectious and parasitic diseases |17 |
+140 Malignant neoplasm of lip |2 |
+141 Malignant neoplasm of tongue |11 |
+142 Malignant neoplasm of major salivary glands |2 |
+143 Malignant neoplasm of gum |1 |
+144 Malignant neoplasm of floor of mouth |3 |
+145 Malignant neoplasm of other and unspecified parts of mouth |9 |
+146 Malignant neoplasm of oropharynx |6 |
+147 Malignant neoplasm of nasopharynx |5 |
+148 Malignant neoplasm of hypopharynx |2 |
+149 Malignant neoplasm of other and ill-defined sites within the lip, oral cavity, and pharynx |1 |
+150 Malignant neoplasm of oesophagus |21 |
+151 Malignant neoplasm of stomach |45 |
+152 Malignant neoplasm of small intestine, including duodenum |4 |
+153 Malignant neoplasm of colon |123 |
+154 Malignant neoplasm of rectum, rectosigmoid junction, and anus |54 |
+155 Malignant neoplasm of liver and intrahepatic bile ducts |27 |
+156 Malignant neoplasm of gall bladder and extrahepatic bile ducts |18 |
+157 Malignant neoplasm of pancreas |53 |
+158 Malignant neoplasm of retroperitoneum and peritoneum |18 |
+159 Malignant neoplasm of other and ill-defined sites within the digestive organs and peritoneum |2 |
+160 Malignant neoplasm of nasal cavities, middle ear, and accessory sinuses |4 |
+161 Malignant neoplasm of larynx |14 |
+162 Malignant neoplasm of trachea, bronchus, and lung |205 |
+163 Malignant neoplasm of pleura |4 |
+164 Malignant neoplasm of thymus, heart, and mediastinum |9 |
+170 Malignant neoplasm of bone and articular cartilage |13 |
+171 Malignant neoplasm of connective and other soft tissue |18 |
+172 Malignant melanoma of skin |3 |
+173 Other malignant neoplasm of skin |13 |
+174 Malignant neoplasm of female breast |210 |
+175 Malignant neoplasm of male breast |1 |
+176 Kaposi's sarcoma |1 |
+179 Malignant neoplasm of uterus, part |1 |
+180 Malignant neoplasm of cervix uteri |52 |
+182 Malignant neoplasm of body of uterus |67 |
+183 Malignant neoplasm of ovary and other uterine adnexa |54 |
+184 Malignant neoplasm of other and unspecified female genital organs |11 |
+185 Malignant neoplasm of prostate |207 |
+186 Malignant neoplasm of testis |6 |
+188 Malignant neoplasm of bladder |49 |
+189 Malignant neoplasm of kidney and other and unspecified urinary organs |54 |
+190 Malignant neoplasm of eye |9 |
+191 Malignant neoplasm of brain |64 |
+192 Malignant neoplasm of other and unspecified parts of nervous system |7 |
+193 Malignant neoplasm of thyroid gland |54 |
+194 Malignant neoplasm of other endocrine glands and related structures |4 |
+195 Malignant neoplasm of other and ill-defined sites |5 |
+196 Secondary and unspecified malignant neoplasm of lymph nodes |45 |
+197 Secondary malignant neoplasm of respiratory and digestive systems |203 |
+198 Secondary malignant neoplasm of other specified sites |192 |
+199 Malignant neoplasm without specification of site |24 |
+200 Lymphosarcoma and reticulosarcoma |32 |
+201 Hodgkin's disease |10 |
+202 Other malignant neoplasms of lymphoid and histiocytic tissue |70 |
+203 Multiple myeloma and immunoproliferative neoplasms |29 |
+204 Lymphoid leukaemia |32 |
+205 Myeloid leukaemia |43 |
+210 Benign neoplasm of lip, oral cavity, and pharynx |13 |
+211 Benign neoplasm of other parts of digestive system |40 |
+212 Benign neoplasm of respiratory and intrathoracic organs |19 |
+213 Benign neoplasm of bone and articular cartilage |11 |
+214 Lipoma |6 |
+215 Other benign neoplasm of connective and other soft tissue |6 |
+216 Benign neoplasm of skin |3 |
+217 Benign neoplasm of breast |2 |
+218 Uterine leiomyoma |322 |
+220 Benign neoplasm of ovary |52 |
+221 Benign neoplasm of other female genital organs |2 |
+222 Benign neoplasm of male genital organs |1 |
+223 Benign neoplasm of kidney and other urinary organs |12 |
+225 Benign neoplasm of brain and other parts of nervous system |67 |
+226 Benign neoplasm of thyroid glands |24 |
+227 Benign neoplasm of other endocrine glands and related structures |50 |
+228 Haemangioma and lymphangioma, any site |9 |
+229 Benign neoplasm of other and unspecified sites |2 |
+230 Carcinoma in situ of digestive organs |5 |
+233 Carcinoma in situ of breast and genitourinary system |48 |
+235 Neoplasm of uncertain behaviour of digestive and respiratory systems |18 |
+236 Neoplasm of uncertain behaviour of genitourinary organs |5 |
+237 Neoplasm of uncertain behaviour of endocrine glands and nervous system |16 |
+238 Neoplasm of uncertain behaviour of other and unspecified sites and tissues |29 |
+239 Neoplasms of unspecified nature |9 |
+240 Simple and unspecified goitre |2 |
+241 Nontoxic nodular goitre |35 |
+242 Thyrotoxicosis with or without goitre |16 |
+243 Congenital hypothyroidism |1 |
+244 Acquired hypothyroidism |15 |
+245 Thyroiditis |3 |
+250 Diabetes mellitus |564 |
+251 Other disorders of pancreatic internal secretion |18 |
+252 Disorders of parathyroid gland |16 |
+253 Disorders of the pituitary gland and its hypothalamic control |21 |
+255 Disorders of adrenal glands |10 |
+256 Ovarian dysfunction |2 |
+258 Polyglandular dysfunction and related disorders |1 |
+259 Other endocrine disorders |1 |
+261 Nutritional marasmus |3 |
+262 Other severe protein-calorie malnutrition |2 |
+263 Other and unspecified protein-calorie malnutrition |10 |
+268 Vitamin D deficiency |1 |
+270 Disorders of amino-acid transport and metabolism |2 |
+271 Disorders of carbohydrate transport and metabolism |3 |
+272 Disorders of lipoid metabolism |50 |
+273 Disorders of plasma protein metabolism |1 |
+274 Gout |12 |
+275 Disorders of mineral metabolism |20 |
+276 Disorders of fluid, electrolyte, and acid-base balance |489 |
+277 Other and unspecified disorders of metabolism |23 |
+278 Obesity and other hyperalimentation |25 |
+279 Disorders involving the immune mechanism |5 |
+280 Iron deficiency anaemias |47 |
+281 Other deficiency anaemias |10 |
+282 Hereditary haemolytic anaemias |252 |
+283 Acquired haemolytic anaemias |13 |
+284 Aplastic anaemia |27 |
+285 Other and unspecified anaemias |61 |
+286 Coagulation defects |10 |
+287 Purpura and other haemorrhagic conditions |42 |
+288 Diseases of white blood cells |88 |
+289 Other diseases of blood and blood-forming organs |17 |
+290 Senile and presenile organic psychotic conditions |36 |
+291 Alcoholic psychoses |73 |
+292 Drug psychoses |52 |
+293 Transient organic psychotic conditions |32 |
+294 Other organic psychotic conditions (chronic) |29 |
+295 Schizophrenic disorders |381 |
+296 Affective psychoses |501 |
+297 Paranoid states |6 |
+298 Other nonorganic psychoses |85 |
+299 Psychoses with origin specific to childhood |7 |
+300 Neurotic disorders |47 |
+301 Personality disorders |11 |
+302 Sexual deviations and disorders |2 |
+303 Alcohol dependence syndrome |132 |
+304 Drug dependence |283 |
+305 Nondependent drug use disorder |64 |
+306 Physiological malfunction arising from mental factors |3 |
+307 Special symptoms or syndromes, not elsewhere classified |22 |
+308 Acute reaction to stress |3 |
+309 Adjustment reaction |105 |
+310 Specific nonpsychotic mental disorders due to organic brain damage |9 |
+311 Depressive disorder, not elsewhere classified |34 |
+312 Disturbance of conduct, not elsewhere classified |29 |
+313 Disturbance of emotions specific to childhood and adolescence |8 |
+314 Hyperkinetic syndrome of childhood |27 |
+315 Specific delays in development |3 |
+319 Unspecified mental retardation |2 |
+320 Bacterial meningitis |17 |
+322 Meningitis of unspecified cause |11 |
+323 Encephalitis, myelitis, and encephalomyelitis |10 |
+324 Intracranial and intraspinal abscess |10 |
+325 Phlebitis and thrombophlebitis of intracranial venous sinuses |1 |
+330 Cerebral degenerations usually manifest in childhood |1 |
+331 Other cerebral degenerations |53 |
+332 Parkinson's disease |30 |
+333 Other extrapyramidal disease and abnormal movement disorders |15 |
+334 Spinocerebellar disease |2 |
+335 Anterior horn cell disease |10 |
+336 Other diseases of spinal cord |10 |
+337 Disorders of the autonomic nervous system |21 |
+340 Multiple sclerosis |96 |
+341 Other demyelinating diseases of central nervous system |3 |
+342 Hemiplegia and hemiparesis |9 |
+343 Infantile cerebral palsy |8 |
+344 Other paralytic syndromes |14 |
+345 Epilepsy |68 |
+346 Migraine |46 |
+347 Cataplexy and narcolepsy |1 |
+348 Other conditions of brain |40 |
+349 Other and unspecified disorders of the nervous system |19 |
+350 Trigeminal nerve disorders |6 |
+351 Facial nerve disorders |11 |
+352 Disorders of other cranial nerves |3 |
+353 Nerve root and plexus disorders |29 |
+354 Mononeuritis of upper limb and mononeuritis multiplex |11 |
+355 Mononeuritis of lower limb |12 |
+356 Hereditary and idiopathic peripheral neuropathy |9 |
+357 Inflammatory and toxic neuropathy |23 |
+358 Myoneural disorders |17 |
+359 Muscular dystrophies and other myopathies |6 |
+360 Disorders of the globe |3 |
+361 Retinal detachments and defects |25 |
+362 Other retinal disorders |28 |
+363 Chorioretinal inflammations, scars, and other disorders of choroid |1 |
+365 Glaucoma |8 |
+366 Cataract |2 |
+368 Visual disturbances |4 |
+369 Blindness and low vision |3 |
+370 Keratitis |4 |
+371 Corneal opacity and other disorders of cornea |4 |
+372 Disorders of conjunctiva |2 |
+373 Inflammation of eyelids |5 |
+374 Other disorders of eyelids |4 |
+375 Disorders of lacrimal system |1 |
+376 Disorders of the orbit |8 |
+377 Disorders of optic nerve and visual pathways |6 |
+378 Strabismus and other disorders of binocular eye movements |4 |
+379 Other disorders of eye |4 |
+380 Disorders of external ear |8 |
+381 Nonsuppurative otitis media and Eustachian tube disorders |7 |
+382 Suppurative and unspecified otitis media |25 |
+383 Mastoiditis and related conditions |6 |
+384 Other disorders of tympanic membrane |1 |
+385 Other disorders of middle ear and mastoid |3 |
+386 Vertiginous syndromes and other disorders of vestibular system |25 |
+387 Otosclerosis |1 |
+388 Other disorders of ear |1 |
+389 Hearing loss |5 |
+390 Rheumatic fever without mention of heart involvement |1 |
+391 Rheumatic fever with heart involvement |1 |
+392 Rheumatic chorea |1 |
+393 Chronic rheumatic pericarditis |1 |
+394 Diseases of mitral valve |15 |
+395 Diseases of aortic valve |2 |
+396 Diseases of mitral and aortic valves |31 |
+397 Diseases of other endocardial structures |3 |
+398 Other rheumatic heart disease |9 |
+401 Essential hypertension |205 |
+402 Hypertensive heart disease |90 |
+403 Hypertensive renal disease |147 |
+404 Hypertensive heart and renal disease |24 |
+405 Secondary hypertension |5 |
+410 Acute myocardial infarction |889 |
+411 Other acute and subacute forms of ischaemic heart disease |175 |
+412 Old myocardial infarction |5 |
+413 Angina pectoris |53 |
+414 Other forms of chronic ischaemic heart disease |2037 |
+415 Acute pulmonary heart disease |63 |
+416 Chronic pulmonary heart disease |27 |
+417 Other diseases of pulmonary circulation |1 |
+420 Acute pericarditis |8 |
+421 Acute and subacute endocarditis |24 |
+422 Acute myocarditis |3 |
+423 Other diseases of pericardium |23 |
+424 Other diseases of endocardium |167 |
+425 Cardiomyopathy |71 |
+426 Conduction disorders |95 |
+427 Cardiac dysrhythmias |730 |
+428 Heart failure |945 |
+429 Ill-defined descriptions and complications of heart disease |21 |
+430 Subarachnoid haemorrhage |75 |
+431 Intracerebral haemorrhage |83 |
+432 Other and unspecified intracranial haemorrhage |29 |
+433 Occlusion and stenosis of precerebral arteries |283 |
+434 Occlusion of cerebral arteries |303 |
+435 Transient cerebral ischaemia |130 |
+436 Acute, but ill-defined, cerebrovascular disease |73 |
+437 Other and ill-defined cerebrovascular disease |81 |
+438 Late effects of cerebrovascular disease |6 |
+440 Atherosclerosis |240 |
+441 Aortic aneurysm and dissection |240 |
+442 Other aneurysm |29 |
+443 Other peripheral vascular disease |25 |
+444 Arterial embolism and thrombosis |52 |
+446 Polyarteritis nodosa and allied conditions |15 |
+447 Other disorders of arteries and arterioles |28 |
+448 Disease of capillaries |2 |
+451 Phlebitis and thrombophlebitis |33 |
+452 Portal vein thrombosis |3 |
+453 Other venous embolism and thrombosis |141 |
+454 Varicose veins of lower extremities |15 |
+455 Haemorrhoids |26 |
+456 Varicose veins of other sites |7 |
+457 Noninfectious disorders of lymphatic channels |4 |
+458 Hypotension |39 |
+459 Other disorders of circulatory system |20 |
+461 Acute sinusitis |10 |
+462 Acute pharyngitis |7 |
+463 Acute tonsillitis |10 |
+464 Acute laryngitis and tracheitis |19 |
+465 Acute upper respiratory infections of multiple or unspecified sites |47 |
+466 Acute bronchitis and bronchiolitis |157 |
+470 Deviated nasal septum |10 |
+471 Nasal polyps |2 |
+472 Chronic pharyngitis and nasopharyngitis |1 |
+473 Chronic sinusitis |28 |
+474 Chronic disease of tonsils and adenoids |24 |
+475 Peritonsillar abscess |8 |
+477 Allergic rhinitis |1 |
+478 Other diseases of upper respiratory tract |36 |
+480 Viral pneumonia |27 |
+481 Pneumococcal pneumonia [Streptococcus pneumoniae pneumonia] |40 |
+482 Other bacterial pneumonia |117 |
+483 Pneumonia due to other specified organism |8 |
+485 Bronchopneumonia, organism unspecified |8 |
+486 Pneumonia, organism unspecified |728 |
+487 Influenza |20 |
+490 Bronchitis, not specified as acute or chronic |22 |
+491 Chronic bronchitis |265 |
+492 Emphysema |40 |
+493 Asthma |619 |
+494 Bronchiectasis |14 |
+496 Chronic airway obstruction, not elsewhere classified |68 |
+502 Pneumoconiosis due to other silica or silicates |3 |
+506 Respiratory conditions due to chemical fumes and vapours |1 |
+507 Pneumonitis due to solids and liquids |118 |
+508 Respiratory conditions due to other unspecified external agents |2 |
+510 Empyema |9 |
+511 Pleurisy |67 |
+512 Pneumothorax |38 |
+513 Abscess of lung and mediastinum |5 |
+514 Pulmonary congestion and hypostasis |13 |
+515 Postinflammatory pulmonary fibrosis |32 |
+516 Other alveolar and parietoalveolar pneumonopathy |22 |
+518 Other diseases of lung |164 |
+519 Other diseases of respiratory system |21 |
+520 Disorders of tooth development and eruption |1 |
+521 Diseases of hard tissues of teeth |4 |
+522 Diseases of pulp and periapical tissues |3 |
+523 Gingival and periodontal diseases |2 |
+524 Dentofacial anomalies, including malocclusion |70 |
+525 Other diseases and conditions of teeth and supporting structures |2 |
+526 Diseases of the jaws |8 |
+527 Diseases of the salivary glands |12 |
+528 Diseases of the oral soft tissues, excluding lesions specific for gingiva and tongue |15 |
+530 Diseases of oesophagus |172 |
+531 Gastric ulcer |79 |
+532 Duodenal ulcer |79 |
+533 Peptic ulcer, site unspecified |16 |
+534 Gastrojejunal ulcer |2 |
+535 Gastritis and duodenitis |89 |
+536 Disorders of function of stomach |32 |
+537 Other disorders of stomach and duodenum |30 |
+540 Acute appendicitis |181 |
+541 Appendicitis, unqualified |2 |
+542 Other appendicitis |3 |
+543 Other diseases of appendix |4 |
+550 Inguinal hernia |69 |
+551 Other hernia of abdominal cavity, with gangrene |2 |
+552 Other hernia of abdominal cavity, with obstruction, but without mention of gangrene |45 |
+553 Other hernia of abdominal cavity without mention of obstruction or gangrene |102 |
+555 Regional enteritis |65 |
+556 Ulcerative colitis |32 |
+557 Vascular insufficiency of intestine |35 |
+558 Other noninfectious gastroenteritis and colitis |129 |
+560 Intestinal obstruction without mention of hernia |282 |
+562 Diverticula of intestine |203 |
+564 Functional digestive disorders, not elsewhere classified |55 |
+565 Anal fissure and fistula |9 |
+566 Abscess of anal and rectal regions |24 |
+567 Peritonitis |21 |
+568 Other disorders of peritoneum |12 |
+569 Other disorders of intestine |119 |
+570 Acute and subacute necrosis of liver |14 |
+571 Chronic liver disease and cirrhosis |168 |
+572 Liver abscess and sequelae of chronic liver disease |31 |
+573 Other disorders of liver |13 |
+574 Cholelithiasis |360 |
+575 Other disorders of gall bladder |47 |
+576 Other disorders of biliary tract |37 |
+577 Diseases of pancreas |211 |
+578 Gastrointestinal haemorrhage |118 |
+579 Intestinal malabsorption |6 |
+580 Acute glomerulonephritis |2 |
+581 Nephrotic syndrome |8 |
+582 Chronic glomerulonephritis |1 |
+583 Nephritis and nephropathy, not specified as acute or chronic |3 |
+584 Acute renal failure |88 |
+585 Chronic renal failure and impairment |51 |
+586 Renal failure, unspecified |4 |
+587 Renal sclerosis, unspecified |1 |
+588 Disorders resulting from impaired renal function |7 |
+590 Infections of kidney |123 |
+591 Hydronephrosis |7 |
+592 Calculus of kidney and ureter |166 |
+593 Other disorders of kidney and ureter |42 |
+594 Calculus of lower urinary tract |11 |
+595 Cystitis |10 |
+596 Other disorders of bladder |34 |
+597 Urethritis, not sexually transmitted, and urethral syndrome |1 |
+598 Urethral stricture |9 |
+599 Other disorders of urethra and urinary tract |287 |
+600 Hyperplasia of prostate |121 |
+601 Inflammatory diseases of prostate |8 |
+602 Other disorders of prostate |1 |
+603 Hydrocele |2 |
+604 Orchitis and epididymitis |11 |
+605 Redundant prepuce and phimosis |2 |
+607 Disorders of penis |10 |
+608 Other disorders of male genital organs |13 |
+610 Benign mammary dysplasias |3 |
+611 Other disorders of breast |47 |
+614 Inflammatory disease of ovary, fallopian tube, pelvic cellular tissue, and peritoneum |97 |
+616 Inflammatory disease of cervix, vagina, and, vulva |8 |
+617 Endometriosis |65 |
+618 Genital prolapse |177 |
+619 Fistula involving female genital tract |7 |
+620 Noninflammatory disorders of ovary, fallopian tube, and broad ligament |65 |
+621 Disorders of uterus, not elsewhere classified |10 |
+622 Noninflammatory disorders of cervix |3 |
+623 Noninflammatory disorders of vagina |6 |
+624 Noninflammatory disorders of vulva and perineum |3 |
+625 Pain and other symptoms associated with female genital organs |68 |
+626 Disorders of menstruation and other abnormal bleeding from female genital tract |57 |
+627 Menopausal and postmenopausal disorders |8 |
+628 Infertility, female |3 |
+630 Hydatidiform mole |1 |
+631 Other abnormal product of conception |2 |
+632 Missed abortion |31 |
+633 Ectopic pregnancy |53 |
+634 Spontaneous abortion |58 |
+635 Legally induced abortion |126 |
+637 Unspecified abortion |6 |
+639 Complications following abortion and ectopic and molar pregnancies |10 |
+640 Haemorrhage in early pregnancy |3 |
+641 Antepartum haemorrhage, abruptio placentae, and placenta praevia |20 |
+642 Hypertension complicating pregnancy, childbirth, and the puerperium |22 |
+643 Excessive vomiting in pregnancy |33 |
+644 Early or threatened labour |144 |
+645 Prolonged pregnancy |1 |
+646 Other complications of pregnancy, not elsewhere classified |60 |
+647 Infectious and parasitic conditions in the mother classifiable elsewhere, but complicating pregnancy, childbirth, or the puerperi|13 |
+648 Other current conditions in the mother classifiable elsewhere, but complicating pregnancy, childbirth or the puerperium |95 |
+651 Multiple gestation |2 |
+652 Malposition and malpresentation of foetus |1 |
+654 Abnormality of organs and soft tissues of pelvis |16 |
+655 Known or suspected foetal abnormality affecting management of mother |5 |
+656 Other foetal and placental problems affecting management of mother |10 |
+657 Polyhydramnios |3 |
+658 Other problems associated with amniotic cavity and membranes |10 |
+659 Other indications for care or intervention related to labour and delivery, not elsewhere classified |2 |
+661 Abnormality of forces of labour |2 |
+664 Trauma to perineum and vulva during delivery |6 |
+665 Other obstetrical trauma |1 |
+666 Postpartum haemorrhage |3 |
+667 Retained placenta or membranes, without haemorrhage |1 |
+668 Complications of the administration anaesthetic or other sedation in labour and delivery |1 |
+669 Other complications of labour and delivery, not elsewhere classified |1 |
+670 Major puerperal infection |12 |
+671 Venous complications in pregnancy and the puerperium |3 |
+674 Other and unspecified complications of the puerperium, not elsewhere classified |5 |
+675 Infections of the breast and nipple associated with childbirth |1 |
+680 Carbuncle and furuncle |1 |
+681 Cellulitis and abscess of finger and toe |25 |
+682 Other cellulitis and abscess |314 |
+683 Acute lymphadenitis |4 |
+684 Impetigo |5 |
+685 Pilonidal cyst |3 |
+686 Other local infections of skin and subcutaneous tissue |7 |
+690 Erythematosquamous dermatosis |2 |
+691 Atopic dermatitis and related conditions |2 |
+692 Contact dermatitis and other eczema |3 |
+693 Dermatitis due to substances taken internally |9 |
+694 Bullous dermatoses |2 |
+695 Erythematous conditions |7 |
+696 Psoriasis and similar disorders |4 |
+701 Other hypertrophic and atrophic conditions of skin |9 |
+703 Diseases of nail |1 |
+705 Disorders of sweat glands |5 |
+706 Diseases of sebaceous glands |3 |
+707 Chronic ulcer of skin |68 |
+708 Urticaria |4 |
+709 Other disorders of skin and subcutaneous tissue |5 |
+710 Diffuse diseases of connective tissue |32 |
+711 Arthropathy associated with infections |25 |
+714 Rheumatoid arthritis and other inflammatory polyarthropathies |30 |
+715 Osteoarthrosis and allied disorders |425 |
+716 Other and unspecified arthropathies |22 |
+717 Internal derangement of knee |17 |
+718 Other derangement of joint |45 |
+719 Other and unspecified disorders of joints |16 |
+720 Ankylosing spondylitis and other inflammatory spondylopathies |2 |
+721 Spondylosis and allied disorders |96 |
+722 Intervertebral disc disorders |394 |
+723 Other disorders of cervical region |33 |
+724 Other and unspecified disorders of back |173 |
+725 Polymyalgia rheumatica |1 |
+726 Peripheral enthesopathies and allied syndromes |51 |
+727 Other disorders of synovium, tendon, and bursa |34 |
+728 Disorders of muscle, ligament, and fascia |22 |
+729 Other disorders of soft tissues |37 |
+730 Osteomyelitis, periostitis, and other infections involving bone |47 |
+732 Osteochondropathies |8 |
+733 Other disorders of bone and cartilage |176 |
+734 Flat foot |1 |
+735 Acquired deformities of toe |4 |
+736 Other acquired deformities of limbs |14 |
+737 Curvature of spine |15 |
+738 Other acquired deformity |43 |
+741 Spina bifida |6 |
+742 Other congenital anomalies of nervous system |5 |
+744 Congenital anomalies of ear, face, and neck |2 |
+745 Bulbus cordis anomalies and anomalies of cardiac septal closure |36 |
+746 Other congenital anomalies of heart |12 |
+747 Other congenital anomalies of circulatory system |40 |
+748 Congenital anomalies of respiratory system |7 |
+749 Cleft palate and cleft lip |11 |
+750 Other congenital anomalies of upper alimentary tract |12 |
+751 Other congenital anomalies of digestive system |19 |
+752 Congenital anomalies of genital organs |4 |
+753 Congenital anomalies of urinary system |10 |
+754 Certain congenital musculoskeletal deformities |19 |
+755 Other congenital anomalies of limbs |6 |
+756 Other congenital musculoskeletal anomalies |23 |
+757 Congenital anomalies of the integument |4 |
+759 Other and unspecified congenital anomalies |6 |
+760 Foetus or newborn affected by maternal conditions which may be unrelated to present pregnancy |2 |
+764 Slow foetal growth and foetal malnutrition |1 |
+765 Disorders relating to short gestation and unspecified low birthweight |15 |
+766 Disorders relating to long gestation and high birthweight |2 |
+767 Birth trauma |1 |
+768 Intrauterine hypoxia and birth asphyxia |1 |
+769 Respiratory distress syndrome |7 |
+770 Other respiratory conditions of foetus and newborn |25 |
+771 Infections specific to the perinatal period |11 |
+772 Foetal and neonatal haemorrhage |3 |
+773 Haemolytic diseases of foetus or newborn, due to isoimmunisation |4 |
+774 Other perinatal jaundice |14 |
+775 Endocrine and metabolic disturbances specific to the foetus and newborn |2 |
+777 Perinatal disorders of digestive system |4 |
+778 Conditions involving the integument and temperature regulation of foetus and newborn |3 |
+779 Other and ill-defined conditions originating in the perinatal period |4 |
+780 General symptoms |67 |
+782 Symptoms involving skin and other integumentary tissue |2 |
+784 Symptoms involving head and neck |6 |
+785 Symptoms involving cardiovascular system |8 |
+786 Symptoms involving respiratory system and other chest symptoms |57 |
+787 Symptoms involving digestive system |5 |
+788 Symptoms involving urinary system |2 |
+789 Other symptoms involving abdomen and pelvis |23 |
+793 Nonspecified abnormal findings on radiological and other examination of body structure |1 |
+795 Nonspecific abnormal histological and immunological findings |2 |
+796 Other nonspecific abnormal findings |1 |
+799 Other ill-defined and unknown causes of morbidity and mortality |68 |
+800 Fracture of vault of skull |26 |
+801 Fracture of base of skull |51 |
+802 Fracture of face bones |81 |
+803 Other and unqualified skull fractures |6 |
+804 Multiple fractures involving skull or face with other bones |1 |
+805 Fracture of vertebral column without mention of spinal cord injury |61 |
+806 Fracture of vertebral column with spinal cord injury |17 |
+807 Fracture of rib(s), sternum, larynx, and trachea |35 |
+808 Fracture of pelvis |54 |
+810 Fracture of clavicle |7 |
+811 Fracture of scapula |1 |
+812 Fracture of humerus |58 |
+813 Fracture of radius and ulna |77 |
+815 Fracture of metacarpal bone(s) |11 |
+816 Fracture of one or more phalanges of hand |13 |
+817 Multiple fractures of hand bones |4 |
+820 Fracture of neck of femur |219 |
+821 Fracture of other and unspecified parts of femur |52 |
+822 Fracture of patella |25 |
+823 Fracture of tibia and fibula |78 |
+824 Fracture of ankle |116 |
+825 Fracture of one or more tarsal and metatarsal bones |38 |
+826 Fracture of one or more phalanges of foot |7 |
+831 Dislocation of shoulder |6 |
+832 Dislocation of elbow |1 |
+834 Dislocation of finger |3 |
+835 Dislocation of hip |1 |
+836 Dislocation of knee |5 |
+838 Dislocation of foot |2 |
+839 Other, multiple, and ill-defined dislocations |3 |
+840 Sprains and strains of shoulder and upper arm |26 |
+843 Sprains and strains of hip and thigh |2 |
+844 Sprains and strains of knee and leg |25 |
+845 Sprains and strains of ankle and foot |3 |
+846 Sprains and strains of sacroiliac region |4 |
+847 Sprains and strains of other and unspecified parts of back |6 |
+850 Concussion |49 |
+851 Cerebral laceration and contusion |33 |
+852 Subarachnoid, subdural, and extradural haemorrhage, following injury |54 |
+853 Other and unspecified intracranial haemorrhage following injury |15 |
+854 Intracranial injury of other and unspecified nature |16 |
+860 Traumatic pneumothorax and haemothorax |42 |
+861 Injury to heart and lung |12 |
+862 Injury to other and unspecified intrathoracic organs |7 |
+863 Injury to gastrointestinal tract |21 |
+864 Injury to liver |19 |
+865 Injury to spleen |12 |
+866 Injury to kidney |9 |
+867 Injury to pelvic organs |1 |
+868 Injury to other intra-abdominal organs |6 |
+870 Open wound of ocular adnexa |13 |
+871 Open wound of eyeball |12 |
+872 Open wound of ear |1 |
+873 Other open wound of head |29 |
+874 Open wound of neck |9 |
+875 Open wound of chest wall |9 |
+876 Open wound of back |5 |
+877 Open wound of buttock |4 |
+878 Open wound of genital organs(external), including traumatic amputation |1 |
+879 Open wound of other and unspecified sites, except limbs |19 |
+880 Open wound of shoulder and upper arm |3 |
+881 Open wound of elbow, forearm, and wrist |6 |
+882 Open wound of hand except finger(s) alone |14 |
+883 Open wound of finger(s) |24 |
+884 Multiple and unspecified open wound of upper limb |1 |
+885 Traumatic amputation of thumb (complete) |5 |
+886 Traumatic amputation of other finger(s) (complete) |18 |
+890 Open wound of hip and thigh |8 |
+891 Open wound of knee, leg [except thigh], and ankle |21 |
+892 Open wound of foot except toe(s) alone |5 |
+895 Traumatic amputation of toe(s) (complete) |2 |
+900 Injury to blood vessels of head and neck |5 |
+901 Injury to blood vessels of thorax |3 |
+902 Injury to blood vessels of abdomen and pelvis |5 |
+903 Injury to blood vessels of upper extremity |4 |
+904 Injury to blood vessels of lower extremity and unspecified sites |7 |
+905 Late effects of musculoskeletal and connective tissue injuries |1 |
+906 Late effects of injuries to skin and subcutaneous tissues |1 |
+907 Late effects of injuries to the nervous system |4 |
+908 Late effects of other and unspecified injuries |1 |
+910 Superficial injury of face, neck, and scalp except eye |6 |
+911 Superficial injury of trunk |1 |
+913 Superficial injury of elbow, forearm, and wrist |1 |
+914 Superficial injury of hand(s) except finger(s) alone |1 |
+916 Superficial injury of hip, thigh, leg, and ankle |2 |
+917 Superficial injury of foot and toe(s) |1 |
+920 Contusion of face, scalp, and neck except eye(s) |10 |
+921 Contusion of eye and adnexa |7 |
+922 Contusion of trunk |10 |
+923 Contusion of upper limb |3 |
+924 Contusion of lower limb and of other and unspecified sites |6 |
+926 Crushing injury of trunk |1 |
+927 Crushing injury of upper limb |4 |
+928 Crushing injury of lower limb |1 |
+933 Foreign body in pharynx and larynx |3 |
+934 Foreign body in trachea, bronchus, and lung |2 |
+935 Foreign body in mouth, oesophagus, and stomach |8 |
+936 Foreign body in intestine and colon |1 |
+941 Burn of face, head, and neck |3 |
+942 Burn of trunk |7 |
+943 Burn of upper limb, except wrist and hand |6 |
+944 Burn of wrist(s) and hand(s) |7 |
+945 Burn of lower limb(s) |13 |
+946 Burns of multiple specified sites |4 |
+948 Burns classified according to extent of body surface involved |21 |
+952 Spinal cord injury without evidence of spinal bone injury |8 |
+953 Injury to nerve roots and spinal plexus |2 |
+955 Injury to peripheral nerve(s) of shoulder girdle and upper limb |5 |
+957 Injury to other and unspecified nerves |1 |
+958 Certain early complications of trauma |9 |
+959 Injury, other and unspecified |19 |
+960 Poisoning by antibiotics |1 |
+961 Poisoning by other anti-infectives |1 |
+962 Poisoning by hormones and synthetic substitutes |6 |
+963 Poisoning by primarily systemic agents |5 |
+964 Poisoning by agents primarily affecting blood constituents |5 |
+965 Poisoning by analgesics, antipyretics, and antirheumatics |31 |
+966 Poisoning by anticonvulsants and anti-Parkinsonism drugs |4 |
+967 Poisoning by sedatives and hypnotics |4 |
+968 Poisoning by other central nervous system depressants and anaesthetics |13 |
+969 Poisoning by psychotropic agents |21 |
+971 Poisoning by drugs primarily affecting the autonomic nervous system |2 |
+972 Poisoning by agents primarily affecting the cardiovascular system |3 |
+975 Poisoning by agents primarily acting on the smooth and skeletal muscles and respiratory system |3 |
+977 Poisoning by other and unspecified drugs and medicinal substances |7 |
+980 Toxic effect of alcohol |2 |
+982 Toxic effect of solvents other than petroleum-based |3 |
+983 Toxic effect of corrosive aromatics, acids, and caustic alkalis |4 |
+984 Toxic effect of lead and its compounds (including fumes) |5 |
+985 Toxic effect of other metals |1 |
+986 Toxic effect of carbon monoxide |1 |
+987 Toxic effect of other gases, fumes, or vapours |5 |
+988 Toxic effect of noxious substances eaten as food |1 |
+989 Toxic effect of other substances, chiefly nonmedicinal as to source |6 |
+991 Effects of reduced temperature |4 |
+992 Effects of heat and light |3 |
+994 Effects of other external causes |4 |
+995 Certain adverse effects not elsewhere classified |17 |
+996 Complications peculiar to certain specified procedures |609 |
+997 Complications affecting specified body systems, not elsewhere classified |113 |
+998 Other complications of procedures, not elsewhere classified |247 |
+999 Complications of medical care, not elsewhere classified |12 |
+V07 Need for isolation and other prophylactic measures |11 |
+V08 Asymptomatic human immunodeficiency virus [HIV] infection status |7 |
+V10 Personal history of malignant neoplasm |7 |
+V12 Personal history of certain other diseases |9 |
+V14 Personal history of allergy to medicinal agents |1 |
+V15 Other personal history presenting hazards to health |5 |
+V17 Family history of certain chronic disabling diseases |2 |
+V22 Normal pregnancy |4 |
+V24 Postpartum care and examination |13 |
+V25 Encounter for contraceptive management |2 |
+V26 Procreative management |5 |
+V27 Outcome of delivery |3950 |
+V30 Single liveborn |4043 |
+V31 Twin, mate liveborn |132 |
+V32 Twin, mate stillborn |1 |
+V33 Twin, unspecified |6 |
+V34 Other multiple, mates all liveborn |9 |
+V36 Other multiple, mates live- and stillborn |2 |
+V39 Unspecified |3 |
+V42 Organ or tissue replaced by transplant |9 |
+V43 Organ or tissue replaced by other means |6 |
+V44 Artificial opening status |2 |
+V45 Other postsurgical states |19 |
+V49 Problems with limbs and other problems |1 |
+V50 Elective surgery for purposes other than remedying health states |5 |
+V51 Aftercare involving the use of plastic surgery |11 |
+V52 Fitting and adjustment of prosthetic device and implant |1 |
+V53 Fitting and adjustment of other device |6 |
+V54 Other orthopaedic aftercare |4 |
+V55 Attention to artificial openings |51 |
+V56 Encounter for dialysis and dialysis catheter care |12 |
+V57 Care involving use of rehabilitation procedures |334 |
+V58 Encounter for other and unspecified procedures and aftercare |490 |
+V59 Donors |27 |
+V64 Persons encountering health services for specific procedures, not carried out |2 |
+V65 Other persons seeking consultation without complaint or sickness |1 |
+V67 Follow-up examination |8 |
+V70 General medical examination |37 |
+V71 Observation and evaluation for suspected conditions not found |28 |
+
+
+That took 3.236 seconds.
+
+##############################################################################
+
+#20a. Now let's explore some wheresets (where clauses).
+>>> nhds.makewhereset("test1",var1="race",op1="=",val1=2,var2="discharge_status",op2="=",val2=1)
+Assembling whereset test1 containing 7418 elements took 0.087 seconds.
+
+That took 0.088 seconds.
+
+##############################################################################
+
+#20b. Now make another where clause.
+>>> nhds.makewhereset("test2",var1="sex",op1="=",val1=2)
+# Note that currently you need to know that 2 means female but the
+# next version will allow the user to specify either "female" or 2
+# and it will translate to teh stored value (which is 2). It is obviously
+# desirable to store categories as integer or other codes, but why should
+# the user need to consult a code book to formulate a where clause?
+Assembling whereset test2 containing 22368 elements took 0.056 seconds.
+
+That took 0.057 seconds.
+
+##############################################################################
+
+#20c. And yet another where clause, this time using a date value.
+>>> nhds.makewhereset("test3",var1="randomdate",op1="=",val1=mx.DateTime.Date(1996,10,12))
+Assembling whereset test3 containing 29 elements took 0.106 seconds.
+
+That took 0.106 seconds.
+
+##############################################################################
+
+#20d. Use the first whereset to filter the dataset. Note how fast the filtering is.
+>>> print nhds.quadrivar(wheresetname="test1",var1="race",var2="sex",var3="geog_region",var4="marital_status",wgtvar="analysis_wgt",cellvar="age",wgtmean=1,wgtn=1,printit=1,allcalc=1)
+wheresetload(): memory mapping of test1 containing 7418 elements took 0.005 seconds.
+
+quadrivar(): Quadrivariate setup time 0.014 seconds.
+
+Quadrivariate summary derived from geog_region, marital_status, race and sex columns in nhds dataset
+Using whereset test1 containing 7418 elements
+quadrivar(): Quadrivariate summary created in 0.772 seconds.
+Time in intersect() function was 0.107 seconds.
+
+ | | | | | | |
+ | | | | |Frequency |Mean of |
+Grouped |Grouped |Grouped |Grouped| |weighted |Age (years) |
+by |by |by |by | |by |weighted by |
+Geographic Region|Marital Status|Race |Sex |Frequency|Analysis Weight|Analysis Weight|
+-----------------|--------------|---------|-------|---------|---------------|---------------|
+Northeast |Married |All races|Male |111 |14204 |57.6603069558 |
+Northeast |Married |All races|Female |111 |14133 |44.8955635746 |
+Northeast |Single |All races|Male |230 |31644 |25.7389092342 |
+Northeast |Single |All races|Female |376 |49740 |32.3833608653 |
+Northeast |Widowed |All races|Male |9 |1180 |75.5881355932 |
+Northeast |Widowed |All races|Female |56 |7519 |74.1248836281 |
+Northeast |Divorced |All races|Male |11 |1246 |61.2993579454 |
+Northeast |Divorced |All races|Female |30 |3780 |56.0423280423 |
+Northeast |Separated |All races|Male |5 |646 |59.2074303406 |
+Northeast |Separated |All races|Female |25 |3233 |66.9489638107 |
+Northeast |Not stated |All races|Male |1627 |54075 |29.8606701058 |
+Northeast |Not stated |All races|Female |2120 |75018 |33.330652788 |
+Midwest |Married |All races|Male |69 |14969 |58.8707996526 |
+Midwest |Married |All races|Female |83 |18219 |43.5158351172 |
+Midwest |Single |All races|Male |175 |37330 |27.4799187987 |
+Midwest |Single |All races|Female |202 |44872 |23.4954908206 |
+Midwest |Widowed |All races|Male |6 |1310 |86.5732824427 |
+Midwest |Widowed |All races|Female |60 |13216 |71.8641797821 |
+Midwest |Divorced |All races|Male |9 |2068 |55.6233075435 |
+Midwest |Divorced |All races|Female |30 |6106 |53.6994759253 |
+Midwest |Separated |All races|Male |1 |239 |33.0 |
+Midwest |Separated |All races|Female |2 |602 |47.6212624585 |
+Midwest |Not stated |All races|Male |64 |12749 |45.0568672053 |
+Midwest |Not stated |All races|Female |39 |7177 |35.7231433747 |
+South |Married |All races|Male |64 |10117 |55.3967579322 |
+South |Married |All races|Female |123 |18646 |44.2667059959 |
+South |Single |All races|Male |550 |90445 |23.6421925449 |
+South |Single |All races|Female |650 |108912 |21.7226766422 |
+South |Widowed |All races|Male |8 |1192 |75.7953020134 |
+South |Widowed |All races|Female |53 |8300 |70.4568674699 |
+South |Divorced |All races|Male |40 |5757 |55.9874934862 |
+South |Divorced |All races|Female |62 |9437 |47.8352230582 |
+South |Separated |All races|Male |2 |269 |46.9591078067 |
+South |Separated |All races|Female |14 |2117 |48.1733585262 |
+South |Not stated |All races|Male |166 |13567 |53.0557586485 |
+South |Not stated |All races|Female |235 |11621 |44.2358762351 |
+All regions |Married |All races|Male |244 |39290 |57.5386357852 |
+All regions |Married |All races|Female |317 |50998 |44.172732264 |
+All regions |Single |All races|Male |955 |159419 |24.9570347153 |
+All regions |Single |All races|Female |1228 |203524 |24.7189431811 |
+All regions |Widowed |All races|Male |23 |3682 |79.5635524172 |
+All regions |Widowed |All races|Female |169 |29035 |72.0473221973 |
+All regions |Divorced |All races|Male |60 |9071 |56.634108698 |
+All regions |Divorced |All races|Female |122 |19323 |51.2937949594 |
+All regions |Separated |All races|Male |8 |1154 |50.924610052 |
+All regions |Separated |All races|Female |41 |5952 |58.3160282258 |
+All regions |Not stated |All races|Male |1857 |80391 |36.1850606853 |
+All regions |Not stated |All races|Female |2394 |93816 |34.8645116886 |
+Northeast |All |All races|Male |1993 |102995 |33.5164404075 |
+Northeast |All |All races|Female |2718 |153423 |37.356115317 |
+Midwest |All |All races|Male |324 |68665 |39.5808544201 |
+Midwest |All |All races|Female |416 |90192 |37.8060544627 |
+South |All |All races|Male |830 |121347 |31.6767170371 |
+South |All |All races|Female |1137 |159033 |30.4560580268 |
+Northeast |Married |All races|Persons|222 |28337 |51.2939266683 |
+Northeast |Single |All races|Persons|606 |81384 |29.7998428837 |
+Northeast |Widowed |All races|Persons|65 |8699 |74.3233705024 |
+Northeast |Divorced |All races|Persons|41 |5026 |57.3456028651 |
+Northeast |Separated |All races|Persons|30 |3879 |65.6597061098 |
+Northeast |Not stated |All races|Persons|3747 |129093 |31.8771323528 |
+Midwest |Married |All races|Persons|152 |33188 |50.4414848741 |
+Midwest |Single |All races|Persons|377 |82202 |25.304919988 |
+Midwest |Widowed |All races|Persons|66 |14526 |73.1906925513 |
+Midwest |Divorced |All races|Persons|39 |8174 |54.1862001468 |
+Midwest |Separated |All races|Persons|3 |841 |43.4661117717 |
+Midwest |Not stated |All races|Persons|103 |19926 |41.6950215798 |
+South |Married |All races|Persons|187 |28763 |48.1815526892 |
+South |Single |All races|Persons|1200 |199357 |22.5935295133 |
+South |Widowed |All races|Persons|61 |9492 |71.1272650653 |
+South |Divorced |All races|Persons|102 |15194 |50.9241147822 |
+South |Separated |All races|Persons|16 |2386 |48.0364626991 |
+South |Not stated |All races|Persons|401 |25188 |48.9865251434 |
+All regions |All |All races|Male |3147 |293007 |34.1757013683 |
+All regions |All |All races|Female |4271 |402648 |34.7315998603 |
+Northeast |All |All races|Persons|4711 |256418 |35.8138393563 |
+Midwest |All |All races|Persons|740 |158857 |38.5732012619 |
+South |All |All races|Persons|1967 |280380 |30.9843528728 |
+All regions |Married |All races|Persons|561 |90288 |49.9890793904 |
+All regions |Single |All races|Persons|2183 |362943 |24.8235224519 |
+All regions |Widowed |All races|Persons|192 |32717 |72.8932053672 |
+All regions |Divorced |All races|Persons|182 |28394 |52.9998591252 |
+All regions |Separated |All races|Persons|49 |7106 |57.1156768928 |
+All regions |Not stated |All races|Persons|4251 |174207 |35.4739031275 |
+All regions |All |All races|Persons|7418 |695655 |34.4974577217 |
+
+
+That took 0.854 seconds.
+
+##############################################################################
+
+#20e. Use the second whereset to filter the dataset.
+>>> print nhds.quadrivar(wheresetname="test2",var1="race",var2="sex",var3="geog_region",var4="marital_status",wgtvar="analysis_wgt",cellvar="age",wgtmean=1,wgtn=1,printit=1,allcalc=1)
+wheresetload(): memory mapping of test2 containing 22368 elements took 0.004 seconds.
+
+quadrivar(): Quadrivariate setup time 0.030 seconds.
+
+Quadrivariate summary derived from geog_region, marital_status, race and sex columns in nhds dataset
+Using whereset test2 containing 22368 elements
+quadrivar(): Quadrivariate summary created in 1.940 seconds.
+Time in intersect() function was 0.287 seconds.
+
+ | | | | | | |
+ | | | | |Frequency |Mean of |
+Grouped |Grouped |Grouped |Grouped| |weighted |Age (years) |
+by |by |by |by | |by |weighted by |
+Geographic Region|Marital Status|Race |Sex |Frequency|Analysis Weight|Analysis Weight|
+-----------------|--------------|----------------------|-------|---------|---------------|---------------|
+Northeast |Married |White |Persons|886 |113048 |50.7878688699 |
+Northeast |Married |Black |Persons|123 |15688 |46.8068587455 |
+Northeast |Married |American Indian/Eskimo|Persons|2 |271 |52.8450184502 |
+Northeast |Married |Asian/Pacific Islander|Persons|40 |4886 |42.3145722472 |
+Northeast |Married |Other |Persons|111 |14300 |44.9609090909 |
+Northeast |Married |Not stated |Persons|194 |24787 |45.1552023238 |
+Northeast |Single |White |Persons|601 |80771 |31.9646920239 |
+Northeast |Single |Black |Persons|418 |55191 |34.0363607491 |
+Northeast |Single |American Indian/Eskimo|Persons|4 |663 |6.75263951735 |
+Northeast |Single |Asian/Pacific Islander|Persons|36 |4509 |17.9306756857 |
+Northeast |Single |Other |Persons|265 |36150 |24.0950326942 |
+Northeast |Single |Not stated |Persons|420 |57816 |26.5161432615 |
+Northeast |Widowed |White |Persons|380 |46594 |77.9027986436 |
+Northeast |Widowed |Black |Persons|78 |10134 |74.7357410697 |
+Northeast |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+Northeast |Widowed |Asian/Pacific Islander|Persons|1 |124 |76.0 |
+Northeast |Widowed |Other |Persons|43 |5676 |72.025017618 |
+Northeast |Widowed |Not stated |Persons|44 |5916 |76.8032454361 |
+Northeast |Divorced |White |Persons|132 |16369 |54.8385362576 |
+Northeast |Divorced |Black |Persons|39 |5089 |58.4539202201 |
+Northeast |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+Northeast |Divorced |Asian/Pacific Islander|Persons|1 |118 |67.0 |
+Northeast |Divorced |Other |Persons|37 |4735 |51.1596620908 |
+Northeast |Divorced |Not stated |Persons|29 |3614 |58.4584947427 |
+Northeast |Separated |White |Persons|33 |4317 |66.1976295267 |
+Northeast |Separated |Black |Persons|28 |3658 |66.8015308912 |
+Northeast |Separated |American Indian/Eskimo|Persons|7 |841 |48.4946492271 |
+Northeast |Separated |Asian/Pacific Islander|Persons|2 |265 |35.0 |
+Northeast |Separated |Other |Persons|10 |1302 |45.7503840246 |
+Northeast |Separated |Not stated |Persons|16 |2020 |61.2440594059 |
+Northeast |Not stated |White |Persons|1121 |175273 |47.0690279827 |
+Northeast |Not stated |Black |Persons|2308 |82648 |35.3079828551 |
+Northeast |Not stated |American Indian/Eskimo|Persons|13 |1200 |38.174375 |
+Northeast |Not stated |Asian/Pacific Islander|Persons|11 |1932 |17.6537267081 |
+Northeast |Not stated |Other |Persons|387 |25184 |38.640623281 |
+Northeast |Not stated |Not stated |Persons|96 |12086 |53.6532935364 |
+Midwest |Married |White |Persons|788 |165666 |50.181092278 |
+Midwest |Married |Black |Persons|89 |19544 |44.7604891527 |
+Midwest |Married |American Indian/Eskimo|Persons|1 |179 |69.0 |
+Midwest |Married |Asian/Pacific Islander|Persons|7 |1235 |55.0097165992 |
+Midwest |Married |Other |Persons|5 |901 |41.3274139845 |
+Midwest |Married |Not stated |Persons|539 |94519 |41.8136882532 |
+Midwest |Single |White |Persons|481 |105392 |22.8603358956 |
+Midwest |Single |Black |Persons|213 |47679 |24.2253332515 |
+Midwest |Single |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Single |Asian/Pacific Islander|Persons|4 |720 |29.4138888889 |
+Midwest |Single |Other |Persons|2 |314 |33.9171974522 |
+Midwest |Single |Not stated |Persons|407 |71907 |14.5453358613 |
+Midwest |Widowed |White |Persons|335 |70266 |76.915876811 |
+Midwest |Widowed |Black |Persons|77 |16899 |72.6628794603 |
+Midwest |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Widowed |Asian/Pacific Islander|Persons|1 |174 |78.0 |
+Midwest |Widowed |Other |Persons|3 |543 |78.379373849 |
+Midwest |Widowed |Not stated |Persons|158 |28201 |76.9008191199 |
+Midwest |Divorced |White |Persons|115 |23950 |57.1633820459 |
+Midwest |Divorced |Black |Persons|37 |7332 |55.1419803601 |
+Midwest |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Divorced |Asian/Pacific Islander|Persons|1 |242 |60.0 |
+Midwest |Divorced |Other |Persons|2 |361 |59.8060941828 |
+Midwest |Divorced |Not stated |Persons|66 |11948 |48.1949280214 |
+Midwest |Separated |White |Persons|4 |713 |24.6802244039 |
+Midwest |Separated |Black |Persons|2 |602 |47.6212624585 |
+Midwest |Separated |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Separated |Asian/Pacific Islander|Persons|0 |0 |None |
+Midwest |Separated |Other |Persons|0 |0 |None |
+Midwest |Separated |Not stated |Persons|6 |1002 |38.5798403194 |
+Midwest |Not stated |White |Persons|550 |125807 |64.9911319462 |
+Midwest |Not stated |Black |Persons|43 |8030 |35.2581569116 |
+Midwest |Not stated |American Indian/Eskimo|Persons|0 |0 |None |
+Midwest |Not stated |Asian/Pacific Islander|Persons|2 |411 |45.5060827251 |
+Midwest |Not stated |Other |Persons|5 |455 |18.0813186813 |
+Midwest |Not stated |Not stated |Persons|74 |14309 |45.3128101195 |
+South |Married |White |Persons|377 |49393 |44.8787884923 |
+South |Married |Black |Persons|136 |20964 |45.276044648 |
+South |Married |American Indian/Eskimo|Persons|1 |204 |38.0 |
+South |Married |Asian/Pacific Islander|Persons|45 |5488 |39.2458090379 |
+South |Married |Other |Persons|4 |864 |34.6782407407 |
+South |Married |Not stated |Persons|187 |23277 |34.0549469433 |
+South |Single |White |Persons|410 |58291 |18.9273152129 |
+South |Single |Black |Persons|701 |119071 |22.616342632 |
+South |Single |American Indian/Eskimo|Persons|0 |0 |None |
+South |Single |Asian/Pacific Islander|Persons|32 |4647 |7.32106735528 |
+South |Single |Other |Persons|4 |1002 |17.3293413174 |
+South |Single |Not stated |Persons|206 |28119 |9.48045802285 |
+South |Widowed |White |Persons|91 |11476 |75.2161031718 |
+South |Widowed |Black |Persons|61 |9375 |71.0226133333 |
+South |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+South |Widowed |Asian/Pacific Islander|Persons|5 |644 |73.5403726708 |
+South |Widowed |Other |Persons|0 |0 |None |
+South |Widowed |Not stated |Persons|16 |2051 |73.7640175524 |
+South |Divorced |White |Persons|48 |6356 |53.3786972939 |
+South |Divorced |Black |Persons|65 |9884 |48.2100364225 |
+South |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+South |Divorced |Asian/Pacific Islander|Persons|0 |0 |None |
+South |Divorced |Other |Persons|0 |0 |None |
+South |Divorced |Not stated |Persons|6 |729 |61.9958847737 |
+South |Separated |White |Persons|3 |435 |31.7471264368 |
+South |Separated |Black |Persons|15 |2257 |48.9069561365 |
+South |Separated |American Indian/Eskimo|Persons|0 |0 |None |
+South |Separated |Asian/Pacific Islander|Persons|0 |0 |None |
+South |Separated |Other |Persons|0 |0 |None |
+South |Separated |Not stated |Persons|0 |0 |None |
+South |Not stated |White |Persons|1215 |67160 |53.7483788038 |
+South |Not stated |Black |Persons|282 |13682 |47.2678057102 |
+South |Not stated |American Indian/Eskimo|Persons|555 |15537 |49.1005380854 |
+South |Not stated |Asian/Pacific Islander|Persons|167 |4817 |43.0552729915 |
+South |Not stated |Other |Persons|153 |3798 |31.6147972617 |
+South |Not stated |Not stated |Persons|5650 |144700 |48.9992103644 |
+All regions |Married |White |Persons|2051 |328107 |49.591949679 |
+All regions |Married |Black |Persons|348 |56196 |45.5240942416 |
+All regions |Married |American Indian/Eskimo|Persons|4 |654 |52.6360856269 |
+All regions |Married |Asian/Pacific Islander|Persons|92 |11609 |42.2144026187 |
+All regions |Married |Other |Persons|120 |16065 |44.20410831 |
+All regions |Married |Not stated |Persons|920 |142583 |41.1279535428 |
+All regions |Single |White |Persons|1492 |244454 |24.9306977642 |
+All regions |Single |Black |Persons|1332 |221941 |25.8018616828 |
+All regions |Single |American Indian/Eskimo|Persons|4 |663 |6.75263951735 |
+All regions |Single |Asian/Pacific Islander|Persons|72 |9876 |13.7756598488 |
+All regions |Single |Other |Persons|271 |37466 |23.9964082607 |
+All regions |Single |Not stated |Persons|1033 |157842 |18.0278367211 |
+All regions |Widowed |White |Persons|806 |128336 |77.1221948635 |
+All regions |Widowed |Black |Persons|216 |36408 |72.8174851681 |
+All regions |Widowed |American Indian/Eskimo|Persons|0 |0 |None |
+All regions |Widowed |Asian/Pacific Islander|Persons|7 |942 |74.6878980892 |
+All regions |Widowed |Other |Persons|46 |6219 |72.5798359865 |
+All regions |Widowed |Not stated |Persons|218 |36168 |76.7069785446 |
+All regions |Divorced |White |Persons|295 |46675 |55.832672737 |
+All regions |Divorced |Black |Persons|141 |22305 |52.8258686393 |
+All regions |Divorced |American Indian/Eskimo|Persons|0 |0 |None |
+All regions |Divorced |Asian/Pacific Islander|Persons|2 |360 |62.2944444444 |
+All regions |Divorced |Other |Persons|39 |5096 |51.7721742543 |
+All regions |Divorced |Not stated |Persons|101 |16291 |51.0893745013 |
+All regions |Separated |White |Persons|40 |5465 |58.0388228118 |
+All regions |Separated |Black |Persons|45 |6517 |58.8324382385 |
+All regions |Separated |American Indian/Eskimo|Persons|7 |841 |48.4946492271 |
+All regions |Separated |Asian/Pacific Islander|Persons|2 |265 |35.0 |
+All regions |Separated |Other |Persons|10 |1302 |45.7503840246 |
+All regions |Separated |Not stated |Persons|22 |3022 |53.7293183322 |
+All regions |Not stated |White |Persons|2886 |368240 |54.4101949784 |
+All regions |Not stated |Black |Persons|2633 |104360 |36.8721280638 |
+All regions |Not stated |American Indian/Eskimo|Persons|568 |16737 |48.3171601979 |
+All regions |Not stated |Asian/Pacific Islander|Persons|180 |7160 |36.3417946927 |
+All regions |Not stated |Other |Persons|545 |29437 |37.4163622892 |
+All regions |Not stated |Not stated |Persons|5820 |171095 |49.0196700395 |
+Northeast |All |White |Persons|3153 |436372 |49.0096684658 |
+Northeast |All |Black |Persons|2994 |172408 |39.6161718314 |
+Northeast |All |American Indian/Eskimo|Persons|26 |2975 |35.4256302521 |
+Northeast |All |Asian/Pacific Islander|Persons|91 |11834 |29.4330249 |
+Northeast |All |Other |Persons|853 |87347 |36.6094300732 |
+Northeast |All |Not stated |Persons|799 |106239 |38.4992615187 |
+Midwest |All |White |Persons|2273 |491794 |52.2376598551 |
+Midwest |All |Black |Persons|461 |100086 |39.7044608047 |
+Midwest |All |American Indian/Eskimo|Persons|1 |179 |69.0 |
+Midwest |All |Asian/Pacific Islander|Persons|15 |2782 |48.8533429188 |
+Midwest |All |Other |Persons|17 |2574 |46.7222222222 |
+Midwest |All |Not stated |Persons|1250 |221886 |37.9909073388 |
+South |All |White |Persons|2144 |193111 |42.1829737899 |
+South |All |Black |Persons|1260 |175233 |31.6239729461 |
+South |All |American Indian/Eskimo|Persons|556 |15741 |48.9566774813 |
+South |All |Asian/Pacific Islander|Persons|249 |15596 |32.3261894075 |
+South |All |Other |Persons|161 |5664 |29.5549081921 |
+South |All |Not stated |Persons|6065 |198876 |41.9655903119 |
+Northeast |Married |All races |Persons|1356 |172980 |48.9018730489 |
+Northeast |Single |All races |Persons|1744 |235100 |29.5607831261 |
+Northeast |Widowed |All races |Persons|546 |68444 |76.847948688 |
+Northeast |Divorced |All races |Persons|238 |29925 |55.3563909774 |
+Northeast |Separated |All races |Persons|96 |12403 |61.5556048268 |
+Northeast |Not stated |All races |Persons|3936 |298323 |43.1396818918 |
+Midwest |Married |All races |Persons|1429 |282044 |47.0061828414 |
+Midwest |Single |All races |Persons|1107 |226012 |20.539067176 |
+Midwest |Widowed |All races |Persons|574 |116083 |76.3015514761 |
+Midwest |Divorced |All races |Persons|221 |43833 |54.4180640157 |
+Midwest |Separated |All races |Persons|12 |2317 |36.6517047907 |
+Midwest |Not stated |All races |Persons|674 |149012 |61.3022665071 |
+South |Married |All races |Persons|750 |100190 |42.0367002695 |
+South |Single |All races |Persons|1353 |211130 |19.4866132892 |
+South |Widowed |All races |Persons|173 |23546 |73.3741187463 |
+South |Divorced |All races |Persons|119 |16969 |50.7382874654 |
+South |Separated |All races |Persons|18 |2692 |46.1341010401 |
+South |Not stated |All races |Persons|8022 |249694 |49.8089272796 |
+Northeast |All |All races |Persons|7916 |817175 |44.0029866315 |
+Midwest |All |All races |Persons|4017 |819301 |46.823087999 |
+South |All |All races |Persons|10435 |604221 |38.8528269489 |
+All regions |Married |All races |Persons|3535 |555214 |46.7000378833 |
+All regions |Single |All races |Persons|4204 |672242 |23.3636464655 |
+All regions |Widowed |All races |Persons|1293 |208073 |76.1500098523 |
+All regions |Divorced |All races |Persons|578 |90727 |54.0393157494 |
+All regions |Separated |All races |Persons|126 |17412 |55.8574067693 |
+All regions |Not stated |All races |Persons|12632 |697029 |49.4116069 |
+All regions |All |White |Persons|7570 |1121277 |49.2497491611 |
+All regions |All |Black |Persons|4715 |447727 |36.5078926857 |
+All regions |All |American Indian/Eskimo|Persons|583 |18895 |47.0161053312 |
+All regions |All |Asian/Pacific Islander|Persons|355 |30212 |32.714804272 |
+All regions |All |Other |Persons|1031 |95585 |36.463732684 |
+All regions |All |Not stated |Persons|8114 |527001 |39.5933257226 |
+All regions |All |All races |Persons|22368 |2240697 |43.6453645331 |
+
+
+That took 2.294 seconds.
+
+##############################################################################
+
+#20f. Use the first whereset again to filter an overall summary.
+wheresetload(): memory mapping of test1 containing 7418 elements took 0.004 seconds.
+
+
+Summary derived from nhds dataset
+Using whereset test1 containing 7418 elements
+summary(): Summary created in 0.038 seconds.
+
+ | | |
+ |Frequency |Mean of |
+ |weighted |Age (years) |
+ |by |weighted by |
+Frequency|Analysis Weight|Analysis Weight|
+---------|---------------|---------------|
+7418 |695655 |34.4974577217 |
+
+#21a. Yet another whereset - note the "startingwith" operator, like the SQL "like" operator.
+>>> nhds.makewhereset("test4",var1="diagnosis1",op1="startingwith",val1="250")
+Assembling whereset test4 containing 564 elements took 0.112 seconds.
+
+That took 0.113 seconds.
+
+##############################################################################
+
+#21b. You can turn off the output formatting.
+>>> print nhds.univar(var1="diagnosis1",wheresetname="test4",printit=1,allcalc=1)
+>>> nhds.metadata.diagnosis1.use_outtrans=0
+>>> print nhds.univar(var1="diagnosis1",wheresetname="test4",printit=1,allcalc=1)
+>>> nhds.metadata.diagnosis1.use_outtrans=1
+wheresetload(): memory mapping of test4 containing 564 elements took 0.003 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+Using whereset test4 containing 564 elements
+univar(): Univariate summary created in 0.068 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1 |Frequency|
+-------------------------------------------------------------------------------------------------------------------------------------|---------|
+250.00 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|42 |
+250.01 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|32 |
+250.02 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|51 |
+250.03 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|16 |
+250.10 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspec|14 |
+250.11 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], not stated as unco|37 |
+250.12 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspe|10 |
+250.13 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |34 |
+250.20 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or un|5 |
+250.22 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or uns|7 |
+250.23 Diabetes mellitus - Diabetes with hyperosmolarity, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |2 |
+250.30 Diabetes mellitus - Diabetes with other coma, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspecif|6 |
+250.31 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], not stated as uncont|1 |
+250.33 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |3 |
+250.40 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|11 |
+250.41 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], not stated|16 |
+250.42 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|3 |
+250.43 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], uncontroll|4 |
+250.50 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|4 |
+250.51 Diabetes mellitus - Diabetes with ophthalmic complications, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.52 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|1 |
+250.60 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset |18 |
+250.61 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type][IDDM type] [juvenile type], not |23 |
+250.62 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset t|6 |
+250.63 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type] [IDDM type] [juvenile type], unc|8 |
+250.70 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-o|33 |
+250.71 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|28 |
+250.72 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-|10 |
+250.73 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|4 |
+250.80 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|44 |
+250.81 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], n|25 |
+250.82 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|17 |
+250.83 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], u|17 |
+250.90 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset ty|6 |
+250.91 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.92 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|18 |
+250.93 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], uncont|4 |
+<All> |564 |
+
+
+That took 0.081 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of test4 containing 564 elements took 0.003 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+Using whereset test4 containing 564 elements
+univar(): Univariate summary created in 0.060 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1|Frequency|
+----------------|---------|
+250.00 |42 |
+250.01 |32 |
+250.02 |51 |
+250.03 |16 |
+250.10 |14 |
+250.11 |37 |
+250.12 |10 |
+250.13 |34 |
+250.20 |5 |
+250.22 |7 |
+250.23 |2 |
+250.30 |6 |
+250.31 |1 |
+250.33 |3 |
+250.40 |11 |
+250.41 |16 |
+250.42 |3 |
+250.43 |4 |
+250.50 |4 |
+250.51 |2 |
+250.52 |1 |
+250.60 |18 |
+250.61 |23 |
+250.62 |6 |
+250.63 |8 |
+250.70 |33 |
+250.71 |28 |
+250.72 |10 |
+250.73 |4 |
+250.80 |44 |
+250.81 |25 |
+250.82 |17 |
+250.83 |17 |
+250.90 |6 |
+250.91 |2 |
+250.92 |18 |
+250.93 |4 |
+<All> |564 |
+
+
+That took 0.070 seconds.
+
+##############################################################################
+
+
+That took 0.000 seconds.
+
+##############################################################################
+
+#21c. Define another whereset (filter) using the "in" operator.
+>>> nhds.makewhereset("testi",var1="diagnosis1",op1="in",val1=("250.12","250.13","250.21"))
+>>> nhds.univar(var1="diagnosis1",wheresetname="testi",printit=1)
+Assembling whereset testi containing 44 elements took 0.073 seconds.
+wheresetload(): memory mapping of testi containing 44 elements took 0.004 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+Using whereset testi containing 44 elements
+univar(): Univariate summary created in 0.034 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1 |Frequency|
+-------------------------------------------------------------------------------------------------------------------------------------|---------|
+250.12 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspe|10 |
+250.13 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |34 |
+
+
+That took 0.108 seconds.
+
+##############################################################################
+
+#21d. Define another whereset (filter) using the "in:" operator ("in" plus "startingwith") -
+ this syntax is borrowed from the SAS package.
+>>> nhds.makewhereset("testj",var1="diagnosis1",op1="in:",val1=("250*","01","410.1"))
+>>> nhds.univar(var1="diagnosis1",wheresetname="testj",printit=1)
+Assembling whereset testj containing 755 elements took 0.254 seconds.
+wheresetload(): memory mapping of testj containing 755 elements took 0.004 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+Using whereset testj containing 755 elements
+univar(): Univariate summary created in 0.251 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1 |Frequency|
+-------------------------------------------------------------------------------------------------------------------------------------|---------|
+010.83 Primary tuberculous infection - Other primary progressive tuberculosis, tubercle bacilli found (in sputum) by microscopy |1 |
+011.23 Pulmonary tuberculosis - Tuberculosis of lung with cavitation, tubercle bacilli found (in sputum) by microscopy |2 |
+011.40 Pulmonary tuberculosis - Tuberculous fibrosis of lung, unspecified |1 |
+011.86 Pulmonary tuberculosis - Other specified pulmonary tuberculosis, tubercle bacilli not found by bacteriological or histological|1 |
+011.90 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, unspecified |13 |
+011.91 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, bacteriological or histolo|1 |
+011.92 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, bacteriological or histolo|2 |
+011.93 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, tubercle bacilli found (in|7 |
+011.94 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, tubercle bacilli not found|1 |
+011.96 Pulmonary tuberculosis - Pulmonary tuberculosis, unspecified - Pulmonary tuberculosis, unspecified, tubercle bacilli not found|1 |
+014.80 Tuberculosis of intestines, peritoneum, and mesenteric glands - Other, unspecified |1 |
+015.00 Tuberculosis of bones and joints - Vertebral column, unspecified |2 |
+017.26 Tuberculosis of other organs - Peripheral lymph nodes, tubercle bacilli not found by bacteriological or histological examinati|1 |
+018.03 Miliary tuberculosis - Acute miliary tuberculosis, tubercle bacilli found (in sputum) by microscopy |2 |
+018.83 Miliary tuberculosis - Other specified miliary tuberculosis, tubercle bacilli found (in sputum) by microscopy |2 |
+018.90 Miliary tuberculosis - Miliary tuberculosis, unspecified - Miliary tuberculosis, unspecified, unspecified |1 |
+018.95 Miliary tuberculosis - Miliary tuberculosis, unspecified - Miliary tuberculosis, unspecified, tubercle bacilli not found by ba|1 |
+250.00 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|42 |
+250.01 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|32 |
+250.02 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|51 |
+250.03 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|16 |
+250.10 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspec|14 |
+250.11 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], not stated as unco|37 |
+250.12 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspe|10 |
+250.13 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |34 |
+250.20 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or un|5 |
+250.22 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or uns|7 |
+250.23 Diabetes mellitus - Diabetes with hyperosmolarity, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |2 |
+250.30 Diabetes mellitus - Diabetes with other coma, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspecif|6 |
+250.31 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], not stated as uncont|1 |
+250.33 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |3 |
+250.40 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|11 |
+250.41 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], not stated|16 |
+250.42 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|3 |
+250.43 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], uncontroll|4 |
+250.50 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|4 |
+250.51 Diabetes mellitus - Diabetes with ophthalmic complications, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.52 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|1 |
+250.60 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset |18 |
+250.61 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type][IDDM type] [juvenile type], not |23 |
+250.62 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset t|6 |
+250.63 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type] [IDDM type] [juvenile type], unc|8 |
+250.70 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-o|33 |
+250.71 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|28 |
+250.72 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-|10 |
+250.73 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|4 |
+250.80 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|44 |
+250.81 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], n|25 |
+250.82 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|17 |
+250.83 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], u|17 |
+250.90 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset ty|6 |
+250.91 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.92 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|18 |
+250.93 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], uncont|4 |
+410.10 Acute myocardial infarction - Of other anterior wall, episode of care unspecified |2 |
+410.11 Acute myocardial infarction - Of other anterior wall, initial episode of care |145 |
+410.12 Acute myocardial infarction - Of other anterior wall, subsequent episode of care |4 |
+
+
+That took 0.522 seconds.
+
+##############################################################################
+
+#22a. Date/time values are supported.
+>>> print nhds.univar(var1="randomdate",printit=1,allcalc=1)
+
+Univariate summary derived from randomdate column in nhds dataset
+univar(): Univariate summary created in 0.508 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Random date|Frequency|
+-----------|---------|
+01/01/1996 |26 |
+02/01/1996 |24 |
+03/01/1996 |35 |
+04/01/1996 |21 |
+05/01/1996 |20 |
+06/01/1996 |20 |
+07/01/1996 |32 |
+08/01/1996 |24 |
+09/01/1996 |39 |
+10/01/1996 |21 |
+11/01/1996 |24 |
+12/01/1996 |21 |
+13/01/1996 |25 |
+14/01/1996 |29 |
+15/01/1996 |30 |
+16/01/1996 |21 |
+17/01/1996 |23 |
+18/01/1996 |35 |
+19/01/1996 |17 |
+20/01/1996 |19 |
+21/01/1996 |33 |
+22/01/1996 |31 |
+23/01/1996 |36 |
+24/01/1996 |32 |
+25/01/1996 |22 |
+26/01/1996 |29 |
+27/01/1996 |17 |
+28/01/1996 |24 |
+29/01/1996 |29 |
+30/01/1996 |32 |
+31/01/1996 |28 |
+01/02/1996 |23 |
+02/02/1996 |35 |
+03/02/1996 |27 |
+04/02/1996 |40 |
+05/02/1996 |21 |
+06/02/1996 |27 |
+07/02/1996 |32 |
+08/02/1996 |20 |
+09/02/1996 |27 |
+10/02/1996 |20 |
+11/02/1996 |34 |
+12/02/1996 |20 |
+13/02/1996 |21 |
+14/02/1996 |32 |
+15/02/1996 |27 |
+16/02/1996 |31 |
+17/02/1996 |28 |
+18/02/1996 |29 |
+19/02/1996 |25 |
+20/02/1996 |24 |
+21/02/1996 |24 |
+22/02/1996 |28 |
+23/02/1996 |24 |
+24/02/1996 |25 |
+25/02/1996 |25 |
+26/02/1996 |30 |
+27/02/1996 |25 |
+28/02/1996 |38 |
+29/02/1996 |25 |
+01/03/1996 |26 |
+02/03/1996 |32 |
+03/03/1996 |21 |
+04/03/1996 |26 |
+05/03/1996 |30 |
+06/03/1996 |27 |
+07/03/1996 |25 |
+08/03/1996 |33 |
+09/03/1996 |16 |
+10/03/1996 |32 |
+11/03/1996 |30 |
+12/03/1996 |26 |
+13/03/1996 |32 |
+14/03/1996 |21 |
+15/03/1996 |29 |
+16/03/1996 |25 |
+17/03/1996 |24 |
+18/03/1996 |19 |
+19/03/1996 |22 |
+20/03/1996 |30 |
+21/03/1996 |22 |
+22/03/1996 |39 |
+23/03/1996 |19 |
+24/03/1996 |35 |
+25/03/1996 |28 |
+26/03/1996 |29 |
+27/03/1996 |31 |
+28/03/1996 |20 |
+29/03/1996 |30 |
+30/03/1996 |39 |
+31/03/1996 |21 |
+01/04/1996 |20 |
+02/04/1996 |22 |
+03/04/1996 |32 |
+04/04/1996 |43 |
+05/04/1996 |29 |
+06/04/1996 |33 |
+07/04/1996 |30 |
+08/04/1996 |34 |
+09/04/1996 |31 |
+10/04/1996 |33 |
+11/04/1996 |33 |
+12/04/1996 |32 |
+13/04/1996 |22 |
+14/04/1996 |25 |
+15/04/1996 |25 |
+16/04/1996 |30 |
+17/04/1996 |29 |
+18/04/1996 |27 |
+19/04/1996 |28 |
+20/04/1996 |30 |
+21/04/1996 |30 |
+22/04/1996 |33 |
+23/04/1996 |29 |
+24/04/1996 |33 |
+25/04/1996 |18 |
+26/04/1996 |28 |
+27/04/1996 |21 |
+28/04/1996 |25 |
+29/04/1996 |30 |
+30/04/1996 |29 |
+01/05/1996 |32 |
+02/05/1996 |39 |
+03/05/1996 |29 |
+04/05/1996 |25 |
+05/05/1996 |27 |
+06/05/1996 |23 |
+07/05/1996 |33 |
+08/05/1996 |22 |
+09/05/1996 |34 |
+10/05/1996 |31 |
+11/05/1996 |38 |
+12/05/1996 |26 |
+13/05/1996 |29 |
+14/05/1996 |18 |
+15/05/1996 |28 |
+16/05/1996 |29 |
+17/05/1996 |27 |
+18/05/1996 |33 |
+19/05/1996 |31 |
+20/05/1996 |27 |
+21/05/1996 |23 |
+22/05/1996 |30 |
+23/05/1996 |30 |
+24/05/1996 |23 |
+25/05/1996 |30 |
+26/05/1996 |31 |
+27/05/1996 |39 |
+28/05/1996 |25 |
+29/05/1996 |20 |
+30/05/1996 |23 |
+31/05/1996 |34 |
+01/06/1996 |27 |
+02/06/1996 |23 |
+03/06/1996 |31 |
+04/06/1996 |28 |
+05/06/1996 |27 |
+06/06/1996 |28 |
+07/06/1996 |33 |
+08/06/1996 |30 |
+09/06/1996 |24 |
+10/06/1996 |31 |
+11/06/1996 |30 |
+12/06/1996 |18 |
+13/06/1996 |16 |
+14/06/1996 |37 |
+15/06/1996 |31 |
+16/06/1996 |24 |
+17/06/1996 |18 |
+18/06/1996 |21 |
+19/06/1996 |33 |
+20/06/1996 |21 |
+21/06/1996 |21 |
+22/06/1996 |26 |
+23/06/1996 |29 |
+24/06/1996 |24 |
+25/06/1996 |31 |
+26/06/1996 |35 |
+27/06/1996 |27 |
+28/06/1996 |25 |
+29/06/1996 |35 |
+30/06/1996 |33 |
+01/07/1996 |24 |
+02/07/1996 |30 |
+03/07/1996 |32 |
+04/07/1996 |20 |
+05/07/1996 |30 |
+06/07/1996 |33 |
+07/07/1996 |30 |
+08/07/1996 |34 |
+09/07/1996 |34 |
+10/07/1996 |29 |
+11/07/1996 |21 |
+12/07/1996 |27 |
+13/07/1996 |34 |
+14/07/1996 |27 |
+15/07/1996 |26 |
+16/07/1996 |23 |
+17/07/1996 |35 |
+18/07/1996 |22 |
+19/07/1996 |32 |
+20/07/1996 |26 |
+21/07/1996 |32 |
+22/07/1996 |25 |
+23/07/1996 |26 |
+24/07/1996 |29 |
+25/07/1996 |37 |
+26/07/1996 |16 |
+27/07/1996 |30 |
+28/07/1996 |31 |
+29/07/1996 |28 |
+30/07/1996 |33 |
+31/07/1996 |41 |
+01/08/1996 |33 |
+02/08/1996 |29 |
+03/08/1996 |25 |
+04/08/1996 |29 |
+05/08/1996 |34 |
+06/08/1996 |26 |
+07/08/1996 |24 |
+08/08/1996 |28 |
+09/08/1996 |26 |
+10/08/1996 |23 |
+11/08/1996 |29 |
+12/08/1996 |19 |
+13/08/1996 |27 |
+14/08/1996 |32 |
+15/08/1996 |24 |
+16/08/1996 |24 |
+17/08/1996 |27 |
+18/08/1996 |25 |
+19/08/1996 |22 |
+20/08/1996 |32 |
+21/08/1996 |29 |
+22/08/1996 |24 |
+23/08/1996 |28 |
+24/08/1996 |29 |
+25/08/1996 |23 |
+26/08/1996 |29 |
+27/08/1996 |24 |
+28/08/1996 |21 |
+29/08/1996 |24 |
+30/08/1996 |23 |
+31/08/1996 |24 |
+01/09/1996 |29 |
+02/09/1996 |18 |
+03/09/1996 |20 |
+04/09/1996 |24 |
+05/09/1996 |20 |
+06/09/1996 |34 |
+07/09/1996 |24 |
+08/09/1996 |26 |
+09/09/1996 |33 |
+10/09/1996 |40 |
+11/09/1996 |27 |
+12/09/1996 |29 |
+13/09/1996 |22 |
+14/09/1996 |28 |
+15/09/1996 |23 |
+16/09/1996 |29 |
+17/09/1996 |26 |
+18/09/1996 |23 |
+19/09/1996 |26 |
+20/09/1996 |29 |
+21/09/1996 |28 |
+22/09/1996 |29 |
+23/09/1996 |24 |
+24/09/1996 |28 |
+25/09/1996 |25 |
+26/09/1996 |30 |
+27/09/1996 |29 |
+28/09/1996 |24 |
+29/09/1996 |34 |
+30/09/1996 |28 |
+01/10/1996 |31 |
+02/10/1996 |28 |
+03/10/1996 |32 |
+04/10/1996 |23 |
+05/10/1996 |30 |
+06/10/1996 |17 |
+07/10/1996 |30 |
+08/10/1996 |30 |
+09/10/1996 |26 |
+10/10/1996 |30 |
+11/10/1996 |22 |
+12/10/1996 |29 |
+13/10/1996 |29 |
+14/10/1996 |32 |
+15/10/1996 |20 |
+16/10/1996 |18 |
+17/10/1996 |35 |
+18/10/1996 |18 |
+19/10/1996 |31 |
+20/10/1996 |20 |
+21/10/1996 |21 |
+22/10/1996 |28 |
+23/10/1996 |18 |
+24/10/1996 |29 |
+25/10/1996 |30 |
+26/10/1996 |20 |
+27/10/1996 |25 |
+28/10/1996 |28 |
+29/10/1996 |24 |
+30/10/1996 |22 |
+31/10/1996 |23 |
+01/11/1996 |27 |
+02/11/1996 |26 |
+03/11/1996 |22 |
+04/11/1996 |19 |
+05/11/1996 |26 |
+06/11/1996 |31 |
+07/11/1996 |33 |
+08/11/1996 |32 |
+09/11/1996 |32 |
+10/11/1996 |33 |
+11/11/1996 |21 |
+12/11/1996 |30 |
+13/11/1996 |27 |
+14/11/1996 |28 |
+15/11/1996 |26 |
+16/11/1996 |30 |
+17/11/1996 |22 |
+18/11/1996 |30 |
+19/11/1996 |23 |
+20/11/1996 |34 |
+21/11/1996 |29 |
+22/11/1996 |20 |
+23/11/1996 |20 |
+24/11/1996 |24 |
+25/11/1996 |21 |
+26/11/1996 |31 |
+27/11/1996 |29 |
+28/11/1996 |25 |
+29/11/1996 |33 |
+30/11/1996 |24 |
+01/12/1996 |27 |
+02/12/1996 |34 |
+03/12/1996 |33 |
+04/12/1996 |27 |
+05/12/1996 |22 |
+06/12/1996 |27 |
+07/12/1996 |32 |
+08/12/1996 |30 |
+09/12/1996 |31 |
+10/12/1996 |29 |
+11/12/1996 |32 |
+12/12/1996 |32 |
+13/12/1996 |19 |
+14/12/1996 |18 |
+15/12/1996 |28 |
+16/12/1996 |21 |
+17/12/1996 |29 |
+18/12/1996 |27 |
+19/12/1996 |28 |
+20/12/1996 |29 |
+21/12/1996 |33 |
+22/12/1996 |27 |
+23/12/1996 |27 |
+24/12/1996 |32 |
+25/12/1996 |23 |
+26/12/1996 |21 |
+27/12/1996 |34 |
+28/12/1996 |25 |
+29/12/1996 |30 |
+30/12/1996 |26 |
+31/12/1996 |23 |
+01/01/1997 |33 |
+02/01/1997 |25 |
+03/01/1997 |31 |
+04/01/1997 |31 |
+05/01/1997 |34 |
+06/01/1997 |28 |
+07/01/1997 |29 |
+08/01/1997 |25 |
+09/01/1997 |30 |
+10/01/1997 |35 |
+11/01/1997 |36 |
+12/01/1997 |27 |
+13/01/1997 |27 |
+14/01/1997 |22 |
+15/01/1997 |19 |
+16/01/1997 |21 |
+17/01/1997 |26 |
+18/01/1997 |35 |
+19/01/1997 |20 |
+20/01/1997 |27 |
+21/01/1997 |33 |
+22/01/1997 |23 |
+23/01/1997 |26 |
+24/01/1997 |21 |
+25/01/1997 |30 |
+26/01/1997 |28 |
+27/01/1997 |27 |
+28/01/1997 |40 |
+29/01/1997 |34 |
+30/01/1997 |35 |
+31/01/1997 |24 |
+01/02/1997 |31 |
+02/02/1997 |31 |
+03/02/1997 |18 |
+04/02/1997 |23 |
+05/02/1997 |22 |
+06/02/1997 |27 |
+07/02/1997 |25 |
+08/02/1997 |29 |
+09/02/1997 |26 |
+10/02/1997 |20 |
+11/02/1997 |31 |
+12/02/1997 |18 |
+13/02/1997 |32 |
+14/02/1997 |33 |
+15/02/1997 |24 |
+16/02/1997 |25 |
+17/02/1997 |29 |
+18/02/1997 |19 |
+19/02/1997 |28 |
+20/02/1997 |26 |
+21/02/1997 |34 |
+22/02/1997 |28 |
+23/02/1997 |15 |
+24/02/1997 |31 |
+25/02/1997 |31 |
+26/02/1997 |26 |
+27/02/1997 |36 |
+28/02/1997 |33 |
+01/03/1997 |29 |
+02/03/1997 |33 |
+03/03/1997 |22 |
+04/03/1997 |28 |
+05/03/1997 |30 |
+06/03/1997 |23 |
+07/03/1997 |16 |
+08/03/1997 |22 |
+09/03/1997 |26 |
+10/03/1997 |21 |
+11/03/1997 |32 |
+12/03/1997 |21 |
+13/03/1997 |32 |
+14/03/1997 |27 |
+15/03/1997 |25 |
+16/03/1997 |25 |
+17/03/1997 |33 |
+18/03/1997 |29 |
+19/03/1997 |24 |
+20/03/1997 |30 |
+21/03/1997 |19 |
+22/03/1997 |29 |
+23/03/1997 |33 |
+24/03/1997 |41 |
+25/03/1997 |27 |
+26/03/1997 |37 |
+27/03/1997 |14 |
+28/03/1997 |35 |
+29/03/1997 |29 |
+30/03/1997 |24 |
+31/03/1997 |21 |
+01/04/1997 |14 |
+02/04/1997 |26 |
+03/04/1997 |25 |
+04/04/1997 |36 |
+05/04/1997 |26 |
+06/04/1997 |27 |
+07/04/1997 |27 |
+08/04/1997 |19 |
+09/04/1997 |29 |
+10/04/1997 |26 |
+11/04/1997 |32 |
+12/04/1997 |27 |
+13/04/1997 |35 |
+14/04/1997 |35 |
+15/04/1997 |30 |
+16/04/1997 |25 |
+17/04/1997 |27 |
+18/04/1997 |35 |
+19/04/1997 |30 |
+20/04/1997 |27 |
+21/04/1997 |27 |
+22/04/1997 |32 |
+23/04/1997 |30 |
+24/04/1997 |29 |
+25/04/1997 |33 |
+26/04/1997 |35 |
+27/04/1997 |30 |
+28/04/1997 |25 |
+29/04/1997 |27 |
+30/04/1997 |23 |
+01/05/1997 |18 |
+02/05/1997 |20 |
+03/05/1997 |26 |
+04/05/1997 |25 |
+05/05/1997 |36 |
+06/05/1997 |24 |
+07/05/1997 |29 |
+08/05/1997 |21 |
+09/05/1997 |26 |
+10/05/1997 |32 |
+11/05/1997 |32 |
+12/05/1997 |25 |
+13/05/1997 |33 |
+14/05/1997 |20 |
+15/05/1997 |29 |
+16/05/1997 |25 |
+17/05/1997 |21 |
+18/05/1997 |22 |
+19/05/1997 |34 |
+20/05/1997 |32 |
+21/05/1997 |22 |
+22/05/1997 |25 |
+23/05/1997 |37 |
+24/05/1997 |26 |
+25/05/1997 |25 |
+26/05/1997 |27 |
+27/05/1997 |25 |
+28/05/1997 |41 |
+29/05/1997 |25 |
+30/05/1997 |36 |
+31/05/1997 |26 |
+01/06/1997 |37 |
+02/06/1997 |26 |
+03/06/1997 |26 |
+04/06/1997 |21 |
+05/06/1997 |13 |
+06/06/1997 |29 |
+07/06/1997 |27 |
+08/06/1997 |37 |
+09/06/1997 |22 |
+10/06/1997 |29 |
+11/06/1997 |33 |
+12/06/1997 |29 |
+13/06/1997 |23 |
+14/06/1997 |21 |
+15/06/1997 |37 |
+16/06/1997 |22 |
+17/06/1997 |30 |
+18/06/1997 |29 |
+19/06/1997 |26 |
+20/06/1997 |19 |
+21/06/1997 |21 |
+22/06/1997 |37 |
+23/06/1997 |33 |
+24/06/1997 |20 |
+25/06/1997 |41 |
+26/06/1997 |40 |
+27/06/1997 |28 |
+28/06/1997 |34 |
+29/06/1997 |29 |
+30/06/1997 |28 |
+01/07/1997 |19 |
+02/07/1997 |37 |
+03/07/1997 |25 |
+04/07/1997 |27 |
+05/07/1997 |25 |
+06/07/1997 |28 |
+07/07/1997 |27 |
+08/07/1997 |26 |
+09/07/1997 |24 |
+10/07/1997 |24 |
+11/07/1997 |22 |
+12/07/1997 |23 |
+13/07/1997 |35 |
+14/07/1997 |22 |
+15/07/1997 |23 |
+16/07/1997 |25 |
+17/07/1997 |29 |
+18/07/1997 |21 |
+19/07/1997 |20 |
+20/07/1997 |27 |
+21/07/1997 |36 |
+22/07/1997 |28 |
+23/07/1997 |27 |
+24/07/1997 |35 |
+25/07/1997 |33 |
+26/07/1997 |18 |
+27/07/1997 |25 |
+28/07/1997 |19 |
+29/07/1997 |16 |
+30/07/1997 |28 |
+31/07/1997 |22 |
+01/08/1997 |31 |
+02/08/1997 |28 |
+03/08/1997 |28 |
+04/08/1997 |26 |
+05/08/1997 |28 |
+06/08/1997 |29 |
+07/08/1997 |30 |
+08/08/1997 |32 |
+09/08/1997 |31 |
+10/08/1997 |25 |
+11/08/1997 |24 |
+12/08/1997 |29 |
+13/08/1997 |21 |
+14/08/1997 |27 |
+15/08/1997 |21 |
+16/08/1997 |28 |
+17/08/1997 |27 |
+18/08/1997 |38 |
+19/08/1997 |33 |
+20/08/1997 |26 |
+21/08/1997 |27 |
+22/08/1997 |24 |
+23/08/1997 |22 |
+24/08/1997 |22 |
+25/08/1997 |22 |
+26/08/1997 |30 |
+27/08/1997 |31 |
+28/08/1997 |15 |
+29/08/1997 |23 |
+30/08/1997 |23 |
+31/08/1997 |31 |
+01/09/1997 |24 |
+02/09/1997 |31 |
+03/09/1997 |23 |
+04/09/1997 |32 |
+05/09/1997 |30 |
+06/09/1997 |35 |
+07/09/1997 |33 |
+08/09/1997 |30 |
+09/09/1997 |38 |
+10/09/1997 |31 |
+11/09/1997 |24 |
+12/09/1997 |25 |
+13/09/1997 |24 |
+14/09/1997 |27 |
+15/09/1997 |31 |
+16/09/1997 |21 |
+17/09/1997 |19 |
+18/09/1997 |17 |
+19/09/1997 |26 |
+20/09/1997 |35 |
+21/09/1997 |23 |
+22/09/1997 |21 |
+23/09/1997 |30 |
+24/09/1997 |28 |
+25/09/1997 |24 |
+26/09/1997 |26 |
+27/09/1997 |25 |
+28/09/1997 |28 |
+29/09/1997 |38 |
+30/09/1997 |38 |
+01/10/1997 |32 |
+02/10/1997 |36 |
+03/10/1997 |27 |
+04/10/1997 |28 |
+05/10/1997 |34 |
+06/10/1997 |32 |
+07/10/1997 |33 |
+08/10/1997 |17 |
+09/10/1997 |30 |
+10/10/1997 |32 |
+11/10/1997 |25 |
+12/10/1997 |27 |
+13/10/1997 |27 |
+14/10/1997 |27 |
+15/10/1997 |26 |
+16/10/1997 |28 |
+17/10/1997 |34 |
+18/10/1997 |32 |
+19/10/1997 |22 |
+20/10/1997 |27 |
+21/10/1997 |36 |
+22/10/1997 |22 |
+23/10/1997 |36 |
+24/10/1997 |31 |
+25/10/1997 |42 |
+26/10/1997 |25 |
+27/10/1997 |23 |
+28/10/1997 |27 |
+29/10/1997 |30 |
+30/10/1997 |28 |
+31/10/1997 |30 |
+01/11/1997 |26 |
+02/11/1997 |29 |
+03/11/1997 |20 |
+04/11/1997 |31 |
+05/11/1997 |30 |
+06/11/1997 |23 |
+07/11/1997 |31 |
+08/11/1997 |31 |
+09/11/1997 |30 |
+10/11/1997 |23 |
+11/11/1997 |38 |
+12/11/1997 |24 |
+13/11/1997 |24 |
+14/11/1997 |21 |
+15/11/1997 |27 |
+16/11/1997 |27 |
+17/11/1997 |23 |
+18/11/1997 |21 |
+19/11/1997 |15 |
+20/11/1997 |31 |
+21/11/1997 |31 |
+22/11/1997 |26 |
+23/11/1997 |28 |
+24/11/1997 |35 |
+25/11/1997 |23 |
+26/11/1997 |36 |
+27/11/1997 |28 |
+28/11/1997 |21 |
+29/11/1997 |29 |
+30/11/1997 |18 |
+01/12/1997 |30 |
+02/12/1997 |16 |
+03/12/1997 |30 |
+04/12/1997 |33 |
+05/12/1997 |33 |
+06/12/1997 |32 |
+07/12/1997 |16 |
+08/12/1997 |30 |
+09/12/1997 |23 |
+10/12/1997 |29 |
+11/12/1997 |21 |
+12/12/1997 |24 |
+13/12/1997 |25 |
+14/12/1997 |23 |
+15/12/1997 |29 |
+16/12/1997 |28 |
+17/12/1997 |26 |
+18/12/1997 |23 |
+19/12/1997 |20 |
+20/12/1997 |19 |
+21/12/1997 |33 |
+22/12/1997 |16 |
+23/12/1997 |28 |
+24/12/1997 |30 |
+25/12/1997 |26 |
+26/12/1997 |35 |
+27/12/1997 |30 |
+28/12/1997 |28 |
+29/12/1997 |23 |
+30/12/1997 |35 |
+31/12/1997 |24 |
+01/01/1998 |26 |
+02/01/1998 |20 |
+03/01/1998 |28 |
+04/01/1998 |36 |
+05/01/1998 |27 |
+06/01/1998 |21 |
+07/01/1998 |31 |
+08/01/1998 |36 |
+09/01/1998 |28 |
+10/01/1998 |26 |
+11/01/1998 |26 |
+12/01/1998 |21 |
+13/01/1998 |30 |
+14/01/1998 |30 |
+15/01/1998 |20 |
+16/01/1998 |26 |
+17/01/1998 |27 |
+18/01/1998 |33 |
+19/01/1998 |32 |
+20/01/1998 |23 |
+21/01/1998 |36 |
+22/01/1998 |29 |
+23/01/1998 |16 |
+24/01/1998 |26 |
+25/01/1998 |27 |
+26/01/1998 |27 |
+27/01/1998 |24 |
+28/01/1998 |23 |
+29/01/1998 |23 |
+30/01/1998 |25 |
+31/01/1998 |30 |
+01/02/1998 |25 |
+02/02/1998 |25 |
+03/02/1998 |20 |
+04/02/1998 |30 |
+05/02/1998 |28 |
+06/02/1998 |21 |
+07/02/1998 |31 |
+08/02/1998 |23 |
+09/02/1998 |23 |
+10/02/1998 |28 |
+11/02/1998 |27 |
+12/02/1998 |35 |
+13/02/1998 |20 |
+14/02/1998 |23 |
+15/02/1998 |34 |
+16/02/1998 |32 |
+17/02/1998 |25 |
+18/02/1998 |28 |
+19/02/1998 |25 |
+20/02/1998 |17 |
+21/02/1998 |35 |
+22/02/1998 |18 |
+23/02/1998 |28 |
+24/02/1998 |25 |
+25/02/1998 |27 |
+26/02/1998 |38 |
+27/02/1998 |33 |
+28/02/1998 |28 |
+01/03/1998 |33 |
+02/03/1998 |30 |
+03/03/1998 |25 |
+04/03/1998 |27 |
+05/03/1998 |17 |
+06/03/1998 |19 |
+07/03/1998 |25 |
+08/03/1998 |26 |
+09/03/1998 |26 |
+10/03/1998 |31 |
+11/03/1998 |23 |
+12/03/1998 |25 |
+13/03/1998 |28 |
+14/03/1998 |34 |
+15/03/1998 |22 |
+16/03/1998 |28 |
+17/03/1998 |30 |
+18/03/1998 |32 |
+19/03/1998 |21 |
+20/03/1998 |23 |
+21/03/1998 |31 |
+22/03/1998 |36 |
+23/03/1998 |37 |
+24/03/1998 |27 |
+25/03/1998 |33 |
+26/03/1998 |29 |
+27/03/1998 |26 |
+28/03/1998 |38 |
+29/03/1998 |34 |
+30/03/1998 |21 |
+31/03/1998 |30 |
+01/04/1998 |30 |
+02/04/1998 |22 |
+03/04/1998 |28 |
+04/04/1998 |23 |
+05/04/1998 |29 |
+06/04/1998 |23 |
+07/04/1998 |33 |
+08/04/1998 |26 |
+09/04/1998 |28 |
+10/04/1998 |28 |
+11/04/1998 |21 |
+12/04/1998 |37 |
+13/04/1998 |22 |
+14/04/1998 |31 |
+15/04/1998 |34 |
+16/04/1998 |27 |
+17/04/1998 |26 |
+18/04/1998 |30 |
+19/04/1998 |27 |
+20/04/1998 |23 |
+21/04/1998 |29 |
+22/04/1998 |26 |
+23/04/1998 |32 |
+24/04/1998 |27 |
+25/04/1998 |24 |
+26/04/1998 |27 |
+27/04/1998 |31 |
+28/04/1998 |32 |
+29/04/1998 |35 |
+30/04/1998 |26 |
+01/05/1998 |27 |
+02/05/1998 |26 |
+03/05/1998 |38 |
+04/05/1998 |22 |
+05/05/1998 |26 |
+06/05/1998 |31 |
+07/05/1998 |38 |
+08/05/1998 |33 |
+09/05/1998 |31 |
+10/05/1998 |26 |
+11/05/1998 |34 |
+12/05/1998 |16 |
+13/05/1998 |29 |
+14/05/1998 |24 |
+15/05/1998 |23 |
+16/05/1998 |32 |
+17/05/1998 |21 |
+18/05/1998 |22 |
+19/05/1998 |33 |
+20/05/1998 |27 |
+21/05/1998 |23 |
+22/05/1998 |37 |
+23/05/1998 |36 |
+24/05/1998 |19 |
+25/05/1998 |27 |
+26/05/1998 |29 |
+27/05/1998 |23 |
+28/05/1998 |26 |
+29/05/1998 |28 |
+30/05/1998 |23 |
+31/05/1998 |40 |
+01/06/1998 |31 |
+02/06/1998 |24 |
+03/06/1998 |24 |
+04/06/1998 |27 |
+05/06/1998 |24 |
+06/06/1998 |27 |
+07/06/1998 |29 |
+08/06/1998 |29 |
+09/06/1998 |24 |
+10/06/1998 |26 |
+11/06/1998 |32 |
+12/06/1998 |19 |
+13/06/1998 |37 |
+14/06/1998 |27 |
+15/06/1998 |27 |
+16/06/1998 |32 |
+17/06/1998 |20 |
+18/06/1998 |27 |
+19/06/1998 |26 |
+20/06/1998 |23 |
+21/06/1998 |34 |
+22/06/1998 |33 |
+23/06/1998 |31 |
+24/06/1998 |34 |
+25/06/1998 |22 |
+26/06/1998 |30 |
+27/06/1998 |32 |
+28/06/1998 |17 |
+29/06/1998 |29 |
+30/06/1998 |27 |
+01/07/1998 |23 |
+02/07/1998 |38 |
+03/07/1998 |26 |
+04/07/1998 |29 |
+05/07/1998 |22 |
+06/07/1998 |35 |
+07/07/1998 |27 |
+08/07/1998 |20 |
+09/07/1998 |29 |
+10/07/1998 |29 |
+11/07/1998 |28 |
+12/07/1998 |38 |
+13/07/1998 |20 |
+14/07/1998 |27 |
+15/07/1998 |26 |
+16/07/1998 |30 |
+17/07/1998 |25 |
+18/07/1998 |31 |
+19/07/1998 |32 |
+20/07/1998 |31 |
+21/07/1998 |26 |
+22/07/1998 |36 |
+23/07/1998 |29 |
+24/07/1998 |21 |
+25/07/1998 |21 |
+26/07/1998 |23 |
+27/07/1998 |26 |
+28/07/1998 |43 |
+29/07/1998 |28 |
+30/07/1998 |34 |
+31/07/1998 |25 |
+01/08/1998 |27 |
+02/08/1998 |24 |
+03/08/1998 |23 |
+04/08/1998 |22 |
+05/08/1998 |18 |
+06/08/1998 |28 |
+07/08/1998 |24 |
+08/08/1998 |17 |
+09/08/1998 |28 |
+10/08/1998 |24 |
+11/08/1998 |25 |
+12/08/1998 |20 |
+13/08/1998 |22 |
+14/08/1998 |32 |
+15/08/1998 |25 |
+16/08/1998 |33 |
+17/08/1998 |33 |
+18/08/1998 |25 |
+19/08/1998 |22 |
+20/08/1998 |29 |
+21/08/1998 |30 |
+22/08/1998 |38 |
+23/08/1998 |25 |
+24/08/1998 |34 |
+25/08/1998 |30 |
+26/08/1998 |27 |
+27/08/1998 |21 |
+28/08/1998 |27 |
+29/08/1998 |26 |
+30/08/1998 |32 |
+31/08/1998 |28 |
+01/09/1998 |29 |
+02/09/1998 |36 |
+03/09/1998 |27 |
+04/09/1998 |28 |
+05/09/1998 |26 |
+06/09/1998 |22 |
+07/09/1998 |22 |
+08/09/1998 |16 |
+09/09/1998 |36 |
+10/09/1998 |31 |
+11/09/1998 |23 |
+12/09/1998 |22 |
+13/09/1998 |30 |
+14/09/1998 |19 |
+15/09/1998 |33 |
+16/09/1998 |32 |
+17/09/1998 |26 |
+18/09/1998 |28 |
+19/09/1998 |26 |
+20/09/1998 |25 |
+21/09/1998 |32 |
+22/09/1998 |30 |
+23/09/1998 |33 |
+24/09/1998 |24 |
+25/09/1998 |36 |
+26/09/1998 |28 |
+27/09/1998 |25 |
+28/09/1998 |18 |
+29/09/1998 |25 |
+30/09/1998 |26 |
+01/10/1998 |29 |
+02/10/1998 |24 |
+03/10/1998 |29 |
+04/10/1998 |31 |
+05/10/1998 |25 |
+06/10/1998 |35 |
+07/10/1998 |24 |
+08/10/1998 |29 |
+09/10/1998 |24 |
+10/10/1998 |23 |
+11/10/1998 |22 |
+12/10/1998 |23 |
+13/10/1998 |23 |
+14/10/1998 |34 |
+15/10/1998 |24 |
+16/10/1998 |34 |
+17/10/1998 |22 |
+18/10/1998 |31 |
+19/10/1998 |29 |
+20/10/1998 |37 |
+21/10/1998 |27 |
+22/10/1998 |26 |
+23/10/1998 |30 |
+24/10/1998 |31 |
+25/10/1998 |32 |
+26/10/1998 |24 |
+27/10/1998 |31 |
+28/10/1998 |34 |
+29/10/1998 |31 |
+30/10/1998 |31 |
+31/10/1998 |25 |
+01/11/1998 |22 |
+02/11/1998 |23 |
+03/11/1998 |31 |
+04/11/1998 |38 |
+05/11/1998 |30 |
+06/11/1998 |22 |
+07/11/1998 |15 |
+08/11/1998 |34 |
+09/11/1998 |33 |
+10/11/1998 |24 |
+11/11/1998 |34 |
+12/11/1998 |25 |
+13/11/1998 |14 |
+14/11/1998 |26 |
+15/11/1998 |20 |
+16/11/1998 |25 |
+17/11/1998 |32 |
+18/11/1998 |32 |
+19/11/1998 |24 |
+20/11/1998 |31 |
+21/11/1998 |18 |
+22/11/1998 |38 |
+23/11/1998 |25 |
+24/11/1998 |38 |
+25/11/1998 |32 |
+26/11/1998 |31 |
+27/11/1998 |27 |
+28/11/1998 |26 |
+29/11/1998 |21 |
+30/11/1998 |21 |
+01/12/1998 |26 |
+02/12/1998 |26 |
+03/12/1998 |24 |
+04/12/1998 |27 |
+05/12/1998 |33 |
+06/12/1998 |27 |
+07/12/1998 |30 |
+08/12/1998 |33 |
+09/12/1998 |28 |
+10/12/1998 |15 |
+11/12/1998 |35 |
+12/12/1998 |32 |
+13/12/1998 |28 |
+14/12/1998 |34 |
+15/12/1998 |25 |
+16/12/1998 |20 |
+17/12/1998 |31 |
+18/12/1998 |18 |
+19/12/1998 |25 |
+20/12/1998 |24 |
+21/12/1998 |26 |
+22/12/1998 |31 |
+23/12/1998 |19 |
+24/12/1998 |22 |
+25/12/1998 |27 |
+26/12/1998 |27 |
+27/12/1998 |21 |
+28/12/1998 |28 |
+29/12/1998 |29 |
+30/12/1998 |26 |
+31/12/1998 |23 |
+01/01/1999 |24 |
+02/01/1999 |36 |
+03/01/1999 |32 |
+04/01/1999 |20 |
+05/01/1999 |27 |
+06/01/1999 |25 |
+07/01/1999 |28 |
+08/01/1999 |26 |
+09/01/1999 |20 |
+10/01/1999 |26 |
+11/01/1999 |42 |
+12/01/1999 |23 |
+13/01/1999 |20 |
+14/01/1999 |30 |
+15/01/1999 |32 |
+16/01/1999 |22 |
+17/01/1999 |21 |
+18/01/1999 |21 |
+19/01/1999 |26 |
+20/01/1999 |24 |
+21/01/1999 |20 |
+22/01/1999 |29 |
+23/01/1999 |29 |
+24/01/1999 |39 |
+25/01/1999 |25 |
+26/01/1999 |33 |
+27/01/1999 |31 |
+28/01/1999 |24 |
+29/01/1999 |20 |
+30/01/1999 |33 |
+31/01/1999 |35 |
+01/02/1999 |32 |
+02/02/1999 |23 |
+03/02/1999 |28 |
+04/02/1999 |21 |
+05/02/1999 |24 |
+06/02/1999 |30 |
+07/02/1999 |28 |
+08/02/1999 |20 |
+09/02/1999 |39 |
+10/02/1999 |25 |
+11/02/1999 |25 |
+12/02/1999 |21 |
+13/02/1999 |32 |
+14/02/1999 |35 |
+15/02/1999 |23 |
+16/02/1999 |28 |
+17/02/1999 |25 |
+18/02/1999 |29 |
+19/02/1999 |29 |
+20/02/1999 |33 |
+21/02/1999 |29 |
+22/02/1999 |20 |
+23/02/1999 |24 |
+24/02/1999 |24 |
+25/02/1999 |24 |
+26/02/1999 |32 |
+27/02/1999 |26 |
+28/02/1999 |28 |
+01/03/1999 |25 |
+02/03/1999 |25 |
+03/03/1999 |27 |
+04/03/1999 |28 |
+05/03/1999 |26 |
+06/03/1999 |29 |
+07/03/1999 |31 |
+08/03/1999 |30 |
+09/03/1999 |27 |
+10/03/1999 |31 |
+11/03/1999 |26 |
+12/03/1999 |13 |
+13/03/1999 |22 |
+14/03/1999 |30 |
+15/03/1999 |18 |
+16/03/1999 |42 |
+17/03/1999 |22 |
+18/03/1999 |35 |
+19/03/1999 |24 |
+20/03/1999 |22 |
+21/03/1999 |34 |
+22/03/1999 |21 |
+23/03/1999 |24 |
+24/03/1999 |36 |
+25/03/1999 |20 |
+26/03/1999 |37 |
+27/03/1999 |36 |
+28/03/1999 |31 |
+29/03/1999 |24 |
+30/03/1999 |18 |
+31/03/1999 |28 |
+01/04/1999 |20 |
+02/04/1999 |19 |
+03/04/1999 |22 |
+04/04/1999 |20 |
+05/04/1999 |25 |
+06/04/1999 |24 |
+07/04/1999 |32 |
+08/04/1999 |18 |
+09/04/1999 |30 |
+10/04/1999 |32 |
+11/04/1999 |29 |
+12/04/1999 |22 |
+13/04/1999 |25 |
+14/04/1999 |30 |
+15/04/1999 |24 |
+16/04/1999 |16 |
+17/04/1999 |32 |
+18/04/1999 |30 |
+19/04/1999 |31 |
+20/04/1999 |17 |
+21/04/1999 |24 |
+22/04/1999 |24 |
+23/04/1999 |35 |
+24/04/1999 |34 |
+25/04/1999 |25 |
+26/04/1999 |28 |
+27/04/1999 |23 |
+28/04/1999 |32 |
+29/04/1999 |19 |
+30/04/1999 |22 |
+01/05/1999 |26 |
+02/05/1999 |36 |
+03/05/1999 |34 |
+04/05/1999 |23 |
+05/05/1999 |26 |
+06/05/1999 |29 |
+07/05/1999 |25 |
+08/05/1999 |21 |
+09/05/1999 |42 |
+10/05/1999 |18 |
+11/05/1999 |31 |
+12/05/1999 |26 |
+13/05/1999 |20 |
+14/05/1999 |24 |
+15/05/1999 |28 |
+16/05/1999 |27 |
+17/05/1999 |35 |
+18/05/1999 |24 |
+19/05/1999 |33 |
+20/05/1999 |28 |
+21/05/1999 |32 |
+22/05/1999 |32 |
+23/05/1999 |34 |
+24/05/1999 |26 |
+25/05/1999 |26 |
+26/05/1999 |36 |
+27/05/1999 |32 |
+28/05/1999 |28 |
+29/05/1999 |26 |
+30/05/1999 |27 |
+31/05/1999 |33 |
+01/06/1999 |29 |
+02/06/1999 |27 |
+03/06/1999 |25 |
+04/06/1999 |21 |
+05/06/1999 |35 |
+06/06/1999 |22 |
+07/06/1999 |22 |
+08/06/1999 |34 |
+09/06/1999 |25 |
+10/06/1999 |26 |
+11/06/1999 |29 |
+12/06/1999 |28 |
+13/06/1999 |37 |
+14/06/1999 |31 |
+15/06/1999 |28 |
+16/06/1999 |26 |
+17/06/1999 |33 |
+18/06/1999 |27 |
+19/06/1999 |32 |
+20/06/1999 |31 |
+21/06/1999 |29 |
+22/06/1999 |26 |
+23/06/1999 |29 |
+24/06/1999 |28 |
+25/06/1999 |28 |
+26/06/1999 |21 |
+27/06/1999 |28 |
+28/06/1999 |35 |
+29/06/1999 |26 |
+30/06/1999 |32 |
+01/07/1999 |28 |
+02/07/1999 |29 |
+03/07/1999 |25 |
+04/07/1999 |27 |
+05/07/1999 |35 |
+06/07/1999 |23 |
+07/07/1999 |25 |
+08/07/1999 |23 |
+09/07/1999 |29 |
+10/07/1999 |25 |
+11/07/1999 |19 |
+12/07/1999 |26 |
+13/07/1999 |17 |
+14/07/1999 |28 |
+15/07/1999 |27 |
+16/07/1999 |30 |
+17/07/1999 |23 |
+18/07/1999 |21 |
+19/07/1999 |25 |
+20/07/1999 |33 |
+21/07/1999 |26 |
+22/07/1999 |23 |
+23/07/1999 |24 |
+24/07/1999 |19 |
+25/07/1999 |29 |
+26/07/1999 |30 |
+27/07/1999 |19 |
+28/07/1999 |22 |
+29/07/1999 |26 |
+30/07/1999 |31 |
+31/07/1999 |23 |
+01/08/1999 |27 |
+02/08/1999 |28 |
+03/08/1999 |30 |
+04/08/1999 |37 |
+05/08/1999 |17 |
+06/08/1999 |25 |
+07/08/1999 |21 |
+08/08/1999 |36 |
+09/08/1999 |34 |
+10/08/1999 |25 |
+11/08/1999 |35 |
+12/08/1999 |27 |
+13/08/1999 |21 |
+14/08/1999 |24 |
+15/08/1999 |21 |
+16/08/1999 |42 |
+17/08/1999 |30 |
+18/08/1999 |28 |
+19/08/1999 |29 |
+20/08/1999 |25 |
+21/08/1999 |27 |
+22/08/1999 |22 |
+23/08/1999 |34 |
+24/08/1999 |26 |
+25/08/1999 |31 |
+26/08/1999 |29 |
+27/08/1999 |30 |
+28/08/1999 |28 |
+29/08/1999 |34 |
+30/08/1999 |37 |
+31/08/1999 |22 |
+01/09/1999 |40 |
+02/09/1999 |38 |
+03/09/1999 |27 |
+04/09/1999 |28 |
+05/09/1999 |38 |
+06/09/1999 |30 |
+07/09/1999 |32 |
+08/09/1999 |19 |
+09/09/1999 |26 |
+10/09/1999 |24 |
+11/09/1999 |25 |
+12/09/1999 |27 |
+13/09/1999 |20 |
+14/09/1999 |26 |
+15/09/1999 |29 |
+16/09/1999 |31 |
+17/09/1999 |23 |
+18/09/1999 |29 |
+19/09/1999 |34 |
+20/09/1999 |32 |
+21/09/1999 |30 |
+22/09/1999 |28 |
+23/09/1999 |25 |
+24/09/1999 |26 |
+25/09/1999 |20 |
+26/09/1999 |23 |
+27/09/1999 |19 |
+28/09/1999 |33 |
+29/09/1999 |33 |
+30/09/1999 |25 |
+01/10/1999 |26 |
+02/10/1999 |38 |
+03/10/1999 |24 |
+04/10/1999 |25 |
+05/10/1999 |27 |
+06/10/1999 |26 |
+07/10/1999 |24 |
+08/10/1999 |30 |
+09/10/1999 |30 |
+10/10/1999 |35 |
+11/10/1999 |17 |
+12/10/1999 |25 |
+13/10/1999 |17 |
+14/10/1999 |32 |
+15/10/1999 |31 |
+16/10/1999 |21 |
+17/10/1999 |30 |
+18/10/1999 |32 |
+19/10/1999 |40 |
+20/10/1999 |18 |
+21/10/1999 |31 |
+22/10/1999 |23 |
+23/10/1999 |32 |
+24/10/1999 |21 |
+25/10/1999 |29 |
+26/10/1999 |22 |
+27/10/1999 |30 |
+28/10/1999 |26 |
+29/10/1999 |24 |
+30/10/1999 |20 |
+31/10/1999 |32 |
+01/11/1999 |28 |
+02/11/1999 |32 |
+03/11/1999 |24 |
+04/11/1999 |24 |
+05/11/1999 |26 |
+06/11/1999 |33 |
+07/11/1999 |32 |
+08/11/1999 |30 |
+09/11/1999 |22 |
+10/11/1999 |29 |
+11/11/1999 |28 |
+12/11/1999 |28 |
+13/11/1999 |38 |
+14/11/1999 |19 |
+15/11/1999 |32 |
+16/11/1999 |24 |
+17/11/1999 |29 |
+18/11/1999 |32 |
+19/11/1999 |21 |
+20/11/1999 |26 |
+21/11/1999 |34 |
+22/11/1999 |23 |
+23/11/1999 |26 |
+24/11/1999 |24 |
+25/11/1999 |33 |
+26/11/1999 |22 |
+27/11/1999 |21 |
+28/11/1999 |37 |
+29/11/1999 |27 |
+30/11/1999 |26 |
+01/12/1999 |30 |
+02/12/1999 |30 |
+03/12/1999 |22 |
+04/12/1999 |26 |
+05/12/1999 |47 |
+06/12/1999 |26 |
+07/12/1999 |26 |
+08/12/1999 |39 |
+09/12/1999 |34 |
+10/12/1999 |22 |
+11/12/1999 |26 |
+12/12/1999 |29 |
+13/12/1999 |30 |
+14/12/1999 |27 |
+15/12/1999 |28 |
+16/12/1999 |26 |
+17/12/1999 |33 |
+18/12/1999 |27 |
+19/12/1999 |29 |
+20/12/1999 |29 |
+21/12/1999 |22 |
+22/12/1999 |29 |
+23/12/1999 |32 |
+24/12/1999 |31 |
+25/12/1999 |17 |
+26/12/1999 |20 |
+27/12/1999 |30 |
+28/12/1999 |19 |
+29/12/1999 |41 |
+30/12/1999 |20 |
+31/12/1999 |29 |
+<All> |40000 |
+
+
+That took 1.994 seconds.
+
+##############################################################################
+
+#22b. You can change the date output format - fulldate is a function object defined in the SOOMv0 module.
+>>> nhds.metadata.randomdate.outtrans = fulldate
+>>> print nhds.univar(var1="randomdate",printit=1,allcalc=1)
+
+Univariate summary derived from randomdate column in nhds dataset
+univar(): Univariate summary created in 0.383 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Random date |Frequency|
+----------------------------|---------|
+Monday, 01 January 1996 |26 |
+Tuesday, 02 January 1996 |24 |
+Wednesday, 03 January 1996 |35 |
+Thursday, 04 January 1996 |21 |
+Friday, 05 January 1996 |20 |
+Saturday, 06 January 1996 |20 |
+Sunday, 07 January 1996 |32 |
+Monday, 08 January 1996 |24 |
+Tuesday, 09 January 1996 |39 |
+Wednesday, 10 January 1996 |21 |
+Thursday, 11 January 1996 |24 |
+Friday, 12 January 1996 |21 |
+Saturday, 13 January 1996 |25 |
+Sunday, 14 January 1996 |29 |
+Monday, 15 January 1996 |30 |
+Tuesday, 16 January 1996 |21 |
+Wednesday, 17 January 1996 |23 |
+Thursday, 18 January 1996 |35 |
+Friday, 19 January 1996 |17 |
+Saturday, 20 January 1996 |19 |
+Sunday, 21 January 1996 |33 |
+Monday, 22 January 1996 |31 |
+Tuesday, 23 January 1996 |36 |
+Wednesday, 24 January 1996 |32 |
+Thursday, 25 January 1996 |22 |
+Friday, 26 January 1996 |29 |
+Saturday, 27 January 1996 |17 |
+Sunday, 28 January 1996 |24 |
+Monday, 29 January 1996 |29 |
+Tuesday, 30 January 1996 |32 |
+Wednesday, 31 January 1996 |28 |
+Thursday, 01 February 1996 |23 |
+Friday, 02 February 1996 |35 |
+Saturday, 03 February 1996 |27 |
+Sunday, 04 February 1996 |40 |
+Monday, 05 February 1996 |21 |
+Tuesday, 06 February 1996 |27 |
+Wednesday, 07 February 1996 |32 |
+Thursday, 08 February 1996 |20 |
+Friday, 09 February 1996 |27 |
+Saturday, 10 February 1996 |20 |
+Sunday, 11 February 1996 |34 |
+Monday, 12 February 1996 |20 |
+Tuesday, 13 February 1996 |21 |
+Wednesday, 14 February 1996 |32 |
+Thursday, 15 February 1996 |27 |
+Friday, 16 February 1996 |31 |
+Saturday, 17 February 1996 |28 |
+Sunday, 18 February 1996 |29 |
+Monday, 19 February 1996 |25 |
+Tuesday, 20 February 1996 |24 |
+Wednesday, 21 February 1996 |24 |
+Thursday, 22 February 1996 |28 |
+Friday, 23 February 1996 |24 |
+Saturday, 24 February 1996 |25 |
+Sunday, 25 February 1996 |25 |
+Monday, 26 February 1996 |30 |
+Tuesday, 27 February 1996 |25 |
+Wednesday, 28 February 1996 |38 |
+Thursday, 29 February 1996 |25 |
+Friday, 01 March 1996 |26 |
+Saturday, 02 March 1996 |32 |
+Sunday, 03 March 1996 |21 |
+Monday, 04 March 1996 |26 |
+Tuesday, 05 March 1996 |30 |
+Wednesday, 06 March 1996 |27 |
+Thursday, 07 March 1996 |25 |
+Friday, 08 March 1996 |33 |
+Saturday, 09 March 1996 |16 |
+Sunday, 10 March 1996 |32 |
+Monday, 11 March 1996 |30 |
+Tuesday, 12 March 1996 |26 |
+Wednesday, 13 March 1996 |32 |
+Thursday, 14 March 1996 |21 |
+Friday, 15 March 1996 |29 |
+Saturday, 16 March 1996 |25 |
+Sunday, 17 March 1996 |24 |
+Monday, 18 March 1996 |19 |
+Tuesday, 19 March 1996 |22 |
+Wednesday, 20 March 1996 |30 |
+Thursday, 21 March 1996 |22 |
+Friday, 22 March 1996 |39 |
+Saturday, 23 March 1996 |19 |
+Sunday, 24 March 1996 |35 |
+Monday, 25 March 1996 |28 |
+Tuesday, 26 March 1996 |29 |
+Wednesday, 27 March 1996 |31 |
+Thursday, 28 March 1996 |20 |
+Friday, 29 March 1996 |30 |
+Saturday, 30 March 1996 |39 |
+Sunday, 31 March 1996 |21 |
+Monday, 01 April 1996 |20 |
+Tuesday, 02 April 1996 |22 |
+Wednesday, 03 April 1996 |32 |
+Thursday, 04 April 1996 |43 |
+Friday, 05 April 1996 |29 |
+Saturday, 06 April 1996 |33 |
+Sunday, 07 April 1996 |30 |
+Monday, 08 April 1996 |34 |
+Tuesday, 09 April 1996 |31 |
+Wednesday, 10 April 1996 |33 |
+Thursday, 11 April 1996 |33 |
+Friday, 12 April 1996 |32 |
+Saturday, 13 April 1996 |22 |
+Sunday, 14 April 1996 |25 |
+Monday, 15 April 1996 |25 |
+Tuesday, 16 April 1996 |30 |
+Wednesday, 17 April 1996 |29 |
+Thursday, 18 April 1996 |27 |
+Friday, 19 April 1996 |28 |
+Saturday, 20 April 1996 |30 |
+Sunday, 21 April 1996 |30 |
+Monday, 22 April 1996 |33 |
+Tuesday, 23 April 1996 |29 |
+Wednesday, 24 April 1996 |33 |
+Thursday, 25 April 1996 |18 |
+Friday, 26 April 1996 |28 |
+Saturday, 27 April 1996 |21 |
+Sunday, 28 April 1996 |25 |
+Monday, 29 April 1996 |30 |
+Tuesday, 30 April 1996 |29 |
+Wednesday, 01 May 1996 |32 |
+Thursday, 02 May 1996 |39 |
+Friday, 03 May 1996 |29 |
+Saturday, 04 May 1996 |25 |
+Sunday, 05 May 1996 |27 |
+Monday, 06 May 1996 |23 |
+Tuesday, 07 May 1996 |33 |
+Wednesday, 08 May 1996 |22 |
+Thursday, 09 May 1996 |34 |
+Friday, 10 May 1996 |31 |
+Saturday, 11 May 1996 |38 |
+Sunday, 12 May 1996 |26 |
+Monday, 13 May 1996 |29 |
+Tuesday, 14 May 1996 |18 |
+Wednesday, 15 May 1996 |28 |
+Thursday, 16 May 1996 |29 |
+Friday, 17 May 1996 |27 |
+Saturday, 18 May 1996 |33 |
+Sunday, 19 May 1996 |31 |
+Monday, 20 May 1996 |27 |
+Tuesday, 21 May 1996 |23 |
+Wednesday, 22 May 1996 |30 |
+Thursday, 23 May 1996 |30 |
+Friday, 24 May 1996 |23 |
+Saturday, 25 May 1996 |30 |
+Sunday, 26 May 1996 |31 |
+Monday, 27 May 1996 |39 |
+Tuesday, 28 May 1996 |25 |
+Wednesday, 29 May 1996 |20 |
+Thursday, 30 May 1996 |23 |
+Friday, 31 May 1996 |34 |
+Saturday, 01 June 1996 |27 |
+Sunday, 02 June 1996 |23 |
+Monday, 03 June 1996 |31 |
+Tuesday, 04 June 1996 |28 |
+Wednesday, 05 June 1996 |27 |
+Thursday, 06 June 1996 |28 |
+Friday, 07 June 1996 |33 |
+Saturday, 08 June 1996 |30 |
+Sunday, 09 June 1996 |24 |
+Monday, 10 June 1996 |31 |
+Tuesday, 11 June 1996 |30 |
+Wednesday, 12 June 1996 |18 |
+Thursday, 13 June 1996 |16 |
+Friday, 14 June 1996 |37 |
+Saturday, 15 June 1996 |31 |
+Sunday, 16 June 1996 |24 |
+Monday, 17 June 1996 |18 |
+Tuesday, 18 June 1996 |21 |
+Wednesday, 19 June 1996 |33 |
+Thursday, 20 June 1996 |21 |
+Friday, 21 June 1996 |21 |
+Saturday, 22 June 1996 |26 |
+Sunday, 23 June 1996 |29 |
+Monday, 24 June 1996 |24 |
+Tuesday, 25 June 1996 |31 |
+Wednesday, 26 June 1996 |35 |
+Thursday, 27 June 1996 |27 |
+Friday, 28 June 1996 |25 |
+Saturday, 29 June 1996 |35 |
+Sunday, 30 June 1996 |33 |
+Monday, 01 July 1996 |24 |
+Tuesday, 02 July 1996 |30 |
+Wednesday, 03 July 1996 |32 |
+Thursday, 04 July 1996 |20 |
+Friday, 05 July 1996 |30 |
+Saturday, 06 July 1996 |33 |
+Sunday, 07 July 1996 |30 |
+Monday, 08 July 1996 |34 |
+Tuesday, 09 July 1996 |34 |
+Wednesday, 10 July 1996 |29 |
+Thursday, 11 July 1996 |21 |
+Friday, 12 July 1996 |27 |
+Saturday, 13 July 1996 |34 |
+Sunday, 14 July 1996 |27 |
+Monday, 15 July 1996 |26 |
+Tuesday, 16 July 1996 |23 |
+Wednesday, 17 July 1996 |35 |
+Thursday, 18 July 1996 |22 |
+Friday, 19 July 1996 |32 |
+Saturday, 20 July 1996 |26 |
+Sunday, 21 July 1996 |32 |
+Monday, 22 July 1996 |25 |
+Tuesday, 23 July 1996 |26 |
+Wednesday, 24 July 1996 |29 |
+Thursday, 25 July 1996 |37 |
+Friday, 26 July 1996 |16 |
+Saturday, 27 July 1996 |30 |
+Sunday, 28 July 1996 |31 |
+Monday, 29 July 1996 |28 |
+Tuesday, 30 July 1996 |33 |
+Wednesday, 31 July 1996 |41 |
+Thursday, 01 August 1996 |33 |
+Friday, 02 August 1996 |29 |
+Saturday, 03 August 1996 |25 |
+Sunday, 04 August 1996 |29 |
+Monday, 05 August 1996 |34 |
+Tuesday, 06 August 1996 |26 |
+Wednesday, 07 August 1996 |24 |
+Thursday, 08 August 1996 |28 |
+Friday, 09 August 1996 |26 |
+Saturday, 10 August 1996 |23 |
+Sunday, 11 August 1996 |29 |
+Monday, 12 August 1996 |19 |
+Tuesday, 13 August 1996 |27 |
+Wednesday, 14 August 1996 |32 |
+Thursday, 15 August 1996 |24 |
+Friday, 16 August 1996 |24 |
+Saturday, 17 August 1996 |27 |
+Sunday, 18 August 1996 |25 |
+Monday, 19 August 1996 |22 |
+Tuesday, 20 August 1996 |32 |
+Wednesday, 21 August 1996 |29 |
+Thursday, 22 August 1996 |24 |
+Friday, 23 August 1996 |28 |
+Saturday, 24 August 1996 |29 |
+Sunday, 25 August 1996 |23 |
+Monday, 26 August 1996 |29 |
+Tuesday, 27 August 1996 |24 |
+Wednesday, 28 August 1996 |21 |
+Thursday, 29 August 1996 |24 |
+Friday, 30 August 1996 |23 |
+Saturday, 31 August 1996 |24 |
+Sunday, 01 September 1996 |29 |
+Monday, 02 September 1996 |18 |
+Tuesday, 03 September 1996 |20 |
+Wednesday, 04 September 1996|24 |
+Thursday, 05 September 1996 |20 |
+Friday, 06 September 1996 |34 |
+Saturday, 07 September 1996 |24 |
+Sunday, 08 September 1996 |26 |
+Monday, 09 September 1996 |33 |
+Tuesday, 10 September 1996 |40 |
+Wednesday, 11 September 1996|27 |
+Thursday, 12 September 1996 |29 |
+Friday, 13 September 1996 |22 |
+Saturday, 14 September 1996 |28 |
+Sunday, 15 September 1996 |23 |
+Monday, 16 September 1996 |29 |
+Tuesday, 17 September 1996 |26 |
+Wednesday, 18 September 1996|23 |
+Thursday, 19 September 1996 |26 |
+Friday, 20 September 1996 |29 |
+Saturday, 21 September 1996 |28 |
+Sunday, 22 September 1996 |29 |
+Monday, 23 September 1996 |24 |
+Tuesday, 24 September 1996 |28 |
+Wednesday, 25 September 1996|25 |
+Thursday, 26 September 1996 |30 |
+Friday, 27 September 1996 |29 |
+Saturday, 28 September 1996 |24 |
+Sunday, 29 September 1996 |34 |
+Monday, 30 September 1996 |28 |
+Tuesday, 01 October 1996 |31 |
+Wednesday, 02 October 1996 |28 |
+Thursday, 03 October 1996 |32 |
+Friday, 04 October 1996 |23 |
+Saturday, 05 October 1996 |30 |
+Sunday, 06 October 1996 |17 |
+Monday, 07 October 1996 |30 |
+Tuesday, 08 October 1996 |30 |
+Wednesday, 09 October 1996 |26 |
+Thursday, 10 October 1996 |30 |
+Friday, 11 October 1996 |22 |
+Saturday, 12 October 1996 |29 |
+Sunday, 13 October 1996 |29 |
+Monday, 14 October 1996 |32 |
+Tuesday, 15 October 1996 |20 |
+Wednesday, 16 October 1996 |18 |
+Thursday, 17 October 1996 |35 |
+Friday, 18 October 1996 |18 |
+Saturday, 19 October 1996 |31 |
+Sunday, 20 October 1996 |20 |
+Monday, 21 October 1996 |21 |
+Tuesday, 22 October 1996 |28 |
+Wednesday, 23 October 1996 |18 |
+Thursday, 24 October 1996 |29 |
+Friday, 25 October 1996 |30 |
+Saturday, 26 October 1996 |20 |
+Sunday, 27 October 1996 |25 |
+Monday, 28 October 1996 |28 |
+Tuesday, 29 October 1996 |24 |
+Wednesday, 30 October 1996 |22 |
+Thursday, 31 October 1996 |23 |
+Friday, 01 November 1996 |27 |
+Saturday, 02 November 1996 |26 |
+Sunday, 03 November 1996 |22 |
+Monday, 04 November 1996 |19 |
+Tuesday, 05 November 1996 |26 |
+Wednesday, 06 November 1996 |31 |
+Thursday, 07 November 1996 |33 |
+Friday, 08 November 1996 |32 |
+Saturday, 09 November 1996 |32 |
+Sunday, 10 November 1996 |33 |
+Monday, 11 November 1996 |21 |
+Tuesday, 12 November 1996 |30 |
+Wednesday, 13 November 1996 |27 |
+Thursday, 14 November 1996 |28 |
+Friday, 15 November 1996 |26 |
+Saturday, 16 November 1996 |30 |
+Sunday, 17 November 1996 |22 |
+Monday, 18 November 1996 |30 |
+Tuesday, 19 November 1996 |23 |
+Wednesday, 20 November 1996 |34 |
+Thursday, 21 November 1996 |29 |
+Friday, 22 November 1996 |20 |
+Saturday, 23 November 1996 |20 |
+Sunday, 24 November 1996 |24 |
+Monday, 25 November 1996 |21 |
+Tuesday, 26 November 1996 |31 |
+Wednesday, 27 November 1996 |29 |
+Thursday, 28 November 1996 |25 |
+Friday, 29 November 1996 |33 |
+Saturday, 30 November 1996 |24 |
+Sunday, 01 December 1996 |27 |
+Monday, 02 December 1996 |34 |
+Tuesday, 03 December 1996 |33 |
+Wednesday, 04 December 1996 |27 |
+Thursday, 05 December 1996 |22 |
+Friday, 06 December 1996 |27 |
+Saturday, 07 December 1996 |32 |
+Sunday, 08 December 1996 |30 |
+Monday, 09 December 1996 |31 |
+Tuesday, 10 December 1996 |29 |
+Wednesday, 11 December 1996 |32 |
+Thursday, 12 December 1996 |32 |
+Friday, 13 December 1996 |19 |
+Saturday, 14 December 1996 |18 |
+Sunday, 15 December 1996 |28 |
+Monday, 16 December 1996 |21 |
+Tuesday, 17 December 1996 |29 |
+Wednesday, 18 December 1996 |27 |
+Thursday, 19 December 1996 |28 |
+Friday, 20 December 1996 |29 |
+Saturday, 21 December 1996 |33 |
+Sunday, 22 December 1996 |27 |
+Monday, 23 December 1996 |27 |
+Tuesday, 24 December 1996 |32 |
+Wednesday, 25 December 1996 |23 |
+Thursday, 26 December 1996 |21 |
+Friday, 27 December 1996 |34 |
+Saturday, 28 December 1996 |25 |
+Sunday, 29 December 1996 |30 |
+Monday, 30 December 1996 |26 |
+Tuesday, 31 December 1996 |23 |
+Wednesday, 01 January 1997 |33 |
+Thursday, 02 January 1997 |25 |
+Friday, 03 January 1997 |31 |
+Saturday, 04 January 1997 |31 |
+Sunday, 05 January 1997 |34 |
+Monday, 06 January 1997 |28 |
+Tuesday, 07 January 1997 |29 |
+Wednesday, 08 January 1997 |25 |
+Thursday, 09 January 1997 |30 |
+Friday, 10 January 1997 |35 |
+Saturday, 11 January 1997 |36 |
+Sunday, 12 January 1997 |27 |
+Monday, 13 January 1997 |27 |
+Tuesday, 14 January 1997 |22 |
+Wednesday, 15 January 1997 |19 |
+Thursday, 16 January 1997 |21 |
+Friday, 17 January 1997 |26 |
+Saturday, 18 January 1997 |35 |
+Sunday, 19 January 1997 |20 |
+Monday, 20 January 1997 |27 |
+Tuesday, 21 January 1997 |33 |
+Wednesday, 22 January 1997 |23 |
+Thursday, 23 January 1997 |26 |
+Friday, 24 January 1997 |21 |
+Saturday, 25 January 1997 |30 |
+Sunday, 26 January 1997 |28 |
+Monday, 27 January 1997 |27 |
+Tuesday, 28 January 1997 |40 |
+Wednesday, 29 January 1997 |34 |
+Thursday, 30 January 1997 |35 |
+Friday, 31 January 1997 |24 |
+Saturday, 01 February 1997 |31 |
+Sunday, 02 February 1997 |31 |
+Monday, 03 February 1997 |18 |
+Tuesday, 04 February 1997 |23 |
+Wednesday, 05 February 1997 |22 |
+Thursday, 06 February 1997 |27 |
+Friday, 07 February 1997 |25 |
+Saturday, 08 February 1997 |29 |
+Sunday, 09 February 1997 |26 |
+Monday, 10 February 1997 |20 |
+Tuesday, 11 February 1997 |31 |
+Wednesday, 12 February 1997 |18 |
+Thursday, 13 February 1997 |32 |
+Friday, 14 February 1997 |33 |
+Saturday, 15 February 1997 |24 |
+Sunday, 16 February 1997 |25 |
+Monday, 17 February 1997 |29 |
+Tuesday, 18 February 1997 |19 |
+Wednesday, 19 February 1997 |28 |
+Thursday, 20 February 1997 |26 |
+Friday, 21 February 1997 |34 |
+Saturday, 22 February 1997 |28 |
+Sunday, 23 February 1997 |15 |
+Monday, 24 February 1997 |31 |
+Tuesday, 25 February 1997 |31 |
+Wednesday, 26 February 1997 |26 |
+Thursday, 27 February 1997 |36 |
+Friday, 28 February 1997 |33 |
+Saturday, 01 March 1997 |29 |
+Sunday, 02 March 1997 |33 |
+Monday, 03 March 1997 |22 |
+Tuesday, 04 March 1997 |28 |
+Wednesday, 05 March 1997 |30 |
+Thursday, 06 March 1997 |23 |
+Friday, 07 March 1997 |16 |
+Saturday, 08 March 1997 |22 |
+Sunday, 09 March 1997 |26 |
+Monday, 10 March 1997 |21 |
+Tuesday, 11 March 1997 |32 |
+Wednesday, 12 March 1997 |21 |
+Thursday, 13 March 1997 |32 |
+Friday, 14 March 1997 |27 |
+Saturday, 15 March 1997 |25 |
+Sunday, 16 March 1997 |25 |
+Monday, 17 March 1997 |33 |
+Tuesday, 18 March 1997 |29 |
+Wednesday, 19 March 1997 |24 |
+Thursday, 20 March 1997 |30 |
+Friday, 21 March 1997 |19 |
+Saturday, 22 March 1997 |29 |
+Sunday, 23 March 1997 |33 |
+Monday, 24 March 1997 |41 |
+Tuesday, 25 March 1997 |27 |
+Wednesday, 26 March 1997 |37 |
+Thursday, 27 March 1997 |14 |
+Friday, 28 March 1997 |35 |
+Saturday, 29 March 1997 |29 |
+Sunday, 30 March 1997 |24 |
+Monday, 31 March 1997 |21 |
+Tuesday, 01 April 1997 |14 |
+Wednesday, 02 April 1997 |26 |
+Thursday, 03 April 1997 |25 |
+Friday, 04 April 1997 |36 |
+Saturday, 05 April 1997 |26 |
+Sunday, 06 April 1997 |27 |
+Monday, 07 April 1997 |27 |
+Tuesday, 08 April 1997 |19 |
+Wednesday, 09 April 1997 |29 |
+Thursday, 10 April 1997 |26 |
+Friday, 11 April 1997 |32 |
+Saturday, 12 April 1997 |27 |
+Sunday, 13 April 1997 |35 |
+Monday, 14 April 1997 |35 |
+Tuesday, 15 April 1997 |30 |
+Wednesday, 16 April 1997 |25 |
+Thursday, 17 April 1997 |27 |
+Friday, 18 April 1997 |35 |
+Saturday, 19 April 1997 |30 |
+Sunday, 20 April 1997 |27 |
+Monday, 21 April 1997 |27 |
+Tuesday, 22 April 1997 |32 |
+Wednesday, 23 April 1997 |30 |
+Thursday, 24 April 1997 |29 |
+Friday, 25 April 1997 |33 |
+Saturday, 26 April 1997 |35 |
+Sunday, 27 April 1997 |30 |
+Monday, 28 April 1997 |25 |
+Tuesday, 29 April 1997 |27 |
+Wednesday, 30 April 1997 |23 |
+Thursday, 01 May 1997 |18 |
+Friday, 02 May 1997 |20 |
+Saturday, 03 May 1997 |26 |
+Sunday, 04 May 1997 |25 |
+Monday, 05 May 1997 |36 |
+Tuesday, 06 May 1997 |24 |
+Wednesday, 07 May 1997 |29 |
+Thursday, 08 May 1997 |21 |
+Friday, 09 May 1997 |26 |
+Saturday, 10 May 1997 |32 |
+Sunday, 11 May 1997 |32 |
+Monday, 12 May 1997 |25 |
+Tuesday, 13 May 1997 |33 |
+Wednesday, 14 May 1997 |20 |
+Thursday, 15 May 1997 |29 |
+Friday, 16 May 1997 |25 |
+Saturday, 17 May 1997 |21 |
+Sunday, 18 May 1997 |22 |
+Monday, 19 May 1997 |34 |
+Tuesday, 20 May 1997 |32 |
+Wednesday, 21 May 1997 |22 |
+Thursday, 22 May 1997 |25 |
+Friday, 23 May 1997 |37 |
+Saturday, 24 May 1997 |26 |
+Sunday, 25 May 1997 |25 |
+Monday, 26 May 1997 |27 |
+Tuesday, 27 May 1997 |25 |
+Wednesday, 28 May 1997 |41 |
+Thursday, 29 May 1997 |25 |
+Friday, 30 May 1997 |36 |
+Saturday, 31 May 1997 |26 |
+Sunday, 01 June 1997 |37 |
+Monday, 02 June 1997 |26 |
+Tuesday, 03 June 1997 |26 |
+Wednesday, 04 June 1997 |21 |
+Thursday, 05 June 1997 |13 |
+Friday, 06 June 1997 |29 |
+Saturday, 07 June 1997 |27 |
+Sunday, 08 June 1997 |37 |
+Monday, 09 June 1997 |22 |
+Tuesday, 10 June 1997 |29 |
+Wednesday, 11 June 1997 |33 |
+Thursday, 12 June 1997 |29 |
+Friday, 13 June 1997 |23 |
+Saturday, 14 June 1997 |21 |
+Sunday, 15 June 1997 |37 |
+Monday, 16 June 1997 |22 |
+Tuesday, 17 June 1997 |30 |
+Wednesday, 18 June 1997 |29 |
+Thursday, 19 June 1997 |26 |
+Friday, 20 June 1997 |19 |
+Saturday, 21 June 1997 |21 |
+Sunday, 22 June 1997 |37 |
+Monday, 23 June 1997 |33 |
+Tuesday, 24 June 1997 |20 |
+Wednesday, 25 June 1997 |41 |
+Thursday, 26 June 1997 |40 |
+Friday, 27 June 1997 |28 |
+Saturday, 28 June 1997 |34 |
+Sunday, 29 June 1997 |29 |
+Monday, 30 June 1997 |28 |
+Tuesday, 01 July 1997 |19 |
+Wednesday, 02 July 1997 |37 |
+Thursday, 03 July 1997 |25 |
+Friday, 04 July 1997 |27 |
+Saturday, 05 July 1997 |25 |
+Sunday, 06 July 1997 |28 |
+Monday, 07 July 1997 |27 |
+Tuesday, 08 July 1997 |26 |
+Wednesday, 09 July 1997 |24 |
+Thursday, 10 July 1997 |24 |
+Friday, 11 July 1997 |22 |
+Saturday, 12 July 1997 |23 |
+Sunday, 13 July 1997 |35 |
+Monday, 14 July 1997 |22 |
+Tuesday, 15 July 1997 |23 |
+Wednesday, 16 July 1997 |25 |
+Thursday, 17 July 1997 |29 |
+Friday, 18 July 1997 |21 |
+Saturday, 19 July 1997 |20 |
+Sunday, 20 July 1997 |27 |
+Monday, 21 July 1997 |36 |
+Tuesday, 22 July 1997 |28 |
+Wednesday, 23 July 1997 |27 |
+Thursday, 24 July 1997 |35 |
+Friday, 25 July 1997 |33 |
+Saturday, 26 July 1997 |18 |
+Sunday, 27 July 1997 |25 |
+Monday, 28 July 1997 |19 |
+Tuesday, 29 July 1997 |16 |
+Wednesday, 30 July 1997 |28 |
+Thursday, 31 July 1997 |22 |
+Friday, 01 August 1997 |31 |
+Saturday, 02 August 1997 |28 |
+Sunday, 03 August 1997 |28 |
+Monday, 04 August 1997 |26 |
+Tuesday, 05 August 1997 |28 |
+Wednesday, 06 August 1997 |29 |
+Thursday, 07 August 1997 |30 |
+Friday, 08 August 1997 |32 |
+Saturday, 09 August 1997 |31 |
+Sunday, 10 August 1997 |25 |
+Monday, 11 August 1997 |24 |
+Tuesday, 12 August 1997 |29 |
+Wednesday, 13 August 1997 |21 |
+Thursday, 14 August 1997 |27 |
+Friday, 15 August 1997 |21 |
+Saturday, 16 August 1997 |28 |
+Sunday, 17 August 1997 |27 |
+Monday, 18 August 1997 |38 |
+Tuesday, 19 August 1997 |33 |
+Wednesday, 20 August 1997 |26 |
+Thursday, 21 August 1997 |27 |
+Friday, 22 August 1997 |24 |
+Saturday, 23 August 1997 |22 |
+Sunday, 24 August 1997 |22 |
+Monday, 25 August 1997 |22 |
+Tuesday, 26 August 1997 |30 |
+Wednesday, 27 August 1997 |31 |
+Thursday, 28 August 1997 |15 |
+Friday, 29 August 1997 |23 |
+Saturday, 30 August 1997 |23 |
+Sunday, 31 August 1997 |31 |
+Monday, 01 September 1997 |24 |
+Tuesday, 02 September 1997 |31 |
+Wednesday, 03 September 1997|23 |
+Thursday, 04 September 1997 |32 |
+Friday, 05 September 1997 |30 |
+Saturday, 06 September 1997 |35 |
+Sunday, 07 September 1997 |33 |
+Monday, 08 September 1997 |30 |
+Tuesday, 09 September 1997 |38 |
+Wednesday, 10 September 1997|31 |
+Thursday, 11 September 1997 |24 |
+Friday, 12 September 1997 |25 |
+Saturday, 13 September 1997 |24 |
+Sunday, 14 September 1997 |27 |
+Monday, 15 September 1997 |31 |
+Tuesday, 16 September 1997 |21 |
+Wednesday, 17 September 1997|19 |
+Thursday, 18 September 1997 |17 |
+Friday, 19 September 1997 |26 |
+Saturday, 20 September 1997 |35 |
+Sunday, 21 September 1997 |23 |
+Monday, 22 September 1997 |21 |
+Tuesday, 23 September 1997 |30 |
+Wednesday, 24 September 1997|28 |
+Thursday, 25 September 1997 |24 |
+Friday, 26 September 1997 |26 |
+Saturday, 27 September 1997 |25 |
+Sunday, 28 September 1997 |28 |
+Monday, 29 September 1997 |38 |
+Tuesday, 30 September 1997 |38 |
+Wednesday, 01 October 1997 |32 |
+Thursday, 02 October 1997 |36 |
+Friday, 03 October 1997 |27 |
+Saturday, 04 October 1997 |28 |
+Sunday, 05 October 1997 |34 |
+Monday, 06 October 1997 |32 |
+Tuesday, 07 October 1997 |33 |
+Wednesday, 08 October 1997 |17 |
+Thursday, 09 October 1997 |30 |
+Friday, 10 October 1997 |32 |
+Saturday, 11 October 1997 |25 |
+Sunday, 12 October 1997 |27 |
+Monday, 13 October 1997 |27 |
+Tuesday, 14 October 1997 |27 |
+Wednesday, 15 October 1997 |26 |
+Thursday, 16 October 1997 |28 |
+Friday, 17 October 1997 |34 |
+Saturday, 18 October 1997 |32 |
+Sunday, 19 October 1997 |22 |
+Monday, 20 October 1997 |27 |
+Tuesday, 21 October 1997 |36 |
+Wednesday, 22 October 1997 |22 |
+Thursday, 23 October 1997 |36 |
+Friday, 24 October 1997 |31 |
+Saturday, 25 October 1997 |42 |
+Sunday, 26 October 1997 |25 |
+Monday, 27 October 1997 |23 |
+Tuesday, 28 October 1997 |27 |
+Wednesday, 29 October 1997 |30 |
+Thursday, 30 October 1997 |28 |
+Friday, 31 October 1997 |30 |
+Saturday, 01 November 1997 |26 |
+Sunday, 02 November 1997 |29 |
+Monday, 03 November 1997 |20 |
+Tuesday, 04 November 1997 |31 |
+Wednesday, 05 November 1997 |30 |
+Thursday, 06 November 1997 |23 |
+Friday, 07 November 1997 |31 |
+Saturday, 08 November 1997 |31 |
+Sunday, 09 November 1997 |30 |
+Monday, 10 November 1997 |23 |
+Tuesday, 11 November 1997 |38 |
+Wednesday, 12 November 1997 |24 |
+Thursday, 13 November 1997 |24 |
+Friday, 14 November 1997 |21 |
+Saturday, 15 November 1997 |27 |
+Sunday, 16 November 1997 |27 |
+Monday, 17 November 1997 |23 |
+Tuesday, 18 November 1997 |21 |
+Wednesday, 19 November 1997 |15 |
+Thursday, 20 November 1997 |31 |
+Friday, 21 November 1997 |31 |
+Saturday, 22 November 1997 |26 |
+Sunday, 23 November 1997 |28 |
+Monday, 24 November 1997 |35 |
+Tuesday, 25 November 1997 |23 |
+Wednesday, 26 November 1997 |36 |
+Thursday, 27 November 1997 |28 |
+Friday, 28 November 1997 |21 |
+Saturday, 29 November 1997 |29 |
+Sunday, 30 November 1997 |18 |
+Monday, 01 December 1997 |30 |
+Tuesday, 02 December 1997 |16 |
+Wednesday, 03 December 1997 |30 |
+Thursday, 04 December 1997 |33 |
+Friday, 05 December 1997 |33 |
+Saturday, 06 December 1997 |32 |
+Sunday, 07 December 1997 |16 |
+Monday, 08 December 1997 |30 |
+Tuesday, 09 December 1997 |23 |
+Wednesday, 10 December 1997 |29 |
+Thursday, 11 December 1997 |21 |
+Friday, 12 December 1997 |24 |
+Saturday, 13 December 1997 |25 |
+Sunday, 14 December 1997 |23 |
+Monday, 15 December 1997 |29 |
+Tuesday, 16 December 1997 |28 |
+Wednesday, 17 December 1997 |26 |
+Thursday, 18 December 1997 |23 |
+Friday, 19 December 1997 |20 |
+Saturday, 20 December 1997 |19 |
+Sunday, 21 December 1997 |33 |
+Monday, 22 December 1997 |16 |
+Tuesday, 23 December 1997 |28 |
+Wednesday, 24 December 1997 |30 |
+Thursday, 25 December 1997 |26 |
+Friday, 26 December 1997 |35 |
+Saturday, 27 December 1997 |30 |
+Sunday, 28 December 1997 |28 |
+Monday, 29 December 1997 |23 |
+Tuesday, 30 December 1997 |35 |
+Wednesday, 31 December 1997 |24 |
+Thursday, 01 January 1998 |26 |
+Friday, 02 January 1998 |20 |
+Saturday, 03 January 1998 |28 |
+Sunday, 04 January 1998 |36 |
+Monday, 05 January 1998 |27 |
+Tuesday, 06 January 1998 |21 |
+Wednesday, 07 January 1998 |31 |
+Thursday, 08 January 1998 |36 |
+Friday, 09 January 1998 |28 |
+Saturday, 10 January 1998 |26 |
+Sunday, 11 January 1998 |26 |
+Monday, 12 January 1998 |21 |
+Tuesday, 13 January 1998 |30 |
+Wednesday, 14 January 1998 |30 |
+Thursday, 15 January 1998 |20 |
+Friday, 16 January 1998 |26 |
+Saturday, 17 January 1998 |27 |
+Sunday, 18 January 1998 |33 |
+Monday, 19 January 1998 |32 |
+Tuesday, 20 January 1998 |23 |
+Wednesday, 21 January 1998 |36 |
+Thursday, 22 January 1998 |29 |
+Friday, 23 January 1998 |16 |
+Saturday, 24 January 1998 |26 |
+Sunday, 25 January 1998 |27 |
+Monday, 26 January 1998 |27 |
+Tuesday, 27 January 1998 |24 |
+Wednesday, 28 January 1998 |23 |
+Thursday, 29 January 1998 |23 |
+Friday, 30 January 1998 |25 |
+Saturday, 31 January 1998 |30 |
+Sunday, 01 February 1998 |25 |
+Monday, 02 February 1998 |25 |
+Tuesday, 03 February 1998 |20 |
+Wednesday, 04 February 1998 |30 |
+Thursday, 05 February 1998 |28 |
+Friday, 06 February 1998 |21 |
+Saturday, 07 February 1998 |31 |
+Sunday, 08 February 1998 |23 |
+Monday, 09 February 1998 |23 |
+Tuesday, 10 February 1998 |28 |
+Wednesday, 11 February 1998 |27 |
+Thursday, 12 February 1998 |35 |
+Friday, 13 February 1998 |20 |
+Saturday, 14 February 1998 |23 |
+Sunday, 15 February 1998 |34 |
+Monday, 16 February 1998 |32 |
+Tuesday, 17 February 1998 |25 |
+Wednesday, 18 February 1998 |28 |
+Thursday, 19 February 1998 |25 |
+Friday, 20 February 1998 |17 |
+Saturday, 21 February 1998 |35 |
+Sunday, 22 February 1998 |18 |
+Monday, 23 February 1998 |28 |
+Tuesday, 24 February 1998 |25 |
+Wednesday, 25 February 1998 |27 |
+Thursday, 26 February 1998 |38 |
+Friday, 27 February 1998 |33 |
+Saturday, 28 February 1998 |28 |
+Sunday, 01 March 1998 |33 |
+Monday, 02 March 1998 |30 |
+Tuesday, 03 March 1998 |25 |
+Wednesday, 04 March 1998 |27 |
+Thursday, 05 March 1998 |17 |
+Friday, 06 March 1998 |19 |
+Saturday, 07 March 1998 |25 |
+Sunday, 08 March 1998 |26 |
+Monday, 09 March 1998 |26 |
+Tuesday, 10 March 1998 |31 |
+Wednesday, 11 March 1998 |23 |
+Thursday, 12 March 1998 |25 |
+Friday, 13 March 1998 |28 |
+Saturday, 14 March 1998 |34 |
+Sunday, 15 March 1998 |22 |
+Monday, 16 March 1998 |28 |
+Tuesday, 17 March 1998 |30 |
+Wednesday, 18 March 1998 |32 |
+Thursday, 19 March 1998 |21 |
+Friday, 20 March 1998 |23 |
+Saturday, 21 March 1998 |31 |
+Sunday, 22 March 1998 |36 |
+Monday, 23 March 1998 |37 |
+Tuesday, 24 March 1998 |27 |
+Wednesday, 25 March 1998 |33 |
+Thursday, 26 March 1998 |29 |
+Friday, 27 March 1998 |26 |
+Saturday, 28 March 1998 |38 |
+Sunday, 29 March 1998 |34 |
+Monday, 30 March 1998 |21 |
+Tuesday, 31 March 1998 |30 |
+Wednesday, 01 April 1998 |30 |
+Thursday, 02 April 1998 |22 |
+Friday, 03 April 1998 |28 |
+Saturday, 04 April 1998 |23 |
+Sunday, 05 April 1998 |29 |
+Monday, 06 April 1998 |23 |
+Tuesday, 07 April 1998 |33 |
+Wednesday, 08 April 1998 |26 |
+Thursday, 09 April 1998 |28 |
+Friday, 10 April 1998 |28 |
+Saturday, 11 April 1998 |21 |
+Sunday, 12 April 1998 |37 |
+Monday, 13 April 1998 |22 |
+Tuesday, 14 April 1998 |31 |
+Wednesday, 15 April 1998 |34 |
+Thursday, 16 April 1998 |27 |
+Friday, 17 April 1998 |26 |
+Saturday, 18 April 1998 |30 |
+Sunday, 19 April 1998 |27 |
+Monday, 20 April 1998 |23 |
+Tuesday, 21 April 1998 |29 |
+Wednesday, 22 April 1998 |26 |
+Thursday, 23 April 1998 |32 |
+Friday, 24 April 1998 |27 |
+Saturday, 25 April 1998 |24 |
+Sunday, 26 April 1998 |27 |
+Monday, 27 April 1998 |31 |
+Tuesday, 28 April 1998 |32 |
+Wednesday, 29 April 1998 |35 |
+Thursday, 30 April 1998 |26 |
+Friday, 01 May 1998 |27 |
+Saturday, 02 May 1998 |26 |
+Sunday, 03 May 1998 |38 |
+Monday, 04 May 1998 |22 |
+Tuesday, 05 May 1998 |26 |
+Wednesday, 06 May 1998 |31 |
+Thursday, 07 May 1998 |38 |
+Friday, 08 May 1998 |33 |
+Saturday, 09 May 1998 |31 |
+Sunday, 10 May 1998 |26 |
+Monday, 11 May 1998 |34 |
+Tuesday, 12 May 1998 |16 |
+Wednesday, 13 May 1998 |29 |
+Thursday, 14 May 1998 |24 |
+Friday, 15 May 1998 |23 |
+Saturday, 16 May 1998 |32 |
+Sunday, 17 May 1998 |21 |
+Monday, 18 May 1998 |22 |
+Tuesday, 19 May 1998 |33 |
+Wednesday, 20 May 1998 |27 |
+Thursday, 21 May 1998 |23 |
+Friday, 22 May 1998 |37 |
+Saturday, 23 May 1998 |36 |
+Sunday, 24 May 1998 |19 |
+Monday, 25 May 1998 |27 |
+Tuesday, 26 May 1998 |29 |
+Wednesday, 27 May 1998 |23 |
+Thursday, 28 May 1998 |26 |
+Friday, 29 May 1998 |28 |
+Saturday, 30 May 1998 |23 |
+Sunday, 31 May 1998 |40 |
+Monday, 01 June 1998 |31 |
+Tuesday, 02 June 1998 |24 |
+Wednesday, 03 June 1998 |24 |
+Thursday, 04 June 1998 |27 |
+Friday, 05 June 1998 |24 |
+Saturday, 06 June 1998 |27 |
+Sunday, 07 June 1998 |29 |
+Monday, 08 June 1998 |29 |
+Tuesday, 09 June 1998 |24 |
+Wednesday, 10 June 1998 |26 |
+Thursday, 11 June 1998 |32 |
+Friday, 12 June 1998 |19 |
+Saturday, 13 June 1998 |37 |
+Sunday, 14 June 1998 |27 |
+Monday, 15 June 1998 |27 |
+Tuesday, 16 June 1998 |32 |
+Wednesday, 17 June 1998 |20 |
+Thursday, 18 June 1998 |27 |
+Friday, 19 June 1998 |26 |
+Saturday, 20 June 1998 |23 |
+Sunday, 21 June 1998 |34 |
+Monday, 22 June 1998 |33 |
+Tuesday, 23 June 1998 |31 |
+Wednesday, 24 June 1998 |34 |
+Thursday, 25 June 1998 |22 |
+Friday, 26 June 1998 |30 |
+Saturday, 27 June 1998 |32 |
+Sunday, 28 June 1998 |17 |
+Monday, 29 June 1998 |29 |
+Tuesday, 30 June 1998 |27 |
+Wednesday, 01 July 1998 |23 |
+Thursday, 02 July 1998 |38 |
+Friday, 03 July 1998 |26 |
+Saturday, 04 July 1998 |29 |
+Sunday, 05 July 1998 |22 |
+Monday, 06 July 1998 |35 |
+Tuesday, 07 July 1998 |27 |
+Wednesday, 08 July 1998 |20 |
+Thursday, 09 July 1998 |29 |
+Friday, 10 July 1998 |29 |
+Saturday, 11 July 1998 |28 |
+Sunday, 12 July 1998 |38 |
+Monday, 13 July 1998 |20 |
+Tuesday, 14 July 1998 |27 |
+Wednesday, 15 July 1998 |26 |
+Thursday, 16 July 1998 |30 |
+Friday, 17 July 1998 |25 |
+Saturday, 18 July 1998 |31 |
+Sunday, 19 July 1998 |32 |
+Monday, 20 July 1998 |31 |
+Tuesday, 21 July 1998 |26 |
+Wednesday, 22 July 1998 |36 |
+Thursday, 23 July 1998 |29 |
+Friday, 24 July 1998 |21 |
+Saturday, 25 July 1998 |21 |
+Sunday, 26 July 1998 |23 |
+Monday, 27 July 1998 |26 |
+Tuesday, 28 July 1998 |43 |
+Wednesday, 29 July 1998 |28 |
+Thursday, 30 July 1998 |34 |
+Friday, 31 July 1998 |25 |
+Saturday, 01 August 1998 |27 |
+Sunday, 02 August 1998 |24 |
+Monday, 03 August 1998 |23 |
+Tuesday, 04 August 1998 |22 |
+Wednesday, 05 August 1998 |18 |
+Thursday, 06 August 1998 |28 |
+Friday, 07 August 1998 |24 |
+Saturday, 08 August 1998 |17 |
+Sunday, 09 August 1998 |28 |
+Monday, 10 August 1998 |24 |
+Tuesday, 11 August 1998 |25 |
+Wednesday, 12 August 1998 |20 |
+Thursday, 13 August 1998 |22 |
+Friday, 14 August 1998 |32 |
+Saturday, 15 August 1998 |25 |
+Sunday, 16 August 1998 |33 |
+Monday, 17 August 1998 |33 |
+Tuesday, 18 August 1998 |25 |
+Wednesday, 19 August 1998 |22 |
+Thursday, 20 August 1998 |29 |
+Friday, 21 August 1998 |30 |
+Saturday, 22 August 1998 |38 |
+Sunday, 23 August 1998 |25 |
+Monday, 24 August 1998 |34 |
+Tuesday, 25 August 1998 |30 |
+Wednesday, 26 August 1998 |27 |
+Thursday, 27 August 1998 |21 |
+Friday, 28 August 1998 |27 |
+Saturday, 29 August 1998 |26 |
+Sunday, 30 August 1998 |32 |
+Monday, 31 August 1998 |28 |
+Tuesday, 01 September 1998 |29 |
+Wednesday, 02 September 1998|36 |
+Thursday, 03 September 1998 |27 |
+Friday, 04 September 1998 |28 |
+Saturday, 05 September 1998 |26 |
+Sunday, 06 September 1998 |22 |
+Monday, 07 September 1998 |22 |
+Tuesday, 08 September 1998 |16 |
+Wednesday, 09 September 1998|36 |
+Thursday, 10 September 1998 |31 |
+Friday, 11 September 1998 |23 |
+Saturday, 12 September 1998 |22 |
+Sunday, 13 September 1998 |30 |
+Monday, 14 September 1998 |19 |
+Tuesday, 15 September 1998 |33 |
+Wednesday, 16 September 1998|32 |
+Thursday, 17 September 1998 |26 |
+Friday, 18 September 1998 |28 |
+Saturday, 19 September 1998 |26 |
+Sunday, 20 September 1998 |25 |
+Monday, 21 September 1998 |32 |
+Tuesday, 22 September 1998 |30 |
+Wednesday, 23 September 1998|33 |
+Thursday, 24 September 1998 |24 |
+Friday, 25 September 1998 |36 |
+Saturday, 26 September 1998 |28 |
+Sunday, 27 September 1998 |25 |
+Monday, 28 September 1998 |18 |
+Tuesday, 29 September 1998 |25 |
+Wednesday, 30 September 1998|26 |
+Thursday, 01 October 1998 |29 |
+Friday, 02 October 1998 |24 |
+Saturday, 03 October 1998 |29 |
+Sunday, 04 October 1998 |31 |
+Monday, 05 October 1998 |25 |
+Tuesday, 06 October 1998 |35 |
+Wednesday, 07 October 1998 |24 |
+Thursday, 08 October 1998 |29 |
+Friday, 09 October 1998 |24 |
+Saturday, 10 October 1998 |23 |
+Sunday, 11 October 1998 |22 |
+Monday, 12 October 1998 |23 |
+Tuesday, 13 October 1998 |23 |
+Wednesday, 14 October 1998 |34 |
+Thursday, 15 October 1998 |24 |
+Friday, 16 October 1998 |34 |
+Saturday, 17 October 1998 |22 |
+Sunday, 18 October 1998 |31 |
+Monday, 19 October 1998 |29 |
+Tuesday, 20 October 1998 |37 |
+Wednesday, 21 October 1998 |27 |
+Thursday, 22 October 1998 |26 |
+Friday, 23 October 1998 |30 |
+Saturday, 24 October 1998 |31 |
+Sunday, 25 October 1998 |32 |
+Monday, 26 October 1998 |24 |
+Tuesday, 27 October 1998 |31 |
+Wednesday, 28 October 1998 |34 |
+Thursday, 29 October 1998 |31 |
+Friday, 30 October 1998 |31 |
+Saturday, 31 October 1998 |25 |
+Sunday, 01 November 1998 |22 |
+Monday, 02 November 1998 |23 |
+Tuesday, 03 November 1998 |31 |
+Wednesday, 04 November 1998 |38 |
+Thursday, 05 November 1998 |30 |
+Friday, 06 November 1998 |22 |
+Saturday, 07 November 1998 |15 |
+Sunday, 08 November 1998 |34 |
+Monday, 09 November 1998 |33 |
+Tuesday, 10 November 1998 |24 |
+Wednesday, 11 November 1998 |34 |
+Thursday, 12 November 1998 |25 |
+Friday, 13 November 1998 |14 |
+Saturday, 14 November 1998 |26 |
+Sunday, 15 November 1998 |20 |
+Monday, 16 November 1998 |25 |
+Tuesday, 17 November 1998 |32 |
+Wednesday, 18 November 1998 |32 |
+Thursday, 19 November 1998 |24 |
+Friday, 20 November 1998 |31 |
+Saturday, 21 November 1998 |18 |
+Sunday, 22 November 1998 |38 |
+Monday, 23 November 1998 |25 |
+Tuesday, 24 November 1998 |38 |
+Wednesday, 25 November 1998 |32 |
+Thursday, 26 November 1998 |31 |
+Friday, 27 November 1998 |27 |
+Saturday, 28 November 1998 |26 |
+Sunday, 29 November 1998 |21 |
+Monday, 30 November 1998 |21 |
+Tuesday, 01 December 1998 |26 |
+Wednesday, 02 December 1998 |26 |
+Thursday, 03 December 1998 |24 |
+Friday, 04 December 1998 |27 |
+Saturday, 05 December 1998 |33 |
+Sunday, 06 December 1998 |27 |
+Monday, 07 December 1998 |30 |
+Tuesday, 08 December 1998 |33 |
+Wednesday, 09 December 1998 |28 |
+Thursday, 10 December 1998 |15 |
+Friday, 11 December 1998 |35 |
+Saturday, 12 December 1998 |32 |
+Sunday, 13 December 1998 |28 |
+Monday, 14 December 1998 |34 |
+Tuesday, 15 December 1998 |25 |
+Wednesday, 16 December 1998 |20 |
+Thursday, 17 December 1998 |31 |
+Friday, 18 December 1998 |18 |
+Saturday, 19 December 1998 |25 |
+Sunday, 20 December 1998 |24 |
+Monday, 21 December 1998 |26 |
+Tuesday, 22 December 1998 |31 |
+Wednesday, 23 December 1998 |19 |
+Thursday, 24 December 1998 |22 |
+Friday, 25 December 1998 |27 |
+Saturday, 26 December 1998 |27 |
+Sunday, 27 December 1998 |21 |
+Monday, 28 December 1998 |28 |
+Tuesday, 29 December 1998 |29 |
+Wednesday, 30 December 1998 |26 |
+Thursday, 31 December 1998 |23 |
+Friday, 01 January 1999 |24 |
+Saturday, 02 January 1999 |36 |
+Sunday, 03 January 1999 |32 |
+Monday, 04 January 1999 |20 |
+Tuesday, 05 January 1999 |27 |
+Wednesday, 06 January 1999 |25 |
+Thursday, 07 January 1999 |28 |
+Friday, 08 January 1999 |26 |
+Saturday, 09 January 1999 |20 |
+Sunday, 10 January 1999 |26 |
+Monday, 11 January 1999 |42 |
+Tuesday, 12 January 1999 |23 |
+Wednesday, 13 January 1999 |20 |
+Thursday, 14 January 1999 |30 |
+Friday, 15 January 1999 |32 |
+Saturday, 16 January 1999 |22 |
+Sunday, 17 January 1999 |21 |
+Monday, 18 January 1999 |21 |
+Tuesday, 19 January 1999 |26 |
+Wednesday, 20 January 1999 |24 |
+Thursday, 21 January 1999 |20 |
+Friday, 22 January 1999 |29 |
+Saturday, 23 January 1999 |29 |
+Sunday, 24 January 1999 |39 |
+Monday, 25 January 1999 |25 |
+Tuesday, 26 January 1999 |33 |
+Wednesday, 27 January 1999 |31 |
+Thursday, 28 January 1999 |24 |
+Friday, 29 January 1999 |20 |
+Saturday, 30 January 1999 |33 |
+Sunday, 31 January 1999 |35 |
+Monday, 01 February 1999 |32 |
+Tuesday, 02 February 1999 |23 |
+Wednesday, 03 February 1999 |28 |
+Thursday, 04 February 1999 |21 |
+Friday, 05 February 1999 |24 |
+Saturday, 06 February 1999 |30 |
+Sunday, 07 February 1999 |28 |
+Monday, 08 February 1999 |20 |
+Tuesday, 09 February 1999 |39 |
+Wednesday, 10 February 1999 |25 |
+Thursday, 11 February 1999 |25 |
+Friday, 12 February 1999 |21 |
+Saturday, 13 February 1999 |32 |
+Sunday, 14 February 1999 |35 |
+Monday, 15 February 1999 |23 |
+Tuesday, 16 February 1999 |28 |
+Wednesday, 17 February 1999 |25 |
+Thursday, 18 February 1999 |29 |
+Friday, 19 February 1999 |29 |
+Saturday, 20 February 1999 |33 |
+Sunday, 21 February 1999 |29 |
+Monday, 22 February 1999 |20 |
+Tuesday, 23 February 1999 |24 |
+Wednesday, 24 February 1999 |24 |
+Thursday, 25 February 1999 |24 |
+Friday, 26 February 1999 |32 |
+Saturday, 27 February 1999 |26 |
+Sunday, 28 February 1999 |28 |
+Monday, 01 March 1999 |25 |
+Tuesday, 02 March 1999 |25 |
+Wednesday, 03 March 1999 |27 |
+Thursday, 04 March 1999 |28 |
+Friday, 05 March 1999 |26 |
+Saturday, 06 March 1999 |29 |
+Sunday, 07 March 1999 |31 |
+Monday, 08 March 1999 |30 |
+Tuesday, 09 March 1999 |27 |
+Wednesday, 10 March 1999 |31 |
+Thursday, 11 March 1999 |26 |
+Friday, 12 March 1999 |13 |
+Saturday, 13 March 1999 |22 |
+Sunday, 14 March 1999 |30 |
+Monday, 15 March 1999 |18 |
+Tuesday, 16 March 1999 |42 |
+Wednesday, 17 March 1999 |22 |
+Thursday, 18 March 1999 |35 |
+Friday, 19 March 1999 |24 |
+Saturday, 20 March 1999 |22 |
+Sunday, 21 March 1999 |34 |
+Monday, 22 March 1999 |21 |
+Tuesday, 23 March 1999 |24 |
+Wednesday, 24 March 1999 |36 |
+Thursday, 25 March 1999 |20 |
+Friday, 26 March 1999 |37 |
+Saturday, 27 March 1999 |36 |
+Sunday, 28 March 1999 |31 |
+Monday, 29 March 1999 |24 |
+Tuesday, 30 March 1999 |18 |
+Wednesday, 31 March 1999 |28 |
+Thursday, 01 April 1999 |20 |
+Friday, 02 April 1999 |19 |
+Saturday, 03 April 1999 |22 |
+Sunday, 04 April 1999 |20 |
+Monday, 05 April 1999 |25 |
+Tuesday, 06 April 1999 |24 |
+Wednesday, 07 April 1999 |32 |
+Thursday, 08 April 1999 |18 |
+Friday, 09 April 1999 |30 |
+Saturday, 10 April 1999 |32 |
+Sunday, 11 April 1999 |29 |
+Monday, 12 April 1999 |22 |
+Tuesday, 13 April 1999 |25 |
+Wednesday, 14 April 1999 |30 |
+Thursday, 15 April 1999 |24 |
+Friday, 16 April 1999 |16 |
+Saturday, 17 April 1999 |32 |
+Sunday, 18 April 1999 |30 |
+Monday, 19 April 1999 |31 |
+Tuesday, 20 April 1999 |17 |
+Wednesday, 21 April 1999 |24 |
+Thursday, 22 April 1999 |24 |
+Friday, 23 April 1999 |35 |
+Saturday, 24 April 1999 |34 |
+Sunday, 25 April 1999 |25 |
+Monday, 26 April 1999 |28 |
+Tuesday, 27 April 1999 |23 |
+Wednesday, 28 April 1999 |32 |
+Thursday, 29 April 1999 |19 |
+Friday, 30 April 1999 |22 |
+Saturday, 01 May 1999 |26 |
+Sunday, 02 May 1999 |36 |
+Monday, 03 May 1999 |34 |
+Tuesday, 04 May 1999 |23 |
+Wednesday, 05 May 1999 |26 |
+Thursday, 06 May 1999 |29 |
+Friday, 07 May 1999 |25 |
+Saturday, 08 May 1999 |21 |
+Sunday, 09 May 1999 |42 |
+Monday, 10 May 1999 |18 |
+Tuesday, 11 May 1999 |31 |
+Wednesday, 12 May 1999 |26 |
+Thursday, 13 May 1999 |20 |
+Friday, 14 May 1999 |24 |
+Saturday, 15 May 1999 |28 |
+Sunday, 16 May 1999 |27 |
+Monday, 17 May 1999 |35 |
+Tuesday, 18 May 1999 |24 |
+Wednesday, 19 May 1999 |33 |
+Thursday, 20 May 1999 |28 |
+Friday, 21 May 1999 |32 |
+Saturday, 22 May 1999 |32 |
+Sunday, 23 May 1999 |34 |
+Monday, 24 May 1999 |26 |
+Tuesday, 25 May 1999 |26 |
+Wednesday, 26 May 1999 |36 |
+Thursday, 27 May 1999 |32 |
+Friday, 28 May 1999 |28 |
+Saturday, 29 May 1999 |26 |
+Sunday, 30 May 1999 |27 |
+Monday, 31 May 1999 |33 |
+Tuesday, 01 June 1999 |29 |
+Wednesday, 02 June 1999 |27 |
+Thursday, 03 June 1999 |25 |
+Friday, 04 June 1999 |21 |
+Saturday, 05 June 1999 |35 |
+Sunday, 06 June 1999 |22 |
+Monday, 07 June 1999 |22 |
+Tuesday, 08 June 1999 |34 |
+Wednesday, 09 June 1999 |25 |
+Thursday, 10 June 1999 |26 |
+Friday, 11 June 1999 |29 |
+Saturday, 12 June 1999 |28 |
+Sunday, 13 June 1999 |37 |
+Monday, 14 June 1999 |31 |
+Tuesday, 15 June 1999 |28 |
+Wednesday, 16 June 1999 |26 |
+Thursday, 17 June 1999 |33 |
+Friday, 18 June 1999 |27 |
+Saturday, 19 June 1999 |32 |
+Sunday, 20 June 1999 |31 |
+Monday, 21 June 1999 |29 |
+Tuesday, 22 June 1999 |26 |
+Wednesday, 23 June 1999 |29 |
+Thursday, 24 June 1999 |28 |
+Friday, 25 June 1999 |28 |
+Saturday, 26 June 1999 |21 |
+Sunday, 27 June 1999 |28 |
+Monday, 28 June 1999 |35 |
+Tuesday, 29 June 1999 |26 |
+Wednesday, 30 June 1999 |32 |
+Thursday, 01 July 1999 |28 |
+Friday, 02 July 1999 |29 |
+Saturday, 03 July 1999 |25 |
+Sunday, 04 July 1999 |27 |
+Monday, 05 July 1999 |35 |
+Tuesday, 06 July 1999 |23 |
+Wednesday, 07 July 1999 |25 |
+Thursday, 08 July 1999 |23 |
+Friday, 09 July 1999 |29 |
+Saturday, 10 July 1999 |25 |
+Sunday, 11 July 1999 |19 |
+Monday, 12 July 1999 |26 |
+Tuesday, 13 July 1999 |17 |
+Wednesday, 14 July 1999 |28 |
+Thursday, 15 July 1999 |27 |
+Friday, 16 July 1999 |30 |
+Saturday, 17 July 1999 |23 |
+Sunday, 18 July 1999 |21 |
+Monday, 19 July 1999 |25 |
+Tuesday, 20 July 1999 |33 |
+Wednesday, 21 July 1999 |26 |
+Thursday, 22 July 1999 |23 |
+Friday, 23 July 1999 |24 |
+Saturday, 24 July 1999 |19 |
+Sunday, 25 July 1999 |29 |
+Monday, 26 July 1999 |30 |
+Tuesday, 27 July 1999 |19 |
+Wednesday, 28 July 1999 |22 |
+Thursday, 29 July 1999 |26 |
+Friday, 30 July 1999 |31 |
+Saturday, 31 July 1999 |23 |
+Sunday, 01 August 1999 |27 |
+Monday, 02 August 1999 |28 |
+Tuesday, 03 August 1999 |30 |
+Wednesday, 04 August 1999 |37 |
+Thursday, 05 August 1999 |17 |
+Friday, 06 August 1999 |25 |
+Saturday, 07 August 1999 |21 |
+Sunday, 08 August 1999 |36 |
+Monday, 09 August 1999 |34 |
+Tuesday, 10 August 1999 |25 |
+Wednesday, 11 August 1999 |35 |
+Thursday, 12 August 1999 |27 |
+Friday, 13 August 1999 |21 |
+Saturday, 14 August 1999 |24 |
+Sunday, 15 August 1999 |21 |
+Monday, 16 August 1999 |42 |
+Tuesday, 17 August 1999 |30 |
+Wednesday, 18 August 1999 |28 |
+Thursday, 19 August 1999 |29 |
+Friday, 20 August 1999 |25 |
+Saturday, 21 August 1999 |27 |
+Sunday, 22 August 1999 |22 |
+Monday, 23 August 1999 |34 |
+Tuesday, 24 August 1999 |26 |
+Wednesday, 25 August 1999 |31 |
+Thursday, 26 August 1999 |29 |
+Friday, 27 August 1999 |30 |
+Saturday, 28 August 1999 |28 |
+Sunday, 29 August 1999 |34 |
+Monday, 30 August 1999 |37 |
+Tuesday, 31 August 1999 |22 |
+Wednesday, 01 September 1999|40 |
+Thursday, 02 September 1999 |38 |
+Friday, 03 September 1999 |27 |
+Saturday, 04 September 1999 |28 |
+Sunday, 05 September 1999 |38 |
+Monday, 06 September 1999 |30 |
+Tuesday, 07 September 1999 |32 |
+Wednesday, 08 September 1999|19 |
+Thursday, 09 September 1999 |26 |
+Friday, 10 September 1999 |24 |
+Saturday, 11 September 1999 |25 |
+Sunday, 12 September 1999 |27 |
+Monday, 13 September 1999 |20 |
+Tuesday, 14 September 1999 |26 |
+Wednesday, 15 September 1999|29 |
+Thursday, 16 September 1999 |31 |
+Friday, 17 September 1999 |23 |
+Saturday, 18 September 1999 |29 |
+Sunday, 19 September 1999 |34 |
+Monday, 20 September 1999 |32 |
+Tuesday, 21 September 1999 |30 |
+Wednesday, 22 September 1999|28 |
+Thursday, 23 September 1999 |25 |
+Friday, 24 September 1999 |26 |
+Saturday, 25 September 1999 |20 |
+Sunday, 26 September 1999 |23 |
+Monday, 27 September 1999 |19 |
+Tuesday, 28 September 1999 |33 |
+Wednesday, 29 September 1999|33 |
+Thursday, 30 September 1999 |25 |
+Friday, 01 October 1999 |26 |
+Saturday, 02 October 1999 |38 |
+Sunday, 03 October 1999 |24 |
+Monday, 04 October 1999 |25 |
+Tuesday, 05 October 1999 |27 |
+Wednesday, 06 October 1999 |26 |
+Thursday, 07 October 1999 |24 |
+Friday, 08 October 1999 |30 |
+Saturday, 09 October 1999 |30 |
+Sunday, 10 October 1999 |35 |
+Monday, 11 October 1999 |17 |
+Tuesday, 12 October 1999 |25 |
+Wednesday, 13 October 1999 |17 |
+Thursday, 14 October 1999 |32 |
+Friday, 15 October 1999 |31 |
+Saturday, 16 October 1999 |21 |
+Sunday, 17 October 1999 |30 |
+Monday, 18 October 1999 |32 |
+Tuesday, 19 October 1999 |40 |
+Wednesday, 20 October 1999 |18 |
+Thursday, 21 October 1999 |31 |
+Friday, 22 October 1999 |23 |
+Saturday, 23 October 1999 |32 |
+Sunday, 24 October 1999 |21 |
+Monday, 25 October 1999 |29 |
+Tuesday, 26 October 1999 |22 |
+Wednesday, 27 October 1999 |30 |
+Thursday, 28 October 1999 |26 |
+Friday, 29 October 1999 |24 |
+Saturday, 30 October 1999 |20 |
+Sunday, 31 October 1999 |32 |
+Monday, 01 November 1999 |28 |
+Tuesday, 02 November 1999 |32 |
+Wednesday, 03 November 1999 |24 |
+Thursday, 04 November 1999 |24 |
+Friday, 05 November 1999 |26 |
+Saturday, 06 November 1999 |33 |
+Sunday, 07 November 1999 |32 |
+Monday, 08 November 1999 |30 |
+Tuesday, 09 November 1999 |22 |
+Wednesday, 10 November 1999 |29 |
+Thursday, 11 November 1999 |28 |
+Friday, 12 November 1999 |28 |
+Saturday, 13 November 1999 |38 |
+Sunday, 14 November 1999 |19 |
+Monday, 15 November 1999 |32 |
+Tuesday, 16 November 1999 |24 |
+Wednesday, 17 November 1999 |29 |
+Thursday, 18 November 1999 |32 |
+Friday, 19 November 1999 |21 |
+Saturday, 20 November 1999 |26 |
+Sunday, 21 November 1999 |34 |
+Monday, 22 November 1999 |23 |
+Tuesday, 23 November 1999 |26 |
+Wednesday, 24 November 1999 |24 |
+Thursday, 25 November 1999 |33 |
+Friday, 26 November 1999 |22 |
+Saturday, 27 November 1999 |21 |
+Sunday, 28 November 1999 |37 |
+Monday, 29 November 1999 |27 |
+Tuesday, 30 November 1999 |26 |
+Wednesday, 01 December 1999 |30 |
+Thursday, 02 December 1999 |30 |
+Friday, 03 December 1999 |22 |
+Saturday, 04 December 1999 |26 |
+Sunday, 05 December 1999 |47 |
+Monday, 06 December 1999 |26 |
+Tuesday, 07 December 1999 |26 |
+Wednesday, 08 December 1999 |39 |
+Thursday, 09 December 1999 |34 |
+Friday, 10 December 1999 |22 |
+Saturday, 11 December 1999 |26 |
+Sunday, 12 December 1999 |29 |
+Monday, 13 December 1999 |30 |
+Tuesday, 14 December 1999 |27 |
+Wednesday, 15 December 1999 |28 |
+Thursday, 16 December 1999 |26 |
+Friday, 17 December 1999 |33 |
+Saturday, 18 December 1999 |27 |
+Sunday, 19 December 1999 |29 |
+Monday, 20 December 1999 |29 |
+Tuesday, 21 December 1999 |22 |
+Wednesday, 22 December 1999 |29 |
+Thursday, 23 December 1999 |32 |
+Friday, 24 December 1999 |31 |
+Saturday, 25 December 1999 |17 |
+Sunday, 26 December 1999 |20 |
+Monday, 27 December 1999 |30 |
+Tuesday, 28 December 1999 |19 |
+Wednesday, 29 December 1999 |41 |
+Thursday, 30 December 1999 |20 |
+Friday, 31 December 1999 |29 |
+<All> |40000 |
+
+
+That took 2.442 seconds.
+
+##############################################################################
+
+#22c. You can filter on dates - this is currently rather slow, but it does work.
+>>> print nhds.bivar(wheresetname="test3",var1="sex",var2="race",printit=1,allcalc=1)
+wheresetload(): memory mapping of test3 containing 29 elements took 0.003 seconds.
+
+bivar(): Bivariate setup time 0.012 seconds.
+
+Bivariate summary derived from race and sex columns in nhds dataset
+Using whereset test3 containing 29 elements
+bivar(): Bivariate summary created in 0.024 seconds.
+Time in intersect() function was 0.004 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped| |
+by |by | |
+Race |Sex |Frequency|
+----------------------|-------|---------|
+White |Male |6 |
+White |Female |7 |
+Black |Male |2 |
+Black |Female |3 |
+American Indian/Eskimo|Male |0 |
+American Indian/Eskimo|Female |0 |
+Asian/Pacific Islander|Male |0 |
+Asian/Pacific Islander|Female |1 |
+Other |Male |0 |
+Other |Female |1 |
+Not stated |Male |4 |
+Not stated |Female |5 |
+All races |Male |12 |
+All races |Female |17 |
+White |Persons|13 |
+Black |Persons|5 |
+American Indian/Eskimo|Persons|0 |
+Asian/Pacific Islander|Persons|1 |
+Other |Persons|1 |
+Not stated |Persons|9 |
+All races |Persons|29 |
+
+
+That took 0.032 seconds.
+
+##############################################################################
+
+#22d. Date range filtering. The mx.DateTime.Date() function could be aliased to just date() or even d().
+>>> nhds.makewhereset("test5",var1="randomdate",op1=">=",val1=mx.DateTime.Date(1996,10,12),var2="randomdate",op2="<=",val2=mx.DateTime.Date(1996,11,12))
+>>> print nhds.bivar(var1="randomdate",var2="sex",wheresetname="test5",printit=1,allcalc=1)
+Assembling whereset test5 containing 832 elements took 4.027 seconds.
+
+That took 4.028 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of test5 containing 832 elements took 0.003 seconds.
+
+bivar(): Bivariate setup time 0.059 seconds.
+
+Bivariate summary derived from randomdate and sex columns in nhds dataset
+Using whereset test5 containing 832 elements
+bivar(): Bivariate summary created in 0.122 seconds.
+Time in intersect() function was 0.021 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped| |
+by |by | |
+Random date |Sex |Frequency|
+---------------------------|-------|---------|
+Saturday, 12 October 1996 |Male |12 |
+Saturday, 12 October 1996 |Female |17 |
+Sunday, 13 October 1996 |Male |12 |
+Sunday, 13 October 1996 |Female |17 |
+Monday, 14 October 1996 |Male |16 |
+Monday, 14 October 1996 |Female |16 |
+Tuesday, 15 October 1996 |Male |8 |
+Tuesday, 15 October 1996 |Female |12 |
+Wednesday, 16 October 1996 |Male |8 |
+Wednesday, 16 October 1996 |Female |10 |
+Thursday, 17 October 1996 |Male |11 |
+Thursday, 17 October 1996 |Female |24 |
+Friday, 18 October 1996 |Male |6 |
+Friday, 18 October 1996 |Female |12 |
+Saturday, 19 October 1996 |Male |14 |
+Saturday, 19 October 1996 |Female |17 |
+Sunday, 20 October 1996 |Male |8 |
+Sunday, 20 October 1996 |Female |12 |
+Monday, 21 October 1996 |Male |13 |
+Monday, 21 October 1996 |Female |8 |
+Tuesday, 22 October 1996 |Male |12 |
+Tuesday, 22 October 1996 |Female |16 |
+Wednesday, 23 October 1996 |Male |9 |
+Wednesday, 23 October 1996 |Female |9 |
+Thursday, 24 October 1996 |Male |8 |
+Thursday, 24 October 1996 |Female |21 |
+Friday, 25 October 1996 |Male |12 |
+Friday, 25 October 1996 |Female |18 |
+Saturday, 26 October 1996 |Male |11 |
+Saturday, 26 October 1996 |Female |9 |
+Sunday, 27 October 1996 |Male |7 |
+Sunday, 27 October 1996 |Female |18 |
+Monday, 28 October 1996 |Male |14 |
+Monday, 28 October 1996 |Female |14 |
+Tuesday, 29 October 1996 |Male |13 |
+Tuesday, 29 October 1996 |Female |11 |
+Wednesday, 30 October 1996 |Male |9 |
+Wednesday, 30 October 1996 |Female |13 |
+Thursday, 31 October 1996 |Male |10 |
+Thursday, 31 October 1996 |Female |13 |
+Friday, 01 November 1996 |Male |12 |
+Friday, 01 November 1996 |Female |15 |
+Saturday, 02 November 1996 |Male |12 |
+Saturday, 02 November 1996 |Female |14 |
+Sunday, 03 November 1996 |Male |8 |
+Sunday, 03 November 1996 |Female |14 |
+Monday, 04 November 1996 |Male |13 |
+Monday, 04 November 1996 |Female |6 |
+Tuesday, 05 November 1996 |Male |14 |
+Tuesday, 05 November 1996 |Female |12 |
+Wednesday, 06 November 1996|Male |13 |
+Wednesday, 06 November 1996|Female |18 |
+Thursday, 07 November 1996 |Male |18 |
+Thursday, 07 November 1996 |Female |15 |
+Friday, 08 November 1996 |Male |14 |
+Friday, 08 November 1996 |Female |18 |
+Saturday, 09 November 1996 |Male |13 |
+Saturday, 09 November 1996 |Female |19 |
+Sunday, 10 November 1996 |Male |14 |
+Sunday, 10 November 1996 |Female |19 |
+Monday, 11 November 1996 |Male |6 |
+Monday, 11 November 1996 |Female |15 |
+Tuesday, 12 November 1996 |Male |12 |
+Tuesday, 12 November 1996 |Female |18 |
+<All> |Male |362 |
+<All> |Female |470 |
+Saturday, 12 October 1996 |Persons|29 |
+Sunday, 13 October 1996 |Persons|29 |
+Monday, 14 October 1996 |Persons|32 |
+Tuesday, 15 October 1996 |Persons|20 |
+Wednesday, 16 October 1996 |Persons|18 |
+Thursday, 17 October 1996 |Persons|35 |
+Friday, 18 October 1996 |Persons|18 |
+Saturday, 19 October 1996 |Persons|31 |
+Sunday, 20 October 1996 |Persons|20 |
+Monday, 21 October 1996 |Persons|21 |
+Tuesday, 22 October 1996 |Persons|28 |
+Wednesday, 23 October 1996 |Persons|18 |
+Thursday, 24 October 1996 |Persons|29 |
+Friday, 25 October 1996 |Persons|30 |
+Saturday, 26 October 1996 |Persons|20 |
+Sunday, 27 October 1996 |Persons|25 |
+Monday, 28 October 1996 |Persons|28 |
+Tuesday, 29 October 1996 |Persons|24 |
+Wednesday, 30 October 1996 |Persons|22 |
+Thursday, 31 October 1996 |Persons|23 |
+Friday, 01 November 1996 |Persons|27 |
+Saturday, 02 November 1996 |Persons|26 |
+Sunday, 03 November 1996 |Persons|22 |
+Monday, 04 November 1996 |Persons|19 |
+Tuesday, 05 November 1996 |Persons|26 |
+Wednesday, 06 November 1996|Persons|31 |
+Thursday, 07 November 1996 |Persons|33 |
+Friday, 08 November 1996 |Persons|32 |
+Saturday, 09 November 1996 |Persons|32 |
+Sunday, 10 November 1996 |Persons|33 |
+Monday, 11 November 1996 |Persons|21 |
+Tuesday, 12 November 1996 |Persons|30 |
+<All> |Persons|832 |
+
+
+That took 0.175 seconds.
+
+##############################################################################
+
+#22e. One more test of date ranges, this time using 'not'.
+# Note the short-circuiting based on the whereset values.
+>>> nhds.makewhereset("test6",var1="randomdate",op1=">=",val1=mx.DateTime.Date(1996,10,1),con1="and",var2="randomdate",op2="<=",val2=mx.DateTime.Date(1996,11,1),con2="not",var3="randomdate",op3="=",val3=mx.DateTime.Date(1996,10,15))
+>>> print nhds.bivar(var1="randomdate",var2="sex",wheresetname="test6",printit=1,allcalc=1)
+>>> print nhds.trivar(var1="randomdate",var2="sex",var3="geog_region",wheresetname="test6",printit=1,allcalc=1)
+Assembling whereset test6 containing 806 elements took 4.077 seconds.
+
+That took 4.078 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of test6 containing 806 elements took 0.003 seconds.
+
+bivar(): Bivariate setup time 0.046 seconds.
+
+Bivariate summary derived from randomdate and sex columns in nhds dataset
+Using whereset test6 containing 806 elements
+bivar(): Bivariate summary created in 0.108 seconds.
+Time in intersect() function was 0.021 seconds.
+
+ | | |
+ | | |
+Grouped |Grouped| |
+by |by | |
+Random date |Sex |Frequency|
+--------------------------|-------|---------|
+Tuesday, 01 October 1996 |Male |13 |
+Tuesday, 01 October 1996 |Female |18 |
+Wednesday, 02 October 1996|Male |11 |
+Wednesday, 02 October 1996|Female |17 |
+Thursday, 03 October 1996 |Male |17 |
+Thursday, 03 October 1996 |Female |15 |
+Friday, 04 October 1996 |Male |8 |
+Friday, 04 October 1996 |Female |15 |
+Saturday, 05 October 1996 |Male |13 |
+Saturday, 05 October 1996 |Female |17 |
+Sunday, 06 October 1996 |Male |6 |
+Sunday, 06 October 1996 |Female |11 |
+Monday, 07 October 1996 |Male |9 |
+Monday, 07 October 1996 |Female |21 |
+Tuesday, 08 October 1996 |Male |13 |
+Tuesday, 08 October 1996 |Female |17 |
+Wednesday, 09 October 1996|Male |10 |
+Wednesday, 09 October 1996|Female |16 |
+Thursday, 10 October 1996 |Male |15 |
+Thursday, 10 October 1996 |Female |15 |
+Friday, 11 October 1996 |Male |10 |
+Friday, 11 October 1996 |Female |12 |
+Saturday, 12 October 1996 |Male |12 |
+Saturday, 12 October 1996 |Female |17 |
+Sunday, 13 October 1996 |Male |12 |
+Sunday, 13 October 1996 |Female |17 |
+Monday, 14 October 1996 |Male |16 |
+Monday, 14 October 1996 |Female |16 |
+Wednesday, 16 October 1996|Male |8 |
+Wednesday, 16 October 1996|Female |10 |
+Thursday, 17 October 1996 |Male |11 |
+Thursday, 17 October 1996 |Female |24 |
+Friday, 18 October 1996 |Male |6 |
+Friday, 18 October 1996 |Female |12 |
+Saturday, 19 October 1996 |Male |14 |
+Saturday, 19 October 1996 |Female |17 |
+Sunday, 20 October 1996 |Male |8 |
+Sunday, 20 October 1996 |Female |12 |
+Monday, 21 October 1996 |Male |13 |
+Monday, 21 October 1996 |Female |8 |
+Tuesday, 22 October 1996 |Male |12 |
+Tuesday, 22 October 1996 |Female |16 |
+Wednesday, 23 October 1996|Male |9 |
+Wednesday, 23 October 1996|Female |9 |
+Thursday, 24 October 1996 |Male |8 |
+Thursday, 24 October 1996 |Female |21 |
+Friday, 25 October 1996 |Male |12 |
+Friday, 25 October 1996 |Female |18 |
+Saturday, 26 October 1996 |Male |11 |
+Saturday, 26 October 1996 |Female |9 |
+Sunday, 27 October 1996 |Male |7 |
+Sunday, 27 October 1996 |Female |18 |
+Monday, 28 October 1996 |Male |14 |
+Monday, 28 October 1996 |Female |14 |
+Tuesday, 29 October 1996 |Male |13 |
+Tuesday, 29 October 1996 |Female |11 |
+Wednesday, 30 October 1996|Male |9 |
+Wednesday, 30 October 1996|Female |13 |
+Thursday, 31 October 1996 |Male |10 |
+Thursday, 31 October 1996 |Female |13 |
+Friday, 01 November 1996 |Male |12 |
+Friday, 01 November 1996 |Female |15 |
+<All> |Male |342 |
+<All> |Female |464 |
+Tuesday, 01 October 1996 |Persons|31 |
+Wednesday, 02 October 1996|Persons|28 |
+Thursday, 03 October 1996 |Persons|32 |
+Friday, 04 October 1996 |Persons|23 |
+Saturday, 05 October 1996 |Persons|30 |
+Sunday, 06 October 1996 |Persons|17 |
+Monday, 07 October 1996 |Persons|30 |
+Tuesday, 08 October 1996 |Persons|30 |
+Wednesday, 09 October 1996|Persons|26 |
+Thursday, 10 October 1996 |Persons|30 |
+Friday, 11 October 1996 |Persons|22 |
+Saturday, 12 October 1996 |Persons|29 |
+Sunday, 13 October 1996 |Persons|29 |
+Monday, 14 October 1996 |Persons|32 |
+Wednesday, 16 October 1996|Persons|18 |
+Thursday, 17 October 1996 |Persons|35 |
+Friday, 18 October 1996 |Persons|18 |
+Saturday, 19 October 1996 |Persons|31 |
+Sunday, 20 October 1996 |Persons|20 |
+Monday, 21 October 1996 |Persons|21 |
+Tuesday, 22 October 1996 |Persons|28 |
+Wednesday, 23 October 1996|Persons|18 |
+Thursday, 24 October 1996 |Persons|29 |
+Friday, 25 October 1996 |Persons|30 |
+Saturday, 26 October 1996 |Persons|20 |
+Sunday, 27 October 1996 |Persons|25 |
+Monday, 28 October 1996 |Persons|28 |
+Tuesday, 29 October 1996 |Persons|24 |
+Wednesday, 30 October 1996|Persons|22 |
+Thursday, 31 October 1996 |Persons|23 |
+Friday, 01 November 1996 |Persons|27 |
+<All> |Persons|806 |
+
+
+That took 0.158 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of test6 containing 806 elements took 0.003 seconds.
+
+trivar(): Trivariate setup time 0.039 seconds.
+
+Trivariate summary derived from geog_region, randomdate and sex columns in nhds dataset
+Using whereset test6 containing 806 elements
+trivar(): Trivariate summary created in 0.378 seconds.
+Time in intersect() function was 0.094 seconds.
+Time in ArrayDict was 0.206 seconds.
+Time in loops was 0.339 seconds.
+Time in empty loops was 0.000 seconds.
+Time in count was 0.004 seconds.
+Time in take() was 0.000 seconds.
+
+ | | | |
+ | | | |
+Grouped |Grouped |Grouped| |
+by |by |by | |
+Geographic Region|Random date |Sex |Frequency|
+-----------------|--------------------------|-------|---------|
+Northeast |Tuesday, 01 October 1996 |Male |5 |
+Northeast |Tuesday, 01 October 1996 |Female |8 |
+Northeast |Wednesday, 02 October 1996|Male |3 |
+Northeast |Wednesday, 02 October 1996|Female |6 |
+Northeast |Thursday, 03 October 1996 |Male |8 |
+Northeast |Thursday, 03 October 1996 |Female |7 |
+Northeast |Friday, 04 October 1996 |Male |2 |
+Northeast |Friday, 04 October 1996 |Female |4 |
+Northeast |Saturday, 05 October 1996 |Male |6 |
+Northeast |Saturday, 05 October 1996 |Female |6 |
+Northeast |Sunday, 06 October 1996 |Male |2 |
+Northeast |Sunday, 06 October 1996 |Female |3 |
+Northeast |Monday, 07 October 1996 |Male |4 |
+Northeast |Monday, 07 October 1996 |Female |10 |
+Northeast |Tuesday, 08 October 1996 |Male |6 |
+Northeast |Tuesday, 08 October 1996 |Female |5 |
+Northeast |Wednesday, 09 October 1996|Male |5 |
+Northeast |Wednesday, 09 October 1996|Female |8 |
+Northeast |Thursday, 10 October 1996 |Male |7 |
+Northeast |Thursday, 10 October 1996 |Female |4 |
+Northeast |Friday, 11 October 1996 |Male |1 |
+Northeast |Friday, 11 October 1996 |Female |6 |
+Northeast |Saturday, 12 October 1996 |Male |3 |
+Northeast |Saturday, 12 October 1996 |Female |8 |
+Northeast |Sunday, 13 October 1996 |Male |7 |
+Northeast |Sunday, 13 October 1996 |Female |7 |
+Northeast |Monday, 14 October 1996 |Male |6 |
+Northeast |Monday, 14 October 1996 |Female |6 |
+Northeast |Wednesday, 16 October 1996|Male |0 |
+Northeast |Wednesday, 16 October 1996|Female |2 |
+Northeast |Thursday, 17 October 1996 |Male |6 |
+Northeast |Thursday, 17 October 1996 |Female |7 |
+Northeast |Friday, 18 October 1996 |Male |2 |
+Northeast |Friday, 18 October 1996 |Female |6 |
+Northeast |Saturday, 19 October 1996 |Male |5 |
+Northeast |Saturday, 19 October 1996 |Female |5 |
+Northeast |Sunday, 20 October 1996 |Male |2 |
+Northeast |Sunday, 20 October 1996 |Female |2 |
+Northeast |Monday, 21 October 1996 |Male |7 |
+Northeast |Monday, 21 October 1996 |Female |1 |
+Northeast |Tuesday, 22 October 1996 |Male |6 |
+Northeast |Tuesday, 22 October 1996 |Female |7 |
+Northeast |Wednesday, 23 October 1996|Male |3 |
+Northeast |Wednesday, 23 October 1996|Female |4 |
+Northeast |Thursday, 24 October 1996 |Male |3 |
+Northeast |Thursday, 24 October 1996 |Female |6 |
+Northeast |Friday, 25 October 1996 |Male |0 |
+Northeast |Friday, 25 October 1996 |Female |5 |
+Northeast |Saturday, 26 October 1996 |Male |4 |
+Northeast |Saturday, 26 October 1996 |Female |2 |
+Northeast |Sunday, 27 October 1996 |Male |3 |
+Northeast |Sunday, 27 October 1996 |Female |5 |
+Northeast |Monday, 28 October 1996 |Male |3 |
+Northeast |Monday, 28 October 1996 |Female |4 |
+Northeast |Tuesday, 29 October 1996 |Male |4 |
+Northeast |Tuesday, 29 October 1996 |Female |5 |
+Northeast |Wednesday, 30 October 1996|Male |3 |
+Northeast |Wednesday, 30 October 1996|Female |4 |
+Northeast |Thursday, 31 October 1996 |Male |2 |
+Northeast |Thursday, 31 October 1996 |Female |4 |
+Northeast |Friday, 01 November 1996 |Male |6 |
+Northeast |Friday, 01 November 1996 |Female |3 |
+Midwest |Tuesday, 01 October 1996 |Male |0 |
+Midwest |Tuesday, 01 October 1996 |Female |2 |
+Midwest |Wednesday, 02 October 1996|Male |2 |
+Midwest |Wednesday, 02 October 1996|Female |1 |
+Midwest |Thursday, 03 October 1996 |Male |0 |
+Midwest |Thursday, 03 October 1996 |Female |3 |
+Midwest |Friday, 04 October 1996 |Male |1 |
+Midwest |Friday, 04 October 1996 |Female |3 |
+Midwest |Saturday, 05 October 1996 |Male |2 |
+Midwest |Saturday, 05 October 1996 |Female |4 |
+Midwest |Sunday, 06 October 1996 |Male |1 |
+Midwest |Sunday, 06 October 1996 |Female |1 |
+Midwest |Monday, 07 October 1996 |Male |3 |
+Midwest |Monday, 07 October 1996 |Female |0 |
+Midwest |Tuesday, 08 October 1996 |Male |3 |
+Midwest |Tuesday, 08 October 1996 |Female |1 |
+Midwest |Wednesday, 09 October 1996|Male |1 |
+Midwest |Wednesday, 09 October 1996|Female |4 |
+Midwest |Thursday, 10 October 1996 |Male |1 |
+Midwest |Thursday, 10 October 1996 |Female |1 |
+Midwest |Friday, 11 October 1996 |Male |4 |
+Midwest |Friday, 11 October 1996 |Female |0 |
+Midwest |Saturday, 12 October 1996 |Male |1 |
+Midwest |Saturday, 12 October 1996 |Female |5 |
+Midwest |Sunday, 13 October 1996 |Male |2 |
+Midwest |Sunday, 13 October 1996 |Female |3 |
+Midwest |Monday, 14 October 1996 |Male |2 |
+Midwest |Monday, 14 October 1996 |Female |2 |
+Midwest |Wednesday, 16 October 1996|Male |2 |
+Midwest |Wednesday, 16 October 1996|Female |1 |
+Midwest |Thursday, 17 October 1996 |Male |3 |
+Midwest |Thursday, 17 October 1996 |Female |3 |
+Midwest |Friday, 18 October 1996 |Male |1 |
+Midwest |Friday, 18 October 1996 |Female |3 |
+Midwest |Saturday, 19 October 1996 |Male |2 |
+Midwest |Saturday, 19 October 1996 |Female |3 |
+Midwest |Sunday, 20 October 1996 |Male |2 |
+Midwest |Sunday, 20 October 1996 |Female |3 |
+Midwest |Monday, 21 October 1996 |Male |2 |
+Midwest |Monday, 21 October 1996 |Female |2 |
+Midwest |Tuesday, 22 October 1996 |Male |4 |
+Midwest |Tuesday, 22 October 1996 |Female |2 |
+Midwest |Wednesday, 23 October 1996|Male |2 |
+Midwest |Wednesday, 23 October 1996|Female |1 |
+Midwest |Thursday, 24 October 1996 |Male |1 |
+Midwest |Thursday, 24 October 1996 |Female |6 |
+Midwest |Friday, 25 October 1996 |Male |2 |
+Midwest |Friday, 25 October 1996 |Female |2 |
+Midwest |Saturday, 26 October 1996 |Male |2 |
+Midwest |Saturday, 26 October 1996 |Female |5 |
+Midwest |Sunday, 27 October 1996 |Male |3 |
+Midwest |Sunday, 27 October 1996 |Female |1 |
+Midwest |Monday, 28 October 1996 |Male |2 |
+Midwest |Monday, 28 October 1996 |Female |1 |
+Midwest |Tuesday, 29 October 1996 |Male |2 |
+Midwest |Tuesday, 29 October 1996 |Female |1 |
+Midwest |Wednesday, 30 October 1996|Male |2 |
+Midwest |Wednesday, 30 October 1996|Female |1 |
+Midwest |Thursday, 31 October 1996 |Male |1 |
+Midwest |Thursday, 31 October 1996 |Female |3 |
+Midwest |Friday, 01 November 1996 |Male |1 |
+Midwest |Friday, 01 November 1996 |Female |3 |
+South |Tuesday, 01 October 1996 |Male |8 |
+South |Tuesday, 01 October 1996 |Female |8 |
+South |Wednesday, 02 October 1996|Male |6 |
+South |Wednesday, 02 October 1996|Female |10 |
+South |Thursday, 03 October 1996 |Male |9 |
+South |Thursday, 03 October 1996 |Female |5 |
+South |Friday, 04 October 1996 |Male |5 |
+South |Friday, 04 October 1996 |Female |8 |
+South |Saturday, 05 October 1996 |Male |5 |
+South |Saturday, 05 October 1996 |Female |7 |
+South |Sunday, 06 October 1996 |Male |3 |
+South |Sunday, 06 October 1996 |Female |7 |
+South |Monday, 07 October 1996 |Male |2 |
+South |Monday, 07 October 1996 |Female |11 |
+South |Tuesday, 08 October 1996 |Male |4 |
+South |Tuesday, 08 October 1996 |Female |11 |
+South |Wednesday, 09 October 1996|Male |4 |
+South |Wednesday, 09 October 1996|Female |4 |
+South |Thursday, 10 October 1996 |Male |7 |
+South |Thursday, 10 October 1996 |Female |10 |
+South |Friday, 11 October 1996 |Male |5 |
+South |Friday, 11 October 1996 |Female |6 |
+South |Saturday, 12 October 1996 |Male |8 |
+South |Saturday, 12 October 1996 |Female |4 |
+South |Sunday, 13 October 1996 |Male |3 |
+South |Sunday, 13 October 1996 |Female |7 |
+South |Monday, 14 October 1996 |Male |8 |
+South |Monday, 14 October 1996 |Female |8 |
+South |Wednesday, 16 October 1996|Male |6 |
+South |Wednesday, 16 October 1996|Female |7 |
+South |Thursday, 17 October 1996 |Male |2 |
+South |Thursday, 17 October 1996 |Female |14 |
+South |Friday, 18 October 1996 |Male |3 |
+South |Friday, 18 October 1996 |Female |3 |
+South |Saturday, 19 October 1996 |Male |7 |
+South |Saturday, 19 October 1996 |Female |9 |
+South |Sunday, 20 October 1996 |Male |4 |
+South |Sunday, 20 October 1996 |Female |7 |
+South |Monday, 21 October 1996 |Male |4 |
+South |Monday, 21 October 1996 |Female |5 |
+South |Tuesday, 22 October 1996 |Male |2 |
+South |Tuesday, 22 October 1996 |Female |7 |
+South |Wednesday, 23 October 1996|Male |4 |
+South |Wednesday, 23 October 1996|Female |4 |
+South |Thursday, 24 October 1996 |Male |4 |
+South |Thursday, 24 October 1996 |Female |9 |
+South |Friday, 25 October 1996 |Male |10 |
+South |Friday, 25 October 1996 |Female |11 |
+South |Saturday, 26 October 1996 |Male |5 |
+South |Saturday, 26 October 1996 |Female |2 |
+South |Sunday, 27 October 1996 |Male |1 |
+South |Sunday, 27 October 1996 |Female |12 |
+South |Monday, 28 October 1996 |Male |9 |
+South |Monday, 28 October 1996 |Female |9 |
+South |Tuesday, 29 October 1996 |Male |7 |
+South |Tuesday, 29 October 1996 |Female |5 |
+South |Wednesday, 30 October 1996|Male |4 |
+South |Wednesday, 30 October 1996|Female |8 |
+South |Thursday, 31 October 1996 |Male |7 |
+South |Thursday, 31 October 1996 |Female |6 |
+South |Friday, 01 November 1996 |Male |5 |
+South |Friday, 01 November 1996 |Female |9 |
+All regions |Tuesday, 01 October 1996 |Male |13 |
+All regions |Tuesday, 01 October 1996 |Female |18 |
+All regions |Wednesday, 02 October 1996|Male |11 |
+All regions |Wednesday, 02 October 1996|Female |17 |
+All regions |Thursday, 03 October 1996 |Male |17 |
+All regions |Thursday, 03 October 1996 |Female |15 |
+All regions |Friday, 04 October 1996 |Male |8 |
+All regions |Friday, 04 October 1996 |Female |15 |
+All regions |Saturday, 05 October 1996 |Male |13 |
+All regions |Saturday, 05 October 1996 |Female |17 |
+All regions |Sunday, 06 October 1996 |Male |6 |
+All regions |Sunday, 06 October 1996 |Female |11 |
+All regions |Monday, 07 October 1996 |Male |9 |
+All regions |Monday, 07 October 1996 |Female |21 |
+All regions |Tuesday, 08 October 1996 |Male |13 |
+All regions |Tuesday, 08 October 1996 |Female |17 |
+All regions |Wednesday, 09 October 1996|Male |10 |
+All regions |Wednesday, 09 October 1996|Female |16 |
+All regions |Thursday, 10 October 1996 |Male |15 |
+All regions |Thursday, 10 October 1996 |Female |15 |
+All regions |Friday, 11 October 1996 |Male |10 |
+All regions |Friday, 11 October 1996 |Female |12 |
+All regions |Saturday, 12 October 1996 |Male |12 |
+All regions |Saturday, 12 October 1996 |Female |17 |
+All regions |Sunday, 13 October 1996 |Male |12 |
+All regions |Sunday, 13 October 1996 |Female |17 |
+All regions |Monday, 14 October 1996 |Male |16 |
+All regions |Monday, 14 October 1996 |Female |16 |
+All regions |Wednesday, 16 October 1996|Male |8 |
+All regions |Wednesday, 16 October 1996|Female |10 |
+All regions |Thursday, 17 October 1996 |Male |11 |
+All regions |Thursday, 17 October 1996 |Female |24 |
+All regions |Friday, 18 October 1996 |Male |6 |
+All regions |Friday, 18 October 1996 |Female |12 |
+All regions |Saturday, 19 October 1996 |Male |14 |
+All regions |Saturday, 19 October 1996 |Female |17 |
+All regions |Sunday, 20 October 1996 |Male |8 |
+All regions |Sunday, 20 October 1996 |Female |12 |
+All regions |Monday, 21 October 1996 |Male |13 |
+All regions |Monday, 21 October 1996 |Female |8 |
+All regions |Tuesday, 22 October 1996 |Male |12 |
+All regions |Tuesday, 22 October 1996 |Female |16 |
+All regions |Wednesday, 23 October 1996|Male |9 |
+All regions |Wednesday, 23 October 1996|Female |9 |
+All regions |Thursday, 24 October 1996 |Male |8 |
+All regions |Thursday, 24 October 1996 |Female |21 |
+All regions |Friday, 25 October 1996 |Male |12 |
+All regions |Friday, 25 October 1996 |Female |18 |
+All regions |Saturday, 26 October 1996 |Male |11 |
+All regions |Saturday, 26 October 1996 |Female |9 |
+All regions |Sunday, 27 October 1996 |Male |7 |
+All regions |Sunday, 27 October 1996 |Female |18 |
+All regions |Monday, 28 October 1996 |Male |14 |
+All regions |Monday, 28 October 1996 |Female |14 |
+All regions |Tuesday, 29 October 1996 |Male |13 |
+All regions |Tuesday, 29 October 1996 |Female |11 |
+All regions |Wednesday, 30 October 1996|Male |9 |
+All regions |Wednesday, 30 October 1996|Female |13 |
+All regions |Thursday, 31 October 1996 |Male |10 |
+All regions |Thursday, 31 October 1996 |Female |13 |
+All regions |Friday, 01 November 1996 |Male |12 |
+All regions |Friday, 01 November 1996 |Female |15 |
+Northeast |<All> |Male |124 |
+Northeast |<All> |Female |160 |
+Midwest |<All> |Male |57 |
+Midwest |<All> |Female |71 |
+South |<All> |Male |161 |
+South |<All> |Female |233 |
+Northeast |Tuesday, 01 October 1996 |Persons|13 |
+Northeast |Wednesday, 02 October 1996|Persons|9 |
+Northeast |Thursday, 03 October 1996 |Persons|15 |
+Northeast |Friday, 04 October 1996 |Persons|6 |
+Northeast |Saturday, 05 October 1996 |Persons|12 |
+Northeast |Sunday, 06 October 1996 |Persons|5 |
+Northeast |Monday, 07 October 1996 |Persons|14 |
+Northeast |Tuesday, 08 October 1996 |Persons|11 |
+Northeast |Wednesday, 09 October 1996|Persons|13 |
+Northeast |Thursday, 10 October 1996 |Persons|11 |
+Northeast |Friday, 11 October 1996 |Persons|7 |
+Northeast |Saturday, 12 October 1996 |Persons|11 |
+Northeast |Sunday, 13 October 1996 |Persons|14 |
+Northeast |Monday, 14 October 1996 |Persons|12 |
+Northeast |Wednesday, 16 October 1996|Persons|2 |
+Northeast |Thursday, 17 October 1996 |Persons|13 |
+Northeast |Friday, 18 October 1996 |Persons|8 |
+Northeast |Saturday, 19 October 1996 |Persons|10 |
+Northeast |Sunday, 20 October 1996 |Persons|4 |
+Northeast |Monday, 21 October 1996 |Persons|8 |
+Northeast |Tuesday, 22 October 1996 |Persons|13 |
+Northeast |Wednesday, 23 October 1996|Persons|7 |
+Northeast |Thursday, 24 October 1996 |Persons|9 |
+Northeast |Friday, 25 October 1996 |Persons|5 |
+Northeast |Saturday, 26 October 1996 |Persons|6 |
+Northeast |Sunday, 27 October 1996 |Persons|8 |
+Northeast |Monday, 28 October 1996 |Persons|7 |
+Northeast |Tuesday, 29 October 1996 |Persons|9 |
+Northeast |Wednesday, 30 October 1996|Persons|7 |
+Northeast |Thursday, 31 October 1996 |Persons|6 |
+Northeast |Friday, 01 November 1996 |Persons|9 |
+Midwest |Tuesday, 01 October 1996 |Persons|2 |
+Midwest |Wednesday, 02 October 1996|Persons|3 |
+Midwest |Thursday, 03 October 1996 |Persons|3 |
+Midwest |Friday, 04 October 1996 |Persons|4 |
+Midwest |Saturday, 05 October 1996 |Persons|6 |
+Midwest |Sunday, 06 October 1996 |Persons|2 |
+Midwest |Monday, 07 October 1996 |Persons|3 |
+Midwest |Tuesday, 08 October 1996 |Persons|4 |
+Midwest |Wednesday, 09 October 1996|Persons|5 |
+Midwest |Thursday, 10 October 1996 |Persons|2 |
+Midwest |Friday, 11 October 1996 |Persons|4 |
+Midwest |Saturday, 12 October 1996 |Persons|6 |
+Midwest |Sunday, 13 October 1996 |Persons|5 |
+Midwest |Monday, 14 October 1996 |Persons|4 |
+Midwest |Wednesday, 16 October 1996|Persons|3 |
+Midwest |Thursday, 17 October 1996 |Persons|6 |
+Midwest |Friday, 18 October 1996 |Persons|4 |
+Midwest |Saturday, 19 October 1996 |Persons|5 |
+Midwest |Sunday, 20 October 1996 |Persons|5 |
+Midwest |Monday, 21 October 1996 |Persons|4 |
+Midwest |Tuesday, 22 October 1996 |Persons|6 |
+Midwest |Wednesday, 23 October 1996|Persons|3 |
+Midwest |Thursday, 24 October 1996 |Persons|7 |
+Midwest |Friday, 25 October 1996 |Persons|4 |
+Midwest |Saturday, 26 October 1996 |Persons|7 |
+Midwest |Sunday, 27 October 1996 |Persons|4 |
+Midwest |Monday, 28 October 1996 |Persons|3 |
+Midwest |Tuesday, 29 October 1996 |Persons|3 |
+Midwest |Wednesday, 30 October 1996|Persons|3 |
+Midwest |Thursday, 31 October 1996 |Persons|4 |
+Midwest |Friday, 01 November 1996 |Persons|4 |
+South |Tuesday, 01 October 1996 |Persons|16 |
+South |Wednesday, 02 October 1996|Persons|16 |
+South |Thursday, 03 October 1996 |Persons|14 |
+South |Friday, 04 October 1996 |Persons|13 |
+South |Saturday, 05 October 1996 |Persons|12 |
+South |Sunday, 06 October 1996 |Persons|10 |
+South |Monday, 07 October 1996 |Persons|13 |
+South |Tuesday, 08 October 1996 |Persons|15 |
+South |Wednesday, 09 October 1996|Persons|8 |
+South |Thursday, 10 October 1996 |Persons|17 |
+South |Friday, 11 October 1996 |Persons|11 |
+South |Saturday, 12 October 1996 |Persons|12 |
+South |Sunday, 13 October 1996 |Persons|10 |
+South |Monday, 14 October 1996 |Persons|16 |
+South |Wednesday, 16 October 1996|Persons|13 |
+South |Thursday, 17 October 1996 |Persons|16 |
+South |Friday, 18 October 1996 |Persons|6 |
+South |Saturday, 19 October 1996 |Persons|16 |
+South |Sunday, 20 October 1996 |Persons|11 |
+South |Monday, 21 October 1996 |Persons|9 |
+South |Tuesday, 22 October 1996 |Persons|9 |
+South |Wednesday, 23 October 1996|Persons|8 |
+South |Thursday, 24 October 1996 |Persons|13 |
+South |Friday, 25 October 1996 |Persons|21 |
+South |Saturday, 26 October 1996 |Persons|7 |
+South |Sunday, 27 October 1996 |Persons|13 |
+South |Monday, 28 October 1996 |Persons|18 |
+South |Tuesday, 29 October 1996 |Persons|12 |
+South |Wednesday, 30 October 1996|Persons|12 |
+South |Thursday, 31 October 1996 |Persons|13 |
+South |Friday, 01 November 1996 |Persons|14 |
+All regions |<All> |Male |342 |
+All regions |<All> |Female |464 |
+All regions |Tuesday, 01 October 1996 |Persons|31 |
+All regions |Wednesday, 02 October 1996|Persons|28 |
+All regions |Thursday, 03 October 1996 |Persons|32 |
+All regions |Friday, 04 October 1996 |Persons|23 |
+All regions |Saturday, 05 October 1996 |Persons|30 |
+All regions |Sunday, 06 October 1996 |Persons|17 |
+All regions |Monday, 07 October 1996 |Persons|30 |
+All regions |Tuesday, 08 October 1996 |Persons|30 |
+All regions |Wednesday, 09 October 1996|Persons|26 |
+All regions |Thursday, 10 October 1996 |Persons|30 |
+All regions |Friday, 11 October 1996 |Persons|22 |
+All regions |Saturday, 12 October 1996 |Persons|29 |
+All regions |Sunday, 13 October 1996 |Persons|29 |
+All regions |Monday, 14 October 1996 |Persons|32 |
+All regions |Wednesday, 16 October 1996|Persons|18 |
+All regions |Thursday, 17 October 1996 |Persons|35 |
+All regions |Friday, 18 October 1996 |Persons|18 |
+All regions |Saturday, 19 October 1996 |Persons|31 |
+All regions |Sunday, 20 October 1996 |Persons|20 |
+All regions |Monday, 21 October 1996 |Persons|21 |
+All regions |Tuesday, 22 October 1996 |Persons|28 |
+All regions |Wednesday, 23 October 1996|Persons|18 |
+All regions |Thursday, 24 October 1996 |Persons|29 |
+All regions |Friday, 25 October 1996 |Persons|30 |
+All regions |Saturday, 26 October 1996 |Persons|20 |
+All regions |Sunday, 27 October 1996 |Persons|25 |
+All regions |Monday, 28 October 1996 |Persons|28 |
+All regions |Tuesday, 29 October 1996 |Persons|24 |
+All regions |Wednesday, 30 October 1996|Persons|22 |
+All regions |Thursday, 31 October 1996 |Persons|23 |
+All regions |Friday, 01 November 1996 |Persons|27 |
+Northeast |<All> |Persons|284 |
+Midwest |<All> |Persons|128 |
+South |<All> |Persons|394 |
+All regions |<All> |Persons|806 |
+
+
+That took 0.871 seconds.
+
+##############################################################################
+
+#23a. Demonstration of multivalue columns (no SQL equivalent - needs a join between two tables -
+# but perfect for association rule data mining).
+# Use a where clause because the cardinality of the diagnosis (ICD9CM) columns is rather large.
+>>> nhds.makewhereset("diabetes",var1="diagnosis1",op1="startingwith",val1="250")
+>>> nhds.makewhereset("diabetes_all",var1="diagnosis_all",op1="startingwith",val1="250")
+>>> print nhds.univar(var1="diagnosis1",wheresetname="diabetes",printit=1,allcalc=1)
+>>> print nhds.univar(var1="diagnosis_all",wheresetname="diabetes_all",printit=1,allcalc=1)
+Assembling whereset diabetes containing 564 elements took 0.113 seconds.
+
+That took 0.113 seconds.
+
+##############################################################################
+
+Assembling whereset diabetes_all containing 4411 elements took 4.796 seconds.
+
+That took 4.796 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of diabetes containing 564 elements took 0.004 seconds.
+
+
+Univariate summary derived from diagnosis1 column in nhds dataset
+Using whereset diabetes containing 564 elements
+univar(): Univariate summary created in 0.070 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis Code 1 |Frequency|
+-------------------------------------------------------------------------------------------------------------------------------------|---------|
+250.00 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|42 |
+250.01 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|32 |
+250.02 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|51 |
+250.03 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|16 |
+250.10 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspec|14 |
+250.11 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], not stated as unco|37 |
+250.12 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspe|10 |
+250.13 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |34 |
+250.20 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or un|5 |
+250.22 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or uns|7 |
+250.23 Diabetes mellitus - Diabetes with hyperosmolarity, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |2 |
+250.30 Diabetes mellitus - Diabetes with other coma, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspecif|6 |
+250.31 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], not stated as uncont|1 |
+250.33 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |3 |
+250.40 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|11 |
+250.41 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], not stated|16 |
+250.42 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|3 |
+250.43 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], uncontroll|4 |
+250.50 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|4 |
+250.51 Diabetes mellitus - Diabetes with ophthalmic complications, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.52 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|1 |
+250.60 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset |18 |
+250.61 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type][IDDM type] [juvenile type], not |23 |
+250.62 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset t|6 |
+250.63 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type] [IDDM type] [juvenile type], unc|8 |
+250.70 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-o|33 |
+250.71 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|28 |
+250.72 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-|10 |
+250.73 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|4 |
+250.80 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|44 |
+250.81 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], n|25 |
+250.82 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|17 |
+250.83 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], u|17 |
+250.90 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset ty|6 |
+250.91 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], not st|2 |
+250.92 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|18 |
+250.93 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], uncont|4 |
+<All> |564 |
+
+
+That took 0.082 seconds.
+
+##############################################################################
+
+wheresetload(): memory mapping of diabetes_all containing 4411 elements took 0.004 seconds.
+
+
+Univariate summary derived from diagnosis_all column in nhds dataset
+Using whereset diabetes_all containing 4411 elements
+univar(): Univariate summary created in 0.132 seconds.
+
+ | |
+ | |
+Grouped | |
+by | |
+Diagnosis codes 1-7 |Frequency|
+-------------------------------------------------------------------------------------------------------------------------------------|---------|
+250.41 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], not stated|160 |
+250.90 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset ty|30 |
+250.91 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], not st|7 |
+250.92 Diabetes mellitus - Diabetes with unspecified complication, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|38 |
+250.93 Diabetes mellitus - Diabetes with unspecified complication, type I [insulin dependent type][IDDM type] [juvenile type], uncont|19 |
+250.83 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], u|25 |
+250.82 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|36 |
+250.81 Diabetes mellitus - Diabetes with other specified complications, type I [insulin dependent type][IDDM type] [juvenile type], n|57 |
+250.80 Diabetes mellitus - Diabetes with other specified complications, type II [non-insulin dependent type] [NIDDM type] [adult-onse|107 |
+250.72 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-|20 |
+250.73 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|8 |
+250.51 Diabetes mellitus - Diabetes with ophthalmic complications, type I [insulin dependent type][IDDM type] [juvenile type], not st|66 |
+250.61 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type][IDDM type] [juvenile type], not |122 |
+250.60 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset |112 |
+250.63 Diabetes mellitus - Diabetes with neurological complications, type I [insulin dependent type] [IDDM type] [juvenile type], unc|32 |
+250.62 Diabetes mellitus - Diabetes with neurological complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset t|19 |
+250.50 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|51 |
+250.52 Diabetes mellitus - Diabetes with ophthalmic complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset typ|8 |
+250.53 Diabetes mellitus - Diabetes with ophthalmic complications, type I [insulin dependent type][IDDM type] [juvenile type], uncont|15 |
+250.43 Diabetes mellitus - Diabetes with renal complications, type I [insulin dependent type] [IDDM type] [juvenile type], uncontroll|23 |
+250.42 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|15 |
+250.40 Diabetes mellitus - Diabetes with renal complications, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or|129 |
+250.70 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type II [non-insulin dependent type] [NIDDM type] [adult-o|78 |
+250.33 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |3 |
+250.30 Diabetes mellitus - Diabetes with other coma, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspecif|6 |
+250.31 Diabetes mellitus - Diabetes with other coma, type I [insulin dependent type][IDDM type] [juvenile type], not stated as uncont|1 |
+250.20 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or un|6 |
+250.23 Diabetes mellitus - Diabetes with hyperosmolarity, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |3 |
+250.22 Diabetes mellitus - Diabetes with hyperosmolarity, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or uns|14 |
+250.10 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspec|20 |
+250.11 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], not stated as unco|45 |
+250.12 Diabetes mellitus - Diabetes with ketoacidosis, type II [non-insulin dependent type] [NIDDM type] [adult-onset type] or unspe|17 |
+250.03 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|95 |
+250.02 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|199 |
+250.01 Diabetes mellitus - Diabetes mellitus without mention of complication, type I [insulin dependent type][IDDM type] [juvenile ty|779 |
+250.00 Diabetes mellitus - Diabetes mellitus without mention of complication, type II [non-insulin dependent type] [NIDDM type] [adul|2203 |
+250.13 Diabetes mellitus - Diabetes with ketoacidosis, type I [insulin dependent type][IDDM type] [juvenile type], uncontrolled |39 |
+250.71 Diabetes mellitus - Diabetes with peripheral circulatory disorders, type I [insulin dependent type][IDDM type] [juvenile type]|65 |
+<All> |4411 |
+
+
+That took 0.149 seconds.
+
+##############################################################################
+
+#24a. Demonstration of partial result caching.
+# We can precalculate a particular summary or contingency table.
+>>> nhds.precalculate(var1="sex",var2="race",var3="drg")
+colload(): memory mapping of drg took 0.010 seconds.
+
diff --git a/demo/loaders/__init__.py b/demo/loaders/__init__.py
new file mode 100644
index 0000000..2f83bc0
--- /dev/null
+++ b/demo/loaders/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/__init__.py,v $
diff --git a/demo/loaders/epitools.py b/demo/loaders/epitools.py
new file mode 100644
index 0000000..644f227
--- /dev/null
+++ b/demo/loaders/epitools.py
@@ -0,0 +1,283 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load example data from the EpiTools web site
+# http: //www.medepi.net/epitools/examples.html
+#
+# Python standard modules
+import os
+
+# 3rd Party Modules
+# http: //www.egenix.com/files/python/eGenix-mx-Extensions.html
+from mx.DateTime import DateTime
+# http: //www.pfdubois.com/numpy/
+import Numeric, MA
+
+# SOOM modules
+from SOOMv0.Sources.CSV import *
+from SOOMv0 import *
+
+# Project modules
+import urlfetch
+
+# http: //www.medepi.net/epitools/examples.html
+oswego_url = 'http://www.medepi.net/data/oswego/oswego.txt'
+wnv_url = 'http://www.medepi.net/data/wnv/wnv2004-12-14.txt'
+
+
+def fetch(scratchdir, dataset, url):
+ """
+ Caching URL fetcher, returns filename of cached file.
+ """
+ filename = os.path.join(scratchdir, os.path.basename(url))
+ print 'Fetching dataset %r from %s' % (dataset, url)
+ urlfetch.fetch(url, filename)
+ return filename
+
+# Add 5 year age groups
+def agegrp(age):
+ agrp = MA.choose(MA.greater_equal(age, 85), (age, -18.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 80), (agrp, -17.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 75), (agrp, -16.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 70), (agrp, -15.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 65), (agrp, -14.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 60), (agrp, -13.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 55), (agrp, -12.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 50), (agrp, -11.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 45), (agrp, -10.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 40), (agrp, -9.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 35), (agrp, -8.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 30), (agrp, -7.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 25), (agrp, -6.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 20), (agrp, -5.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 15), (agrp, -4.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 10), (agrp, -3.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 5), (agrp, -2.0))
+ agrp = MA.choose(MA.greater_equal(agrp, 0), (agrp, -1.0))
+ returnarray = -agrp.astype(MA.Int)
+ return returnarray
+
+agegrp_outtrans = {
+ None: 'Unknown',
+ 0: 'All ages',
+ 1: '0 - 4 yrs',
+ 2: '5 - 9 yrs',
+ 3: '10 - 14 yrs',
+ 4: '15 - 19 yrs',
+ 5: '20 - 24 yrs',
+ 6: '25 - 29 yrs',
+ 7: '30 - 34 yrs',
+ 8: '35 - 39 yrs',
+ 9: '40 - 44 yrs',
+ 10: '45 - 49 yrs',
+ 11: '50 - 54 yrs',
+ 12: '55 - 59 yrs',
+ 13: '60 - 64 yrs',
+ 14: '65 - 69 yrs',
+ 15: '70 - 74 yrs',
+ 16: '75 - 79 yrs',
+ 17: '80 - 84 yrs',
+ 18: '85+ yrs',
+}
+
+yn_outtrans = {
+ None: 'Unknown',
+ 'Y': 'Yes',
+ 'N': 'No'
+}
+
+def oswego_xform_post(row_dict):
+ # create synthetic meal date/time
+ meal_time = row_dict['meal_time']
+ if meal_time != None:
+ mt_hrmin, mt_ampm = meal_time.split()
+ mt_hr, mt_min = map(int, mt_hrmin.split(':'))
+ if mt_ampm == 'PM':
+ mt_hr += 12
+ row_dict['meal_datetime'] = DateTime(1980, 4, 18, mt_hr, mt_min)
+ else:
+ row_dict['meal_datetime'] = None
+ # create synthetic onset date/time
+ onset_time = row_dict['onset_time']
+ onset_date = row_dict['onset_date']
+ if onset_time != None and onset_date != None:
+ on_hrmin, on_ampm = onset_time.split()
+ on_hr, on_min = map(int, on_hrmin.split(':'))
+ if on_ampm == 'PM':
+ on_hr += 12
+ on_mth, on_day = map(int, onset_date.split('/'))
+ row_dict['onset_datetime'] = DateTime(1980, on_mth, on_day, on_hr, on_min)
+ else:
+ row_dict['onset_datetime'] = None
+ return row_dict
+
+
+def oswego_load(options):
+ oswego_columns = (
+ DataSourceColumn('id', ordinalpos=0),
+ DataSourceColumn('age', ordinalpos=1),
+ DataSourceColumn('sex', ordinalpos=2),
+ DataSourceColumn('meal_time', ordinalpos=3),
+ DataSourceColumn('ill', ordinalpos=4),
+ DataSourceColumn('onset_date', ordinalpos=5),
+ DataSourceColumn('onset_time', ordinalpos=6),
+ DataSourceColumn('baked_ham', ordinalpos=7),
+ DataSourceColumn('spinach', ordinalpos=8),
+ DataSourceColumn('mashed_potato', ordinalpos=9),
+ DataSourceColumn('cabbage_salad', ordinalpos=10),
+ DataSourceColumn('jello', ordinalpos=11),
+ DataSourceColumn('rolls', ordinalpos=12),
+ DataSourceColumn('brown_bread', ordinalpos=13),
+ DataSourceColumn('milk', ordinalpos=14),
+ DataSourceColumn('coffee', ordinalpos=15),
+ DataSourceColumn('water', ordinalpos=16),
+ DataSourceColumn('cakes', ordinalpos=17),
+ DataSourceColumn('vanilla_ice_cream', ordinalpos=18),
+ DataSourceColumn('chocolate_ice_cream', ordinalpos=19),
+ DataSourceColumn('fruit_salad', ordinalpos=20),
+ )
+
+ filename = fetch(options.scratchdir, 'oswego', oswego_url)
+ oswego_source = CSVDataSource('oswego', oswego_columns, delimiter=' ',
+ filename=filename, header_rows=1,
+ missing='NA',
+ xformpost=oswego_xform_post)
+
+ oswego = makedataset('oswego',
+ label='Oswego County gastrointestinal illness investigation data',
+ desc='Dataset derived from: ' + oswego_url)
+
+ oswego.addcolumn('id', label='ID Number', datatype=int, coltype='identity')
+ oswego.addcolumn('age', label='Age (years)', datatype=int, coltype='scalar')
+ oswego.addcolumn('sex', label='Sex',
+ datatype='recode', coltype='categorical',
+ outtrans={'M': 'Male', 'F': 'Female'})
+ oswego.addcolumn('meal_time', label='Meal time', datatype=str)
+ oswego.addcolumn('meal_datetime', label='Meal date/time',
+ datatype='datetime', coltype='ordinal')
+ oswego.addcolumn('ill', label='Became ill?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('onset_date', label='Onset date', datatype=str)
+ oswego.addcolumn('onset_time', label='Onset time', datatype=str)
+ oswego.addcolumn('onset_datetime', label='Illness onset date/time',
+ datatype='datetime', coltype='ordinal')
+ oswego.addcolumn('baked_ham', label='Ate baked ham?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('spinach', label='Ate spinach?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('mashed_potato', label='Ate mashed potato?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('cabbage_salad', label='Ate cabbage salad?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('jello', label='Ate jello?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('rolls', label='Ate bread rolls?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('brown_bread', label='Ate brown bread?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('milk', label='Drank milk?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('coffee', label='Drank coffee?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('water', label='Drank water?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('cakes', label='Ate cakes?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('vanilla_ice_cream', label='Ate vanilla ice cream?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('chocolate_ice_cream', label='Ate chocolate ice cream?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+ oswego.addcolumn('fruit_salad', label='Ate fruit salad?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+
+ oswego.initialise()
+ oswego.loaddata(oswego_source)
+ oswego.finalise()
+
+ oswego.derivedcolumn(dername='agegrp', dercols=('age', ), derfunc=agegrp,
+ coltype='ordinal', datatype=int,
+ outtrans=agegrp_outtrans, label='Age Group',
+ all_value=0, all_label='All ages')
+ oswego.save()
+
+
+def wnv_load(options):
+ wnv_columns = (
+ DataSourceColumn('id', ordinalpos=0),
+ DataSourceColumn('county', ordinalpos=1),
+ DataSourceColumn('age', ordinalpos=2),
+ DataSourceColumn('sex', ordinalpos=3),
+ DataSourceColumn('syndrome', ordinalpos=4),
+ DataSourceColumn('onset_date', format='iso-date', ordinalpos=5),
+ DataSourceColumn('test_date', format='iso-date', ordinalpos=6),
+ DataSourceColumn('death', ordinalpos=7),
+ )
+
+ filename = fetch(options.scratchdir, 'wnv', wnv_url)
+ wnv_source = CSVDataSource('wnv', wnv_columns,
+ filename=filename, header_rows=1,
+ missing='NA')
+
+ wnv = makedataset('wnv',
+ label='Human cases of West Nile virus, California 2004',
+ desc='Dataset derived from: ' + wnv_url)
+
+ wnv.addcolumn('id', label='ID Number', datatype=int, coltype='identity')
+ wnv.addcolumn('county', label='County', datatype='recode',
+ coltype='categorical', missingvalues={None: 'Unknown'})
+ wnv.addcolumn('age', label='Age (years)', datatype=int, coltype='scalar')
+ wnv.addcolumn('sex', label='Sex', datatype='recode', coltype='categorical',
+ outtrans={'M': 'Male', 'F': 'Female'},
+ missingvalues={None: 'Unknown'},
+ all_value='P', all_label='Persons')
+ wnv.addcolumn('syndrome', label='Syndrome', datatype='recode',
+ coltype='categorical',
+ missingvalues={None: 'Unknown'},
+ outtrans={'WNF': 'West Nile fever', 'WNND': 'West Nile neuroinvasive disease'})
+ wnv.addcolumn('onset_date', label='Onset date', datatype='date',
+ coltype='date')
+ wnv.addcolumn('test_date', label='Test date', datatype='date',
+ coltype='date')
+ wnv.addcolumn('death', label='Died?',
+ datatype='recode', coltype='categorical',
+ outtrans=yn_outtrans)
+
+ wnv.initialise()
+ wnv.loaddata(wnv_source)
+ wnv.finalise()
+
+ wnv.derivedcolumn(dername='agegrp', dercols=('age', ), derfunc=agegrp,
+ coltype='ordinal', datatype=int,
+ outtrans=agegrp_outtrans, label='Age Group',
+ all_value=0, all_label='All ages')
+ wnv.save()
+
+def load(options):
+ oswego_load(options)
+ wnv_load(options)
diff --git a/demo/loaders/make_icd9cm_fmt.py b/demo/loaders/make_icd9cm_fmt.py
new file mode 100644
index 0000000..01dcaa1
--- /dev/null
+++ b/demo/loaders/make_icd9cm_fmt.py
@@ -0,0 +1,114 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: make_icd9cm_fmt.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/make_icd9cm_fmt.py,v $
+
+# Standard Python Libraries
+import zipfile
+import os
+
+# Project modules
+import urlfetch
+import rtfparse
+
+srcfiles = [
+ 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD9-CM/2002/Ptab03.ZIP',
+ 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD9-CM/2002/Dtab03.ZIP',
+]
+
+
+def fetch(datadir, url):
+ dstfile = os.path.join(datadir, os.path.basename(url))
+ print 'Fetching %s from %s' % (dstfile, url)
+ urlfetch.fetch(url, dstfile)
+ return dstfile
+
+
+def extract(icd9_map, node):
+ """
+ Find groups containing a bookmark, and extract first two text
+ nodes. This relies on the structure of the source RTF files
+ being stable - good enough for now.
+ """
+
+ def child_has_bookmark(node):
+ if node.token == rtfparse.GROUP:
+ for child in node.children:
+ if (child.token == rtfparse.CWORD
+ and child.args[0] == 'bkmkstart'):
+ return True
+ return False
+
+ # States
+ SEEKING, FOUND, NEXT = range(3)
+ state = SEEKING
+ for child in node.children:
+ if child.token == rtfparse.TEXT:
+ text = child.args.strip()
+ if state == SEEKING and child_has_bookmark(child):
+ state = FOUND
+ elif state == FOUND and child.token == rtfparse.TEXT and text:
+ code = text
+ state = NEXT
+ elif state == NEXT and child.token == rtfparse.TEXT and text:
+ icd9_map[code] = '%s - %s' % (code, text)
+ state = SEEKING
+ if child.token == rtfparse.GROUP:
+ extract(icd9_map, child)
+
+
+def make_icd9cm_fmt(datadir, verbose = False):
+ icd9cm_fmt = {}
+ for url in srcfiles:
+ fn = fetch(datadir, url)
+
+ if verbose: print 'Decompressing %s' % fn
+ zf = zipfile.ZipFile(fn)
+ try:
+ first_member = zf.namelist()[0]
+ data = zf.read(first_member)
+ finally:
+ zf.close()
+
+ if verbose: print 'Parsing %s' % fn
+ root = rtfparse.parse(data)
+ del data
+
+ if verbose: print 'Extracting %s' % fn
+ extract(icd9cm_fmt, root)
+ return icd9cm_fmt
+
+if __name__ == '__main__':
+ import optparse
+ import cPickle
+
+ p = optparse.OptionParser()
+ p.add_option('-d', '--dl-dir', dest='datadir',
+ help='download directory', default='.')
+ p.add_option('-v', '--verbose', dest='verbose', action='store_true',
+ help='verbose', default=False)
+ options, args = p.parse_args()
+ try:
+ outfile, = args
+ except ValueError:
+ p.error('specify output filename')
+ icd9cm = make_icd9cm_fmt(options.datadir, options.verbose)
+
+ if options.verbose: print 'Writing %s' % outfile
+ f = open(outfile, 'wb')
+ try:
+ cPickle.dump(icd9cm, f, -1)
+ finally:
+ f.close()
diff --git a/demo/loaders/nhds.py b/demo/loaders/nhds.py
new file mode 100644
index 0000000..cc41878
--- /dev/null
+++ b/demo/loaders/nhds.py
@@ -0,0 +1,699 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load NHDS sample data
+#
+# $Id: nhds.py 2692 2007-06-08 03:03:15Z tchur $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/nhds.py,v $
+
+# Python standard modules
+import os
+import sys
+import random
+import cPickle
+
+# 3rd Party Modules
+# http://www.egenix.com/files/python/eGenix-mx-Extensions.html
+from mx.DateTime import DateTime
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+
+
+# SOOM modules
+from SOOMv0.Sources.Columns import *
+from SOOMv0 import *
+
+
+# Project modules
+import urlfetch
+
+icd9cm_fmt_file = 'icd9cm_fmt.pkl'
+
+# http://www.cdc.gov/nchs/about/major/hdasd/nhds.htm
+nhds_data = {
+ 'nhds96': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds96/nhds96.zip',
+ 'nhds97': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds97/nhds97.zip',
+ 'nhds98': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds98/nhds98.zip',
+ 'nhds99': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds99/nhds99.zip',
+ 'nhds00': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds00/nhds00.zip',
+ 'nhds01': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds01/nhds01.zip',
+ 'nhds02': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds02/NHDS02PU.zip',
+ 'nhds03': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds03/NHDS03PU.zip',
+ 'nhds04': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds04/NHDS04PU.zip',
+ 'nhds05': 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NHDS/nhds05/NHDS05.PU.TXT',
+}
+
+def fetch(scratchdir, dataset):
+ """
+ Caching URL fetcher, returns filename of cached file.
+ """
+ url = nhds_data[dataset]
+ filename = os.path.join(scratchdir, os.path.basename(url))
+ print "Fetching dataset %r from %s" % (dataset, url)
+ urlfetch.fetch(url, filename)
+ return filename
+
+diag_cols = 'diagnosis1', 'diagnosis2', 'diagnosis3', 'diagnosis4', \
+ 'diagnosis5', 'diagnosis6', 'diagnosis7'
+proc_cols = 'procedure1', 'procedure2', 'procedure3', 'procedure4'
+
+day_cnt = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+def reformat_icd9cm_diag_codes(data):
+ if data and len(data) == 5:
+ if data[3] == "-":
+ return data[0:3]
+ elif data[4] == "-":
+ if data[0] == "E":
+ return data[0:4]
+ else:
+ return ".".join((data[0:3],data[3]))
+ else:
+ if data[0] == "E":
+ return ".".join((data[0:4],data[4]))
+ else:
+ # return data[0:3] + "." + data[3:5]
+ return ".".join((data[0:3],data[3]))
+ else:
+ return data
+
+def reformat_icd9cm_proc_codes(data):
+ if data and len(data) == 4:
+ if data[2] == "-":
+ return data[0:2]
+ elif data[3] == "-":
+ return ".".join((data[0:2],data[2]))
+ else:
+ return ".".join((data[0:2],data[2:4]))
+ else:
+ return data
+
+def nhds_xform_pre(row_dict):
+ diags = []
+ for col in diag_cols:
+ diag = reformat_icd9cm_diag_codes(row_dict[col])
+ row_dict[col] = diag
+ if diag:
+ diags.append(diag)
+ row_dict['diagnosis_all'] = tuple(diags)
+ procs = []
+ for col in proc_cols:
+ proc = reformat_icd9cm_proc_codes(row_dict[col])
+ row_dict[col] = proc
+ if proc:
+ procs.append(proc)
+ row_dict['procedure_all'] = tuple(procs)
+ return row_dict
+
+def nhds_xform_post(row_dict):
+ yr = int(row_dict['year'])
+ if yr < 10:
+ yr += 2000
+ else:
+ yr += 1900
+ row_dict['year'] = yr
+ mth = row_dict['month_of_admission']
+ if mth:
+ month = int(mth)
+ if month != 99:
+ row_dict['month_year'] = DateTime(yr, month, 1)
+ if month != 4:
+ day = random.randint(1, day_cnt[month])
+ row_dict['randomdate'] = DateTime(yr, month, day)
+ else:
+ row_dict['randomdate'] = None
+ row_dict['month_year'] = None
+ else:
+ row_dict['month_year'] = None
+ row_dict['randomdate'] = None
+ return row_dict
+
+
+def random_missing():
+ n = random.random()
+ if n >= 0.7 and n < 0.8:
+ return 999.0
+ else:
+ return n
+
+
+def nhds96_source(options):
+ nhds96_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race1",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("prin_src_payment1",label="Principal Expected Source of Payment",startpos=26,length=1,posbase=1),
+ DataSourceColumn("sec_src_payment1",label="Secondary Expected Source of Payment",startpos=27,length=1,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("drg",label="DRG V13.0",startpos=79,length=3,posbase=1)
+ )
+ return ColumnDataSource("nhds96", nhds96_columns, filename=fetch(options.scratchdir, "nhds96"), header_rows=0, label="National Hospital Discharge Survey 1996", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds97_source(options):
+ nhds97_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race1",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("prin_src_payment1",label="Principal Expected Source of Payment",startpos=26,length=1,posbase=1),
+ DataSourceColumn("sec_src_payment1",label="Secondary Expected Source of Payment",startpos=27,length=1,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("drg",label="DRG V14.0",startpos=79,length=3,posbase=1)
+ )
+ return ColumnDataSource("nhds97", nhds97_columns, filename=fetch(options.scratchdir, "nhds97"), header_rows=0,label="National Hospital Discharge Survey 1997", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds98_source(options):
+ nhds98_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race1",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V15.0",startpos=83,length=3,posbase=1)
+ )
+ return ColumnDataSource("nhds98", nhds98_columns, filename=fetch(options.scratchdir, "nhds98"), header_rows=0,label="National Hospital Discharge Survey 1998", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds99_source(options):
+ nhds99_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race1",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1)
+ )
+ return ColumnDataSource("nhds99", nhds99_columns, filename=fetch(options.scratchdir, "nhds99"), header_rows=0,label="National Hospital Discharge Survey 1999", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds00_source(options):
+ nhds00_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1)
+ )
+ return ColumnDataSource("nhds00", nhds00_columns, filename=fetch(options.scratchdir, "nhds00"), header_rows=0,label="National Hospital Discharge Survey 2000", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds01_source(options):
+ nhds01_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1),
+ DataSourceColumn("admission_type",label="Type of admission",startpos=86,length=1,posbase=1),
+ DataSourceColumn("admission_source",label="Source of admission",startpos=87,length=2,posbase=1)
+ )
+ return ColumnDataSource("nhds01", nhds01_columns, filename=fetch(options.scratchdir, "nhds01"), header_rows=0,label="National Hospital Discharge Survey 2001", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+
+def nhds02_source(options):
+ nhds02_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1),
+ DataSourceColumn("admission_type",label="Type of admission",startpos=86,length=1,posbase=1),
+ DataSourceColumn("admission_source",label="Source of admission",startpos=87,length=2,posbase=1)
+ )
+ return ColumnDataSource("nhds02", nhds02_columns, filename=fetch(options.scratchdir, "nhds02"), header_rows=0,label="National Hospital Discharge Survey 2002", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+def nhds03_source(options):
+ nhds03_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1),
+ DataSourceColumn("admission_type",label="Type of admission",startpos=86,length=1,posbase=1),
+ DataSourceColumn("admission_source",label="Source of admission",startpos=87,length=2,posbase=1)
+ )
+ return ColumnDataSource("nhds03", nhds03_columns, filename=fetch(options.scratchdir, "nhds03"), header_rows=0,label="National Hospital Discharge Survey 2003", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+def nhds04_source(options):
+ nhds04_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V16.0",startpos=83,length=3,posbase=1),
+ DataSourceColumn("admission_type",label="Type of admission",startpos=86,length=1,posbase=1),
+ DataSourceColumn("admission_source",label="Source of admission",startpos=87,length=2,posbase=1)
+ )
+ return ColumnDataSource("nhds04", nhds04_columns, filename=fetch(options.scratchdir, "nhds04"), header_rows=0,label="National Hospital Discharge Survey 2004", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+def nhds05_source(options):
+ nhds05_columns = (
+ DataSourceColumn("year",label="Survey Year",startpos=1,length=2,posbase=1),
+ DataSourceColumn("newborn_status",label="Newborn status",startpos=3,length=1,posbase=1),
+ DataSourceColumn("age_units",label="Age Units",startpos=4,length=1,posbase=1),
+ DataSourceColumn("raw_age",label="Raw age",startpos=5,length=2,posbase=1),
+ DataSourceColumn("sex",label="Sex",startpos=7,length=1,posbase=1),
+ DataSourceColumn("race2",label="Race",startpos=8,length=1,posbase=1),
+ DataSourceColumn("marital_status",label="Marital Status",startpos=9,length=1,posbase=1),
+ DataSourceColumn("month_of_admission",label="Month of Admission",startpos=10,length=2,posbase=1),
+ DataSourceColumn("discharge_status",label="Discharge Status",startpos=12,length=1,posbase=1),
+ DataSourceColumn("days_of_care",label="Days of Care",startpos=13,length=4,posbase=1),
+ DataSourceColumn("los_flag",label="Length of Stay Flag",startpos=17,length=1,posbase=1),
+ DataSourceColumn("geog_region",label="Geographic Region",startpos=18,length=1,posbase=1),
+ DataSourceColumn("num_beds",label="Number of Beds",startpos=19,length=1,posbase=1),
+ DataSourceColumn("hosp_ownership",label="Hospital Ownership",startpos=20,length=1,posbase=1),
+ DataSourceColumn("analysis_wgt",label="Analysis Weight",startpos=21,length=5,posbase=1),
+ DataSourceColumn("diagnosis1",label="Diagnosis Code 1",startpos=28,length=5,posbase=1),
+ DataSourceColumn("diagnosis2",label="Diagnosis Code 2",startpos=33,length=5,posbase=1),
+ DataSourceColumn("diagnosis3",label="Diagnosis Code 3",startpos=38,length=5,posbase=1),
+ DataSourceColumn("diagnosis4",label="Diagnosis Code 4",startpos=43,length=5,posbase=1),
+ DataSourceColumn("diagnosis5",label="Diagnosis Code 5",startpos=48,length=5,posbase=1),
+ DataSourceColumn("diagnosis6",label="Diagnosis Code 6",startpos=53,length=5,posbase=1),
+ DataSourceColumn("diagnosis7",label="Diagnosis Code 7",startpos=58,length=5,posbase=1),
+ DataSourceColumn("procedure1",label="Procedure Code 1",startpos=63,length=4,posbase=1),
+ DataSourceColumn("procedure2",label="Procedure Code 2",startpos=67,length=4,posbase=1),
+ DataSourceColumn("procedure3",label="Procedure Code 3",startpos=71,length=4,posbase=1),
+ DataSourceColumn("procedure4",label="Procedure Code 4",startpos=75,length=4,posbase=1),
+ DataSourceColumn("prin_src_payment2",label="Principal Expected Source of Payment",startpos=79,length=2,posbase=1),
+ DataSourceColumn("sec_src_payment2",label="Secondary Expected Source of Payment",startpos=81,length=2,posbase=1),
+ DataSourceColumn("drg",label="DRG V22.0",startpos=83,length=3,posbase=1),
+ DataSourceColumn("admission_type",label="Type of admission",startpos=86,length=1,posbase=1),
+ DataSourceColumn("admission_source",label="Source of admission",startpos=87,length=2,posbase=1)
+ )
+ return ColumnDataSource("nhds05", nhds05_columns, filename=fetch(options.scratchdir, "nhds05"), header_rows=0,label="National Hospital Discharge Survey 2005", xformpre=nhds_xform_pre, xformpost=nhds_xform_post)
+
+def get_icd9cm_fmt(datadir):
+ fmt_file = os.path.join(datadir, icd9cm_fmt_file)
+ if not os.path.exists(fmt_file):
+ # We run the rtf parsing in a separate process as it appears the
+ # regexp module is leaking serious amounts of memory.
+ pid = os.fork()
+ if pid:
+ pid, status = os.waitpid(pid, 0)
+ if status:
+ sys.exit(1)
+ else:
+ import make_icd9cm_fmt
+ icd9cm_fmt = make_icd9cm_fmt.make_icd9cm_fmt(datadir, verbose=1)
+ f = open(fmt_file, 'wb')
+ try:
+ cPickle.dump(icd9cm_fmt, f, -1)
+ finally:
+ f.close()
+ os._exit(0)
+ f = open(fmt_file, 'rb')
+ try:
+ return cPickle.load(f)
+ finally:
+ f.close()
+
+def make_dataset(options):
+ icd9cm_fmt = get_icd9cm_fmt(options.scratchdir)
+ nhds = makedataset("nhds",
+ label="National Hospital Discharge Surveys 1996-2005",
+ weightcol='analysis_wgt')
+
+ nhds.addcolumn("year",label="Survey Year",datatype=int,coltype="ordinal",all_value=0,all_label="All years")
+ nhds.addcolumn("newborn_status",label="Newborn status",outtrans={1:"Newborn",2:"Not newborn"},datatype=int,coltype="categorical")
+ nhds.addcolumn("age_units",label="Age Units",datatype=int,coltype="categorical",outtrans={1:"Years",2:"Months",3:"Days"})
+ nhds.addcolumn("raw_age",label="Raw age (years, months or days)",datatype=int,coltype="scalar")
+ nhds.addcolumn("sex",label="Sex",outtrans={1:"Male",2:"Female"},datatype=int,coltype="categorical",all_value=0,all_label="Persons")
+ nhds.addcolumn("race1",label="Race (1996-99)",outtrans={1:"White",2:"Black",3:"American Indian/Eskimo",4:"Asian/Pacific Islander",5:"Other",9:"Not stated"},datatype=int,coltype="categorical",all_value=0,all_label="All races")
+ nhds.addcolumn("race2",label="Race (2000-05)",outtrans={1:"White",2:"Black",3:"American Indian/Eskimo",4:"Asian",5:"Native Hawaiian or other Pacific Islander",6:"Other",8:"Multiple race indicated",9:"Not stated"},datatype=int,coltype="categorical",all_value=0,all_label="All races")
+ nhds.addcolumn("marital_status",label="Marital Status",outtrans={1:"Married",2:"Single",3:"Widowed",4:"Divorced",5:"Separated",9:"Not stated"},datatype=int,coltype="categorical",all_value=0,all_label="All marital states")
+ nhds.addcolumn("month_of_admission",label="Month of Admission/Discharge",outtrans={1:'January',2:'February',3:'March',4:'April',5:'May',6:'June',7:'July',8:'August',9:'September',10:'October',11:'November',12:'December',99:'Missing'},datatype=int,coltype="ordinal",all_value='0',all_label="All months")
+ nhds.addcolumn("discharge_status",label="Discharge Status",outtrans={1:'Routine/discharged home',2:'Left against medical advice',3:'Discharged/transferred to short-term facility',4:'Discharged/transferred to long-term care institution',5:'Alive, disposition not stated',6:'Dead',9:'Not stated or not reported'},all_label="All dispositions",all_value=0,datatype=int,coltype="categorical")
+ nhds.addcolumn("days_of_care",label="Days of Care",datatype=int,coltype="scalar")
+ nhds.addcolumn("los_flag",label="Length of Stay Flag",outtrans={0:'Less than 1 day',1:'One day or more'},datatype=int,coltype="categorical")
+ nhds.addcolumn("geog_region",label="Geographic Region",outtrans={0:'United States',1:'Northeast',2:'Midwest',3:'South',4:'West'},datatype=int,coltype="categorical",all_value=0,all_label="All regions")
+ nhds.addcolumn("num_beds",label="Number of Beds",outtrans={1:'6-99',2:'100-199',3:'200-299',4:'300-499',5:'500 and over'},datatype=int,coltype="categorical",all_value=0,all_label="All sizes")
+ nhds.addcolumn("hosp_ownership",label="Hospital Ownership",outtrans={1:'Proprietary',2:'Government',3:'Nonprofit, including church'},datatype=int,coltype="categorical",all_value=0,all_label="All types")
+ nhds.addcolumn("analysis_wgt",label="Analysis Weight",datatype=int,coltype="weighting")
+ nhds.addcolumn("prin_src_payment1",label="Principal Expected Source of Payment (1996-97)",outtrans={1:"Worker's compensation",2:'Medicare',3:'Medicaid',4:'Other government payments',5:'Blue Cross',6:'Other private/commercial insurance',7:'Self-pay',8:'Other',9:'Not stated',0:'No charge'},datatype=int,coltype="categorical")
+ nhds.addcolumn("sec_src_payment1",label="Secondary Expected Source of Payment (1996-97)",outtrans={1:"Worker's compensation",2:'Medicare',3:'Medicaid',4:'Other government payments',5:'Blue Cross',6:'Other private/commercial insurance',7:'Self-pay',8:'Other',9:'Not stated',0:'No charge'},datatype=int,coltype="categorical")
+ nhds.addcolumn("prin_src_payment2",label="Principal Expected Source of Payment (1998-2005)",outtrans={'01':"Worker's compensation",'02':'Medicare','03':'Medicaid','04':'Other government','05':'Blue Cross/Blue Shield','06':'HMO/PPO','07':'Other private','08':'Self-pay','09':'No charge','10':'Other','99':'Not stated'},datatype='recode',coltype="categorical")
+ nhds.addcolumn("sec_src_payment2",label="Secondary Expected Source of Payment (1998-2005)",outtrans={'01':"Worker's compensation",'02':'Medicare','03':'Medicaid','04':'Other government','05':'Blue Cross/Blue Shield','06':'HMO/PPO','07':'Other private','08':'Self-pay','09':'No charge','10':'Other','99':'Not stated'},datatype='recode',coltype="categorical")
+ nhds.addcolumn("diagnosis1",label="Diagnosis Code 1",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis2",label="Diagnosis Code 2",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis3",label="Diagnosis Code 3",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis4",label="Diagnosis Code 4",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis5",label="Diagnosis Code 5",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis6",label="Diagnosis Code 6",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("diagnosis7",label="Diagnosis Code 7",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("procedure1",label="Procedure Code 1",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("procedure2",label="Procedure Code 2",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("procedure3",label="Procedure Code 3",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("procedure4",label="Procedure Code 4",datatype='recode',coltype="ordinal",outtrans=icd9cm_fmt)
+ nhds.addcolumn("drg",label="DRG V13.0-V22.0",datatype=int,coltype="categorical")
+ nhds.addcolumn("diagnosis_all",label="Diagnosis codes 1-7",datatype="tuple",coltype="ordinal",outtrans=icd9cm_fmt)
+# nhds.addcolumn("diagnosis_all",label="Diagnosis codes 1-7",datatype="tuple",coltype="ordinal",multisourcecols=["diagnosis1","diagnosis2","diagnosis3","diagnosis4","diagnosis5","diagnosis6","diagnosis7"],ignorenone=1,outtrans=icd9cm_fmt)
+ nhds.addcolumn("procedure_all",label="Procedure codes 1-4",datatype="tuple",coltype="ordinal",outtrans=icd9cm_fmt)
+# nhds.addcolumn("procedure_all",label="Procedure codes 1-4",datatype="tuple",coltype="ordinal",multisourcecols=["procedure1","procedure2","procedure3","procedure4"],ignorenone=1,outtrans=icd9cm_fmt)
+ nhds.addcolumn("randomvalue",label="Random values",datatype=float,coltype="scalar",calculatedby=random_missing,missingvalues={999.0:None})
+ nhds.addcolumn("randomdate",label="Random date",datatype="recodedate",coltype="date")
+ nhds.addcolumn("month_year",label="Month and year of admission",datatype="recodedate",coltype="date")
+ nhds.addcolumn("admission_type",label="Type of admission (2001-05)",outtrans={1:"Emergency",2:'Urgent',3:'Elective',4:'Newborn',9:'Not available'},datatype=int,coltype="categorical")
+ nhds.addcolumn("admission_source",label="Source of admission (2001-05)",outtrans={1:"Physician referral",2:'Clinical referral',3:'HMO referral',4:'Transfer from a hospital',5:'Transfer from a skilled nursing facility',6:'Transfer from other health facility',7:'Emergency room',8:'Court/law enforcement',9:'Other',99:'Not available'},datatype=int,coltype="categorical")
+ if options.verbose:
+ print nhds
+ return nhds
+
+def get_source(year, options):
+ return globals()['nhds%02d_source' % (int(year) % 100)](options)
+
+def all_sources(options):
+ year = 1996
+ while 1:
+ try:
+ yield get_source(year, options)
+ except LookupError:
+ break
+ year += 1
+
+
+def load_sources(nhds, options):
+ """
+ Load NHDS from sources (defined in nhds_source module)
+ """
+ nhds.initialise()
+ for x in range(options.nhds_iterations):
+ if options.nhds_years is None:
+ sources = all_sources(options)
+ else:
+ years = options.nhds_years.split(',')
+ sources = [get_source(year, options) for year in years]
+ for source in sources:
+ if options.verbose:
+ print source
+ nhds.loaddata(source,
+ rowlimit=options.rowlimit,
+ chunkrows=options.chunkrows)
+ nhds.finalise()
+ for col in nhds.get_columns():
+ if getattr(col, 'calculatedby', None):
+ col.calculatedby = None
+
+
+def derived_cols(nhds):
+ """
+ Add some derived columns to the NHDS dataset.
+ """
+
+ # There should be no missing values for age in the NHDS data,
+ # so don't bother checking - but this could be done and the mask
+ # value set appropriately if required
+ def age_years(raw_age,age_units):
+ units_divisor = Numeric.choose(age_units - 1, (1.0, 12.0, 365.25))
+ returnarray = raw_age / units_divisor
+ return returnarray
+ print "Adding age column..."
+ nhds.derivedcolumn(dername="age",dercols=("raw_age","age_units"),derfunc=age_years,coltype="scalar",datatype=float,outtrans=None,label="Age (years)")
+
+ def age_months(raw_age,age_units):
+ units_multiplier = Numeric.choose(age_units - 1, (12.0, 1.0, (1/30.5)))
+ returnarray = raw_age * units_multiplier
+ return returnarray
+ print "Adding age_months column..."
+ nhds.derivedcolumn(dername="age_months",dercols=("raw_age","age_units"),derfunc=age_months,coltype="scalar",datatype=float,outtrans=None,label="Age (months)")
+
+ def age_days(raw_age,age_units):
+ units_multiplier = Numeric.choose(age_units - 1, (365.25, 30.5, 1.0))
+ returnarray = raw_age * units_multiplier
+ return returnarray
+ print "Adding age_days column..."
+ nhds.derivedcolumn(dername="age_days",dercols=("raw_age","age_units"),derfunc=age_days,coltype="scalar",datatype=float,outtrans=None,label="Age (days)")
+
+ # Add 5 year age groups
+ def agegrp(age):
+ agrp = MA.choose(MA.greater_equal(age,85),(age,-18.0))
+ agrp = MA.choose(MA.greater_equal(agrp,80),(agrp,-17.0))
+ agrp = MA.choose(MA.greater_equal(agrp,75),(agrp,-16.0))
+ agrp = MA.choose(MA.greater_equal(agrp,70),(agrp,-15.0))
+ agrp = MA.choose(MA.greater_equal(agrp,65),(agrp,-14.0))
+ agrp = MA.choose(MA.greater_equal(agrp,60),(agrp,-13.0))
+ agrp = MA.choose(MA.greater_equal(agrp,55),(agrp,-12.0))
+ agrp = MA.choose(MA.greater_equal(agrp,50),(agrp,-11.0))
+ agrp = MA.choose(MA.greater_equal(agrp,45),(agrp,-10.0))
+ agrp = MA.choose(MA.greater_equal(agrp,40),(agrp,-9.0))
+ agrp = MA.choose(MA.greater_equal(agrp,35),(agrp,-8.0))
+ agrp = MA.choose(MA.greater_equal(agrp,30),(agrp,-7.0))
+ agrp = MA.choose(MA.greater_equal(agrp,25),(agrp,-6.0))
+ agrp = MA.choose(MA.greater_equal(agrp,20),(agrp,-5.0))
+ agrp = MA.choose(MA.greater_equal(agrp,15),(agrp,-4.0))
+ agrp = MA.choose(MA.greater_equal(agrp,10),(agrp,-3.0))
+ agrp = MA.choose(MA.greater_equal(agrp,5),(agrp,-2.0))
+ agrp = MA.choose(MA.greater_equal(agrp,0),(agrp,-1.0))
+ returnarray = -agrp.astype(MA.Int)
+ return returnarray
+
+ agegrp_outtrans = {
+ 0:"All ages",
+ 1:"0 - 4 yrs",
+ 2:"5 - 9 yrs",
+ 3:"10 - 14 yrs",
+ 4:"15 - 19 yrs",
+ 5:"20 - 24 yrs",
+ 6:"25 - 29 yrs",
+ 7:"30 - 34 yrs",
+ 8:"35 - 39 yrs",
+ 9:"40 - 44 yrs",
+ 10:"45 - 49 yrs",
+ 11:"50 - 54 yrs",
+ 12:"55 - 59 yrs",
+ 13:"60 - 64 yrs",
+ 14:"65 - 69 yrs",
+ 15:"70 - 74 yrs",
+ 16:"75 - 79 yrs",
+ 17:"80 - 84 yrs",
+ 18:"85+ yrs"}
+
+ nhds.derivedcolumn(dername="agegrp",dercols=("age",),derfunc=agegrp,coltype="ordinal",datatype=int,outtrans=agegrp_outtrans,label="Age Group",all_value=0,all_label="All ages")
+
+
+def load(options):
+ ds = make_dataset(options)
+ load_sources(ds, options)
+ derived_cols(ds)
+ ds.save()
diff --git a/demo/loaders/nhds_population.py b/demo/loaders/nhds_population.py
new file mode 100644
index 0000000..be0560e
--- /dev/null
+++ b/demo/loaders/nhds_population.py
@@ -0,0 +1,201 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: nhds_population.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/nhds_population.py,v $
+
+# Python standard modules
+import csv
+import time
+import os
+import sys
+import random
+import optparse
+
+# 3rd Party Modules
+# http://www.egenix.com/files/python/eGenix-mx-Extensions.html
+from mx.DateTime import DateTime
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+
+# SOOM modules
+from SOOMv0 import *
+
+#########################################################################
+# The following code loads some population datasets for the 1997 NHDS data.
+# Unfortunately, these data files are only available in Lotus 123 spreadsheet
+# format, so you need to open them in a spreadsheet package and save them as
+# CSV files for the following code to work. With some more work we may be
+# able to automatically download and convert these files - or we could ask
+# NCHS to make CSV files available instead.
+#########################################################################
+# Now load some population datasets for 1997
+csv_reader = csv.reader(file(os.path.join(rawdatapath, "appc_b97.csv")))
+fo = open(os.path.join(rawdatapath, "popsgeog97.dat"), "w")
+
+def agegrp_transform(text):
+ text = text.strip()
+ if text == 'All ages' or text == 'All Ages': return '0'
+ elif text == '0-4': return '1'
+ elif text == '5-9': return '2'
+ elif text == '10-14': return '3'
+ elif text == '15-19': return '4'
+ elif text == '20-24': return '5'
+ elif text == '25-29': return '6'
+ elif text == '30-34': return '7'
+ elif text == '35-39': return '8'
+ elif text == '40-44': return '9'
+ elif text == '45-49': return '10'
+ elif text == '50-54': return '11'
+ elif text == '55-59': return '12'
+ elif text == '60-64': return '13'
+ elif text == '65-69': return '14'
+ elif text == '70-74': return '15'
+ elif text == '75-79': return '16'
+ elif text == '80-84': return '17'
+ elif text == '85+': return '18'
+ else: return text
+
+def writeline(filehandle,sex,otherattrib,popindex):
+ outline = agegrp_transform(cleanline[0])
+ outline += ',' + str(sex)
+ outline += ',' + str(otherattrib)
+ outline += ',' + str(int(cleanline[popindex]) * 1000)
+ outline += '\n'
+ filehandle.write(outline)
+
+for line_no, parsed_line in enumerate(csv_reader):
+ if parsed_line is not None and line_no in [11] + range(13,31):
+ cleanline = []
+ for item in parsed_line:
+ item = item.strip().replace(",","")
+ try:
+ cleanline.append(int(item))
+ except:
+ cleanline.append(item)
+ writeline(fo,0,0,1)
+ writeline(fo,1,0,2)
+ writeline(fo,2,0,3)
+
+ writeline(fo,0,1,4)
+ writeline(fo,1,1,5)
+ writeline(fo,2,1,6)
+
+ writeline(fo,0,2,7)
+ writeline(fo,1,2,8)
+ writeline(fo,2,2,9)
+
+ writeline(fo,0,3,10)
+ writeline(fo,1,3,11)
+ writeline(fo,2,3,12)
+
+ writeline(fo,0,4,13)
+ writeline(fo,1,4,14)
+ writeline(fo,2,4,15)
+
+fo.close()
+
+popsgeog97data_columns = (
+ DataSourceColumn("agegrp",label="5 yr age group",coltype="ordinal",ordinalpos=0,posbase=0),
+ DataSourceColumn("sex",label="Sex",coltype="categorical",ordinalpos=1,posbase=0),
+ DataSourceColumn("geog_region",label="Geographical Region",coltype="categorical",ordinalpos=2,posbase=0),
+ DataSourceColumn("pop",label="Population estimate",coltype="scalar",ordinalpos=3,posbase=0)
+)
+popsgeog97data = CSVDataSource("popsgeog97data", popsgeog97data_columns, filename=os.path.join(rawdatapath, "popsgeog97.dat"), header_rows=0, label="Transformed appc_b97.wk1")
+
+
+popsgeog97 = makedataset("popsgeog97",label="Populations by 5 yr age group, sex and geographical region, 1997")
+
+popsgeog97.addcolumn("sex",label="Sex",outtrans={0:"Persons",1:"Male",2:"Female"},datatype=int,coltype="categorical")
+popsgeog97.addcolumn("geog_region",label="Geographic Region",outtrans={0:'United States',1:'Northeast',2:'Midwest',3:'South',4:'West'},datatype=int,coltype="categorical")
+popsgeog97.addcolumn("pop",label="Population estimate",datatype=int,coltype="scalar")
+agegrp_outtrans = { 0:"All ages",
+ 1:"0 - 4 yrs",
+ 2:"5 - 9 yrs",
+ 3:"10 - 14 yrs",
+ 4:"15 - 19 yrs",
+ 5:"20 - 24 yrs",
+ 6:"25 - 29 yrs",
+ 7:"30 - 34 yrs",
+ 8:"35 - 39 yrs",
+ 9:"40 - 44 yrs",
+ 10:"45 - 49 yrs",
+ 11:"50 - 54 yrs",
+ 12:"55 - 59 yrs",
+ 13:"60 - 64 yrs",
+ 14:"65 - 69 yrs",
+ 15:"70 - 74 yrs",
+ 16:"75 - 79 yrs",
+ 17:"80 - 84 yrs",
+ 18:"85+ yrs"}
+popsgeog97.addcolumn("agegrp",label="Age group",coltype="ordinal",datatype=int,outtrans=agegrp_outtrans)
+
+popsgeog97.initialise()
+popsgeog97.loaddata(popsgeog97data,rowlimit=None)
+popsgeog97.finalise()
+popsgeog97.save()
+
+###################################################
+# Repeat for racial groups populations
+
+csv_reader = csv.reader(file(os.path.join(rawdatapath, "appc_c97.csv")))
+fo = open(os.path.join(rawdatapath, "popsrace97.dat"), "w")
+
+for line_no, parsed_line in enumerate(csv_reader):
+ if parsed_line is not None and line_no in [11,13,20,27,34,41,48,55,62,69,76,83,90,97,104,111,118,125,166]:
+ cleanline = []
+ for item in parsed_line:
+ item = item.strip().replace(",","")
+ try:
+ cleanline.append(int(item))
+ except:
+ cleanline.append(item)
+ print "line_no %s, parsed_line %s" % (line_no, parsed_line)
+ writeline(fo,0,0,1)
+ writeline(fo,1,0,2)
+ writeline(fo,2,0,3)
+
+ writeline(fo,0,1,4)
+ writeline(fo,1,1,5)
+ writeline(fo,2,1,6)
+
+ writeline(fo,0,2,7)
+ writeline(fo,1,2,8)
+ writeline(fo,2,2,9)
+
+ writeline(fo,0,3,10)
+ writeline(fo,1,3,11)
+ writeline(fo,2,3,12)
+
+fo.close()
+
+popsrace97_columns = (
+ DataSourceColumn("agegrp",label="5 yr age group",coltype="ordinal",ordinalpos=0,posbase=0),
+ DataSourceColumn("sex",label="Sex",coltype="categorical",ordinalpos=1,posbase=0),
+ DataSourceColumn("racialgroup",label="Racial group",coltype="categorical",ordinalpos=2,posbase=0),
+ DataSourceColumn("pop",label="Population estimate",coltype="scalar",ordinalpos=3,posbase=0)
+)
+popsrace97data = CSVDataSource("popsrace97data", popsrace97_columns, filename=os.path.join(rawdatapath, "popsrace97.dat"),header_rows=0,label="Transformed appc_c97.wk1")
+
+popsrace97 = makedataset("popsrace97",label="Populations by 5 yr age group, sex and grouped race, 1997")
+
+popsrace97.addcolumn("sex",label="Sex",outtrans={0:"Persons",1:"Male",2:"Female"},datatype=int,coltype="categorical")
+popsrace97.addcolumn("racialgroup",label="Racial group",outtrans={0:'All races',1:'White',2:'Black',3:'Others'},datatype=int,coltype="categorical")
+popsrace97.addcolumn("pop",label="Population estimate",datatype=int,coltype="scalar")
+popsrace97.addcolumn("agegrp",label="Age group",coltype="ordinal",datatype=int,outtrans=agegrp_outtrans)
+
+popsrace97.initialise()
+popsrace97.loaddata(popsrace97data,rowlimit=None)
+popsrace97.finalise()
+popsrace97.save()
diff --git a/demo/loaders/nhmrc.py b/demo/loaders/nhmrc.py
new file mode 100644
index 0000000..95e92b7
--- /dev/null
+++ b/demo/loaders/nhmrc.py
@@ -0,0 +1,137 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load Australian NH&MRC grant funding data
+# See http://www.nhmrc.gov.au/funding/dataset/rmis/index.htm
+
+# Python standard modules
+import os
+import csv
+import sys
+
+# 3rd Party Modules
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+from mx.DateTime import DateTime
+
+# SOOM modules
+from SOOMv0 import *
+from SOOMv0.Sources.CSV import *
+
+def make_nhmrc(options):
+ label="Australian NH&MRC grant funding 2000-2007"
+ nhmrc = makedataset("nhmrc", label=label)
+
+ nhmrc.addcolumn("grant_id",label="Grant ID",coltype="identity",datatype=int)
+ nhmrc.addcolumn("chief_investigator_a",label="Chief Investigator A",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("funding_group",label="Main Funding Group",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("grant_type",label="Grant Type",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("grant_subtype",label="Grant Subtype",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("scientific_title",label="Scientific Title",coltype="searchabletext",datatype=str)
+ nhmrc.addcolumn("simplified_title",label="Simplified Title",coltype="searchabletext",datatype=str)
+ nhmrc.addcolumn("admin_inst",label="Administering Institution",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("state",label="State/Territory",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("sector",label="Sector",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("research_area",label="Broad Research Area",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("rfcd_main",label="Field of Research - main category",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("rfcd",label="Field of Research",coltype="categorical",datatype=str)
+ nhmrc.addcolumn("app_yr",label="Application year",coltype="ordinal",datatype=int)
+ nhmrc.addcolumn("start_yr",label="Commencement year",coltype="ordinal",datatype=int)
+ nhmrc.addcolumn("funding_total",label="Total funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1994",label="1994 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1995",label="1995 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1996",label="1996 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1997",label="1997 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1998",label="1998 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_1999",label="1999 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2000",label="2000 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2001",label="2001 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2002",label="2002 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2003",label="2003 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2004",label="2004 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2005",label="2005 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2006",label="2006 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2007",label="2007 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2008",label="2008 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2009",label="2009 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2010",label="2010 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2011",label="2011 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2012",label="2012 funding (AUD)",coltype="scalar",datatype=float)
+ nhmrc.addcolumn("funding_2013",label="2013 funding (AUD)",coltype="scalar",datatype=float)
+
+ if options.verbose:
+ print nhmrc
+ return nhmrc
+
+def nhmrc_source(filename):
+ nhmrc_columns = [
+ DataSourceColumn("grant_id",label="Grant ID",coltype="identity",ordinalpos=0),
+ DataSourceColumn("chief_investigator_a",label="Chief Investigator A",coltype="categorical",ordinalpos=1),
+ DataSourceColumn("funding_group",label="Main Funding Group",coltype="categorical",ordinalpos=2),
+ DataSourceColumn("grant_type",label="Grant Type",coltype="categorical",ordinalpos=3),
+ DataSourceColumn("grant_subtype",label="Grant Subtype",coltype="categorical",ordinalpos=4),
+ DataSourceColumn("scientific_title",label="Scientific Title",coltype="searchabletext",ordinalpos=5),
+ DataSourceColumn("simplified_title",label="Simplified Title",coltype="searchabletext",ordinalpos=6),
+ DataSourceColumn("admin_inst",label="Administering Institution",coltype="categorical",ordinalpos=7),
+ DataSourceColumn("state",label="State/Territory",coltype="categorical",ordinalpos=8),
+ DataSourceColumn("sector",label="Sector",coltype="categorical",ordinalpos=9),
+ DataSourceColumn("research_area",label="Broad Research Area",coltype="categorical",ordinalpos=10),
+ DataSourceColumn("rfcd_main",label="Field of Research - main category",coltype="categorical",ordinalpos=11),
+ DataSourceColumn("rfcd",label="Field of Research",coltype="categorical",ordinalpos=12),
+ DataSourceColumn("app_yr",label="Application year",coltype="ordinal",ordinalpos=13),
+ DataSourceColumn("start_yr",label="Commencement year",coltype="ordinal",ordinalpos=14),
+ DataSourceColumn("funding_total",label="Total funding (AUD)",coltype="scalar",ordinalpos=15),
+ DataSourceColumn("funding_1994",label="1994 funding (AUD)",coltype="scalar",ordinalpos=16),
+ DataSourceColumn("funding_1995",label="1995 funding (AUD)",coltype="scalar",ordinalpos=17),
+ DataSourceColumn("funding_1996",label="1996 funding (AUD)",coltype="scalar",ordinalpos=18),
+ DataSourceColumn("funding_1997",label="1997 funding (AUD)",coltype="scalar",ordinalpos=19),
+ DataSourceColumn("funding_1998",label="1998 funding (AUD)",coltype="scalar",ordinalpos=21),
+ DataSourceColumn("funding_1999",label="1999 funding (AUD)",coltype="scalar",ordinalpos=22),
+ DataSourceColumn("funding_2000",label="2000 funding (AUD)",coltype="scalar",ordinalpos=23),
+ DataSourceColumn("funding_2001",label="2001 funding (AUD)",coltype="scalar",ordinalpos=24),
+ DataSourceColumn("funding_2002",label="2002 funding (AUD)",coltype="scalar",ordinalpos=25),
+ DataSourceColumn("funding_2003",label="2003 funding (AUD)",coltype="scalar",ordinalpos=26),
+ DataSourceColumn("funding_2004",label="2004 funding (AUD)",coltype="scalar",ordinalpos=27),
+ DataSourceColumn("funding_2005",label="2005 funding (AUD)",coltype="scalar",ordinalpos=28),
+ DataSourceColumn("funding_2006",label="2006 funding (AUD)",coltype="scalar",ordinalpos=29),
+ DataSourceColumn("funding_2007",label="2007 funding (AUD)",coltype="scalar",ordinalpos=30),
+ DataSourceColumn("funding_2008",label="2008 funding (AUD)",coltype="scalar",ordinalpos=31),
+ DataSourceColumn("funding_2009",label="2009 funding (AUD)",coltype="scalar",ordinalpos=32),
+ DataSourceColumn("funding_2010",label="2010 funding (AUD)",coltype="scalar",ordinalpos=33),
+ DataSourceColumn("funding_2011",label="2011 funding (AUD)",coltype="scalar",ordinalpos=34),
+ DataSourceColumn("funding_2012",label="2012 funding (AUD)",coltype="scalar",ordinalpos=35),
+ DataSourceColumn("funding_2013",label="2013 funding (AUD)",coltype="scalar",ordinalpos=36),
+ DataSourceColumn("total",label="Total funding (AUD)",coltype="scalar",ordinalpos=37),
+ ]
+
+ return CSVDataSource("nhmrc_data", nhmrc_columns, filename=filename, header_rows=1,
+ label="NH&MRC funding 2000-2007")
+
+
+def load_nhmrc(nhmrc, filename, options):
+ filename = os.path.join(options.datadir, filename)
+ nhmrc_src = nhmrc_source(filename)
+ if options.verbose:
+ print nhmrc
+ nhmrc.initialise()
+ nhmrc.loaddata(nhmrc_src,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ nhmrc.finalise()
+
+def load(options):
+ ds = make_nhmrc(options)
+ load_nhmrc(ds, 'nhmrc_grantdata.csv.gz', options)
+ ds.save()
+
diff --git a/demo/loaders/rtfparse.py b/demo/loaders/rtfparse.py
new file mode 100644
index 0000000..ea57ac1
--- /dev/null
+++ b/demo/loaders/rtfparse.py
@@ -0,0 +1,112 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: rtfparse.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/rtfparse.py,v $
+
+import re
+
+__all__ = ['parse']
+# Tokens
+for tok in 'TEXT,CWORD,CSYM,LBRACE,RBRACE,GROUP'.split(','):
+ globals()[tok] = tok # XXX shouldn't modify globals like this?
+ __all__.append(tok)
+
+cword_re = re.compile(r'''
+ (?:\\([a-z]+)(-?\d+)?\ ?) # control word
+ |
+ (\\[^a-zA-Z]) # control symbol
+ |
+ ([{}]) # group
+''', re.VERBOSE | re.MULTILINE)
+
+def tokenise(data):
+ last = 0
+ for match in cword_re.finditer(data):
+ start, end = match.span()
+ content = data[last:start]
+ last = end
+ cword, cwordarg, csym, group = match.groups()
+ if content:
+ yield TEXT, content
+ if cword:
+ yield CWORD, (cword, cwordarg)
+ elif csym:
+ yield CSYM, csym[1]
+ elif group == '{':
+ yield LBRACE, None
+ elif group == '}':
+ yield RBRACE, None
+ if data[last:]:
+ yield TEXT, data[last:]
+
+
+class Node(object):
+ __slots__ = 'token', 'args'
+
+ def __init__(self, token, args):
+ self.token = token
+ self.args = args
+
+ def dump(self, level):
+ indent = ' ' * level
+ if self.token == TEXT:
+ print '%s%r' % (indent, self.args)
+ elif self.token == CWORD:
+ print '%s\\%s %r' % (indent, self.args[0], self.args[1])
+ elif self.token == CSYM:
+ print '%s\\%s' % (indent, self.args[0])
+ else:
+ print '%s%s %r' % (indent, self.token, self.args)
+
+
+class Group(Node):
+ __slots__ = 'token', 'args', 'children'
+
+ def __init__(self):
+ self.token = 'GROUP'
+ self.children = []
+
+ def add(self, node):
+ self.children.append(node)
+
+ def dump(self, level):
+ indent = ' ' * level
+ print indent + '['
+ for child in self.children:
+ child.dump(level + 1)
+ print indent + ']'
+
+
+def parse(data):
+ stack = []
+ for token, arg in tokenise(data):
+ # print '%6s %r' % (token, arg)
+ if token == LBRACE:
+ stack.append(Group())
+ elif token == RBRACE:
+ group = stack.pop()
+ if stack:
+ stack[-1].add(group)
+ else:
+ return group
+ else:
+ stack[-1].add(Node(token, arg))
+
+
+if __name__ == '__main__':
+ import sys
+ aa = open(sys.argv[1], 'U').read()
+ root = parse(aa)
+ root.dump(0)
diff --git a/demo/loaders/syndeath.py b/demo/loaders/syndeath.py
new file mode 100644
index 0000000..61112a0
--- /dev/null
+++ b/demo/loaders/syndeath.py
@@ -0,0 +1,390 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load new WHO World Standard Population data
+#
+# $Id: syndeath.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/syndeath.py,v $
+
+# Python standard modules
+import os
+import csv
+
+# 3rd Party Modules
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+
+# SOOM modules
+from SOOMv0 import *
+from SOOMv0.Sources.CSV import *
+
+agegrp_outtrans = {
+ 1: '0-4 yrs',
+ 2: '5-9 yrs',
+ 3: '10-14 yrs',
+ 4: '15-19 yrs',
+ 5: '20-24 yrs',
+ 6: '25-29 yrs',
+ 7: '30-34 yrs',
+ 8: '35-39 yrs',
+ 9: '40-44 yrs',
+ 10: '45-49 yrs',
+ 11: '50-54 yrs',
+ 12: '55-59 yrs',
+ 13: '60-64 yrs',
+ 14: '65-69 yrs',
+ 15: '70-74 yrs',
+ 16: '75-79 yrs',
+ 17: '80-84 yrs',
+ 18: '85+ yrs',
+ 19: 'Invalid Data',
+ 20: 'Unknown',
+}
+
+causeofdeath_outtrans = {
+ 1: 'Malignant neoplasm of pancreas (ICD9 157)',
+ 2: 'Railway accidents (ICD9 E800-E807)',
+ 3: 'Benign neoplasm of nervous system (ICD9 225)',
+ 4: 'Meningitis (ICD9 320-322)',
+ 5: 'Retention of urine (ICD9 788.2)',
+ 6: 'Sudden infant death syndrome (ICD9 798.0)',
+ 7: 'Tuberculosis of genitourinary system (ICD9 016)',
+ 8: 'Other congenital anomalies of musculoskeletal system (ICD9 754.0-754.2,754.4-756)',
+ 9: 'Other chronic obstructive pulmonary disease (ICD9 495,496)',
+ 10: 'Affective psychoses (ICD9 296)',
+ 11: 'Spontaneous abortion (ICD9 634)',
+ 12: 'Malignant neoplasm of larynx (ICD9 161)',
+ 13: 'Late effects of tuberculosis (ICD9 137)',
+ 14: 'Epilepsy (ICD9 345)',
+ 15: 'Abdominal pain (ICD9 789.0)',
+ 16: 'Subarachnoid haemorrhage (ICD9 430)',
+ 17: 'Senility without mention of psychosis (ICD9 797)',
+ 18: 'Alcohol dependence syndrome (ICD9 303)',
+ 19: 'Benign neoplasm of ovary (ICD9 220)',
+ 20: 'Arthropod-borne encephalitis (ICD9 062-064)',
+ 21: 'Accidents caused by firearm missile (ICD9 E922)',
+ 22: 'Acute but ill-defined cerebrovascular disease (ICD9 436)',
+ 23: 'Injury undetermined whether accidentally or purposely inflicted (ICD9 E980-E989)',
+ 24: 'Malignant neoplasm of cervix uteri (ICD9 180)',
+ 25: 'Syphilis (ICD9 090-097)',
+ 26: 'Ill-defined intestinal infections (ICD9 009)',
+ 27: 'Rheumatoid arthritis, except spine (ICD9 714)',
+ 28: 'Senile and presenile organic psychotic conditions (ICD9 290)',
+ 29: 'Spina bifida and hydrocephalus (ICD9 741,742.3)',
+ 30: 'Malignant neoplasm of colon (ICD9 153)',
+ 31: 'Intracerebral and other intracranial haemorrhage (ICD9 431,432)',
+ 32: 'Other acute upper respiratory infections (ICD9 460-462,465)',
+ 33: 'Accidents due to natural and environmental factors (ICD9 E900-E909)',
+ 34: 'Hyperplasia of prostate (ICD9 600)',
+ 35: 'Ankylosing spondylitis (ICD9 720.0)',
+ 36: 'Pneumoconiosis and other lung disease due to external agents (ICD9 500-508)',
+ 37: 'Accidental poisoning by gases and vapours (ICD9 E867-E869)',
+ 38: 'Chronic liver disease and cirrhosis (ICD9 571)',
+ 39: 'Acute myocardial infarction (ICD9 410)',
+ 40: 'Cardiac dysrhythmias (ICD9 427)',
+ 41: 'Meningococcal infection (ICD9 036)',
+ 42: 'Other psychoses (ICD9 291-294,297-299)',
+ 43: 'Malignant neoplasm of small intestine, including duodenum (ICD9 152)',
+ 44: 'Other degenerative and hereditary disorders of the CNS (ICD9 330,331,333-336)',
+ 45: 'Measles (ICD9 055)',
+ 46: 'Influenza (ICD9 487)',
+ 47: 'Hyperlipoproteinaemia (ICD9 272.0,272.1)',
+ 48: 'Diphtheria (ICD9 032)',
+ 49: 'Inflammatory diseases of pelvic cellular tissue and peritoneum (ICD9 614.3-614.9)',
+ 50: 'Cerebral atherosclerosis (ICD9 437.0)',
+ 51: 'Diseases of oesophagus (ICD9 530)',
+ 52: 'Other arthropathies (ICD9 710-713,715,716)',
+ 53: 'Nutritional marasmus (ICD9 261)',
+ 54: 'Diseases of breast (ICD9 610,611)',
+ 55: 'Malignant neoplasm of stomach (ICD9 151)',
+ 56: 'Benign neoplasm of thyroid (ICD9 226)',
+ 57: 'Late effects of acute poliomyelitis (ICD9 138)',
+ 58: 'Other dorsopathies (ICD9 720.1-724)',
+ 59: 'Hypertensive heart disease (ICD9 402,404)',
+ 60: 'Strabismus and other disorders of binocular eye movements (ICD9 378)',
+ 61: 'Toxaemia of pregnancy (ICD9 642.4-642.9,643)',
+ 62: 'Mental retardation (ICD9 317-319)',
+ 63: 'Malignant neoplasm of testis (ICD9 186)',
+ 64: 'Arterial embolism and thrombosis (ICD9 444)',
+ 65: 'Septicaemia (ICD9 038)',
+ 66: 'Pulmonary embolism (ICD9 415.1)',
+ 67: 'Other rickettsiosis (ICD9 081-083)',
+ 68: 'Pyrexia of unknown origin (ICD9 780.6)',
+ 69: 'Intestinal obstruction without mention of hernia (ICD9 560)',
+ 70: 'Pleurisy (ICD9 511)',
+ 71: 'Haemolytic disease of fetus or newborn (ICD9 773)',
+ 72: "Parkinson's disease (ICD9 332)",
+ 73: 'Ulcer of stomach and duodenum (ICD9 531-533)',
+ 74: 'Infections of kidney (ICD9 590)',
+ 75: 'Viral hepatitis (ICD9 070)',
+ 76: 'Blindness and low vision (ICD9 369)',
+ 77: 'Benign neoplasm of skin (ICD9 216)',
+ 78: "Hodgkin's disease (ICD9 201)",
+ 79: 'Intestinal infections due to other specified organism (ICD9 007,008)',
+ 80: 'Whooping cough (ICD9 033)',
+ 81: 'Benign neoplasm of kidney and other urinary organs (ICD9 223)',
+ 82: 'Birth trauma (ICD9 767)',
+ 83: 'Cholelithiasis and cholecystitis (ICD9 574-575.1)',
+ 84: 'Diseases of teeth and supporting structures (ICD9 520-525)',
+ 85: 'Malignant neoplasm of ovary and other uterine adnexa (ICD9 183)',
+ 86: 'Other respiratory tuberculosis (ICD9 010,012)',
+ 87: 'Malignant neoplasm of rectum, rectosigmoid junction and anus (ICD9 154)',
+ 88: 'Malignant neoplasm of liver, specified as primary (ICD9 155.0)',
+ 89: 'Pneumonia (ICD9 480-486)',
+ 90: 'Nephritis, nephrotic syndrome and nephrosis (ICD9 580-589)',
+ 91: 'Injury resulting from operation of war (ICD9 E990-E999)',
+ 92: 'Amoebiasis (ICD9 006)',
+ 93: 'Cystitis (ICD9 595)',
+ 94: 'Malignant neoplasm of uterus, other and unspecified (ICD9 179,182)',
+ 95: 'Malignant neoplasm of trachea, bronchus and lung (ICD9 162)',
+ 96: 'Other protein-calorie malnutrition (ICD9 262,263)',
+ 97: 'Food poisoning (ICD9 003,005)',
+ 98: 'Uterovaginal prolapse (ICD9 618)',
+ 99: 'Urinary calculus (ICD9 592,594)',
+ 100: 'Appendicitis (ICD9 540-543)',
+ 101: 'Air and space transport accidents (ICD9 E840-E845)',
+ 102: 'Kwashiorkor (ICD9 260)',
+ 103: 'Malignant neoplasm of prostate (ICD9 185)',
+ 104: 'Accidental poisoning by drugs,medicaments and biologicals (ICD9 E850-E858)',
+ 105: 'Hernia of abdominal cavity (ICD9 550-553)',
+ 106: 'Leukaemia (ICD9 204-208)',
+ 107: 'Respiratory failure (ICD9 799.1)',
+ 108: 'Mycosis (ICD9 110-118)',
+ 109: 'Congenital anomalies of heart and circulatory system (ICD9 745-747)',
+ 110: 'Inflammatory diseases of uterus, vagina and vulva (ICD9 615,616)',
+ 111: 'Bronchitis, chronic and unspecified, emphysema and asthma (ICD9 490-493)',
+ 112: 'Motor vehicle traffic accidents (ICD9 E810-E819)',
+ 113: 'Malignant neoplasm of placenta (ICD9 181)',
+ 114: 'Rabies (ICD9 071)',
+ 115: 'Hypoxia, birth asphyxia and other respiratory conditions (ICD9 768-770)',
+ 116: 'Shigellosis (ICD9 004)',
+ 117: 'Diverticula of intestine (ICD9 562)',
+ 118: 'Rubella (ICD9 056)',
+ 119: 'Atherosclerosis (ICD9 440)',
+ 120: 'Anaemias (ICD9 280-285)',
+ 121: 'Streptococcal sore throat, scarlatina and erysipelas (ICD9 034,035)',
+ 122: 'Accidents caused by machinery and by cutting/piercing instruments (ICD9 E919,E920)',
+ 123: 'Otitis media and mastoiditis (ICD9 381-383)',
+ 124: 'Other malignant neoplasm of skin (ICD9 173)',
+ 125: 'Infantile cerebral palsy and other paralytic syndromes (ICD9 343,344)',
+ 126: 'Bronchiectasis (ICD9 494)',
+ 127: 'Neurotic and personality disorders (ICD9 300,301)',
+ 128: 'Osteomyelitis, periostitis and other infections involving bone (ICD9 730)',
+ 129: 'Rheumatism, excluding the back (ICD9 725-729)',
+ 130: 'Other road vehicle accidents (ICD9 E826-E829)',
+ 131: 'Disorders of thyroid gland (ICD9 240-246)',
+ 132: 'Other diseases of arteries, arterioles and capillaries (ICD9 441-443,446-448)',
+ 133: 'Other functional digestive disorders (ICD9 564)',
+ 134: 'Other deformities of central nervous system (ICD9 740,742.0-742.2,742.4-742.9)',
+ 135: 'Tuberculosis of meninges and central nervous system (ICD9 013)',
+ 136: 'Malignant neoplasm of brain (ICD9 191)',
+ 137: 'Accidental poisoning by other solid and liquid substances (ICD9 E860-E866)',
+ 138: 'Benign neoplasm of uterus (ICD9 218,219)',
+ 139: 'Acute laryngitis and tracheitis (ICD9 464)',
+ 140: 'Malignant neoplasm of bladder (ICD9 188)',
+ 141: 'Multiple sclerosis (ICD9 340)',
+ 142: 'Schistosomiasis (ICD9 120)',
+ 143: 'Varicose veins of lower extremities (ICD9 454)',
+ 144: 'Acute bronchitis and bronchiolitis (ICD9 466)',
+ 145: 'Phlebitis, thrombophlebitis, venous embolism and thrombosis (ICD9 451-453)',
+ 146: 'Salpingitis and oophoritis (ICD9 614.0-614.2)',
+ 147: 'Leishmaniasis (ICD9 085)',
+ 148: 'Obesity of non-endocrine origin (ICD9 278.0)',
+ 149: 'Obstructed labour (ICD9 660)',
+ 150: 'Water transport accidents (ICD9 E830-E838)',
+ 151: 'Redundant prepuce and phimosis (ICD9 605)',
+ 152: 'Acute tonsillitis (ICD9 463)',
+ 153: 'Slow fetal growth, fetal malnutrition and immaturity (ICD9 764,765)',
+ 154: 'Drug dependence (ICD9 304)',
+ 155: 'Schizophrenic psychoses (ICD9 295)',
+ 156: 'Other deformities of digestive system (ICD9 750,751)',
+ 157: 'Echinococcosis (ICD9 122)',
+ 158: 'Non-syphilitic spirochaetal diseases (ICD9 100-104)',
+ 159: 'Infections of skin and subcutaneous tissue (ICD9 680-686)',
+ 160: 'Avitaminosis (ICD9 264-269)',
+ 161: 'Filarial infection and dracontiasis (ICD9 125)',
+ 162: 'Other helminthiasis (ICD9 121,123,124,127-129)',
+ 163: 'Acute rheumatic fever (ICD9 390-392)',
+ 164: 'Tuberculosis of intestines, peritoneum and mesenteric glands (ICD9 014)',
+ 165: 'Obstetric complications affecting fetus or newborn (ICD9 761-763)',
+ 166: 'Cerebral infarction (ICD9 433,434)',
+ 167: 'Malignant neoplasm of bone and articular cartilage (ICD9 170)',
+ 168: 'Other disorders of joints (ICD9 717-719)',
+ 169: 'Chronic rheumatic heart disease (ICD9 393-398)',
+ 170: 'Malaria (ICD9 084)',
+ 171: 'Tetanus (ICD9 037)',
+ 172: 'Haemorrhoids (ICD9 455)',
+ 173: 'Legally induced abortion (ICD9 635)',
+ 174: 'Accidental drawning and submersion (ICD9 E910)',
+ 175: 'Malignant neoplasm of oesophagus (ICD9 150)',
+ 176: 'Foreign body accidentally entering orifice (ICD9 E914,E915)',
+ 177: 'Pulmonary tuberculosis (ICD9 011)',
+ 178: 'Chronic pharyngitis, nasopharyngitis and sinusitis (ICD9 472, 473)',
+ 179: 'Diabetes mellitus (ICD9 250)',
+ 180: 'Malignant neoplasm of female breast (ICD9 174)',
+ 181: 'Malignant melanoma of skin (ICD9 172)',
+ 182: 'Tuberculosis of bones and joints (ICD9 015)',
+ 183: 'Trypanosomiasis (ICD9 086)',
+ 184: 'Haemorrhage of pregnancy and childbirth (ICD9 640,641,666)',
+}
+
+sex_outtrans = {
+ 1: 'Male',
+ 2: 'Female',
+ 9: 'Unknown',
+}
+
+region_outtrans = {
+ 1: 'Region A',
+ 2: 'Region B',
+ 3: 'Region C',
+ 4: 'Region D',
+ 5: 'Region E',
+ 6: 'Region F',
+ 7: 'Region G',
+ 8: 'Region H',
+ 9: 'Region I',
+ 10: 'Region J',
+ 11: 'Region K',
+ 12: 'Region L',
+ 13: 'Region M',
+ 14: 'Region N',
+ 15: 'Region O',
+ 16: 'Region P',
+ 17: 'Region Q',
+ 18: 'Unknown'
+}
+
+def make_death(options):
+ ds = makedataset('syndeath',label='Synthetic Death Dataset')
+ ds.addcolumn('agegrp', label='Age group',
+ datatype='int', coltype='ordinal',
+ outtrans=agegrp_outtrans)
+ ds.addcolumn('sex', label='Sex',
+ datatype='int', coltype='categorical',
+ outtrans=sex_outtrans)
+ ds.addcolumn('region', label='Region',
+ datatype='int', coltype='categorical',
+ outtrans=region_outtrans)
+ ds.addcolumn('year', label='Year of death',
+ datatype='int', coltype='ordinal')
+ ds.addcolumn('causeofdeath', label='Cause of death',
+ datatype='int', coltype='categorical',
+ outtrans=causeofdeath_outtrans)
+ if options.verbose:
+ print ds
+ return ds
+
+def load_death_source(ds, options):
+ from syndeath_expand import syndeath_expand
+ syndeath_expand(options.datadir, options.scratchdir, options.verbose)
+ filename = os.path.join(options.scratchdir, 'synthetic_deaths.csv.gz')
+ source = HeaderCSVDataSource('syndeath', [], filename=filename)
+ ds.initialise()
+ ds.loaddata(source,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ ds.finalise()
+
+def make_pop(options):
+ ds = makedataset('synpop',label='Synthetic Population Dataset',summary=True)
+ ds.addcolumn('agegrp', label='Age group',
+ datatype='int', coltype='ordinal',
+ outtrans=agegrp_outtrans)
+ ds.addcolumn('sex', label='Sex',
+ datatype='int', coltype='categorical',
+ outtrans=sex_outtrans)
+ ds.addcolumn('region', label='Region',
+ datatype='int', coltype='categorical',
+ outtrans=region_outtrans)
+ ds.addcolumn('year', label='Year',
+ datatype='int', coltype='ordinal')
+ ds.addcolumn('pop', label='Population',
+ datatype='int', coltype='scalar')
+ if options.verbose:
+ print ds
+ return ds
+
+
+def load_pop_source(ds, options):
+ filename = os.path.join(options.datadir, 'synthetic_pops.csv.gz')
+ source = HeaderCSVDataSource('synpop', [], filename=filename)
+ ds.initialise()
+ ds.loaddata(source,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ ds.finalise()
+
+def make_stdpop_mf(options):
+ ds = makedataset('aus01stdpop_mf',label='Australian 2001 Standard Population (males and females)',
+ summary=True)
+ ds.addcolumn('agegrp', label='Age group',
+ datatype='int', coltype='ordinal',
+ outtrans=agegrp_outtrans)
+ ds.addcolumn('sex', label='Sex',
+ datatype='int', coltype='categorical',
+ outtrans=sex_outtrans)
+ ds.addcolumn('pop', label='Population',
+ datatype='int', coltype='scalar')
+ if options.verbose:
+ print ds
+ return ds
+
+def load_stdpop_mf_source(ds, options):
+ filename = os.path.join(options.datadir, 'aus01stdpop_mf.csv')
+ source = HeaderCSVDataSource('synstdpop', [], filename=filename)
+ ds.initialise()
+ ds.loaddata(source,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ ds.finalise()
+
+def make_stdpop(options):
+ ds = makedataset('aus01stdpop',label='Australian 2001 Standard Population',
+ summary=True)
+ ds.addcolumn('agegrp', label='Age group',
+ datatype='int', coltype='ordinal',
+ outtrans=agegrp_outtrans)
+ ds.addcolumn('pop', label='Population',
+ datatype='int', coltype='scalar')
+ if options.verbose:
+ print ds
+ return ds
+
+def load_stdpop_source(ds, options):
+ filename = os.path.join(options.datadir, 'aus01stdpop.csv')
+ source = HeaderCSVDataSource('synstdpop', [], filename=filename)
+ ds.initialise()
+ ds.loaddata(source,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ ds.finalise()
+
+
+def load(options):
+ ds = make_death(options)
+ load_death_source(ds, options)
+ ds.save()
+
+ ds = make_pop(options)
+ load_pop_source(ds, options)
+ ds.save()
+
+ ds = make_stdpop_mf(options)
+ load_stdpop_mf_source(ds, options)
+ ds.save()
+
+ ds = make_stdpop(options)
+ load_stdpop_source(ds, options)
+ ds.save()
diff --git a/demo/loaders/syndeath_expand.py b/demo/loaders/syndeath_expand.py
new file mode 100644
index 0000000..95677aa
--- /dev/null
+++ b/demo/loaders/syndeath_expand.py
@@ -0,0 +1,94 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load new WHO World Standard Population data
+#
+# $Id: syndeath_expand.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/syndeath_expand.py,v $
+
+# Python standard modules
+import os
+import sys
+import csv
+import gzip
+import random
+import itertools
+
+def intreader(f):
+ for row in csv.reader(f):
+ yield tuple([int(f) for f in row])
+
+def minmax_iter(minmax):
+ this, rest = minmax[0], minmax[1:]
+ for n in xrange(minmax[0][0], minmax[0][1]+1):
+ if rest:
+ for v in minmax_iter(rest):
+ yield (n,) + v
+ else:
+ yield (n,)
+
+def pairwise(i, n=2):
+ while i:
+ yield i[:n]
+ i = i[n:]
+
+def syndeath_expand(datadir, scratchdir, verbose=False):
+ srcfile = os.path.join(datadir, 'synthetic_deaths_comp.gz')
+ dstfile = os.path.join(scratchdir, 'synthetic_deaths.csv.gz')
+ if (os.path.exists(dstfile) and
+ os.path.getmtime(srcfile) <= os.path.getmtime(dstfile)):
+ return
+ if verbose:
+ print 'Expanding %r' % srcfile
+ src = gzip.open(srcfile, 'rb')
+ try:
+ reader = intreader(src)
+ minf = reader.next()
+ maxf = reader.next()
+ count = 0
+ minmax = zip(minf, maxf)
+ lines = []
+ for v, row in itertools.izip(minmax_iter(minmax), reader):
+ for cod, freq in pairwise(row):
+ for n in xrange(freq):
+ lines.append((random.random(),) + v + (cod,))
+ if verbose:
+ c = len(lines) / 10000
+ if c != count:
+ count = c
+ print '\r%s' % len(lines),
+ sys.stdout.flush()
+ finally:
+ if verbose:
+ print
+ src.close()
+ lines.sort()
+ dst = gzip.open(dstfile, 'wb', 9)
+ okay = False
+ if verbose:
+ print 'Writing %r' % dstfile
+ try:
+ writer = csv.writer(dst)
+ writer.writerow('agegrp,sex,region,year,causeofdeath'.split(','))
+ for row in lines:
+ writer.writerow(row[1:])
+ okay = True
+ finally:
+ dst.close()
+ if not okay:
+ os.unlink(dstfile)
+
+if __name__ == '__main__':
+ dir = os.path.dirname(__file__)
+ syndeath_expand(os.path.join(dir, '..', 'rawdata'))
diff --git a/demo/loaders/urlfetch.py b/demo/loaders/urlfetch.py
new file mode 100644
index 0000000..6f4ecdf
--- /dev/null
+++ b/demo/loaders/urlfetch.py
@@ -0,0 +1,76 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+import os
+import sys
+import errno
+import tempfile
+import urllib2
+
+def fetch(url, filename):
+ if os.path.exists(filename):
+ print ' %s: using existing file' % filename
+ return
+
+ dirname = os.path.dirname(filename)
+ if dirname:
+ try:
+ os.makedirs(dirname, 0755)
+ except OSError, (eno, estr):
+ if eno != errno.EEXIST:
+ raise
+ try:
+ u = urllib2.urlopen(url)
+ try:
+ url_len = None
+ info = u.info()
+ if info.has_key('content-length'):
+ url_len = long(info['content-length'])
+ f, fn = tempfile.mkstemp('.tmp', '.', os.path.dirname(filename))
+ try:
+ cnt = 0
+ while 1:
+ buf = u.read(16384)
+ if not buf:
+ break
+ os.write(f, buf)
+ cnt += len(buf)
+ if url_len:
+ sys.stderr.write(' %s %.2fMB %d%%\r' %\
+ (filename, float(cnt) / 1024 / 1024,
+ cnt * 100 / url_len))
+ else:
+ sys.stderr.write(' %s %.2fMB\r' %\
+ (filename, float(cnt) / 1024 / 1024))
+ os.rename(fn, filename)
+ sys.stderr.write(' %s %.2fMB \n' %\
+ (filename, float(cnt) / 1024 / 1024))
+ return cnt
+ finally:
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+ finally:
+ try:
+ u.close()
+ except:
+ pass
+
+ except urllib2.URLError, e:
+ print '\n %s: %s' % (url, e)
+ sys.exit(1)
+ except (IOError, OSError), e:
+ print '\n %s: %s' % (filename, e)
+ sys.exit(1)
diff --git a/demo/loaders/whopop.py b/demo/loaders/whopop.py
new file mode 100644
index 0000000..7e04b66
--- /dev/null
+++ b/demo/loaders/whopop.py
@@ -0,0 +1,194 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load new WHO World Standard Population data
+#
+# $Id: whopop.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/whopop.py,v $
+
+# Python standard modules
+import os
+import csv
+
+# 3rd Party Modules
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+
+# SOOM modules
+from SOOMv0 import *
+from SOOMv0.Sources.CSV import *
+
+def make_worldpop(options):
+ world_agegrp = Numeric.array(range(1,19) + range(1,19))
+ sex_outtrans = {1:'Male',2:'Female'}
+ agegrp_outtrans = {
+ 0:"All ages",
+ 1:"0 - 4 yrs",
+ 2:"5 - 9 yrs",
+ 3:"10 - 14 yrs",
+ 4:"15 - 19 yrs",
+ 5:"20 - 24 yrs",
+ 6:"25 - 29 yrs",
+ 7:"30 - 34 yrs",
+ 8:"35 - 39 yrs",
+ 9:"40 - 44 yrs",
+ 10:"45 - 49 yrs",
+ 11:"50 - 54 yrs",
+ 12:"55 - 59 yrs",
+ 13:"60 - 64 yrs",
+ 14:"65 - 69 yrs",
+ 15:"70 - 74 yrs",
+ 16:"75 - 79 yrs",
+ 17:"80 - 84 yrs",
+ 18:"85+ yrs"}
+
+ world_pop = Numeric.array([ 0.0886,
+ 0.0869,
+ 0.0860,
+ 0.0847,
+ 0.0822,
+ 0.0793,
+ 0.0761,
+ 0.0715,
+ 0.0659,
+ 0.0604,
+ 0.0537,
+ 0.0455,
+ 0.0372,
+ 0.0296,
+ 0.0221,
+ 0.0152,
+ 0.0091,
+ 0.0060]*2,typecode=Numeric.Float)
+ world_pop = (world_pop * 1000000).astype(Numeric.Int32)
+ world_sex = Numeric.array([1]*18 + [2]*18,typecode=Numeric.Int)
+ # Dataset with both sexes
+ worldpop_mf = makedataset("worldpop_mf",label="WHO World Standard (Theoretical) Population - male and female (identical propostions)")
+ worldpop_mf.addcolumnfromseq(name="_stdpop_",data=world_pop,mask=None,label="Standard population",datatype=int,coltype="scalar")
+ worldpop_mf.addcolumnfromseq(name="agegrp",data=world_agegrp,mask=None,label="Age group",coltype="ordinal",datatype=int,outtrans=agegrp_outtrans)
+ worldpop_mf.addcolumnfromseq(name="sex",data=world_sex,mask=None,label="Sex",coltype="categorical",datatype=int,outtrans=sex_outtrans)
+ # Dataset with just persons
+ worldpop_p = makedataset("worldpop_p",label="WHO World Standard (Theoretical) Population - persons only")
+ worldpop_p.addcolumnfromseq(name="_stdpop_",data=world_pop[0:18],mask=None,label="Standard population",datatype=int,coltype="scalar")
+ worldpop_p.addcolumnfromseq(name="agegrp",data=world_agegrp[0:18],mask=None,label="Age group",coltype="ordinal",datatype=int,outtrans=agegrp_outtrans)
+ if options.verbose:
+ print worldpop_mf
+ print worldpop_p
+ return worldpop_mf, worldpop_p
+
+years = [2000,2001,2002]
+
+def merge_who_indicator_data(datadir, scratchdir):
+ """
+ load some data extracted from the WHO WHOSIS Web site - data
+ files are in ./rawdata directory
+
+ This transposes the data, converting rows into columns and v-v
+ """
+ data = {}
+ for year in years:
+ src_fn = os.path.join(datadir, 'who%s.csv' % year)
+ csv_reader = csv.reader(open(src_fn, 'U'))
+ colmap = {}
+ for line_no, fields in enumerate(csv_reader):
+ # print line_no, fields
+ if line_no == 0:
+ pass
+ elif line_no == 1:
+ for i, colname in enumerate(fields):
+ data[(colname,year)] = []
+ colmap[i] = (colname,year)
+ else:
+ for i, value in enumerate(fields):
+ value = value.strip()
+ data[colmap[i]].append(value)
+
+ # Check if indicators the same in all years
+ indicators = None
+ for year in years:
+ if indicators:
+ if indicators != data[('Indicators', year)]:
+ print "Indicators changed"
+ else:
+ indicators = data[('Indicators', year)]
+ else:
+ print "Indicators equal"
+
+ # Write CSV file
+ csv_fn = os.path.join(scratchdir, 'whodata.csv')
+ fo = open(csv_fn, 'w')
+ try:
+ for country, year in data.keys():
+ if country != "Indicators":
+ dataseq = [country, str(year)] + data[(country,year)]
+ line = ','.join(dataseq)
+ print >> fo, line
+ finally:
+ fo.close()
+
+ return csv_fn, indicators,
+
+
+def make_whoindic(options):
+ label="WHO indicators %d-%d" % (years[0], years[-1])
+ whoindic = makedataset("who_indicators", label=label)
+
+ whoindic.addcolumn("country",label="Country",coltype="categorical",datatype=str)
+ whoindic.addcolumn("year",label="Year",coltype="ordinal",datatype=int)
+ if options.verbose:
+ print whoindic
+ return whoindic
+
+
+def whoindic_source(whoindic, filename, indicators):
+ whodata_columns = [
+ DataSourceColumn("country",label="Country",coltype="categorical",ordinalpos=0,posbase=0),
+ DataSourceColumn("year",label="Year",coltype="ordinal",ordinalpos=1,posbase=0),
+ ]
+
+ i = 2
+ for indic in indicators:
+ varname = '_'.join(indic.lower().split())
+ varname = varname.replace('(', '').replace(')', '').replace(';', '')
+ varname = varname.replace('%', 'percent')
+ varname = varname.replace('+', '_plus')
+ varname = varname.replace('-', '_')
+ varname = varname.replace('$', '_dollars')
+ coldef = DataSourceColumn(varname,label=indic,coltype="scalar",ordinalpos=i,posbase=0)
+ whodata_columns.append(coldef)
+ i += 1
+ whoindic.addcolumn(varname,label=indic,datatype=float,coltype="scalar")
+
+ return CSVDataSource("who_indicators_data", whodata_columns, filename=filename, header_rows=0, label="WHO indicators 2000-2002")
+
+
+def load_whoindic(whoindic, options):
+ filename, indicators = merge_who_indicator_data(options.datadir,
+ options.scratchdir)
+ whodata = whoindic_source(whoindic, filename, indicators)
+ if options.verbose:
+ print whodata
+ whoindic.initialise()
+ whoindic.loaddata(whodata,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ whoindic.finalise()
+
+def load(options):
+ mf_ds, p_ds = make_worldpop(options)
+ mf_ds.save()
+ p_ds.save()
+ ind_ds = make_whoindic(options)
+ load_whoindic(ind_ds, options)
+ ind_ds.save()
diff --git a/demo/loaders/whotext.py b/demo/loaders/whotext.py
new file mode 100644
index 0000000..cef8498
--- /dev/null
+++ b/demo/loaders/whotext.py
@@ -0,0 +1,96 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# Define and load new WHO World Standard Population data
+#
+# $Id: whotext.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/loaders/whotext.py,v $
+
+# Python standard modules
+import os
+import csv
+import sys
+
+# 3rd Party Modules
+# http://www.pfdubois.com/numpy/
+import Numeric, MA
+from mx.DateTime import DateTime
+
+# SOOM modules
+from SOOMv0 import *
+from SOOMv0.Sources.CSV import *
+
+def make_whotext(options):
+ label="WHO outbreak reports 2003-2004"
+ whotext = makedataset("who_text", label=label)
+
+ whotext.addcolumn("title",label="Report Title",coltype="searchabletext",datatype=str)
+ whotext.addcolumn("repdate",label="Report Date",coltype="date",datatype='recodedate')
+ whotext.addcolumn("report",label="Report Text",coltype="searchabletext",datatype=str)
+ if options.verbose:
+ print whotext
+ return whotext
+
+def whotext_xform_pre(row_dict):
+ months = {'January':1,
+ 'February':2,
+ 'March':3,
+ 'April':4,
+ 'May':5,
+ 'June':6,
+ 'July':7,
+ 'August':8,
+ 'September':9,
+ 'October':10,
+ 'November':11,
+ 'December':12}
+
+ rawdate = row_dict['repdate']
+ try:
+ day, wordmonth, year = rawdate.split()
+ except:
+ print rawdate
+ sys.exit()
+ month = months[wordmonth]
+ newdate = DateTime(int(year), month, int(day))
+ row_dict['repdate'] = str(newdate.day) + '/' + str(newdate.month) + '/' + str(newdate.year)
+ row_dict['report'] = row_dict['report'].replace('<p>','\n')
+ return row_dict
+
+def whotext_source(filename):
+ whotext_columns = [
+ DataSourceColumn("title",label="Title",coltype="searchabletext",ordinalpos=0),
+ DataSourceColumn("repdate",label="Report Date",coltype="date",ordinalpos=1),
+ DataSourceColumn("report",label="Report Text",coltype="searchabletext",ordinalpos=2),
+ ]
+
+ return CSVDataSource("whotext_data", whotext_columns, filename=filename, header_rows=0,
+ label="WHO outbreak reports 2003-05",xformpre=whotext_xform_pre)
+
+
+def load_whotext(whotext, filename, options):
+ filename = os.path.join(options.datadir, filename)
+ whotext_src = whotext_source(filename)
+ if options.verbose:
+ print whotext
+ whotext.initialise()
+ whotext.loaddata(whotext_src,
+ chunkrows=options.chunkrows,
+ rowlimit=options.rowlimit)
+ whotext.finalise()
+
+def load(options):
+ ds = make_whotext(options)
+ load_whotext(ds, 'whoreps.csv.gz', options)
+ ds.save()
diff --git a/demo/plot_demo.py b/demo/plot_demo.py
new file mode 100644
index 0000000..92fe863
--- /dev/null
+++ b/demo/plot_demo.py
@@ -0,0 +1,368 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: plot_demo.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/plot_demo.py,v $
+
+# Standard python modules
+import os, sys, time
+import optparse
+
+#
+from SOOMv0 import *
+from testrunner import Test, TestRunner
+
+optp = optparse.OptionParser()
+optp.add_option('--soompath', dest='soompath',
+ help='SOOM dataset path')
+options, args = optp.parse_args()
+if not options.soompath:
+ options.soompath = os.path.normpath(os.path.join(os.path.dirname(__file__),
+ '..', 'SOOM_objects'))
+soom.setpath(options.soompath)
+
+
+# Load dataset
+nhds = dsload("nhds")
+
+
+tests = [
+
+ Test('Simple bar chart, check that origin correctly defaults to zero.',
+ plot.barchart, nhds, 'sex'),
+
+ Test('Test of titles and footers.',
+ plot.barchart, nhds, 'sex',title="A funny thing happened on the way to the forum",
+ footer="E tu, Brute!"),
+
+ Test('Test of titles and footers with line breaks and tabs.',
+ plot.barchart, nhds, 'sex',title="A funny\nthing\nhappened\non the\nway to the\nforum",
+ footer="E tu,\t\t\t\t\Brute!"),
+
+ Test('Simple bar chart, using pre-summarised data, check that it is automatically recognised it as a summary dataset - currently this does not happen.',
+ plot.barchart, nhds.summ("sex"), 'sex',),
+
+ Test('Simple bar chart, using pre-summarised data, using manually specified weighting.',
+ plot.barchart, nhds.summ("sex"), 'sex', measure=freq(), weightcol='_freq_'),
+
+ Test('Stacked bar chart, conditioning column same as stackby column.',
+ plot.barchart, nhds, 'sex', stackby='sex'),
+
+ Test('Grouped bar chart, conditioning column same as groupby column.',
+ plot.barchart, nhds, 'sex', groupby='sex'),
+
+ Test('Stacked bar chart, conditioning column different to stackby column.',
+ plot.barchart, nhds, 'geog_region', stackby='sex'),
+
+ Test('Stacked bar chart, packed bars (pack=True), conditioning column different to stackby column.',
+ plot.barchart, nhds, 'geog_region', stackby='sex',pack=True),
+
+ Test('Grouped bar chart, conditioning column different to groupby column.',
+ plot.barchart, nhds, 'geog_region', groupby='sex'),
+
+ Test('Barchart with a non-zero origin.',
+ plot.barchart, nhds,"sex",origin=100000),
+
+ Test('Barchart with a non-zero origin and some values below the origin',
+ plot.barchart, nhds,"sex",origin=1000000),
+
+ Test('Barchart with explicit y scale 0 to 2,000,000',
+ plot.barchart, nhds,"sex",ylim=(0,2000000)),
+
+ Test('Barchart with explicit y scale 0 to 2,000,000 and increased number of tick marks',
+ plot.barchart, nhds,"sex",ylim=(0,2000000),yticks=10),
+
+ Test('Barchart with log scale',
+ plot.barchart, nhds,"sex",logyscale=10),
+
+ Test('Barchart with unusual bases for log scale',
+ plot.barchart, nhds,"sex",logyscale=7),
+
+ # Test('Charts can\'t use explicit scales and log scales together in current version.',
+ # plot.barchart, nhds,"sex",ylim=(0,2000000),logyscale=7),
+
+ Test('Horizontal barchart',
+ plot.barchart, nhds,"sex",horizontal=True),
+
+ Test('Horizontal barchart with more categories',
+ plot.barchart, nhds,"prin_src_payment2",horizontal=True),
+
+ Test('Vertical barchart with more categories',
+ plot.barchart, nhds,"prin_src_payment2",horizontal=False),
+
+ Test('Vertical barchart with rotated labels',
+ plot.barchart, nhds,"prin_src_payment2",xlabelrotate=45),
+
+ Test('Barchart panel plot by specifying more than one conditioning column',
+ plot.barchart, nhds,"prin_src_payment2","sex"),
+
+ Test('Barchart panel plot by specifying more than one conditioning column - reversed order of conditioning columns',
+ plot.barchart, nhds,"sex","prin_src_payment2"),
+
+ Test('Panel plots - can specify layout of panels using a tuple (cols, rows, pages)',
+ plot.barchart, nhds,"prin_src_payment2","sex",layout=(1,2,1)),
+
+ # This aspect of layout not implemented yet.
+ #Test('Panel plots - can specify layout of panels using a string "h" or "v" ("h" used here)',
+ # plot.barchart, nhds,"geog_region","sex",layout='h'),
+
+ #Test('Panel plots - can specify layout of panels using a string "h" or "v" ("v" used here)',
+ # plot.barchart, nhds,"geog_region","sex",layout='v'),
+
+ #Test('Panel plots - this should not break...',
+ # plot.barchart, nhds,"sex","geog_region",filterexpr="geog_region in (1,2)",layout='v'),
+
+ Test('Panel plots - horizontal panel plots also OK',
+ plot.barchart, nhds,"hosp_ownership","sex",horizontal=True),
+
+ # Test('Panel plots - panelling by a scalar column (filtered for speed)',
+ # plot.barchart, nhds,"sex","age",filterexpr="diagnosis1 =: '410'"),
+
+ # Test('Stacking by a scalar column...',
+ # plot.barchart, nhds,"sex",stackby="age",filterexpr="diagnosis1 =: '410'"),
+
+ Test('Barcharts - dates on x-axis',
+ plot.barchart, nhds,"randomdate",filterexpr="randomdate between (date(2002,1,1),date(2002,1,31))"),
+
+ Test('Barcharts - dates on y-axis',
+ plot.barchart, nhds,"randomdate",filterexpr="randomdate between (date(2002,1,1),date(2002,1,31))",horizontal=True),
+
+ Test('Barcharts - horizontal layout with coloured bars due to pseudo-stacking',
+ plot.barchart, nhds,"hosp_ownership","geog_region",stackby="hosp_ownership"),
+
+ Test('Barcharts - measure column is mean(\'age\') rather than frequency',
+ plot.barchart, nhds,"marital_status",measure=mean('age')),
+
+ Test('Barcharts - measure column is mean(\'age\') weighted by \'analysis_wgt\', rather than frequency',
+ plot.barchart, nhds,"marital_status",measure=mean('age',weightcol='analysis_wgt')),
+
+ Test('Plotting proportions - measure=(\'marital_status\',)',
+ plot.barchart, nhds,"marital_status",measure=('marital_status',)),
+
+ Test('Plotting proportions - measure=(\'marital_status\',) weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status",measure=('marital_status',),weightcol='analysis_wgt'),
+
+ Test('Panelled proportions plot - measure=(\'marital_status\',\'sex\') weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status","sex",measure=('marital_status','sex'),weightcol='analysis_wgt'),
+
+ Test('Panelled proportions plot - measure=(\'sex\',) weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status","sex",measure=('sex',),weightcol='analysis_wgt'),
+
+ Test('Panelled proportions plot - measure=(\'marital_status\',) weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status","sex",measure=('marital_status',),weightcol='analysis_wgt'),
+
+ Test('Stacked proportions plot - measure=(\'marital_status\',\'sex\') weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status",stackby="sex",measure=('marital_status','sex'),weightcol='analysis_wgt'),
+
+ Test('Stacked proportions plot - measure=(\'marital_status\',) weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status",stackby="sex",measure=('marital_status',),weightcol='analysis_wgt'),
+
+ Test('Stacked proportions plot - measure=(\'sex\',) weighted by \'analysis_wgt\'',
+ plot.barchart, nhds,"marital_status",stackby="sex",measure=('sex',),weightcol='analysis_wgt'),
+
+ Test('Simple dotchart.',
+ plot.dotchart, nhds,"sex"),
+
+ Test('Panelled dotchart',
+ plot.dotchart, nhds,"marital_status","sex"),
+
+ Test('Horizontal panelled dotchart using median(\'days_of_care\') weighted by \'analysis_wgt\'',
+ plot.dotchart, nhds,"marital_status","sex",measure=median('days_of_care'),weightcol='analysis_wgt',horizontal=True),
+
+ Test('Grouped dotchart',
+ plot.dotchart, nhds,"admission_type",groupby="hosp_ownership",measure=freq(),weightcol='analysis_wgt'),
+
+ # Test('Stacked dotchart - seems to be idetical to grouped dotchart (or v-v) - no weighting',
+ # plot.dotchart, nhds,"admission_type",stackby="hosp_ownership",measure=freq()),
+
+ # Test('Stacked dotchart - seems to be idetical to grouped dotchart (or v-v) - with weighting',
+ # plot.dotchart, nhds,"admission_type",stackby="hosp_ownership",measure=freq(),weightcol='analysis_wgt'),
+
+ # Test('Stacked dotchart with bigger symbols...curent doesn\'t have any effect...',
+ # plot.dotchart, nhds,"admission_type",stackby="hosp_ownership",measure=freq(),dotsize=2),
+
+ Test('Histogram - filtered to reduce data volume',
+ plot.histogram, nhds,"age",filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Count histograms...',
+ plot.histogram, nhds,"age",hist_type='count',filterexpr="diagnosis1 =: '410'",
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Density histograms...',
+ plot.histogram, nhds,"age",hist_type='density',filterexpr="diagnosis1 =: '410'",
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Density plot...',
+ plot.densityplot, nhds,"age",filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Percentage histograms with more bins...',
+ plot.histogram, nhds,"age",bins=55,filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Percentage histograms with panelling...works OK',
+ plot.histogram, nhds,"age","sex",filterexpr="diagnosis1 =: '410'",bins=55,
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Vertically stacked histograms of age distribution using layout parameter',
+ plot.histogram, nhds,"age","sex",bins=50,layout=(1,2,1),filterexpr="diagnosis1 =: '410'",
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled count histograms - is the y scale labelling correct?',
+ plot.histogram, nhds,"age","sex",hist_type='count',filterexpr="diagnosis1 =: '410'",bins=55,
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled count histograms with 18 age bins...',
+ plot.histogram, nhds,"age","sex",hist_type='count',filterexpr="diagnosis1 =: '410'",bins=18,
+ filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled barchart - compare counts to those from panelled histogram by 5 yr age groups.',
+ plot.barchart, nhds,"agegrp","sex",filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled density plot',
+ plot.densityplot, nhds,"age","sex",filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot',
+ plot.boxplot, nhds,"age","sex",filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot with variable width disabled',
+ plot.boxplot, nhds,"age","sex",variable_width=False,filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot with outliers display disabled - Lattice bwplot doesn\'t seem to support this?',
+ plot.boxplot, nhds,"age","sex",outliers=False,filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot with notches display disabled - Lattice bwplot doesn\'t seem to support notches?',
+ plot.boxplot, nhds,"age","sex",notches=False,filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled box plot',
+ plot.boxplot, nhds,"age","marital_status",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Horizontal panelled box plot',
+ plot.boxplot, nhds,"age","num_beds",horizontal=True,
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot with vertical=True',
+ plot.boxplot, nhds,"age","num_beds",vertical=True,
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Box plot with vertical=False',
+ plot.boxplot, nhds,"age","num_beds",vertical=False,
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled box plot',
+ plot.boxplot, nhds,"age","num_beds","sex",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled horizontal box plot',
+ plot.boxplot, nhds,"age","num_beds","sex",horizontal=True,
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Two-way panelled box plot',
+ plot.boxplot, nhds,"age","num_beds","sex","geog_region",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ # Test('Three-way panelled box plot',
+ # plot.boxplot, nhds,"age","num_beds","sex","geog_region","marital_status",
+ # filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Scatter plot',
+ plot.scatterplot, nhds,"age","days_of_care",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Scatter plot showing support for missing values',
+ plot.scatterplot, nhds,"age","randomvalue",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Scatter plot with log scales',
+ plot.scatterplot, nhds,"age","randomvalue",logxscale=10,logyscale=2,
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Scatter plot with date/time values',
+ plot.scatterplot, nhds,"age","randomdate",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled scatter plot',
+ plot.scatterplot, nhds,"age","days_of_care","agegrp",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Panelled scatter plot using continuous panelling column',
+ plot.scatterplot, nhds,"age","days_of_care","randomvalue",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Scatter plot matrix',
+ plot.scattermatrix, nhds,"age","days_of_care","randomvalue",
+ filterexpr="diagnosis1 =: '410'",filterlabel="Admissions for acute myocardial infarction"),
+
+ Test('Simple time-series line plot',
+ plot.lineplot, nhds, "randomdate",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Simple time-series line plot using weighted frequencies',
+ plot.lineplot, nhds, "randomdate",measure=freq(),weightcol="analysis_wgt",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487' and randomdate >= date(2002,6,1)",
+ filterlabel="Admissions for pneumonia and influenza, June-December 2002"),
+
+ # Test('Simple time-series line plot using weighted frequencies and different date formating - not current supported',
+ # plot.lineplot, nhds, "randomdate",measure=freq(),weightcol="analysis_wgt",dateformat='%d-%B',
+ # filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487' and randomdate >= date(2002,6,1)",
+ # filterlabel="Admissions for pneumonia and influenza, June-December 2002"),
+
+ # Test('Simple time-series line plot - note automatic date axis labelling',
+ # plot.lineplot, nhds, "randomdate",measure=freq(),weightcol="analysis_wgt",dateformat='%d-%B',
+ # filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487' and randomdate between (date(2002,6,1),date(2002,6,15))",
+ # filterlabel="Admissions for pneumonia and influenza, 1-15 June 2002"),
+
+ Test('Simple time-series line plot - with a statistic as the measure',
+ plot.lineplot, nhds, "randomdate",measure=mean('age'),weightcol="analysis_wgt",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487' and randomdate between (date(2002,6,1),date(2002,6,15))",
+ filterlabel="Admissions for pneumonia and influenza, 1-15 June 2002"),
+
+ Test('Time-series line plot with groupby',
+ plot.lineplot, nhds,"randomdate",groupby="sex",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Time-series line plot with groupby and thicker lines',
+ plot.lineplot, nhds,"randomdate",groupby="sex",line_width=6,
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Time-series line plot with groupby and different line style',
+ plot.lineplot, nhds,"randomdate",groupby="geog_region",line_style=6,
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Panelled time-series line plot',
+ plot.lineplot, nhds,"randomdate","geog_region",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Panelled time-series line plot with groupby',
+ plot.lineplot, nhds,"randomdate","geog_region",groupby='sex',
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Line plot of a categorical column',
+ plot.lineplot, nhds,"marital_status",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+ Test('Line plot of an ordinal column',
+ plot.lineplot, nhds,"agegrp",
+ filterexpr="diagnosis1 >=: '480' and diagnosis1 <=: '487'",filterlabel="Admissions for pneumonia and influenza"),
+
+]
+
+runner = TestRunner()
+
+
+# Run tests
+try:
+ runner.run(tests)
+finally:
+ runner.close()
+
diff --git a/demo/rawdata/README_NHMRC.txt b/demo/rawdata/README_NHMRC.txt
new file mode 100644
index 0000000..afdc2bf
--- /dev/null
+++ b/demo/rawdata/README_NHMRC.txt
@@ -0,0 +1,2 @@
+The file nhmrc_grantdata.csv.gz contains a compressed version of grant funding data for the years 2000 to 2007 released for public scrutiny by the Australian National Health and Medical Research Council (NH&MRC). These data are available as a spreadsheet file at http://www.nhmrc.gov.au/funding/dataset/rmis/index.htm - the data included here were extracted from that spreadsheet as downloaded on 25 July, 2007.
+
diff --git a/demo/rawdata/README_WHO.txt b/demo/rawdata/README_WHO.txt
new file mode 100644
index 0000000..42f659b
--- /dev/null
+++ b/demo/rawdata/README_WHO.txt
@@ -0,0 +1,18 @@
+The files in this directory called who2000.csv, who2001.csv, who2002.csv and
+whodata.csv were downloaded from the World
+Health Organisation Statistical Information Service (WHOSIS) website at
+http://www3.who.int/whosis/core/core2.cfm?option=3
+
+To obtain an updated version of these data, go to the above URL, select
+"All countries", one of the years 2000, 2001 or 2002, and 'All indicators",
+and then click "GO!". When the table has been displayed, click on
+the "Download the .csv file" at the upper left of the table, and save the CSV
+file in the demo/rawdata directory as whoYYYY.csv where YYYY is the year for
+that file.
+
+The file in this directory called whoreps.csv contains reports of disease
+outbreaks for the period January 2003 to June 2005 taken from the WHO web site.
+At the time of writing, permission to distribute these data with NetEpi for
+software demonstration purposes has been sought from WHO, although such use
+appears to be consistent with the conditons of use statement provided by WHO
+covering all information contained on its web site.
diff --git a/demo/rawdata/aus01stdpop.csv b/demo/rawdata/aus01stdpop.csv
new file mode 100644
index 0000000..896ce0b
--- /dev/null
+++ b/demo/rawdata/aus01stdpop.csv
@@ -0,0 +1,19 @@
+agegrp,pop
+1,1282357
+2,1351664
+3,1353177
+4,1352745
+5,1302412
+6,1407081
+7,1466615
+8,1492204
+9,1479257
+10,1358594
+11,1300777
+12,1008799
+13,822024
+14,682513
+15,638380
+16,519356
+17,330050
+18,265235
diff --git a/demo/rawdata/aus01stdpop_mf.csv b/demo/rawdata/aus01stdpop_mf.csv
new file mode 100644
index 0000000..2730a55
--- /dev/null
+++ b/demo/rawdata/aus01stdpop_mf.csv
@@ -0,0 +1,37 @@
+agegrp,sex,pop
+1,1,657499
+2,1,693790
+3,1,693083
+4,1,690668
+5,1,660776
+6,1,700910
+7,1,726919
+8,1,741434
+9,1,734436
+10,1,675055
+11,1,652540
+12,1,512888
+13,1,413982
+14,1,335590
+15,1,303554
+16,1,227356
+17,1,128250
+18,1,81922
+1,2,624858
+2,2,657874
+3,2,660094
+4,2,662077
+5,2,641636
+6,2,706171
+7,2,739696
+8,2,750770
+9,2,744821
+10,2,683539
+11,2,648237
+12,2,495911
+13,2,408042
+14,2,346923
+15,2,334826
+16,2,292000
+17,2,201800
+18,2,183313
diff --git a/demo/rawdata/nhmrc_grantdata.csv.gz b/demo/rawdata/nhmrc_grantdata.csv.gz
new file mode 100644
index 0000000..c6af597
Binary files /dev/null and b/demo/rawdata/nhmrc_grantdata.csv.gz differ
diff --git a/demo/rawdata/synthetic_deaths_comp.gz b/demo/rawdata/synthetic_deaths_comp.gz
new file mode 100644
index 0000000..092b4ff
Binary files /dev/null and b/demo/rawdata/synthetic_deaths_comp.gz differ
diff --git a/demo/rawdata/synthetic_pops.csv.gz b/demo/rawdata/synthetic_pops.csv.gz
new file mode 100644
index 0000000..66bf00d
Binary files /dev/null and b/demo/rawdata/synthetic_pops.csv.gz differ
diff --git a/demo/rawdata/who2000.csv b/demo/rawdata/who2000.csv
new file mode 100644
index 0000000..12416c0
--- /dev/null
+++ b/demo/rawdata/who2000.csv
@@ -0,0 +1,38 @@
+Data for 2000
+Indicators,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,Azerbaijan,Bahamas,Bahrain,Bangladesh,Barbados,Belarus,Belgium,Belize,Benin,Bhutan,Bolivia,Bosnia and Herzegovina,Botswana,Brazil,Brunei Darussalam,Bulgaria,Burkina Faso,Burundi,Cambodia,Cameroon,Canada,Cape Verde,Central African Republic,Chad,Chile,China,Colombia,Comoros,Congo,Cook Islands,Costa Rica,C�te d'Ivoire,Croatia,Cuba,Cyprus,Czech Republic,Democratic People's Republic o [...]
+Adult mortality (per 1000) females,376, 95,119, 41,386,133, 92,106, 54, 60,153,161, 93,252,122,133, 67,124,328,222,219, 93,669,136, 97,103,507,603,264,440, 57,121,573,361, 67,110,115,325,406,152, 78,494, 74, 94, 59, 75,192,493, 82,541,105,146,120,147,148,280,441,114,535,180, 61, 61,330,320,133, 60,326, 47,159,182,366,427,209,373,157,123, 51,213,191,139,208, 62, 56, 53,127, 44,144,201,529,208, 68,175,299,122,136,630,385,157,106, 64,322,653,113,226,446, 46,230,302,109,101,188, 54,199,113,6 [...]
+Adult mortality (per 1000) males,437,209,155,105,492,183,184,223,100,125,261,267,120,262,180,381,128,200,384,268,264,200,703,259,144,239,559,648,373,488,101,210,620,449,151,161,238,381,475,175,131,553,178,143,116,174,238,571,129,590,183,234,199,210,250,339,493,316,594,240,144,144,380,373,250,127,379,114,202,286,432,495,299,524,221,295, 85,287,250,170,258,108, 99,110,169, 98,199,366,578,269,100,335,355,328,192,667,448,210,286,135,385,701,202,228,518,111,302,357,228,180,243,123,280,174,674 [...]
+Child mortality (per 1000) females,249, 40, 47, 4,198, 22, 20, 38, 5, 5, 88, 12, 8, 93, 13, 12, 6, 32,151, 92, 80, 16, 83, 42, 7, 17,206,183,120,140, 5, 40,185,171, 10, 44, 21, 95,122, 20, 15,138, 7, 8, 7, 6, 52,205, 5,168, 13, 45, 33, 49, 33,143,130, 9,171, 24, 5, 6,109, 90, 19, 5, 98, 6, 22, 52,156,197, 66, 96, 42, 8, 5, 99, 49, 47, 73, 5, 6, 6, 14, 4, 22, 60, 98, 74, 12, 58,134, 14, 18,150,187, 29, 12, 5,142,211, 12, 66,220, 6, 39,168, 16, 25, 48, 5, 76, 59,2 [...]
+Child mortality (per 1000) males,252, 47, 54, 5,217, 25, 24, 56, 7, 6,101, 14, 11, 91, 13, 17, 8, 37,162, 93, 88, 21, 85, 49, 12, 22,217,196,136,149, 6, 56,199,192, 12, 38, 29,107,134, 23, 18,152, 10, 9, 9, 6, 54,218, 7,184, 14, 55, 41, 51, 40,156,142, 14,187, 27, 6, 8,118,101, 30, 6,112, 7, 25, 54,174,215, 77,111, 45, 11, 5, 90, 61, 44, 80, 6, 8, 7, 17, 5, 25, 80,107, 93, 13, 70,152, 20, 22,159,205, 31, 15, 5,156,229, 15, 62,231, 10, 48,175, 21, 31, 61, 7, 92, 68,227 [...]
+Infant mortality rate; both sexes (per 1000 live births),188.8, 23.1, 42.0, 3.8,153.0, 16.4, 16.8, 31.1, 5.2, 4.8, 60.7, 14.6, 9.7, 63.0, 17.3, 10.3, 4.9, 33.2, 95.9, 75.0, 62.9, 15.1, 60.2, 37.1, 11.0, 13.6,117.0,116.4,106.5, 90.6, 5.1, 31.6,120.4,112.2, 13.5, 30.9, 21.9, 64.6, 67.9, 17.8, 9.6,112.5, 7.3, 7.1, 7.1, 4.1, 42.9,127.0, 5.4, 93.9, 11.2, 30.5, 26.1, 37.7, 27.7, 96.6, 65.2, 8.8,113.6, 21.7, 3.6, 4.5, 64.2, 80.9, 20.7, 4.4, 61.6, 6.1, 17.4, 45.0, 94.3,128.3, 44 [...]
+Annual population growth rate (%), 4.8,-0.5, 2.0, 5.0, 3.2, 0.3, 1.3, 0.7, 1.3, 0.4, 1.1, 1.8, 2.7, 2.2, 0.4,-0.1, 0.3, 2.0, 3.0, 2.1, 2.4,-0.8, 2.2, 1.4, 2.5,-0.9, 2.5, 1.2, 3.1, 2.5, 1.1, 2.3, 2.4, 3.1, 1.5, 1.0, 1.9, 3.0, 3.1, 0.7, 2.8, 2.4, 0.3, 0.5, 1.4, 0.0, 1.1, 3.3, 0.3, 2.3,-0.1, 1.7, 2.1, 1.9, 2.1, 2.6, 1.7,-1.2, 2.8, 1.2, 0.4, 0.4, 2.8, 3.4,-0.4, 0.3, 2.5, 0.4, 0.3, 2.7, 2.9, 2.4, 0.4, 1.7, 2.8,-0.4, 0.9, 1.8, 1.5, 1.9, 2.9, 0.8, 3.0, 0.1, 0.8, 0.3, 4.2,-0.3, 2.7, 1.5,-1.1, 1. [...]
+Dependency ratio (per 100), 86, 56, 64, 37,104, 57, 60, 48, 49, 47, 56, 54, 45, 72, 45, 47, 52, 74, 96, 89, 77, 41, 82, 51, 54, 47,108,102, 88, 88, 46, 78, 89, 98, 55, 46, 60, 84, 98, 65, 60, 83, 47, 44, 53, 43, 48,107, 50, 87, 57, 61, 63, 65, 68, 91, 88, 47, 93, 58, 49, 53, 85, 77, 50, 47, 79, 48, 57, 89, 88, 89, 55, 80, 82, 46, 54, 62, 55, 69, 80, 49, 62, 48, 63, 47, 75, 51, 86, 76, 50, 67, 86, 47, 59, 77, 84, 60, 49, 49, 91, 97, 62, 89,100, 48, 76, 90, 47, 61, 76, 53, 64, 63, 89, 61, [...]
+Percentage of population aged 60+ years, 4.7, 9.0, 6.0,15.6, 4.5, 9.9,13.3,13.2,16.3,20.7,10.5, 8.0, 4.7, 4.9,13.4,18.9,22.1, 6.0, 4.2, 6.5, 6.2,14.9, 4.5, 7.8, 5.1,21.7, 4.8, 4.3, 4.4, 5.6,16.7, 6.5, 6.1, 4.9,10.2,10.0, 6.9, 4.2, 5.1, 6.8, 7.5, 5.0,20.2,13.7,15.7,18.4,10.0, 4.5,20.0, 5.5, 9.9, 6.6, 6.9, 6.3, 7.2, 6.0, 4.7,20.2, 4.7, 5.7,19.9,20.5, 8.7, 5.2,18.7,23.2, 5.1,23.4, 9.9, 5.3, 4.4, 5.6, 6.9, 5.6, 5.1,19.7,15.1, 7.6, 7.6, 5.2, 4.6,15.2,13.2,24.1, 9.6,23.2, 4.5,11.2, 4.2, 6.8, 4 [...]
+Total fertility rate, 6.9, 2.4, 3.0, 1.2, 7.2, 1.6, 2.5, 1.2, 1.8, 1.3, 1.7, 2.4, 2.5, 3.7, 1.5, 1.2, 1.5, 3.2, 5.9, 5.3, 4.1, 1.3, 4.1, 2.2, 2.7, 1.1, 6.8, 6.8, 5.0, 4.9, 1.6, 3.4, 5.1, 6.7, 2.4, 1.8, 2.7, 5.2, 6.3, 3.3, 2.7, 4.9, 1.7, 1.6, 2.0, 1.2, 2.1, 6.7, 1.7, 5.9, 1.8, 2.8, 2.9, 3.1, 3.0, 5.9, 5.5, 1.2, 6.8, 3.1, 1.6, 1.8, 5.4, 5.0, 1.5, 1.3, 4.4, 1.3, 3.5, 4.7, 6.1, 6.0, 2.4, 4.2, 4.0, 1.3, 2.0, 3.1, 2.4, 3.0, 5.0, 2.0, 2.8, 1.2, 2.4, 1.4, 4.5, 2.0, 4.4, 4.6, 2.8, 2.6, 5.1, 1.1, [...]
+Total population (000), 21765, 3134, 30291, 86, 13134, 65, 37032, 3787, 19138, 8080, 8041, 304, 640, 137439, 267, 10187, 10249, 226, 6272, 2085, 8329, 3977, 1541, 170406, 328, 7949, 11535, 6356, 13104, 14876, 30757, 427, 3717, 7885, 15211, 1282437, 42105, 706, 3018, 20, 4024, 1601 [...]
+Expectation of lost healthy years at birth females,12.5,10.6,12.9,10.1,10.8,14.5,11.9,10.1, 8.8, 8.9,11.4,15.7,12.4,12.9,13.4, 9.2, 9.9,14.3,11.9,14.3,12.1, 9.4, 7.9,12.7,12.7, 9.2, 9.5, 8.5, 9.8,10.5, 9.8,12.3, 8.9,11.2,12.1, 9.7,11.8,12.3,10.1,11.0,12.4, 9.5,10.6,10.9,12.7, 9.9,11.2, 9.6, 8.4,10.1,12.2,14.0,12.0,12.0,13.9,11.4,10.4,11.0, 9.6,10.7, 9.5,10.2,10.4,12.1,11.6, 9.2,11.0, 8.5,11.5,12.6,11.9,10.5,14.2,11.2,13.2,10.7, 9.3,11.0, 9.1,11.4,12.1, 8.8,10.0, 9.6,11.5, 8.4,13.6,10.3, [...]
+Expectation of lost healthy years at birth males, 9.1, 7.9, 9.7, 7.3, 8.1,10.1, 8.4, 7.5, 6.9, 6.8, 8.4,10.8, 9.7, 9.8, 9.3, 6.6, 6.9,11.1, 8.5,10.3, 9.5, 6.6, 6.5, 9.5, 9.6, 6.3, 7.2, 6.7, 7.8, 8.1, 7.7, 9.6, 6.9, 8.7, 9.0, 8.0, 8.6, 9.1, 7.7, 8.3, 9.2, 7.2, 9.0, 8.6, 8.4, 8.6, 9.6, 7.2, 5.3, 7.8, 9.4,10.8, 9.9, 8.3,11.0, 8.7, 7.7, 9.3, 7.1, 8.3, 7.6, 6.7, 7.8, 8.6, 9.6, 6.9, 8.5, 5.7, 8.8,10.1, 8.6, 7.7,10.1, 8.4,10.6,11.0, 7.3, 7.6, 6.9, 9.1, 9.2, 6.3, 7.3, 6.4,10.0, 6.3,10.3, 7.5, 7. [...]
+Healthy life expectancy at age 60 (years) females, 5.8,14.4,11.0,19.4, 7.3,15.4,16.0,12.0,19.5,18.4,14.6,12.6,11.4, 8.0,16.1,14.4,18.0,13.6, 7.4, 8.8,10.0,14.3, 8.9,12.6,15.1,15.2, 7.4, 7.7,10.1, 8.0,17.8,12.0, 7.9, 7.5,15.7,14.3,14.0, 7.7, 8.9,13.0,15.6, 8.5,15.2,15.5,14.1,15.8,12.1, 7.4,16.5, 7.0,16.4,13.0,14.4,10.0,13.3, 8.3, 8.1,14.8, 7.5,12.7,17.9,19.4, 9.3, 8.1,11.1,17.6, 9.0,17.6,14.1,11.7, 7.0, 7.1,11.1, 8.5,12.7,13.8,18.6,10.9,12.5,11.4, 9.5,16.9,17.1,18.8,15.7,21.4,11.3,14.6, 9 [...]
+Healthy life expectancy at age 60 (years) males, 7.1,11.4,11.1,17.0, 7.4,14.8,13.2, 9.7,17.0,15.2,12.2,12.4,11.3, 8.8,13.4, 9.9,15.3,12.7, 8.4, 9.3, 9.8,12.4, 8.3,10.7,13.3,12.4, 8.0, 7.6, 9.0, 8.4,15.4,11.3, 8.2, 7.4,13.1,11.8,12.9, 8.0, 8.7,11.4,14.0, 8.6,11.4,14.5,14.5,13.0,11.1, 7.2,15.7, 7.4,14.4,12.3,12.7, 9.9,11.9, 8.7, 8.3,10.0, 7.7,11.2,14.8,16.6, 9.2, 8.5, 9.5,14.8, 8.9,16.0,14.0,11.3, 7.3, 7.2,10.3, 7.8,11.7, 9.4,16.2, 9.9,11.6,11.3, 9.3,14.3,16.2,16.3,14.6,17.6,10.3,10.9, 9.3 [...]
+Healthy life expectancy at birth (years) females,32.5,62.3,58.3,73.7,37.6,62.1,65.9,61.1,73.3,72.5,57.5,59.1,62.3,47.9,64.3,64.8,71.0,60.4,41.9,48.2,51.4,65.3,36.5,59.2,65.9,65.8,34.1,32.9,48.7,39.9,71.7,60.0,33.6,39.9,67.4,63.3,63.3,45.8,42.8,61.1,66.4,38.9,67.1,66.7,66.2,68.3,56.0,34.4,70.1,34.6,66.1,57.7,62.2,57.0,59.4,44.8,40.5,65.4,35.1,60.5,71.5,72.9,46.5,46.6,60.2,71.5,46.9,72.3,61.8,56.0,40.1,36.4,52.8,44.9,57.8,64.5,72.6,51.7,58.4,58.6,52.5,70.9,70.6,72.8,65.0,76.3,58.8,58.1,40. [...]
+Healthy life expectancy at birth (years) males,35.1,56.5,58.4,69.8,36.2,61.7,61.8,56.9,69.6,68.1,53.3,57.2,63.0,50.6,62.3,55.4,67.7,58.0,43.1,50.1,51.4,62.1,38.1,54.9,63.8,61.0,35.4,33.9,45.6,40.9,68.3,56.9,34.7,38.6,63.5,60.9,58.6,46.2,42.5,60.4,64.2,39.1,60.8,65.1,66.4,62.9,54.9,34.4,68.9,35.6,63.2,54.7,58.4,57.1,55.3,44.9,41.4,56.2,35.7,58.7,66.1,68.5,46.8,47.3,56.1,67.4,46.5,69.7,62.1,53.5,40.4,36.7,51.4,41.3,55.8,55.3,69.8,52.2,56.5,59.0,52.6,67.8,69.3,69.5,62.9,71.2,58.2,50.5,41.2, [...]
+Healthy life expectancy at birth (years) total population,33.8,59.4,58.4,71.8,36.9,61.9,63.9,59.0,71.5,70.3,55.4,58.1,62.7,49.3,63.3,60.1,69.4,59.2,42.5,49.2,51.4,63.7,37.3,57.1,64.9,63.4,34.8,33.4,47.1,40.4,70.0,58.4,34.1,39.3,65.5,62.1,60.9,46.0,42.6,60.7,65.3,39.0,64.0,65.9,66.3,65.6,55.4,34.4,69.5,35.1,64.6,56.2,60.3,57.1,57.3,44.8,41.0,60.8,35.4,59.6,68.8,70.7,46.6,46.9,58.2,69.4,46.7,71.0,61.9,54.7,40.3,36.6,52.1,43.1,56.8,59.9,71.2,52.0,57.4,58.8,52.6,69.3,69.9,71.2,64.0,73.8,58.5 [...]
+Life expectancy at birth (years) females,45.1,72.9,71.2,83.8,48.3,76.6,77.8,71.2,82.1,81.4,68.9,74.8,74.7,60.8,77.7,74.0,80.9,74.7,53.8,62.5,63.6,74.7,44.4,71.9,78.7,74.9,43.6,41.3,58.5,50.4,81.5,72.3,42.5,51.1,79.5,73.0,75.1,58.1,52.9,72.1,78.8,48.4,77.7,77.5,79.0,78.2,67.2,44.0,78.5,44.7,78.3,71.6,74.2,69.1,73.3,56.2,51.0,76.5,44.7,71.2,80.9,83.1,56.9,58.7,71.8,80.6,57.9,80.8,73.2,68.6,52.0,46.9,67.0,56.1,71.0,75.2,81.8,62.7,67.4,69.9,64.7,79.7,80.6,82.4,76.6,84.7,72.5,68.4,49.6,64.5,7 [...]
+Life expectancy at birth (years) males,44.2,64.3,68.1,77.2,44.3,71.8,70.2,64.4,76.6,74.9,61.7,68.0,72.7,60.4,71.6,62.0,74.6,69.1,51.7,60.4,60.9,68.7,44.6,64.5,73.4,67.4,42.6,40.6,53.4,49.0,76.0,66.5,41.6,47.4,72.5,68.9,67.2,55.3,50.1,68.7,73.4,46.4,69.8,73.7,74.8,71.5,64.5,41.6,74.2,43.5,72.6,65.5,68.2,65.4,66.3,53.5,49.1,65.4,42.8,66.9,73.7,75.2,54.6,55.9,65.7,74.3,55.0,75.4,70.9,63.5,49.0,44.5,61.5,49.7,66.3,66.3,77.1,59.8,63.4,68.1,61.7,74.1,76.6,76.0,72.8,77.5,68.5,58.0,48.2,60.4,74. [...]
+Life expectancy at birth (years) total population,42.8,69.4,68.9,79.5,36.4,70.9,73.7,69.2,79.8,78.6,62.3,71.7,72.9,61.6,74.4,69.0,77.8,69.9,52.1,61.3,62.2,72.6,41.1,68.4,74.1,71.6,42.8,40.8,55.7,50.5,79.1,69.2,42.7,48.2,76.1,70.8,70.7,61.6,52.7,71.7,76.4,46.2,72.9,76.8,76.8,75.1,66.1,44.0,76.9,49.0,73.7,66.8,69.7,66.3,69.0,53.5,44.7,71.0,47.5,69.6,77.7,79.1,59.1,58.3,68.7,78.0,57.3,78.0,67.1,65.9,51.4,47.3,63.8,53.0,67.1,71.5,79.6,60.6,65.4,68.3,60.4,76.3,78.5,79.1,72.6,81.3,70.7,62.5,49 [...]
+Percentage of total life expectancy lost males,20.5,12.2,14.3, 9.5,18.2,14.1,12.0,11.7, 9.1, 9.0,13.6,15.9,13.3,16.2,13.0,10.7, 9.2,16.1,16.5,17.0,15.6, 9.5,14.6,14.8,13.1, 9.4,16.8,16.5,14.7,16.5,10.2,14.4,16.7,18.4,12.4,11.6,12.8,16.4,15.3,12.0,12.6,15.6,12.9,11.6,11.2,12.0,14.8,17.4, 7.2,18.0,13.0,16.4,14.5,12.6,16.6,16.2,15.7,14.2,16.6,12.3,10.3, 8.9,14.2,15.4,14.6, 9.3,15.5, 7.6,12.4,15.8,17.5,17.4,16.4,16.9,16.0,16.5, 9.5,12.7,10.9,13.3,14.8, 8.5, 9.6, 8.5,13.7, 8.1,15.0,13.0,14.5 [...]
+External resources for health as % of total expenditure on health,14.8, 7.0, .1, .0,14.3, 3.1, .3, 2.0, .0, .0, 4.7, .0, .0,13.8, 4.0, .0, .0, 2.9,21.3,42.0,12.1, 7.9, 1.3, .5, .0, 2.5,23.9,40.4,18.8, 5.7, .0, 9.7,30.8,59.7, .1, .2, .4,42.9, 4.0,25.4, 1.3, 2.1, .3, .2, .0, .0, .3, 6.6, .0,29.5, 1.3, 2.5, 3.2, 1.8, .9,17.2,51.1, .9,29.6,10.9, .0, .0, 1.3,26.1, 3.1, .0,13.2, .0, .0, 4.6,19.1,39.8, 3.1,39.2, 8.7, .0, .0, 2.2, 8.3, .1, .0, .0, .4, .0, 2.0, [...]
+General Government expenditure on health as % of total expenditure on health,53.8,63.9,70.6,70.1,55.8,59.9,55.2,40.2,68.9,69.7,67.9,56.8,69.1,45.3,65.9,85.4,72.1,48.0,43.3,90.6,67.0,39.7,62.0,40.8,80.0,82.5,63.5,55.6,14.2,33.7,70.9,70.0,49.2,78.7,42.6,36.6,67.3,59.7,73.3,64.4,68.5,15.4,83.2,85.8,48.7,91.4,73.5,44.7,82.5,58.4,71.5,35.4,55.2,46.1,45.1,62.5,67.2,76.8,34.5,65.2,75.1,75.8,50.2,46.8,33.5,75.0,55.9,56.1,70.1,47.6,53.4,55.3,82.7,50.7,53.5,75.5,83.7,17.6,23.7,41.5,31.2,73.3,69.8 [...]
+General Government expenditure on health as % of total general government expenditure,13.5, 7.7, 9.3,32.0, 3.3,14.1,22.0,10.6,16.5,10.6, 7.3,15.9,10.8, 8.6,12.0,13.1,12.6, 5.3, 8.9, 9.2,10.3, 9.1, 8.4, 8.4, 5.2, 9.3, 8.1, 7.1,15.7, 7.9,15.5, 6.1,13.9,12.2,11.9,10.8,12.7, 5.7, 5.6, 8.6,20.4, 5.3,14.4,10.8,10.7,13.9, 2.9, 9.7,12.6,12.5,12.8,14.6, 9.3, 6.5,25.0,10.0, 4.5,12.4, 3.2, 7.2,10.3,13.4, 7.2,13.3, 5.9,17.3, 8.1,10.8,12.3,16.3,10.9, 7.7, 9.3,12.3,13.6,11.7,18.0, 3.1, 3.2,11.8, 4.5, [...]
+Out-of-pocket expenditure on health as % of private expenditure on health,100.00, 65.00, 91.10, 96.10,100.00,100.00, 63.30,100.00, 60.40, 61.30, 90.50,100.00, 70.00, 93.20, 77.30, 99.60, 58.40,100.00, 99.90,100.00, 85.40,100.00, 30.80, 64.90,100.00,100.00, 97.40,100.00, 85.40, 81.90, 54.30, 99.90, 95.50, 81.60, 59.70, 95.20, 65.10,100.00,100.00,100.00, 91.80, 89.20,100.00, 75.60, 97.90,100.00,100.00,100.00, 90.90, 53.90,100.00, 88.40, 72.70, 92.00, 95.60, 70.50,100.00, 84.90, 84.60,100.0 [...]
+Per capita total expenditure on health at average exchange rate (US$), 9, 46, 68, 1200, 26, 509, 683, 49, 1808, 1873, 11, 863, 484, 13, 603, 56, 1916, 156, 15, 9, 52, 87, 173, 265, 479, 75, 6, 4, 30, 19, 2102, 38, 12, 5, 336, 45, 108, 9, 18, 196, 280, 42, 388, 175, 909, 358, 21, 10, 2474, 58, 200, 145, 52, [...]
+Per capita total expenditure on health in international dollars, 40, 144, 151, 1723, 55, 581, 1099, 246, 2363, 2236, 46, 1117, 568, 56, 881, 424, 2272, 257, 35, 61, 122, 259, 309, 556, 617, 283, 29, 18, 174, 39, 2580, 109, 55, 19, 681, 200, 350, 26, 20, 444, 533, 123, 715, 193, 905, 1031, 41, 11, 2398, 89, 291, 345, 149, 139, [...]
+Per capita GDP in international dollars, 800, 3772, 3966, 31212, 1569, 10486, 12339, 3096, 26474, 28077, 2641, 19776, 13820, 1580, 14187, 7818, 26476, 5183, 848, 1561, 2332, 3346, 5165, 7341, 19734, 5877, 833, 514, 1471, 1239, 28472, 3525, 1265, 608, 10064, 3760, 6385, 851, 1010, 9889, 7737, 1970, 7637, 2723, 11354, 14420, 1661, 343, 29103, 1263, 5013, 5587, 3668, 3653, 4540, 3094, 580, 9339, 350, 5440, 25558, 25321, 5376, [...]
+Per capita government expenditure on health at average exchange rate (US$), 5, 30, 48, 841, 14, 305, 377, 20, 1246, 1305, 8, 490, 334, 6, 397, 48, 1382, 75, 7, 8, 35, 34, 107, 108, 383, 62, 4, 2, 4, 7, 1490, 27, 6, 4, 143, 17, 73, 5, 13, 126, 192, 6, 323, 150, 443, 327, 15, 5, 2042, 34, 143, 51, 2 [...]
+Per capita government expenditure on health in international dollars, 22, 92, 106, 1208, 30, 348, 607, 99, 1629, 1559, 31, 635, 393, 26, 581, 362, 1639, 123, 15, 55, 81, 103, 192, 227, 493, 233, 18, 10, 25, 13, 1828, 76, 27, 15, 290, 73, 236, 16, 15, 286, 365, 19, 595, 166, 441, 942, 30, 5, 1979, 52, 208, 122, 82, [...]
+Percentage of total life expectancy lost females,27.8,14.5,18.1,12.1,22.3,18.9,15.2,14.2,10.7,10.9,16.5,21.0,16.6,21.2,17.2,12.4,12.2,19.2,22.0,22.9,19.1,12.5,17.7,17.6,16.2,12.2,21.7,20.5,16.8,20.8,12.0,17.0,20.9,22.0,15.2,13.2,15.7,21.1,19.1,15.3,15.7,19.7,13.6,14.0,16.1,12.6,16.7,21.9,10.7,22.5,15.6,19.5,16.2,17.4,19.0,20.2,20.4,14.4,21.4,15.1,11.7,12.2,18.4,20.6,16.1,11.4,18.9,10.5,15.7,18.3,22.8,22.3,21.2,20.0,18.6,14.2,11.3,17.5,13.5,16.2,18.7,11.0,12.4,11.6,15.1, 9.9,18.8,15.0,19 [...]
+Prepaid plans as % of private expenditure on health, .0,34.3, 4.2, .0, .0, .0,32.6, .0,23.4,23.2, .0, .0,26.3, .0,22.7, .4, 7.1, .0, .0, .0, 7.9, .0,20.8,35.1, .0, .0, .0, .0, .0, .0,39.1, .1, .0, .0,40.2, .4,34.8, .0, .0, .0, 1.6,10.8, .0, .0, 2.1, .0, .0, .0, 9.1, .0, .0, .4, 8.3, .5, 4.2, .0, .0, 4.1, .0, .0,10.5,52.6, .0, .0, .3,33.3, .0, 4.4, .0, 5.2, .0, .0, .0, .0, 7.6, .8, .0, .0, 8.2, 2.3, .0,28.4, .0, 3.3,29.5, 2.8, 5.6, .0, [...]
+Private expenditure on health as % of total expenditure on health,46.2,36.1,29.4,29.9,44.2,40.1,44.8,59.8,31.1,30.3,32.1,43.2,30.9,54.7,34.1,14.6,27.9,52.0,56.7, 9.4,33.0,60.3,38.0,59.2,20.0,17.5,36.5,44.4,85.8,66.3,29.1,30.0,50.8,21.3,57.4,63.4,32.7,40.3,26.7,35.6,31.5,84.6,16.8,14.2,51.3, 8.6,26.5,55.3,17.5,41.6,28.5,64.6,44.8,53.9,54.9,37.5,32.8,23.2,65.5,34.8,24.9,24.2,49.8,53.2,66.5,25.0,44.1,43.9,29.9,52.4,46.6,44.7,17.3,49.3,46.5,24.5,16.3,82.4,76.3,58.5,68.8,26.7,30.2,26.6,53.0, [...]
+Social security expenditure on health as % of general government expenditure on health, .0,18.5,39.9,88.1, .0, .0,59.5, .0, .0,61.0, .0, .0, .4, .0, .0,22.8,82.2, .0, .0, .0,66.7, .0, .0, .0, .0, 9.7, .0, .0, .0, .0, 2.0, .0, .0, .0,71.8,50.7,19.6, .0, .0, .0,90.2, .0,97.8, .0,48.1,89.4, .0, .0, .0, .0, .0,22.4,38.7,29.5,44.2, .0, .0,85.9, 1.0, .0,20.5,96.7, .0, .0,47.9,91.6, .0,31.9, .0,57.1, .0, .1, .0, .0,16.5,83.9,29.6, .0, 7.5,40.6, .0, [...]
+Total expenditure on health as % of GDP, 5.0, 3.8, 3.8, 5.5, 3.5, 5.5, 8.9, 8.0, 8.9, 8.0, 1.7, 5.7, 4.1, 3.6, 6.2, 5.4, 8.6, 5.0, 4.2, 3.9, 5.2, 7.7, 6.0, 7.6, 3.1, 4.8, 3.5, 3.5,11.8, 3.1, 9.1, 3.1, 4.3, 3.1, 6.8, 5.3, 5.5, 3.1, 2.0, 4.6, 6.9, 6.2, 9.4, 7.1, 8.0, 7.1, 2.4, 3.2, 8.2, 7.1, 5.8, 6.2, 4.1, 3.8, 8.0, 2.1, 5.6, 5.9, 3.2, 3.9, 6.6, 9.4, 3.3, 6.3, 3.4,10.6, 4.3, 9.4, 4.9, 4.7, 3.4, 6.1, 5.1, 4.8, 6.0, 6.7, 9.3, 5.1, 2.7, 6.4, 3.3, 6.4, 8.2, 8.2, 6.2, 7.7, 9.2, 3.3, 8.7, 8.3, 3 [...]
+Under-five mortality rate; both sexes (per 1000 live births),257.0, 26.7, 50.8, 4.9,261.6, 20.5, 19.4, 36.9, 6.2, 5.7, 75.2, 18.5, 12.7, 82.0, 19.5, 13.5, 6.0, 41.0,160.6, 97.5, 80.0, 18.4, 92.9, 41.2, 14.3, 16.3,224.8,189.7,134.1,155.4, 6.0, 40.0,178.6,193.4, 16.0, 37.2, 23.7, 82.0,105.5, 22.5, 11.3,167.4, 8.4, 8.9, 8.4, 5.2, 55.0,211.8, 6.2,150.2, 13.8, 36.6, 35.8, 45.0, 36.8,156.1,112.0, 11.3,178.7, 28.3, 4.3, 5.5, 91.2,128.2, 22.8, 5.3,104.6, 6.8, 23.1, 55.7,162.6,214.5 [...]
diff --git a/demo/rawdata/who2001.csv b/demo/rawdata/who2001.csv
new file mode 100644
index 0000000..a332813
--- /dev/null
+++ b/demo/rawdata/who2001.csv
@@ -0,0 +1,38 @@
+Data for 2001
+Indicators,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,Azerbaijan,Bahamas,Bahrain,Bangladesh,Barbados,Belarus,Belgium,Belize,Benin,Bhutan,Bolivia,Bosnia and Herzegovina,Botswana,Brazil,Brunei Darussalam,Bulgaria,Burkina Faso,Burundi,Cambodia,Cameroon,Canada,Cape Verde,Central African Republic,Chad,Chile,China,Colombia,Comoros,Congo,Cook Islands,Costa Rica,C�te d'Ivoire,Croatia,Cuba,Cyprus,Czech Republic,Democratic People's Republic o [...]
+Adult mortality (per 1000) females,418, 98,129, 46,527,128, 92, 99, 54, 61,132,152, 89,258,103,134, 67,124,348,224,212, 91,767,134, 95, 99,513,586,268,438, 59,127,556,391, 70,106,115,217,415,122, 78,493, 74, 90, 58, 73,169,459, 77,394,121,152,134,160,146,315,383,111,420,152, 61, 60,284,231,108, 61,303, 50,225,168,327,382,206,397,151,118, 57,222,213,137,180, 64, 55, 51,123, 47,122,209,513,196, 66,168,309,116,140,692,425,118, 96, 61,279,636,108,213,410, 48,292,317,116,101,180, 56,160,105,4 [...]
+Adult mortality (per 1000) males,527,177,164,113,660,197,183,216, 94,119,253,249,123,251,192,368,128,191,407,270,266,194,797,247,127,221,571,704,385,489, 98,218,608,466,139,157,247,288,466,174,134,551,186,142,110,168,250,566,122,459,202,259,217,230,268,378,440,312,484,212,139,134,335,329,250,121,360,119,263,277,407,457,302,615,261,275, 84,291,246,209,258,118,115,100,164, 97,193,375,560,255, 87,334,342,312,204,724,517,194,270,122,345,695,194,276,480, 89,347,378,229,179,215,124,278,162,537 [...]
+Child mortality (per 1000) females,249, 30, 44, 5,279, 21, 17, 32, 5, 5, 88, 11, 7, 84, 11, 11, 5, 30,149, 90, 76, 15,107, 40, 14, 16,205,180,114,141, 5, 35,183,170, 8, 40, 21, 80,106, 18, 10,161, 7, 8, 7, 4, 52,203, 5,165, 14, 44, 31, 44, 32,141,107, 10,170, 24, 4, 4, 86,108, 26, 4, 97, 6, 21, 45,153,195, 51,103, 41, 9, 3, 98, 40, 39,111, 6, 6, 5, 15, 4, 24, 45,109, 66, 10, 55,137, 12, 28,147,185, 29, 10, 4,142,240, 11, 48,218, 6, 37,167, 16, 27, 52, 5, 63, 55,1 [...]
+Child mortality (per 1000) males,252, 35, 55, 5,306, 25, 22, 44, 7, 5,100, 14, 9, 82, 15, 14, 6, 39,160, 90, 84, 19,110, 47, 14, 18,216,193,130,150, 6, 49,197,191, 10, 34, 26, 90,115, 23, 13,178, 8, 11, 7, 5, 54,216, 6,181, 13, 48, 36, 46, 35,154,123, 12,185, 27, 5, 5, 93,121, 33, 5,111, 7, 25, 55,172,213, 62,118, 44, 11, 4, 89, 50, 45,122, 7, 7, 6, 16, 5, 27, 59,119, 82, 12, 72,153, 14, 34,155,203, 31, 10, 4,155,261, 13, 42,229, 8, 48,174, 23, 33, 65, 5, 79, 58,208 [...]
+Infant mortality rate; both sexes (per 1000 live births), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Annual population growth rate (%), 4.5,-0.5, 1.9, 5.0, 3.2, 0.3, 1.3, 0.5, 1.2, 0.4, 1.1, 1.7, 2.6, 2.2, 0.4,-0.1, 0.3, 2.0, 3.0, 2.2, 2.4,-0.2, 2.0, 1.4, 2.4,-1.0, 2.5, 1.2, 3.0, 2.4, 1.0, 2.3, 2.3, 3.1, 1.5, 0.9, 1.8, 3.0, 3.1, 0.7, 2.7, 2.3, 0.3, 0.5, 1.3, 0.0, 1.0, 3.2, 0.3, 2.2,-0.1, 1.7, 2.1, 1.9, 2.1, 2.7, 2.0,-1.3, 2.8, 1.2, 0.3, 0.4, 2.7, 3.3,-0.4, 0.3, 2.4, 0.4, 0.3, 2.7, 2.7, 2.4, 0.4, 1.6, 2.7,-0.4, 0.9, 1.8, 1.5, 1.8, 2.9, 0.9, 2.9, 0.1, 0.9, 0.3, 3.9,-0.4, 2.5, 1.4,-0.6, 1. [...]
+Dependency ratio (per 100), 86, 55, 62, 50,104, 56, 59, 46, 48, 47, 54, 53, 44, 71, 44, 46, 52, 72, 95, 87, 77, 40, 81, 50, 53, 46,108,100, 85, 87, 46, 77, 89, 99, 55, 45, 59, 83, 99, 64, 59, 81, 48, 44, 52, 43, 48,108, 50, 87, 56, 60, 62, 64, 68, 91, 88, 46, 93, 58, 49, 53, 86, 76, 50, 47, 78, 49, 56, 88, 88, 89, 54, 78, 81, 46, 53, 62, 54, 66, 79, 48, 61, 48, 62, 48, 75, 50, 84, 76, 47, 65, 85, 46, 58, 77, 83, 58, 48, 49, 91, 97, 61, 88,101, 48, 76, 90, 46, 60, 76, 53, 61, 62, 89, 60, [...]
+Percentage of population aged 60+ years, 4.7, 9.2, 6.0,21.1, 4.5,10.0,13.4,13.2,16.5,21.1,10.5, 8.1, 4.8, 5.0,13.2,18.8,22.2, 5.9, 4.2, 6.5, 6.2,15.1, 4.6, 8.0, 5.2,21.7, 4.8, 4.3, 4.4, 5.6,16.9, 6.3, 6.1, 4.9,10.4,10.0, 7.0, 4.2, 5.0, 6.9, 7.6, 5.0,20.5,14.0,15.9,18.6,10.4, 4.5,20.2, 5.7,10.0, 6.7, 7.0, 6.3, 7.2, 5.9, 4.7,20.3, 4.7, 5.8,20.2,20.5, 8.6, 5.2,18.9,23.7, 5.1,23.7,10.0, 5.3, 4.5, 5.6, 6.9, 5.6, 5.1,19.9,15.1, 7.7, 7.8, 5.3, 4.6,15.3,13.1,24.3, 9.6,23.8, 4.6,11.2, 4.2, 6.9, 4 [...]
+Total fertility rate, 6.8, 2.4, 2.9, 1.3, 7.2, 1.6, 2.5, 1.2, 1.8, 1.3, 1.6, 2.3, 2.4, 3.6, 1.5, 1.2, 1.5, 3.0, 5.8, 5.2, 4.1, 1.3, 4.1, 2.2, 2.6, 1.1, 6.8, 6.8, 4.9, 4.8, 1.6, 3.3, 5.0, 6.7, 2.4, 1.8, 2.7, 5.1, 6.3, 3.3, 2.7, 4.8, 1.7, 1.6, 1.9, 1.2, 2.1, 6.7, 1.7, 5.9, 1.8, 2.8, 2.9, 3.0, 3.0, 5.9, 5.4, 1.2, 6.8, 3.0, 1.6, 1.8, 5.4, 4.9, 1.4, 1.3, 4.3, 1.3, 3.5, 4.6, 6.0, 6.0, 2.4, 4.1, 3.9, 1.3, 1.9, 3.1, 2.4, 2.9, 4.9, 2.0, 2.8, 1.2, 2.4, 1.4, 4.4, 2.0, 4.3, 4.6, 2.7, 2.5, 5.0, 1.1, [...]
+Total population (000), 22473, 3144, 30841, 90, 13527, 65, 37487, 3787, 19338, 8074, 8095, 307, 651, 140368, 268, 10146, 10263, 230, 6445, 2141, 8516, 4066, 1553, 172558, 334, 7866, 11855, 6501, 13440, 15202, 31014, 436, 3781, 8134, 15401, 1292378, 42802, 726, 3109, 20, 4112, 1634 [...]
+Expectation of lost healthy years at birth females, 8.1,11.7,11.2,10.0, 6.5,10.9,12.0,11.9, 9.5, 8.8,11.2,12.5,12.2, 9.2,10.6,11.4, 9.4,11.3, 9.2, 9.9,10.7,11.5, 5.9,11.0,12.2, 9.6, 7.2, 6.6, 9.1, 8.4,10.4,11.3, 7.7, 8.7,11.7, 8.4,12.7,11.0, 8.7,11.4,11.6, 7.7,10.2,10.8,12.0, 9.3,10.3, 8.2, 8.7, 8.1,11.2,10.7,10.8,10.8,11.5, 9.2, 9.1,10.4, 8.5,11.0, 8.8, 9.5, 9.0,10.1,10.2, 8.9, 9.2, 8.9, 9.7,11.9, 9.1, 8.2,10.0, 7.4,10.7,10.5, 9.4,10.4,10.1,13.2, 9.6, 8.9,10.0, 9.3,10.0, 8.9,13.6,11.3, [...]
+Expectation of lost healthy years at birth males,10.0,10.4,11.9, 7.4, 8.4,11.8, 9.5,10.8, 7.3, 7.0,10.4,14.1, 9.9,10.2, 9.5, 9.0, 7.1,11.4,10.9,10.5,13.1, 9.3, 6.4,13.3,12.8, 7.5, 8.3, 6.8,10.3,10.1, 8.4,13.6, 9.7,11.1, 8.7, 7.7,11.4,12.8,10.9,11.6,11.1, 8.7, 9.2,10.0, 9.4, 8.1,10.5, 9.8, 5.5,10.0,12.2,11.1,11.1, 8.9,12.7,10.6, 9.9, 7.7,10.0,11.0, 6.8, 6.6, 9.8,11.1, 7.9, 6.8,10.0, 6.5, 9.7,12.2,10.1, 9.8, 9.7, 7.1,12.3, 9.3, 7.6, 8.4, 8.3,10.9,11.0, 6.1, 8.1, 7.0, 9.9, 6.5,11.4, 9.8, 8. [...]
+Healthy life expectancy at age 60 (years) females, 8.7,12.7,12.2,18.5, 9.2,13.4,15.1,12.2,18.8,18.5,11.0,14.7, 9.4,10.9,16.4,13.0,17.8,12.9, 9.5,11.1,11.0,14.3,12.2,13.0,12.8,13.9, 9.4, 9.6,10.5, 9.9,17.9,12.3, 9.2, 9.3,15.5,14.2,12.9,10.2,10.6,12.7,15.3, 9.7,14.4,16.6,14.5,16.0,13.2, 9.2,16.7, 9.6,15.0,13.1,14.2, 9.2,13.5,10.0,10.3,15.0, 9.4,12.3,18.1,19.1,11.0,10.3,12.1,17.7,10.6,17.1,12.1,11.6, 9.6, 9.4,12.1,11.2,12.6,14.4,17.6,10.2,11.1,11.4,11.0,16.1,16.9,18.2,13.9,20.7,11.5,10.8,10 [...]
+Healthy life expectancy at age 60 (years) males, 4.9, 8.8,10.3,15.8, 5.8,10.3,11.9, 9.2,16.4,15.7, 8.5,11.1,10.5, 9.4,12.3, 9.5,14.8,10.4, 7.1, 9.2, 8.4,11.3, 9.1, 9.4,10.5,11.5, 7.0, 6.8, 7.6, 7.3,15.3, 9.2, 6.0, 6.3,13.3,12.7,10.7, 7.6, 7.7,10.2,12.9, 7.3,10.1,14.4,13.2,12.8,10.7, 6.3,15.5, 6.9,13.0, 9.7,11.6, 9.4,11.2, 7.7, 8.0,11.1, 7.0,10.0,15.2,16.1, 9.1, 7.8,10.3,15.0, 8.4,15.7,10.1,10.4, 7.3, 6.9, 9.4, 8.4, 9.3,10.4,16.8, 9.7,10.6, 9.8, 8.1,13.9,15.8,15.5,11.8,17.1, 9.9, 8.7, 8.1 [...]
+Healthy life expectancy at birth (years) females,35.7,61.5,59.9,73.0,31.7,62.6,65.7,61.1,73.2,73.0,55.4,62.5,61.3,52.6,67.6,62.8,71.8,61.5,44.1,52.9,53.6,64.9,32.7,61.1,63.7,65.2,36.3,35.7,49.9,42.0,71.6,60.8,35.6,41.5,67.8,64.3,62.1,52.8,45.2,62.6,67.0,39.3,66.9,68.5,67.2,69.5,58.1,37.3,70.8,42.3,64.8,59.8,62.4,57.0,61.2,45.9,45.9,66.1,40.7,60.8,72.5,73.5,51.5,51.0,62.2,72.2,49.7,71.9,59.0,57.2,44.7,40.6,56.7,47.3,59.6,65.5,71.9,51.3,57.2,57.9,53.3,70.4,70.8,72.9,64.5,75.8,59.9,55.8,42. [...]
+Healthy life expectancy at birth (years) males,31.1,55.9,55.8,68.8,25.7,56.9,60.6,55.4,70.1,68.9,50.3,54.7,62.3,51.7,61.0,53.9,67.7,56.3,40.1,50.0,48.0,60.0,33.0,52.2,60.4,60.8,33.9,31.7,43.0,38.8,68.2,52.2,32.3,35.9,64.4,62.0,55.3,47.0,40.9,58.3,62.6,36.3,59.7,64.7,65.3,63.8,53.5,32.3,69.3,37.9,59.4,53.0,56.6,56.4,53.7,41.7,42.3,58.0,36.9,56.8,67.7,69.0,48.2,45.1,57.5,68.3,45.8,69.0,56.0,51.4,40.0,36.1,51.6,38.5,52.1,58.0,70.5,51.5,56.1,55.5,47.7,67.6,68.0,69.2,61.1,71.4,57.2,49.0,39.5, [...]
+Healthy life expectancy at birth (years) total population,33.4,58.7,57.8,70.9,28.7,59.7,63.1,58.3,71.6,71.0,52.8,58.6,61.8,52.1,64.3,58.4,69.7,58.9,42.1,51.4,50.8,62.5,32.9,56.7,62.0,63.0,35.1,33.7,46.4,40.4,69.9,56.5,34.0,38.7,66.1,63.2,58.7,49.9,43.0,60.5,64.8,37.8,63.3,66.6,66.2,66.6,55.8,34.8,70.1,40.1,62.1,56.4,59.5,56.7,57.4,43.8,44.1,62.0,38.8,58.8,70.1,71.3,49.9,48.0,59.8,70.2,47.8,70.4,57.5,54.3,42.4,38.3,54.1,42.9,55.9,61.8,71.2,51.4,56.7,56.7,50.5,69.0,69.4,71.0,62.8,73.6,58.5 [...]
+Life expectancy at birth (years) females,43.7,73.2,71.1,82.9,38.3,73.5,77.7,73.0,82.6,81.8,66.6,75.0,73.4,61.7,78.2,74.2,81.2,72.7,53.3,62.7,64.3,76.4,38.6,72.0,75.9,74.8,43.5,42.3,59.0,50.5,81.9,72.0,43.3,50.2,79.5,72.7,74.8,63.8,53.8,74.0,78.6,47.0,77.1,79.2,79.2,78.8,68.3,45.5,79.5,50.4,76.0,70.5,73.2,67.8,72.7,55.1,55.0,76.5,49.2,71.8,81.2,82.9,60.5,61.0,72.4,81.1,58.9,80.8,68.7,69.0,53.8,48.7,66.7,54.7,70.3,76.1,81.3,61.7,67.4,71.1,62.9,79.2,80.9,82.2,74.5,84.7,73.5,67.2,49.6,65.8,7 [...]
+Life expectancy at birth (years) males,41.1,66.3,67.7,76.2,34.1,68.7,70.1,66.2,77.4,75.9,60.7,68.8,72.2,61.9,70.5,62.9,74.8,67.7,51.0,60.5,61.1,69.3,39.3,65.5,73.2,68.4,42.2,38.4,53.3,48.9,76.6,65.7,42.0,47.0,73.2,69.8,66.7,59.8,51.8,69.9,73.8,45.0,68.9,74.7,74.6,71.9,64.0,42.1,74.8,47.9,71.6,64.1,67.6,65.3,66.3,52.3,52.3,65.7,46.8,67.8,74.5,75.6,58.0,56.2,65.4,75.1,55.8,75.5,65.8,63.6,50.1,45.9,61.3,45.6,64.4,67.3,78.2,60.0,64.4,66.4,58.7,73.8,76.1,76.2,71.0,77.9,68.6,58.8,48.2,61.7,74. [...]
+Life expectancy at birth (years) total population,42.3,69.5,69.4,79.6,36.1,71.0,73.9,69.7,80.0,79.0,63.6,71.9,72.7,61.8,74.4,68.5,78.0,70.0,52.1,61.6,62.7,72.8,39.1,68.7,74.4,71.5,42.9,40.4,56.2,49.7,79.3,69.5,42.7,48.6,76.3,71.2,70.7,61.8,52.9,71.9,76.1,45.9,72.9,76.9,76.9,75.4,66.1,43.8,77.2,49.3,73.8,67.0,70.3,66.5,69.5,53.7,53.6,71.2,48.0,69.7,77.9,79.3,59.3,58.5,68.9,78.2,57.4,78.1,67.2,66.2,51.9,47.3,64.0,50.0,67.3,71.7,79.8,60.8,65.9,68.6,60.7,76.5,78.5,79.3,72.7,81.4,70.8,63.0,48 [...]
+Percentage of total life expectancy lost males,24.4,15.7,17.6, 9.8,24.8,17.2,13.6,16.3, 9.4, 9.3,17.2,20.5,13.7,16.5,13.5,14.3, 9.5,16.8,21.4,17.3,21.4,13.4,16.2,20.2,17.4,11.0,19.6,17.6,19.3,20.6,11.0,20.6,23.1,23.7,12.0,11.1,17.1,21.4,21.1,16.6,15.1,19.3,13.3,13.4,12.5,11.3,16.5,23.3, 7.3,20.9,17.1,17.2,16.4,13.7,19.1,20.2,19.0,11.7,21.3,16.2, 9.1, 8.7,16.9,19.7,12.1, 9.1,17.9, 8.6,14.8,19.2,20.1,21.4,15.9,15.5,19.1,13.8, 9.8,14.1,12.9,16.5,18.7, 8.3,10.6, 9.2,13.9, 8.3,16.7,16.7,18.1 [...]
+External resources for health as % of total expenditure on health,11.2, 3.4, .1, .0,14.2, 2.9, .3, 3.7, .0, .0, 7.7, .3, .0,13.3, 4.6, .0, .0, 6.1,21.5,38.2,12.2, 2.4, .4, .5, .0, 2.1,25.6,43.7,19.7, 6.3, .0,20.3,32.4,62.9, .1, .2, .2,39.9, 3.3,23.2, 1.3, 3.2, .1, .2, 2.3, .0, .3,18.0, .0,30.0, .9, 1.8, 1.9, 2.0, .9,10.6,52.3, .0,34.3,10.1, .0, .0, 1.8,26.6, 5.1, .0,23.2, .0, .0, 1.4,20.5,38.6, 2.2,42.9, 7.5, .0, .0, .4, 6.5, .1, .1, .0, .1, .0, 3.0, [...]
+General Government expenditure on health as % of total expenditure on health,52.6,64.6,75.0,71.0,63.1,60.9,53.4,41.2,67.9,69.3,66.9,57.0,69.0,44.2,66.3,86.7,71.7,45.1,46.9,90.6,66.3,36.8,66.2,41.6,79.4,82.1,60.1,59.0,14.9,37.1,70.8,74.2,51.2,76.0,44.0,37.2,65.7,60.0,63.8,67.6,68.5,16.0,81.8,86.2,47.7,91.4,73.4,44.4,82.4,58.8,71.3,36.1,50.3,48.9,46.7,60.4,65.1,77.8,40.5,67.1,75.6,76.0,47.9,49.4,37.8,74.9,59.6,56.0,71.9,48.3,54.1,53.8,79.9,53.4,53.1,75.0,82.9,17.9,25.1,43.5,31.8,76.0,69.2 [...]
+General Government expenditure on health as % of total general government expenditure,11.8, 7.3, 9.9,26.3, 5.5,13.0,21.3,11.5,16.8,10.7, 7.2,15.1,10.8, 8.7,11.5,14.2,13.0, 5.0,10.9, 7.5,10.3, 9.6, 7.6, 8.8, 5.1, 9.3, 8.1, 8.1,16.0, 7.8,16.2, 8.8,18.5,15.2,12.7,10.2,10.8, 5.8, 5.7, 8.9,19.5, 6.0,12.8,11.4,10.2,14.1, 3.0,10.3,12.8,13.7,10.5,13.5, 9.6, 7.4,24.0,10.0, 4.5,12.1, 4.9, 6.9,10.7,13.7, 7.3,13.6, 6.7,16.6, 8.6,11.2,12.4,15.7,11.3, 7.4, 9.3,14.1,13.8,11.5,17.5, 3.1, 3.0,12.0, 4.6, [...]
+Out-of-pocket expenditure on health as % of private expenditure on health,100.00, 65.30, 89.90, 92.60,100.00,100.00, 62.40,100.00, 59.60, 61.30, 97.70,100.00, 69.30, 93.20, 76.60, 99.70, 58.80,100.00, 99.90,100.00, 85.70,100.00, 35.30, 64.10,100.00, 98.00, 97.40,100.00, 84.60, 81.60, 52.30, 99.80, 95.40, 80.90, 59.60, 95.40, 65.20,100.00,100.00,100.00, 92.10, 89.70,100.00, 76.80, 98.00,100.00,100.00,100.00, 90.80, 55.20,100.00, 88.40, 73.80, 92.20, 94.90, 52.30,100.00, 84.70, 84.70,100.0 [...]
+Per capita total expenditure on health at average exchange rate (US$), 8, 48, 73, 1233, 31, 531, 679, 54, 1741, 1866, 11, 864, 500, 12, 613, 68, 1983, 167, 16, 9, 49, 85, 190, 222, 453, 81, 6, 4, 30, 20, 2163, 46, 12, 5, 303, 49, 105, 9, 18, 198, 293, 41, 394, 185, 932, 407, 22, 5, 2545, 58, 203, 153, 76, [...]
+Per capita total expenditure on health in international dollars, 34, 150, 169, 1821, 70, 614, 1130, 273, 2532, 2259, 48, 1220, 664, 58, 940, 464, 2481, 278, 39, 64, 125, 268, 381, 573, 638, 303, 27, 19, 184, 42, 2792, 134, 58, 17, 792, 224, 356, 29, 22, 598, 562, 127, 726, 229, 941, 1129, 44, 12, 2503, 90, 312, 353, 177, 153, [...]
+Per capita GDP in international dollars, 660, 4092, 4104, 32328, 1578, 11028, 11920, 3493, 27614, 28233, 2966, 21531, 16066, 1668, 14571, 8352, 27932, 5351, 888, 1643, 2370, 3560, 5747, 7537, 20503, 6299, 886, 529, 1563, 1269, 29235, 3640, 1289, 656, 11265, 4095, 6519, 928, 1036, 12944, 7838, 2045, 8102, 3168, 11588, 15340, 1745, 346, 29655, 1288, 5192, 5792, 3905, 3901, 4701, 5239, 629, 10127, 382, 5596, 26349, 26809, 5514, [...]
+Per capita government expenditure on health at average exchange rate (US$), 4, 31, 55, 875, 19, 323, 363, 22, 1182, 1293, 8, 492, 345, 5, 406, 59, 1421, 75, 8, 8, 33, 31, 126, 92, 359, 67, 4, 2, 4, 8, 1533, 34, 6, 4, 133, 18, 69, 6, 12, 134, 201, 7, 322, 160, 445, 372, 16, 2, 2097, 34, 145, 55, 3 [...]
+Per capita government expenditure on health in international dollars, 18, 97, 127, 1292, 44, 374, 604, 112, 1718, 1566, 32, 695, 458, 26, 623, 402, 1778, 125, 18, 58, 83, 99, 252, 238, 507, 248, 16, 11, 27, 16, 1978, 100, 30, 13, 348, 83, 234, 17, 14, 404, 385, 20, 593, 198, 449, 1031, 32, 5, 2063, 53, 222, 127, 89, 7 [...]
+Percentage of total life expectancy lost females,18.4,16.0,15.8,12.0,17.1,14.8,15.5,16.3,11.4,10.7,16.8,16.7,16.6,14.9,13.5,15.4,11.6,15.5,17.3,15.7,16.7,15.0,15.2,15.2,16.1,12.8,16.5,15.6,15.4,16.7,12.6,15.6,17.7,17.3,14.7,11.6,16.9,17.2,16.1,15.5,14.7,16.4,13.3,13.6,15.1,11.8,15.0,18.0,10.9,16.1,14.7,15.1,14.7,16.0,15.8,16.7,16.5,13.6,17.2,15.3,10.8,11.4,14.9,16.5,14.1,10.9,15.6,11.0,14.1,17.2,16.9,16.7,14.9,13.6,15.2,13.9,11.6,16.9,15.1,18.5,15.3,11.2,12.4,11.3,13.4,10.6,18.5,16.9,15 [...]
+Prepaid plans as % of private expenditure on health, .0,33.9, 5.1, .0, .0, .0,31.1, .0,24.2,23.3, 2.3, .0,27.2, .0,23.4, .3, 6.8, .0, .0, .0, 7.7, .0,20.5,35.9, .0, .0, .0, .0, .0, .0,39.3, .2, .0, .0,40.3, .4,34.8, .0, .0, .0, 1.5,10.3, .0, .0, 2.0, .0, .0, .0, 9.2, .0, .0, .4, 9.5, .5, 4.9, .0, .0, 4.8, .0, .0, 8.3,53.1, .0, .0, .3,33.5, .0, 4.4, .0, 5.3, .0, .0, .0, .0, 7.5, 1.3, .0, .0, 8.2, 2.6, .0,28.4, .0, 3.6,22.5, 1.4, 7.4, .0, [...]
+Private expenditure on health as % of total expenditure on health,47.4,35.4,25.0,29.0,36.9,39.1,46.6,58.8,32.1,30.7,33.1,43.0,31.0,55.8,33.7,13.3,28.3,54.9,53.1, 9.4,33.7,63.2,33.8,58.4,20.6,17.9,39.9,41.0,85.1,62.9,29.2,25.8,48.8,24.0,56.0,62.8,34.3,40.0,36.2,32.4,31.5,84.0,18.2,13.8,52.3, 8.6,26.6,55.6,17.6,41.2,28.7,63.9,49.7,51.1,53.3,39.6,34.9,22.2,59.5,32.9,24.4,24.0,52.1,50.6,62.2,25.1,40.4,44.0,28.1,51.7,45.9,46.2,20.1,46.6,46.9,25.0,17.1,82.1,74.9,56.5,68.2,24.0,30.8,24.7,57.9, [...]
+Social security expenditure on health as % of general government expenditure on health, .0,19.3,37.4,86.2, .0, .0,58.6, .0, .0,61.7, .0, .0, .3, .0, .0,24.8,77.6, .0, .0, .0,66.3, .0, .0, .0, .0,30.1, .0, .0, .0, .0, 2.0, .0, .0, .0,71.8,50.7,25.0, .0, .0, .0,90.8, .0,94.9, .0,48.2,86.1, .0, .0, .0, .0, .0,22.6,45.3,29.7,44.5, .0, .0,86.0, .8, .0,20.8,96.5, .0, .0,41.5,91.8, .0,35.0, .0,54.1, .0, .1, .0, .0,17.3,83.2,28.1, .0, 7.5,40.8, .0, [...]
+Total expenditure on health as % of GDP, 5.2, 3.7, 4.1, 5.7, 4.4, 5.6, 9.5, 7.8, 9.2, 8.0, 1.6, 5.7, 4.1, 3.5, 6.5, 5.6, 8.9, 5.2, 4.4, 3.9, 5.3, 7.5, 6.6, 7.6, 3.1, 4.8, 3.0, 3.6,11.8, 3.3, 9.5, 3.7, 4.5, 2.6, 7.0, 5.5, 5.5, 3.1, 2.1, 4.7, 7.2, 6.2, 9.0, 7.2, 8.1, 7.4, 2.5, 3.5, 8.4, 7.0, 6.0, 6.1, 4.5, 3.9, 8.0, 2.0, 5.7, 5.5, 3.6, 4.0, 7.0, 9.6, 3.6, 6.4, 3.6,10.8, 4.7, 9.4, 5.3, 4.8, 3.5, 5.9, 5.3, 5.0, 6.1, 6.8, 9.2, 5.1, 2.4, 6.3, 3.2, 6.5, 8.7, 8.4, 6.8, 8.0, 9.5, 3.1, 7.8, 8.6, 3 [...]
+Under-five mortality rate; both sexes (per 1000 live births), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
diff --git a/demo/rawdata/who2002.csv b/demo/rawdata/who2002.csv
new file mode 100644
index 0000000..fb89d0d
--- /dev/null
+++ b/demo/rawdata/who2002.csv
@@ -0,0 +1,38 @@
+Data for 2002
+Indicators,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,Azerbaijan,Bahamas,Bahrain,Bangladesh,Barbados,Belarus,Belgium,Belize,Benin,Bhutan,Bolivia,Bosnia and Herzegovina,Botswana,Brazil,Brunei Darussalam,Bulgaria,Burkina Faso,Burundi,Cambodia,Cameroon,Canada,Cape Verde,Central African Republic,Chad,Chile,China,Colombia,Comoros,Congo,Cook Islands,Costa Rica,C�te d'Ivoire,Croatia,Cuba,Cyprus,Czech Republic,Democratic People's Republic o [...]
+Adult mortality (per 1000) females,413, 94,128, 43,481,125, 90, 98, 52, 59,122,153, 82,258,103,134, 67,123,360,226,209, 90,745,136, 85, 97,522,563,298,454, 58,120,566,402, 67,104, 99,207,410,109, 74,502, 72, 89, 48, 72,191,449, 76,431,120,150,132,157,142,318,286,112,422,176, 60, 60,281,265, 86, 60,303, 48,222,162,332,383,202,438,150,112, 55,220,208,132,176, 66, 53, 49,121, 46,121,195,448,190, 63,163,306,118,139,742,471, 99,103, 64,262,610,106,202,417, 51,286,304,116, 97,176, 47,219,104,5 [...]
+Adult mortality (per 1000) males,494,167,170,113,594,195,177,204, 91,117,231,248,113,251,189,371,126,189,424,272,260,192,786,246,112,219,597,692,400,519, 95,210,620,477,134,165,236,260,474,173,127,577,178,138,102,163,236,585,123,481,206,256,216,240,257,383,350,322,487,281,135,133,342,330,207,118,354,118,261,283,401,462,299,493,269,256, 85,291,244,213,252,113, 98, 96,162, 95,191,426,509,293, 81,345,338,327,201,902,582,173,303,119,333,657,192,205,487, 87,340,393,222,170,211,109,319,160,613 [...]
+Child mortality (per 1000) females,256, 23, 43, 4,247, 18, 16, 35, 5, 4, 70, 11, 10, 73, 15, 10, 5, 34,158, 92, 73, 15,102, 34, 12, 16,217,177,124,158, 5, 30,173,180, 13, 41, 19, 72,101, 19, 10,143, 7, 7, 7, 4, 54,198, 5,144, 14, 30, 30, 39, 34,144,102, 6,168, 27, 3, 4, 79,117, 20, 4, 99, 5, 21, 50,153,198, 50,128, 42, 8, 3, 95, 36, 36,110, 6, 6, 5, 14, 4, 26, 28,113, 69, 11, 55,131, 12, 29,160,222, 17, 9, 5,125,190, 8, 43,224, 6, 36,155, 14, 24, 51, 3, 66, 41,2 [...]
+Child mortality (per 1000) males,258, 27, 54, 5,279, 22, 20, 39, 6, 6, 80, 13, 13, 71, 17, 14, 6, 44,166, 93, 78, 20,104, 42, 14, 18,232,189,149,162, 6, 42,187,202, 16, 31, 27, 80,109, 21, 12,192, 8, 8, 7, 5, 56,221, 6,156, 13, 37, 34, 38, 36,157,117, 10,185, 30, 4, 5,100,132, 26, 5,106, 7, 25, 57,163,215, 61,138, 44, 10, 4, 87, 45, 42,119, 8, 7, 5, 16, 4, 28, 38,119, 80, 14, 63,146, 15, 35,166,242, 19, 11, 5,145,197, 10, 38,233, 7, 46,186, 20, 30, 63, 5, 75, 43,212 [...]
+Infant mortality rate; both sexes (per 1000 live births), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Annual population growth rate (%), 3.8,-0.4, 1.8, 1.8, 2.9, 1.2, 1.3,-1.3, 1.2, 0.3, 1.1, 1.5, 3.0, 2.3, 0.4,-0.4, 0.3, 2.5, 2.8, 2.3, 2.2, 0.4, 2.1, 1.4, 2.6,-0.8, 2.9, 1.3, 2.8, 2.4, 1.0, 2.2, 2.1, 3.1, 1.4, 0.9, 1.8, 3.0, 3.2,-0.2, 2.4, 2.1,-0.6, 0.4, 1.2,-0.1, 0.9, 2.4, 0.3, 2.3, 0.7, 1.7, 1.8, 1.9, 1.9, 2.6, 2.4,-1.4, 2.8, 1.2, 0.3, 0.4, 2.6, 3.3,-0.5, 0.2, 2.4, 0.7,-0.5, 2.7, 2.4, 3.0, 0.4, 1.4, 2.8,-0.4, 1.0, 1.8, 1.4, 1.4, 2.9, 1.0, 2.7, 0.1, 0.9, 0.2, 3.9,-0.9, 2.3, 1.5, 2.1, 1. [...]
+Dependency ratio (per 100), 86, 53, 60, 46,101, 56, 59, 44, 48, 47, 57, 53, 47, 71, 43, 46, 53, 73, 93, 86, 77, 40, 74, 51, 51, 45,106, 99, 81, 86, 45, 80, 89, 99, 54, 44, 59, 82, 99, 65, 56, 82, 49, 44, 51, 42, 48, 98, 50, 86, 56, 59, 62, 66, 68, 91, 91, 48, 94, 57, 49, 53, 83, 80, 49, 48, 77, 49, 56, 87, 88,101, 54, 76, 80, 45, 53, 62, 54, 59, 79, 47, 61, 49, 61, 48, 69, 51, 82, 68, 38, 64, 84, 48, 56, 81, 96, 54, 51, 49, 91, 99, 60, 86,107, 47, 68, 87, 46, 61, 74, 53, 59, 57, 89, 59, [...]
+Percentage of population aged 60+ years, 4.7, 9.5, 6.1,21.6, 4.4,10.5,13.5,13.1,16.7,21.3, 9.2, 8.5, 4.0, 5.1,13.1,19.3,22.2, 5.9, 4.1, 6.5, 6.6,15.3, 4.4, 8.1, 4.5,21.7, 4.0, 4.4, 4.6, 5.6,17.1, 6.3, 6.1, 4.9,10.7,10.0, 7.1, 4.2, 4.6, 7.2, 7.9, 5.2,21.7,14.5,16.1,18.8,10.6, 4.2,20.4, 5.1,10.5, 6.9, 7.3, 6.9, 7.5, 5.9, 3.6,21.5, 4.6, 6.0,20.3,20.5, 6.2, 5.8,18.9,24.0, 5.2,23.8,10.5, 5.3, 4.5, 4.9, 7.0, 5.9, 5.4,20.0,15.3, 7.7, 7.9, 6.4, 4.6,15.3,13.1,24.5, 9.6,24.4, 4.8,11.5, 4.2, 7.0, 2 [...]
+Total fertility rate, 6.8, 2.3, 2.8, 1.3, 7.2, 1.6, 2.5, 1.2, 1.7, 1.3, 2.1, 2.3, 2.7, 3.5, 1.5, 1.2, 1.7, 3.2, 5.7, 5.1, 3.9, 1.3, 3.7, 2.2, 2.5, 1.1, 6.7, 6.8, 4.8, 4.7, 1.5, 3.4, 5.0, 6.7, 2.4, 1.8, 2.6, 4.9, 6.3, 3.2, 2.3, 4.8, 1.6, 1.6, 1.9, 1.2, 2.0, 6.7, 1.8, 5.7, 1.8, 2.7, 2.8, 3.3, 2.9, 5.9, 5.5, 1.2, 6.2, 2.9, 1.7, 1.9, 4.0, 4.8, 1.4, 1.3, 4.2, 1.3, 3.5, 4.5, 5.9, 7.1, 2.3, 4.0, 3.8, 1.2, 2.0, 3.1, 2.4, 2.4, 4.8, 1.9, 2.7, 1.2, 2.4, 1.3, 3.6, 2.0, 4.1, 4.1, 2.7, 2.7, 4.8, 1.1, [...]
+Total population (000), 22930, 3141, 31266, 69, 13184, 73, 37981, 3072, 19544, 8111, 8297, 310, 709, 143809, 269, 9940, 10296, 251, 6558, 2190, 8645, 4126, 1770, 176257, 350, 7965, 12624, 6602, 13810, 15729, 31271, 454, 3819, 8348, 15613, 1302307, 43526, 747, 3633, 18, 4094, 1636 [...]
+Expectation of lost healthy years at birth females, 7.7,10.8, 9.6, 9.1, 6.9,10.3,10.0,10.4, 8.7, 8.6,10.0, 9.5,10.1, 9.3, 9.8, 9.4, 8.2,10.2, 7.9, 9.5, 9.4,10.0, 5.2, 9.8,11.9, 8.5, 6.3, 6.2, 7.6, 7.3, 8.3,10.0, 6.1, 7.6,10.3, 7.6,10.0, 9.6, 7.2,11.5,10.3, 6.7, 9.3, 9.8,10.6, 8.1, 7.4, 7.0, 8.4, 7.4,10.2, 9.6, 9.4, 8.8,10.4, 8.5, 8.6, 8.1, 7.7, 9.7, 8.0, 8.8, 8.8, 8.4, 8.4, 7.6, 8.5, 8.1, 8.9, 9.1, 8.2, 7.2, 9.7, 6.9, 9.9, 8.6, 8.2, 8.4, 9.1,12.5,11.6, 8.2, 9.0, 7.8, 8.6, 7.5,10.9, 9.6, [...]
+Expectation of lost healthy years at birth males, 6.6, 7.8, 7.9, 7.0, 6.3, 8.9, 8.3, 7.6, 7.0, 7.1, 7.2, 8.1, 7.9, 7.3, 7.6, 6.1, 6.3, 9.0, 6.6, 7.3, 8.2, 7.0, 4.2, 8.5, 9.7, 6.2, 5.6, 5.3, 6.3, 6.0, 7.1, 7.9, 5.1, 6.4, 8.5, 6.5, 9.7, 7.8, 6.3, 8.6, 9.5, 5.4, 7.2, 7.9, 8.8, 6.6, 6.4, 6.0, 6.3, 6.1, 9.1, 7.7, 8.1, 7.4, 9.3, 7.2, 6.5, 6.0, 6.1, 7.7, 6.1, 6.6, 7.1, 6.9, 6.2, 5.9, 7.2, 6.7, 7.5, 8.2, 7.0, 6.1, 8.4, 5.6, 7.9, 6.9, 6.3, 6.8, 7.5,10.4,10.3, 6.3, 6.9, 6.0, 6.9, 6.1, 9.0, 6.1, 5. [...]
+Healthy life expectancy at age 60 (years) females, 9.5,13.9,13.3,19.9, 9.6,13.8,16.5,13.3,19.5,19.3,12.4,15.6,11.6,11.1,16.6,14.6,19.1,13.3,10.4,11.3,12.1,15.4,11.9,13.7,13.3,14.9, 9.7,10.4,11.0,10.4,19.3,13.4,10.4,10.0,16.8,14.7,15.4,11.5,11.5,12.6,16.7,10.7,16.1,16.7,15.0,16.8,13.2,10.2,17.2,10.6,15.3,13.7,15.2,11.3,14.1,10.5,11.3,16.5,10.2,11.9,18.9,20.3,11.6,11.2,14.6,19.0,11.2,18.1,12.6,13.3,10.5,10.1,12.2,11.7,13.1,15.9,18.7,11.4,11.5,11.9,10.6,17.5,18.2,19.4,14.5,21.7,12.9,12.5,11 [...]
+Healthy life expectancy at age 60 (years) males, 8.6,10.5,12.5,16.6, 8.1,11.6,13.0,10.9,16.9,16.2,10.6,13.3,11.5,11.1,13.1,10.5,15.7,11.5, 9.7,10.8,10.9,12.4,10.9,11.6,13.1,12.4, 8.6, 8.6, 9.7, 9.7,16.1,11.8, 9.6, 9.2,13.9,13.1,12.6,10.8,10.4,11.5,14.4, 9.5,12.5,15.2,14.2,13.5,12.1, 8.6,15.2, 9.8,13.8,11.3,13.2, 9.9,12.6, 9.7,10.6,11.9, 9.7,10.4,15.7,16.5,10.6,10.4,12.6,15.9,10.5,16.0,11.1,12.3, 9.6, 9.2,10.2,10.3,11.4,12.2,17.5,10.8,10.7,10.4, 9.2,14.8,16.8,16.4,13.0,17.5,11.1, 9.7,10.7 [...]
+Healthy life expectancy at birth (years) females,35.8,63.3,61.6,74.6,35.1,63.6,68.1,62.6,74.3,73.5,58.7,65.7,64.4,53.3,68.2,64.9,73.3,62.2,44.5,52.9,55.2,66.4,35.4,62.4,65.5,66.8,36.3,36.8,49.5,41.8,74.0,62.9,37.7,41.7,69.7,65.2,66.3,55.3,47.3,62.7,69.3,41.3,69.3,69.5,68.5,70.9,59.7,39.1,71.1,43.2,65.6,61.9,64.1,60.2,62.3,46.3,50.8,69.0,41.7,60.6,73.5,74.7,52.6,50.5,66.6,74.0,50.3,72.9,60.0,59.9,45.6,41.5,57.2,44.1,60.5,68.2,73.6,53.6,58.9,59.1,51.5,71.5,72.3,74.7,65.9,77.7,62.3,59.3,44. [...]
+Healthy life expectancy at birth (years) males,35.3,59.5,59.7,69.8,31.6,60.1,62.5,59.4,70.9,69.3,55.8,60.9,64.2,55.3,62.9,56.6,68.9,58.4,43.4,52.9,53.6,62.3,36.0,57.2,65.1,62.5,34.9,33.4,45.6,41.1,70.1,58.8,37.0,39.7,64.9,63.1,57.8,53.9,45.3,60.6,65.2,37.6,63.8,67.1,66.7,65.9,58.0,35.0,68.6,42.5,61.9,57.2,59.8,57.8,57.2,44.7,49.3,59.2,40.7,56.9,68.7,69.3,50.2,48.5,62.2,69.6,49.2,69.1,58.4,54.9,43.9,39.6,53.1,43.5,56.3,61.5,72.1,53.3,57.4,56.1,48.8,68.1,70.5,70.7,64.2,72.3,59.7,52.6,44.1, [...]
+Healthy life expectancy at birth (years) total population,35.5,61.4,60.6,72.2,33.4,61.9,65.3,61.0,72.6,71.4,57.2,63.3,64.3,54.3,65.6,60.7,71.1,60.3,44.0,52.9,54.4,64.3,35.7,59.8,65.3,64.6,35.6,35.1,47.5,41.5,72.0,60.8,37.4,40.7,67.3,64.1,62.0,54.6,46.3,61.6,67.2,39.5,66.6,68.3,67.6,68.4,58.8,37.1,69.8,42.9,63.7,59.6,61.9,59.0,59.7,45.5,50.0,64.1,41.2,58.8,71.1,72.0,51.4,49.5,64.4,71.8,49.8,71.0,59.2,57.4,44.8,40.5,55.2,43.8,58.4,64.9,72.8,53.5,58.1,57.6,50.1,69.8,71.4,72.7,65.1,75.0,61.0 [...]
+Life expectancy at birth (years) females,43.4,74.1,71.2,83.7,42.0,73.9,78.1,73.0,83.0,82.2,68.6,75.2,74.5,62.6,77.9,74.3,81.5,72.4,52.4,62.4,64.7,76.4,40.6,72.3,77.4,75.3,42.6,43.0,57.1,49.0,82.3,72.9,43.7,49.3,80.0,72.7,76.3,64.9,54.5,74.2,79.5,48.0,78.6,79.3,79.1,79.0,67.1,46.1,79.5,50.7,75.8,71.5,73.5,69.0,72.8,54.8,59.3,77.1,49.4,70.3,81.5,83.5,61.4,58.9,75.0,81.6,58.8,81.1,68.8,69.0,53.7,48.7,66.9,51.1,70.4,76.8,81.8,62.0,67.9,71.7,63.1,79.8,81.4,82.5,74.6,85.3,73.3,68.9,51.9,66.7,7 [...]
+Life expectancy at birth (years) males,41.9,67.3,67.5,76.8,37.9,69.0,70.8,67.0,77.9,76.4,63.0,69.0,72.1,62.6,70.5,62.6,75.2,67.4,50.1,60.2,61.8,69.3,40.2,65.7,74.8,68.7,40.6,38.7,51.9,47.2,77.2,66.6,42.1,46.1,73.4,69.6,67.5,61.6,51.6,69.2,74.8,43.1,71.0,75.0,75.5,72.4,64.4,41.0,74.8,48.6,71.0,64.9,67.9,65.3,66.5,51.9,55.8,65.1,46.8,64.6,74.8,75.9,57.3,55.4,68.4,75.6,56.3,75.8,65.9,63.1,50.9,45.7,61.5,49.1,64.2,68.4,78.4,60.1,64.9,66.5,59.1,74.4,77.3,76.8,71.1,78.4,68.6,58.7,49.8,61.8,75. [...]
+Life expectancy at birth (years) total population,42.6,70.4,69.4,80.3,39.9,71.4,74.4,70.0,80.4,79.4,65.8,72.1,73.2,62.6,74.3,68.3,78.4,69.7,51.2,61.3,63.2,72.8,40.4,68.9,76.1,71.9,41.7,40.8,54.6,48.1,79.8,70.1,42.9,47.7,76.7,71.1,71.8,63.3,53.1,71.6,77.1,45.3,74.8,77.1,77.3,75.8,65.8,43.5,77.2,49.6,73.3,68.0,70.6,67.1,69.7,53.4,57.6,71.1,48.0,67.3,78.2,79.7,59.2,57.1,71.7,78.7,57.6,78.4,67.4,65.9,52.3,47.2,64.3,50.1,67.2,72.6,80.1,61.0,66.4,68.9,61.0,77.1,79.4,79.7,72.8,81.9,70.8,63.6,50 [...]
+Percentage of total life expectancy lost males,15.8,11.6,11.7, 9.2,16.6,12.8,11.7,11.3, 9.0, 9.3,11.4,11.7,10.9,11.7,10.7, 9.7, 8.3,13.3,13.3,12.1,13.2,10.2,10.4,13.0,13.0, 9.1,13.9,13.7,12.1,12.8, 9.2,11.8,12.0,13.9,11.5, 9.3,14.4,12.6,12.2,12.5,12.8,12.6,10.1,10.5,11.7, 9.1,10.0,14.7, 8.4,12.6,12.9,11.9,11.9,11.4,14.0,13.9,11.7, 9.2,13.0,12.0, 8.1, 8.7,12.4,12.5, 9.1, 7.8,12.7, 8.9,11.4,13.0,13.7,13.4,13.6,11.4,12.4,10.0, 8.1,11.3,11.5,15.7,17.5, 8.5, 8.9, 7.8, 9.7, 7.8,13.1,10.4,11.5 [...]
+External resources for health as % of total expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+General Government expenditure on health as % of total expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+General Government expenditure on health as % of total general government expenditure, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Out-of-pocket expenditure on health as % of private expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Per capita total expenditure on health at average exchange rate (US$), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Per capita total expenditure on health in international dollars, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Per capita GDP in international dollars, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Per capita government expenditure on health at average exchange rate (US$), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Per capita government expenditure on health in international dollars, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Percentage of total life expectancy lost females,17.7,14.6,13.5,10.8,16.4,13.9,12.8,14.2,10.4,10.5,14.6,12.6,13.6,14.8,12.5,12.6,10.1,14.1,15.0,15.2,14.6,13.1,12.9,13.6,15.4,11.3,14.8,14.4,13.3,14.8,10.0,13.7,13.8,15.5,12.9,10.4,13.1,14.8,13.2,15.5,12.9,13.9,11.8,12.4,13.4,10.3,11.0,15.1,10.5,14.7,13.5,13.4,12.8,12.8,14.3,15.5,14.5,10.5,15.6,13.8, 9.9,10.6,14.3,14.2,11.2, 9.3,14.4,10.0,12.9,13.2,15.2,14.7,14.5,13.6,14.0,11.2,10.0,13.6,13.4,17.5,18.4,10.3,11.1, 9.5,11.5, 8.8,14.9,13.9,13 [...]
+Prepaid plans as % of private expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Private expenditure on health as % of total expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Social security expenditure on health as % of general government expenditure on health, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Total expenditure on health as % of GDP, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
+Under-five mortality rate; both sexes (per 1000 live births), , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
diff --git a/demo/rawdata/whoreps.csv.gz b/demo/rawdata/whoreps.csv.gz
new file mode 100644
index 0000000..34201fe
Binary files /dev/null and b/demo/rawdata/whoreps.csv.gz differ
diff --git a/demo/testrunner.py b/demo/testrunner.py
new file mode 100644
index 0000000..a3f18e6
--- /dev/null
+++ b/demo/testrunner.py
@@ -0,0 +1,144 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: testrunner.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/demo/testrunner.py,v $
+
+import sys, time
+from SOOMv0 import *
+
+import curses
+import textwrap
+import traceback
+import logging
+
+class CursesLoggingHandler(logging.Handler):
+ def __init__(self, w):
+ logging.Handler.__init__(self)
+ self.w = w
+ self.setFormatter(logging.Formatter('%(levelname)s %(message)s'))
+
+ def emit(self, record):
+ self.w.addstr('%s\n' % self.format(record))
+ self.w.refresh()
+
+class TestRunner:
+ enter_keys = 13, 10, curses.KEY_ENTER
+ dn_keys = (ord(' '), curses.KEY_DOWN, curses.KEY_RIGHT) + enter_keys
+ up_keys = curses.KEY_UP, curses.KEY_LEFT
+ quit_keys = ord('q'), ord('Q'), 4
+ bs_keys = 8, curses.KEY_BACKSPACE
+
+ def __init__(self):
+ self.w = curses.initscr()
+ self.real_stdout, sys.stdout = sys.stdout, self
+ curses.noecho()
+ curses.cbreak()
+ self.w.keypad(1)
+ self.w.scrollok(1)
+ self.w.idlok(1)
+ soom.add_logging_handler(CursesLoggingHandler(self.w))
+
+ def write(self, buf):
+# y, x = self.w.getyx()
+ self.w.addstr(buf)
+ self.w.refresh()
+
+ def close(self):
+ self.w.keypad(0)
+ curses.nocbreak()
+ curses.echo()
+ curses.endwin()
+ sys.stdout = self.real_stdout
+
+ def run(self, tests):
+ i = 0
+ while 1:
+ test = tests[i]
+ print '\n## Starting test %d of %d' % (i + 1, len(tests))
+ test.run(self.w)
+ self.w.addstr('Finished test %d of %d> ' % (i + 1, len(tests)))
+ seek = ''
+ while 1:
+ curses.flushinp()
+ c = self.w.getch()
+ if c in self.enter_keys and seek:
+ try:
+ i = int(seek) - 1
+ if i < 0 or i >= len(tests):
+ raise ValueError
+ break
+ except ValueError:
+ curses.beep()
+ continue
+ if c in self.dn_keys:
+ if i < len(tests) - 1:
+ i += 1
+ break
+ else:
+ curses.beep()
+ elif c in self.up_keys:
+ if i > 0:
+ i -= 1
+ break
+ else:
+ curses.beep()
+ elif c in self.quit_keys:
+ return
+ elif ord('0') <= c <= ord('9'):
+ seek = seek + chr(c)
+ self.w.echochar(c)
+ elif c in self.bs_keys:
+ if seek:
+ seek = seek[:-1]
+ self.w.echochar(8)
+ self.w.clrtoeol()
+# else:
+# self.w.addstr('%s\n' % c)
+
+class Test:
+ def __init__(self, commentary, fn, *args, **kwargs):
+ self.commentary = commentary
+ self.fn = fn
+ self.args = args
+ self.disabled = kwargs.pop('disabled', False)
+ self.kwargs = kwargs
+
+ def run(self, w):
+ lines = [l.strip() for l in self.commentary.split('\n')]
+ lines = textwrap.wrap(' '.join(lines), 75)
+ args = []
+ if hasattr(self.args[0], 'name'):
+ args.append(self.args[0].name)
+ else:
+ args.append(repr(self.args[0]))
+ for a in self.args[1:]:
+ args.append(repr(a))
+ for k, v in self.kwargs.items():
+ args.append('%s=%r' % (k, v))
+ print '## %s(%s)' % (self.fn.__name__, ', '.join(args))
+ if self.disabled:
+ print '## DISABLED ##'
+ else:
+ start = time.time()
+ try:
+ self.fn(*self.args, **self.kwargs)
+ except:
+ for line in traceback.format_exception(*sys.exc_info()):
+ w.addstr(line, curses.A_BOLD)
+ elapsed = time.time() - start
+ for line in lines:
+ print '## ' + line
+ if not self.disabled:
+ print '## %.3f seconds' % elapsed
diff --git a/docs/README.searchabletext b/docs/README.searchabletext
new file mode 100644
index 0000000..d915412
--- /dev/null
+++ b/docs/README.searchabletext
@@ -0,0 +1,230 @@
+Searchable text columns in SOOM
+===============================
+
+NOTE - All material associated with "NetEpi Analysis" is Copyright (C)
+2004, 2005 Health Administration Corporation (New South Wales Department
+of Health).
+
+NetEpi Analysis is licensed under the terms of the Health Administration
+Corporation Open Source License Version 1.2 (HACOS License V1.2),
+the full text of which can be found in the LICENSE file provided with
+NetEpi Analysis.
+
+
+New SOOM column type SearchableTextDatasetColumn added with a new column
+type of 'searchabletext'.
+
+Builds a reverse index of the vocabulary of the column keeping, for
+each word, all rows and word positions within those rows where that
+word occurs.
+
+These are stored in three files, e.g.
+
+ nursing_assessment/data.SOOMstringarray
+ nursing_assessment/inverted.SOOMstringvocab
+ nursing_assessment/occurrences.SOOMpackeddata
+
+data.SOOMstringarray contains the actual rows of the column
+
+inverted.SOOMstringvocab contains, for each word, and (offset, length)
+pair indexing into occurrences.SOOMpackeddata
+
+occurrences.SOOMpackeddata contains (many) (rownum, wordnum) pairs in
+rownum/wordnum sorted order for each word.
+
+A 'searchabletext' column may be queried using the 'contains' operator
+(and only the 'contains' operator) with an expression of the form
+
+ column contains [[search-expr]]
+
+Where column is the name of the column, e.g. nursing_assessment, and
+search-expr is a query according to the following grammar.
+
+ search-expr ::= sfactor ( '|' sfactor ) *
+ sfactor ::= sphrase ( ( '&' | '&-' ) sphrase
+ | ( '<' | '>' | '~' ) ( '[' INT ']') ? sphrase
+ | ( '-' ) ? sphrase ) *
+ sphrase ::= sterm | '"' sterm + '"'
+ sterm ::= WORD | '(' search-expr ')'
+
+Where WORD is a sequence of letter, numbers, "'" characters and '*'
+characters. The sequence must contain at least one letter or number.
+The character '*' will match zero or more characters.
+
+In other words a basic query consists of individual WORDs, so
+
+ dog
+
+matches all records where the column contains the word 'dog' (or 'DOG',
+or 'DoG'; searching is not case-sensitive) but not where it contains
+the word 'dogs' and not the word 'dog' on its own;
+
+All punctuation is ignored in queries but serves to separate WORDs,
+except for "'" which is ignored but which does not separate WORDs.
+
+ hair-cut
+
+is two words, while
+
+ don't
+
+is one word and will match the query 'dont'.
+
+A basic query unit can also be a phrase of WORDs, i.e.
+
+ "arrived via car"
+
+means the WORD 'arrived' followed by the WORD 'via' followed by the word
+'car' and would match 'ARRIVED VIA CAR' but not 'ARRIVED IN CAR' or
+'ARRIVED VIA OWN CAR'. (See below for alternatives to phrases.)
+
+Individual words (or phrases) may be joined together with conjunction
+operators:
+
+ & and
+ &- and not
+ < before
+ ~ near
+ > after
+
+So
+
+-- A & B
+
+ the search-expression A must match the row and the search-expression
+ B must also match the row
+
+ dog & cat
+
+ will match 'dog ate cat' as well as 'cat scratched dog' but will
+ NOT match 'dog ate dinner', 'cat left home' or 'the weather is nice'
+
+-- A &- B (or A & - B; or A & -B)
+
+ the search-expression A must match the row and the search-expression
+ B MUST NOT match the row
+
+ dog &- cat
+
+ will match 'dog ate dinner', but will NOT match 'dog ate cat',
+ 'cat scratched dog', 'cat left home' or 'the weather is nice'
+
+** NOTE: The '&' character may be left out which means that the
+ expression 'hair-cut' is equivalent to 'hair &- cut' which may not
+ be what is desired.
+
+-- A < B
+
+ something matching the search-expression A must occur within
+ some number of words (usually 10) before something matching the
+ search-expression B in the same row
+
+ dog < cat
+
+ will match 'dog ate cat', but will NOT match 'dog ate dinner',
+ 'cat scratched dog', 'cat left home' or 'the weather is nice'.
+
+** NOTE: 'dog < cat' will match 'the cat bit the dog then the cat left'
+ since 'dog' appears before 'cat' in that sentence.
+
+-- A > B
+
+ something matching the search-expression A must occur within
+ some number of words (usually 10) after something matching the
+ search-expression B in the same row
+
+ dog > cat
+
+ will match 'cat scratched dog', but will NOT match 'dog ate cat',
+ 'dog ate dinner', 'cat left home' or 'the weather is nice'.
+
+-- A ~ B
+
+ something matching the search-expression A must occur within
+ some number of words (usually 10) of something matching the
+ search-expression B in the same row
+
+ dog ~ cat
+
+ will match 'dog ate cat' as well as 'cat scratched dog', but will
+ NOT match 'dog ate dinner', 'cat left home' or 'the weather is nice'.
+
+** NOTE: The difference between '~' (NEAR) and '&' (AND) is that with
+ '&' the WORDs can appear anywhere in the text while with '~' the
+ WORDs must appear in proximity to one another. The text 'I took the
+ dog for a walk an along the way we went into a park and met a cat'
+ would NOT match the above query without changing the proximity value
+ for the NEAR operator.
+
+** NOTE: ~ (NEAR) may be a quicker alternative to the use of complicated
+ phrases: pick the least common words in a phrase and use ~ instead,
+ e.g. instead of
+
+ "whether 'tis nobler in the mind"
+
+ use
+
+ nobler ~ mind
+
+ which is likely to be faster without producing too many extra results.
+
+** NOTE: The degree of 'nearness' for ~, <, and > may be changed by
+ specifying a number in the query, e.g.
+
+ dog ~[2] cat
+
+ will match 'dog' and 'cat' appearing within two (2) words of each
+ other, i.e. with at most one (1) intervening word.
+
+All conjunction operators are left associative and of the same precedence
+so expression are evaluation left to right. Some thought must be given
+to such expressions to work out what matches. For example
+
+ bitten < ferret < hand
+
+will match 'bitten on the hand by a ferret' since (bitten < ferret)
+matches the words 'bitten' and 'ferret' and thus some part of the left
+hand side of the second <, i.e. the result of (bitten < ferret) is before
+'hand':
+
+ (bitten < ferret) matches 'bitten' and 'ferret'
+ ((bitten < ferret) < hand) matches {'bitten', 'hand'} since only
+ the word 'bitten' in the result {'bitten', 'ferret'} is before
+ {'hand'}
+
+Think of the results of an operator as a set of words: A < B means
+anything in A which is before anything in B union anything in B which
+is after anything in A, taking into account proximity constraints.
+
+Of a lower priority than conjunction is disjunction or union. The |
+joins together the results of subqueries.
+
+-- A | B
+
+ matches anything which matched A or B
+
+ dog | cat
+
+ will match 'dog ate cat', 'cat scratched dog', 'dog ate dinner',
+ 'cat left home' but will NOT match 'the weather is nice'
+
+Sub-expression may be grouped using parentheses to overcome operator
+precedence, e.g.
+
+ (fall | fell) (etoh | alcohol)
+
+will match anything containing the word 'fall' or 'fall' and also
+containing the word 'etoh' or the word 'alcohol'; cf.
+
+ fall | fell etoh | alcohol
+
+which will match anything containing the word 'fall' as well as anything
+containing the words 'fell' and the word 'etoh', or any rows containing
+the word 'alcohol'. This is quite different.
+
+** NOTE: the individual elements of a phrase can be WORDs, or
+ subexpressions, so, for example
+
+ "fell (over | down)"
+
+ would match 'fell down' as well as 'fell over'
diff --git a/liccheck.py b/liccheck.py
new file mode 100644
index 0000000..9c6a317
--- /dev/null
+++ b/liccheck.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# This tool checks that all .py and .html files contain the following license
+# header text:
+
+"""\
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+"""
+
+import sys
+import os
+import re
+
+# Check all files with these extensions
+check_exts = '.py', '.pyx', '.html', '.c', '.h', '.tex', '.sas'
+# Additional files to check
+extras = [
+ 'TODO',
+ 'SOOMv0/soomparse.g',
+]
+# Ignore listed files
+ignore = [
+ 'SOOMv0/yappsrt.py', # YAPPS2 run-time
+ 'web/static/copyright.html', # Contains the full LICENSE
+ 'SOOMv0/Cstats.pyx', # Mixed LICENSE
+]
+ignore_dirs = [
+ 'build',
+ 'yapps2',
+]
+
+filt_re = re.compile(r'(^(%|#|--|[ \t]+\*)?[ \t]*)|([ \t\r;]+$)', re.MULTILINE)
+
+exit_status = 0
+
+def strip(buf):
+ return filt_re.sub('', buf)
+
+def check(filepath, want):
+ f = open(filepath)
+ try:
+ head = f.read(2048)
+ finally:
+ f.close()
+ if strip(head).find(want) < 0:
+ global exit_status
+ exit_status = 1
+ print filepath
+
+
+want = strip(__doc__)
+for filepath in extras:
+ check(filepath, want)
+for dirpath, dirnames, filenames in os.walk('.'):
+ dirnames[:] = [dirname
+ for dirname in dirnames
+ if dirname not in ignore_dirs]
+ for filename in filenames:
+ for ext in check_exts:
+ if filename.endswith(ext):
+ break
+ else:
+ continue
+ filepath = os.path.normpath(os.path.join(dirpath, filename))
+ if filepath in ignore:
+ continue
+ check(filepath, want)
+sys.exit(exit_status)
diff --git a/sandbox/PopRate.py b/sandbox/PopRate.py
new file mode 100644
index 0000000..b5533ee
--- /dev/null
+++ b/sandbox/PopRate.py
@@ -0,0 +1,110 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: PopRate.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/sandbox/PopRate.py,v $
+
+import Numeric
+import CrossTab
+
+def calc_directly_std_rates(summset, popset, stdpopset, conflevel=0.95, basepop = 100000, ci_method='dobson', popset_popcol='_freq_', stdpopset_popcol='_stdpop_'):
+ """
+ Calculate Standardised Population Rates
+ """
+ from rpy import r
+ sumaxis = Numeric.add.reduce
+
+ stdtab= CrossTab.CrossTab(stdpopset)
+ summtab = CrossTab.CrossTab(summset, shaped_like=stdtab)
+ stdtab.replicate_axes(summtab.get_shape())
+ poptab = CrossTab.CrossTab(popset, shaped_like=summtab)
+ poptab.collapse_axes(summtab.get_shape())
+ popfreq = poptab.tables[popset_popcol].astype(Numeric.Float64)
+ stdpop = stdtab.tables[stdpopset_popcol].astype(Numeric.Float64)
+ sum_stdpop = sumaxis(stdpop)
+ stdwgts = stdpop / sum_stdpop
+ stdpop_sq = stdpop**2
+ sum_stdpop_sq = sum_stdpop**2
+
+ zv = r.qnorm(0.5*(1+conflevel))
+ alpha = 1.0 - conflevel
+ axis = 0
+ basepop = float(basepop)
+
+ for name, tab in summtab.tables.items():
+ colname = None
+ if name == '_freq_':
+ colname = '_rate_'
+ collabel = 'Rate'
+ wgtcolname = '_rate_by_stdwgt_'
+ wgtcollabel = 'Rate multiplied by Std Wgt'
+ elif name.startswith('freq_wgtd_by'):
+ colname = 'rate_' + name[5:]
+ label = summset[name].label[len('Frequency'):]
+ wgtcolname = 'rate_by_stdwgt_' + name[5:]
+ collabel = 'Rate' + label
+ wgtcollabel = 'Rate multiplied by Std Wgt (' + label
+ print 'shapes', name, tab.shape, popfreq.shape, stdwgts.shape
+ if colname:
+ summfreq = tab.astype(Numeric.Float64)
+ rate = summfreq / popfreq
+ wgtrate = rate * stdwgts
+
+ # Crude rate
+ cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis)
+
+ # Directly standardised rate
+ dsr = sumaxis(wgtrate, axis)*basepop
+
+ # Calculations for CI per Dobson et al.
+ se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2)
+ stderr = stdpop_sq * rate * (1.0 - rate)
+ se_rate = sumaxis(se_wgtrate, axis)
+ sumsei = sumaxis(stderr, axis)
+ total_freq = sumaxis(summfreq, axis)
+ if total_freq == 0:
+ import math
+ u_lam = -math.log10(1 - conflevel)
+ l_lam = 0.0
+ else:
+ l_lam = r.qgamma((1 - conflevel)/2.0, total_freq, scale = 1.)
+ u_lam = r.qgamma((1 + conflevel)/2.0, total_freq + 1, scale = 1.)
+ dsr_ll = dsr + (((se_rate/total_freq)**0.5)*(l_lam - total_freq))
+ dsr_ul = dsr + (((se_rate/total_freq)**0.5)*(u_lam - total_freq))
+
+ # Calculations for CI per Selvin/Epitools
+ # dsr_var = sumaxis((stdwgts**2) * (summfreq / (popfreq**2)), axis)
+ # x2divvar = (dsr**2) / dsr_var
+ # lcf = r.qgamma(alpha/2., shape = x2divvar, scale = 1.) / x2divvar
+ # ucf = r.qgamma(1. - alpha/2., shape = x2divvar+1., scale = 1.) / x2divvar
+ # dsr_ll = dsr * lcf
+ # dsr_ul = dsr * ucf
+
+ print name, "Sex-specific CR=", cr*basepop
+ print name, "Sex-specific DSR and Dobson limits:", dsr, dsr_ll, dsr_ul
+ return summset
+
+
+if __name__ == '__main__':
+ from SOOMv0 import datasets, soom, SummaryStats
+
+ path = '../SOOM_objects'
+ # path = None
+ ds = datasets.dsload('nhds', path=path)
+ pop = datasets.dsload('nhds97pop', path=path)
+ wp = datasets.dsload("worldpop_p", path=path)
+ print wp
+ s = ds.summ('agegrp', 'sex', SummaryStats.freq(), filterexpr='year=1997',)
+ p = calc_directly_std_rates(s, pop, wp)
+
diff --git a/sandbox/cacheloadmeta.py b/sandbox/cacheloadmeta.py
new file mode 100644
index 0000000..4bcaa89
--- /dev/null
+++ b/sandbox/cacheloadmeta.py
@@ -0,0 +1,51 @@
+from time import time
+import gzip
+import cPickle
+import csv
+
+t0 = time()
+m = cPickle.load(gzip.open('SOOM_objects/edis/load_cache/metadata.pkl.gz', 'rb'))
+print 'load', time() - t0
+
+if 0:
+ t0 = time()
+ f = open('/tmp/clm-a', 'w')
+ w = csv.writer(f)
+ w.writerows(m.key_to_recno.iteritems())
+ f.close()
+ print 'csv', time() - t0
+
+if 0:
+ t0 = time()
+ f = gzip.open('/tmp/clm-b', 'wb')
+ w = csv.writer(f)
+ w.writerows(m.key_to_recno.iteritems())
+ f.close()
+ print 'csv gzip', time() - t0
+
+if 0:
+ t0 = time()
+ f = gzip.open('/tmp/clm-b', 'rb')
+ map = {}
+ for key, index in csv.reader(f):
+ map[key] = int(index)
+ f.close()
+ print 'csv gzip load', time() - t0
+
+if 1:
+ t0 = time()
+ f = open('/tmp/clm-c', 'wb')
+ cPickle.dump(m, f, -1)
+ f.close()
+ print 'pickle dump', time() - t0
+
+if 1:
+ t0 = time()
+ f = open('/tmp/clm-c', 'rb')
+ m2 = cPickle.load(f)
+ f.close()
+ print 'pickle load', time() - t0
+
+
+
+
diff --git a/sandbox/ci_bars.py b/sandbox/ci_bars.py
new file mode 100644
index 0000000..34ff4ce
--- /dev/null
+++ b/sandbox/ci_bars.py
@@ -0,0 +1,446 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: ci_bars.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/sandbox/ci_bars.py,v $
+
+from rpy import *
+
+# set up dummy test data
+testdata = {
+ 'dsr':(1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,
+ 2,3,4,5,6,7,8,9,10,11,3,4,5,6,7,8,9,10,11,12),
+ 'year':(1998,1998,1998,1998,1998,1998,1998,1998,1998,1998,
+ 1999,1999,1999,1999,1999,1999,1999,1999,1999,1999,
+ 2000,2000,2000,2000,2000,2000,2000,2000,2000,2000,
+ 2001,2001,2001,2001,2001,2001,2001,2001,2001,2001),
+'geog_area':('North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle',
+ 'North','South','East','West','Middle'),
+'sex':('Male','Male','Male','Male','Male',
+ 'Female','Female','Female','Female','Female',
+ 'Male','Male','Male','Male','Male',
+ 'Female','Female','Female','Female','Female',
+ 'Male','Male','Male','Male','Male',
+ 'Female','Female','Female','Female','Female',
+ 'Male','Male','Male','Male','Male',
+ 'Female','Female','Female','Female','Female'),
+'age':('Old','Old','Old','Old','Old',
+ 'Young','Young','Young','Young','Young',
+ 'Old','Old','Old','Old','Old',
+ 'Young','Young','Young','Young','Young',
+ 'Old','Old','Old','Old','Old',
+ 'Young','Young','Young','Young','Young',
+ 'Old','Old','Old','Old','Old',
+ 'Young','Young','Young','Young','Young')
+ }
+
+testdata['year'] = [str(x) for x in testdata['year']]
+
+# add dummy lower and upper confidence limits
+testdata['dsr_ll'] = [x - 0.7 for x in testdata['dsr']]
+testdata['dsr_ul'] = [x + 0.5 for x in testdata['dsr']]
+
+r.library('lattice')
+
+def errorbar_macro(plottype='lineplot',datadict=None,measure=None,measure_lower_limits=None,measure_upper_limits=None,
+ condcols=None,groups=None,horizontal=None,origin=0):
+
+ if horizontal is None:
+ if plottype == 'lineplot':
+ horizontal = True
+ else:
+ horizontal = False
+
+ if not horizontal:
+ formula = measure + ' ~ ' + condcols[0]
+ else:
+ formula = condcols[0] + ' ~ ' + measure
+ if len(condcols) > 1:
+ formula += ' | ' + condcols[1]
+ if len(condcols) > 2:
+ formula += ' + ' + ' + '.join(condcols[2:])
+
+ rparameters = dict(formulastring=formula, ll=measure_lower_limits, ul=measure_upper_limits, groups=groups, origin=float(origin))
+
+ try:
+ r.remove('errorbardataframe')
+ r.remove('errorbarplot')
+ except:
+ pass
+
+ rdataframe = with_mode(NO_CONVERSION,r.as_data_frame)(datadict)
+ r.assign('errorbardataframe',rdataframe)
+
+ if plottype == 'barchart' and not horizontal:
+ rplotcode = """
+ errorbarplot <- with(errorbardataframe,barchart(%(formulastring)s,
+ origin = %(origin)f,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel = function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ panel.barchart(x, y, subscripts, ...)
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(as.numeric(x),
+ dsr_ll,
+ as.numeric(x),
+ dsr_ul,
+ col = 'red', lwd = 2)
+ }))
+ """
+ elif plottype == 'barchart' and horizontal:
+ rplotcode = """
+ errorbarplot <- with(errorbardataframe,barchart(%(formulastring)s,
+ origin = %(origin)f,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel = function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ panel.barchart(x, y, subscripts, ...)
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ col = 'red', lwd = 2)
+ }))
+ """
+
+ elif plottype == 'lineplot' and not horizontal:
+ rplotcode = "errorbarplot <- with(errorbardataframe,xyplot(%(formulastring)s,"
+ if groups is not None:
+ rplotcode += "groups= %(groups)s,"
+ rplotcode += """
+ pch = 16, type = 'b',
+ auto.key=TRUE,
+ origin = %(origin)f,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel.groups =
+ function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(as.numeric(x),
+ dsr_ll,
+ as.numeric(x),
+ dsr_ul,
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+ """
+ elif plottype == 'lineplot' and horizontal:
+ rplotcode = "errorbarplot <- with(errorbardataframe,xyplot(%(formulastring)s,"
+ if groups is not None:
+ rplotcode += "groups= %(groups)s,"
+ rplotcode += """
+ pch = 16, type = 'b',
+ auto.key=TRUE,
+ origin = %(origin)f,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel.groups =
+ function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+ """
+ elif plottype == 'dotplot' and not horizontal:
+ rplotcode = "errorbarplot <- with(errorbardataframe,dotplot(%(formulastring)s,"
+ if groups is not None:
+ rplotcode += "groups= %(groups)s,"
+ rplotcode += """pch = 16,
+ auto.key=TRUE,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel.groups =
+ function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(as.numeric(x),
+ dsr_ll,
+ as.numeric(x),
+ dsr_ul,
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+ """
+ elif plottype == 'dotplot' and horizontal:
+ rplotcode = "errorbarplot <- with(errorbardataframe,dotplot(%(formulastring)s,"
+ if groups is not None:
+ rplotcode += "groups= %(groups)s,"
+ rplotcode += """pch = 16,
+ auto.key=TRUE,
+ dsr_ll = %(ll)s,
+ dsr_ul = %(ul)s,
+ panel.groups =
+ function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+ """
+
+
+ print rplotcode % rparameters
+ errorbarplot = with_mode(NO_CONVERSION,r)(rplotcode % rparameters)
+ r.remove('errorbardataframe')
+ return errorbarplot
+
+a = errorbar_macro(plottype='lineplot',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area'],
+ groups='sex')
+# r.print_(a)
+
+b = errorbar_macro(plottype='lineplot',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area'],
+ groups='sex',
+ horizontal=False)
+# r.print_(b)
+
+c = errorbar_macro(plottype='dotplot',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area'],
+ groups='sex')
+# r.print_(c)
+
+d = errorbar_macro(plottype='dotplot',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area'],
+ groups='sex',
+ horizontal=True)
+# r.print_(d)
+
+e = errorbar_macro(plottype='barchart',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area','sex'])
+# r.print_(e)
+
+f = errorbar_macro(plottype='barchart',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area','sex'],
+ horizontal=True)
+# r.print_(f)
+
+# Note that this fails because barcharts with error bars can't handle
+# groups, and without groups (or a second level of panelling), the dataset
+# needs further summarisation. This should not be a problem in SOOM.
+g = errorbar_macro(plottype='barchart',datadict=testdata,
+ measure = "dsr",
+ measure_lower_limits='dsr_ll',
+ measure_upper_limits='dsr_ul',
+ condcols=['year','geog_area'])
+
+r.print_(a)
+x = raw_input("Hit enter for next graph")
+r.print_(b)
+x = raw_input("Hit enter for next graph")
+r.print_(c)
+x = raw_input("Hit enter for next graph")
+r.print_(d)
+x = raw_input("Hit enter for next graph")
+r.print_(e)
+x = raw_input("Hit enter for next graph")
+r.print_(f)
+x = raw_input("Hit enter for next graph - which is known not to look right")
+r.print_(g)
+
+"""
+
+# this works as expected, but not sure what teh error messages mean
+with(testdata,barchart(geog_area ~ dsr | year + sex,
+ origin = 0,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel = function(x, y, ..., dsr_ll, dsr_ul, subscripts) {
+ panel.barchart(x, y, subscripts, ...)
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ col = 'red', lwd = 2)}
+ ))
+
+# no idea what I am doing wrong here, but there is not one bar per group... something
+# to do with panel.groups???
+with(testdata,barchart(geog_area ~ dsr | year, groups=sex,
+ origin = 0,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel = function(x, y, ..., dsr_ll, dsr_ul, subscripts, groups) {
+ panel.barchart(x, y, subscripts, groups, ...)
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ col = 'red', lwd = 2)}
+ ))
+
+# successfully does dotplots with groups
+with(testdata,
+ dotplot(geog_area ~ dsr | year,
+ groups=sex, pch = 16,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+
+# with two levels of panelling
+with(testdata,
+ dotplot(geog_area ~ dsr | year + age,
+ groups=sex, pch = 16,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+
+# with no panelling
+with(testdata,
+ dotplot(geog_area ~ dsr ,
+ groups=sex, pch = 16,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+
+# vertical - note need to swap x and y in the panel.segments function, though.
+with(testdata,
+ dotplot(dsr ~ geog_area | year,
+ groups=sex, pch = 16, auto.key=TRUE,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(as.numeric(x),
+ dsr_ll,
+ as.numeric(x),
+ dsr_ul,
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+
+# using horizontal=TRUE - unable to get this to work...
+with(testdata,
+ dotplot(dsr ~ geog_area | year,
+ groups=sex, pch = 16, horizontal=TRUE,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts, horizontal) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(dsr_ll,
+ as.numeric(y),
+ dsr_ul,
+ as.numeric(y),
+ ...)
+ panel.xyplot(x, y, ...)
+ }))
+
+
+# lineplot
+with(testdata,
+ xyplot(dsr ~ year | geog_area,
+ groups=sex, pch = 16, type="b",
+ auto.key=TRUE,
+ dsr_ul = dsr_ul,
+ dsr_ll = dsr_ll,
+ panel.groups =
+ function(x, y, ...,
+ dsr_ll, dsr_ul,
+ subscripts) {
+ dsr_ll <- dsr_ll[subscripts]
+ dsr_ul <- dsr_ul[subscripts]
+ panel.segments(as.numeric(x),
+ dsr_ll,
+ as.numeric(x),
+ dsr_ul,
+ ...)
+ panel.xyplot(x, y, ...)
+
+ }))
+"""
diff --git a/sandbox/dobson.lst b/sandbox/dobson.lst
new file mode 100644
index 0000000..f4909bf
--- /dev/null
+++ b/sandbox/dobson.lst
@@ -0,0 +1,134 @@
+ The SAS System 16:35 Thursday, June 30, 2005 14
+
+ Obs count sex agegrp
+
+ 1 685 1 1
+ 2 657 2 1
+ 3 66 1 2
+ 4 34 2 2
+ 5 58 1 3
+ 6 42 2 3
+ 7 98 1 4
+ 8 193 2 4
+ 9 108 1 5
+ 10 384 2 5
+ 11 109 1 6
+ 12 454 2 6
+ 13 154 1 7
+ 14 436 2 7
+ 15 213 1 8
+ 16 396 2 8
+ 17 223 1 9
+ 18 262 2 9
+ 19 283 1 10
+ 20 274 2 10
+ 21 280 1 11
+ 22 273 2 11
+ 23 270 1 12
+ 24 232 2 12
+ 25 314 1 13
+ 26 282 2 13
+ 27 378 1 14
+ 28 352 2 14
+ 29 420 1 15
+ 30 395 2 15
+ 31 325 1 16
+ 32 386 2 16
+ 33 195 1 17
+ 34 297 2 17
+ 35 147 1 18
+ 36 325 2 18
+ The SAS System 16:35 Thursday, June 30, 2005 15
+
+ Obs pop sex agegrp
+
+ 1 10145000 1 1
+ 2 9680000 2 1
+ 3 10413000 1 2
+ 4 9932000 2 2
+ 5 10031000 1 3
+ 6 9548000 2 3
+ 7 10011000 1 4
+ 8 9472000 2 4
+ 9 9000000 1 5
+ 10 8772000 2 5
+ 11 9596000 1 6
+ 12 9661000 2 6
+ 13 10416000 1 7
+ 14 10495000 2 7
+ 15 11316000 1 8
+ 16 11410000 2 8
+ 17 10657000 1 9
+ 18 10837000 2 9
+ 19 9138000 1 10
+ 20 9443000 2 10
+ 21 7346000 1 11
+ 22 7716000 2 11
+ 23 5614000 1 12
+ 24 6050000 2 12
+ 25 4712000 1 13
+ 26 5246000 2 13
+ 27 4432000 1 14
+ 28 5235000 2 14
+ 29 3786000 1 15
+ 30 4871000 2 15
+ 31 2903000 1 16
+ 32 4101000 2 16
+ 33 1708000 1 17
+ 34 2896000 2 17
+ 35 1097000 1 18
+ 36 2686000 2 18
+ The SAS System 16:35 Thursday, June 30, 2005 16
+
+ Obs pop sex agegrp
+
+ 1 88600 1 1
+ 2 88600 2 1
+ 3 86900 1 2
+ 4 86900 2 2
+ 5 86000 1 3
+ 6 86000 2 3
+ 7 84700 1 4
+ 8 84700 2 4
+ 9 82200 1 5
+ 10 82200 2 5
+ 11 79300 1 6
+ 12 79300 2 6
+ 13 76100 1 7
+ 14 76100 2 7
+ 15 71500 1 8
+ 16 71500 2 8
+ 17 65900 1 9
+ 18 65900 2 9
+ 19 60400 1 10
+ 20 60400 2 10
+ 21 53700 1 11
+ 22 53700 2 11
+ 23 45500 1 12
+ 24 45500 2 12
+ 25 37200 1 13
+ 26 37200 2 13
+ 27 29600 1 14
+ 28 29600 2 14
+ 29 22100 1 15
+ 30 22100 2 15
+ 31 15200 1 16
+ 32 15200 2 16
+ 33 9100 1 17
+ 34 9100 2 17
+ 35 6000 1 18
+ 36 6000 2 18
+ The SAS System 16:35 Thursday, June 30, 2005 17
+
+ Obs sp_evnt sp_pop cr_rate dst_rate lcs99 ucs99 lcs95 ucs95 sex _TYPE_ _FREQ_
+
+ 1 4326 132321000 3.26932 3.07021 2.94953 3.19434 2.97804 3.16448 Males 1 18
+ 2 5674 138051000 4.11008 3.72456 3.59202 3.86038 3.62338 3.82774 Females 1 18
+ 3 10000 270372000 3.69861 3.38200 3.29268 3.47299 3.31387 3.45115 Persons 1 18
+
+ Obs sp_rate se_rate sumsei stpop u_lam99 u_lam95 l_lam99 l_lam95 lci uci
+
+ 1 .000907488 .002244722 1699783.62 1000000 4498.32 4456.87 4158.46 4198.04 4198.04 4456.87
+ 2 .000909773 .002699302 2346329.37 1000000 5870.92 5823.59 5481.85 5527.31 5527.31 5823.59
+ 3 .000899533 .001220158 2018731.51 1000000 10260.47 10197.95 9744.30 9804.95 9804.95 10197.95
+
diff --git a/sandbox/dobson.sas b/sandbox/dobson.sas
new file mode 100644
index 0000000..af1de81
--- /dev/null
+++ b/sandbox/dobson.sas
@@ -0,0 +1,161 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+
+data counts ;
+input count sex $ agegrp ;
+cards ;
+685 1 1
+657 2 1
+66 1 2
+34 2 2
+58 1 3
+42 2 3
+98 1 4
+193 2 4
+108 1 5
+384 2 5
+109 1 6
+454 2 6
+154 1 7
+436 2 7
+213 1 8
+396 2 8
+223 1 9
+262 2 9
+283 1 10
+274 2 10
+280 1 11
+273 2 11
+270 1 12
+232 2 12
+314 1 13
+282 2 13
+378 1 14
+352 2 14
+420 1 15
+395 2 15
+325 1 16
+386 2 16
+195 1 17
+297 2 17
+147 1 18
+325 2 18
+;
+run ;
+
+data popn ;
+input pop sex $ agegrp ;
+cards ;
+10145000 1 1
+9680000 2 1
+10413000 1 2
+9932000 2 2
+10031000 1 3
+9548000 2 3
+10011000 1 4
+9472000 2 4
+9000000 1 5
+8772000 2 5
+9596000 1 6
+9661000 2 6
+10416000 1 7
+10495000 2 7
+11316000 1 8
+11410000 2 8
+10657000 1 9
+10837000 2 9
+9138000 1 10
+9443000 2 10
+7346000 1 11
+7716000 2 11
+5614000 1 12
+6050000 2 12
+4712000 1 13
+5246000 2 13
+4432000 1 14
+5235000 2 14
+3786000 1 15
+4871000 2 15
+2903000 1 16
+4101000 2 16
+1708000 1 17
+2896000 2 17
+1097000 1 18
+2686000 2 18
+;
+run ;
+
+data stdpop ;
+input pop sex $ agegrp ;
+cards ;
+88600 1 1
+88600 2 1
+86900 1 2
+86900 2 2
+86000 1 3
+86000 2 3
+84700 1 4
+84700 2 4
+82200 1 5
+82200 2 5
+79300 1 6
+79300 2 6
+76100 1 7
+76100 2 7
+71500 1 8
+71500 2 8
+65900 1 9
+65900 2 9
+60400 1 10
+60400 2 10
+53700 1 11
+53700 2 11
+45500 1 12
+45500 2 12
+37200 1 13
+37200 2 13
+29600 1 14
+29600 2 14
+22100 1 15
+22100 2 15
+15200 1 16
+15200 2 16
+ 9100 1 17
+ 9100 2 17
+ 6000 1 18
+ 6000 2 18
+ ;
+ run ;
+
+proc print data=counts ;
+ run ;
+
+proc print data=popn ;
+ run ;
+
+proc print data=stdpop ;
+ run ;
+
+%dstand(stdpop = %str(stdpop),
+ spevents = %str(counts),
+ sppop = %str(popn),
+ countvar = %str(count),
+ agegrps = 1 to 18,
+ basepop = 100000,
+ outds = sumevnt1
+ ) ;
+proc print data=sumevnt1 ;
+ run ;
+
diff --git a/sandbox/martinstats.py b/sandbox/martinstats.py
new file mode 100644
index 0000000..5d1bffb
--- /dev/null
+++ b/sandbox/martinstats.py
@@ -0,0 +1,857 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: martinstats.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/sandbox/martinstats.py,v $
+
+# reverse-engineered version of OpenEpi MartinStats.js module - calculates
+# Fisher's exact and conditional maximum likelihood estimates
+
+import sys
+# Some constants
+MAXDEGREE = 100000 # max degree of polynomial
+MAXITER = 10000 # max number of iterations to bracket/converge to a root
+TOLERANCE = 0.0000001 # relative tolerance level for results
+INFINITY = -sys.maxint # used to represent infinity
+NAN = None # used to represent Not a Number
+
+function CheckData(datatype, tables):
+ curTbl = list()
+ sumA = 0
+ minSumA = 0
+ maxSumA = 0
+ for i in range(len(tables)):
+ curTbl = tables[i]
+
+
+##########################################################################
+###################################################################################
+import sys
+
+function CheckData(datatype, tables):
+ curTbl = list()
+ sumA = 0
+ minSumA = 0
+ maxSumA = 0
+ for i in range(len(tables)):
+ curTbl = tables[i]
+
+
+class MartinAustin(object): # class for Martin-Austin exact stats
+
+ def __init__(self,datatype=1,conflevel=0.95):
+ "Datatypes: 1=stratified case-control, 2=matched case-control, 3=stratified person-time"
+ self.conflevel = conflevel
+ if datatype not in (1,):
+ raise ValueError, 'datatype must be 1' # not supporting other types yet
+ else:
+ self.datatype = datatype
+ # Some constants
+ self.MAXDEGREE = 100000 # max degree of polynomial
+ self.MAXITER = 10000 # max number of iterations to bracket/converge to a root
+ self.TOLERANCE = 0.0000001 # relative tolerance level for results
+ self.INFINITY = -sys.maxint # used to represent infinity
+ self.NAN = None # used to represent Not a Number
+ # other attributes
+ self.NumColumns
+ self.NumRows
+ self.NumStrata
+ self.Tables = list() # List to be filled with Table objects
+
+ self.sumA=0 # Sum of the observed "a" cells }
+ self.minSumA=0 # Lowest value of "a" cell sum w/ given margins }
+ self.maxSumA=0 # Highest value of "a" cell sum w/ given margins }
+
+ self.polyD = list() # Vector of polynomial of conditional coefficients
+ self.degD = 0 # The degree of polyD
+
+ self.value =0.0 # Used in defining Func???
+
+ self.polyN = list() # Vector - The "numerator" polynomial in Func
+ self.degN =0
+
+ def add_Table(self,e1d1,e0d1,e1d0,e0d0):
+ self.Tables.append(MAstratum(e1d1,e0d1,e1d0,e0d0,datatype=1,conflevel=self.conflevel))
+
+ def _CalcExactLim(self,pbLower, pbFisher, pvApprox, pnConfLevel):
+ var pnLimit
+ if (self.minSumA < self.sumA) and (self.sumA < self.maxSumA):
+ pnLimit = self._getExactLim (pbLower, pbFisher, pvApprox, pnConfLevel)
+ elif self.sumA == self.minSumA:
+ # Point estimate = 0 => pbLower pnLimit = 0
+ if pbLower:
+ pnLimit = 0
+ else
+ pnLimit = self._getExactLim( pbLower, pbFisher, pvApprox, pnConfLevel)
+ elif self.sumA == self.maxSumA:
+ # Point estimate = inf => upper pnLimit = inf
+ if not pbLower:
+ pnLimit = self.INFINITY
+ else:
+ pnLimit = self._getExactLim( pbLower, pbFisher, pvApprox, pnConfLevel)
+ return pnLimit
+
+ def _GetExactLim(self, pbLower, pbFisher, pvApprox, pnConfLevel)
+ # var i, error //
+ pvLimit = None
+ if pbLower:
+ self.value = 0.5 * (1 + pnConfLevel) # = 1 - alpha / 2
+ else:
+ self.value = 0.5 * (1 - pnConfLevel) # = alpha / 2
+ if pbLower and pbFisher:
+ # Degree of numerator poly
+ self.degN = self.sumA - self.minSumA - 1
+ else:
+ self.degN = self.sumA - self.minSumA
+
+ # re-dimension polyN(degN)
+ for i in range(self.degN + 1):
+ self.polyN[i] = self.polyD[i] # self.degN!=self.degD => self.polyN!=self.polyD }
+
+ if not pbFisher:
+ self.polyN[self.degN] = (0.5) * self.polyD[self.degN] # Mid-P adjustment
+ pvLimit = self._converge(pvApprox) # Solves so that Func(pvLimit) = 0
+ return pvLimit
+
+ def _CalcCmle(self,approx):
+ # var cmle
+ if (self.minSumA < self.sumA) and (self.sumA < self.maxSumA):
+ # Can calc point estimate
+ cmle = self._GetCmle(approx)
+ elif self.sumA == self.minSumA:
+ # Point estimate = 0
+ cmle = 0
+ elif self.sumA == self.maxSumA:
+ # Point estimate = inf
+ cmle = self.INFINITY
+ return cmle
+
+ def _GetCmle(self,approx)
+ # var i, error,cmle
+ self.value = self.sumA # The sum of the observed "a" cells
+ self.degN = self.degD # Degree of the numerator polyNomial
+ for i in range(self.degN+1): # Defines the numerator polynomial
+ self.polyN[i] = (self.minSumA + i) * self.polyD[i]
+ cmle = self._converge(approx) # Solves so that Func(cmle) = 0
+ return cmle
+
+ # This routine returns the exact P-values as defined in "Modern
+ # Epidemiology" by K. J. Rothman (Little, Brown, and Co., 1986).
+
+ def _CalcExactPVals(self):
+ # var i, diff # Index; sumA - minSumA
+ # var upTail, denom # Upper tail; the whole distribution
+ pValues = dict()
+ diff = self.sumA - self.minSumA
+ upTail = self.polyD[self.degD]
+ for i in range(self.degD - 1, diff - 1,-1):
+ upTail = upTail + self.polyD[i]
+ denom = upTail
+ for i in range(diff - 1, -1, -1):
+ denom = denom + self.polyD[i]
+ pValues["upFishP"] = upTail / float(denom)
+ pValues["loFishP"] = 1.0 - (upTail - self.polyD[diff]) / float(denom)
+ pValues["upMidP"] = (upTail - 0.5 * self.polyD[diff]) / float(denom)
+ pValues["loMidP"] = 1.0 - pValues["upMidP"]
+ return pValues
+
+ # The functions that follow (_BracketRoot, _Zero, and _Converge) locate a zero
+ # to the function Func .
+
+ def _Zero(self, x0, x1, f0, f1):
+ # Takes in an array of x0,x1,f0, and f1 and returns a root or an error
+ # var root
+ found = False # Flags that a root has been found
+ # var x2, f2, swap # Newest point, Func(X2), storage variable
+ # var iter # Current number of iterations
+ var x0=nums.x0;
+ var x1=nums.x1;
+ var f0=nums.f0;
+ var f1=nums.f1;
+ error = 0
+ iter = 0
+ if abs(f0) < abs(f1):
+ # Make X1 best approx to root
+ x1, x0 = x0, x1
+ f1, f0 = f0, f1
+ if f1 == 0:
+ found = True
+ if not found and (f0 * f1) > 0:
+ error = 1 # Root not bracketed
+ while not found and iter < self.MAXITER and error == 0:
+ iter += 1
+ x2 = x1 - f1 * (x1 - x0) / float(f1 - f0)
+ f2 = self._func(x2)
+ if f1 * f2 < 0:
+ # x0 not retained}
+ x0 = x1
+ f0 = f1
+ else
+ # x0 retained => modify f0 }
+ f0 = f0 * f1 / float(f1 + f2) # The Pegasus modification
+ x1 = x2
+ f1 = f2
+ if abs(x1 - x0) < abs(x1) * self.TOLERANCE) or f1 == 0:
+ found = True
+ root = x1 # Estimated root
+ if not found and iter >= self.MAXITER and error == 0:
+ error=2 # Too many iterations
+ return root, error
+
+ def _BracketRoot(self, approx)
+ # Returns x0,x1,f1,f0
+ iter = 0
+ x1 = max(0.5, approx) # x1 is the upper bound
+ x0 = 0 # x0 is the lower bound
+ f0 = self._func(x0) # Func at x0
+ f1 = self.func(x1) # Func at X1
+ while f1 * f0 > 0.0 and iter < self.MAXITER:
+ iter += 1
+ x0 = x1 # What does this accomplish? x0 does not seem to be used.
+ f0 = f1
+ x1 = x1 * 1.5 * iter
+ f1 = self._func(x1)
+ return x0, x1, f0, f1
+
+
+ # This routine returns the root of Func above on the interval [0, infinity].
+ def _Converge(self,approx)
+ # Returns the root or an error
+ # var rootc
+ # var error
+ x0, x1, f0, f1 = self._BracketRoot(approx)
+ rootc, error = self._Zero(x0, x1, f0, f1)
+ if error==0:
+ return rootc
+ else:
+ return self.NAN
+
+ # Below is a routine for evaluating a polynomial. If the value at which the
+ # polynomial is being evaluated is > 1.0 then the polynomial is divided
+ # through by R^(degree of the poly). This helps to prevent floating point
+ # overflows but must be taken into account when evaluating Func below.
+
+ def _EvalPoly(self, c, degC, r)
+ # var i
+ # var y
+ if r == 0:
+ y = c[0]
+ elif r <= 1:
+ y = c[degC]
+ if r < 1:
+ for i in range(degC - 1),-1,-1):
+ y = y * r + c[i]
+ else:
+ for i in range(degC - 1), -1, -1):
+ y = y + c[i]
+ elif r > 1:
+ y = c[0]
+ r = 1 / float(r)
+ for i in range(1,degC +1):
+ y = y * r + c[i]
+ return y
+
+ def _Func(self, r)
+ # var numer , denom
+ numer = self._EvalPoly(self.polyN, self.degN, r)
+ denom = self._EvalPoly(self.polyD, self.degD, r)
+ if r <= 1:
+ return numer / float(denom) - (self.value)
+ else:
+ return (numer / float(r**(self.degD - self.degN))) / float(denom)) - self.value
+
+ # Following is the key routine which outputs the "main" polynomial of
+ # conditional distribution coefficients to be used to compute exact
+ # estimates.
+
+ def _CalcPoly(self, DataType)
+ # This routine outputs the "main" polynomial of conditional distribution
+ # coefficients which will subsequently be used to calculate the conditional
+ # maximum likelihood estimate, exact confidence limits, and exact P-values.
+ # The results are placed in the global variables, this.polyD and this.degD.
+ # For a given data set, this routine MUST be called once before calling
+ # CalcExactPVals(), CalcCmle(), and CalcExactLim(). Note that DATATYPE
+ # indicates the type of data to be analyzed (1 = stratified case-control,
+ # 2 = matched case-control, 3 = stratified person-time).
+ poly1 = list()
+ poly2 = list() # Intermediate polynomials
+ # var i, j, deg1, deg2 # Index; degree of polynomials poly1 & poly2
+ CurTable = list() # ???
+ CurTable = self.Tables[0]
+ if DataType==1:
+ self.degD = self._PolyStratCC(CurTable, self.polyD)
+ elif DataType==2:
+ self.degD = self._PolyMatchCC(CurTable, self.polyD)
+ elif DataType==3:
+ self.degD = self._PolyStratPT(CurTable, self.polyD)
+ else:
+ raise ValueError, "DataType must be 1, 2 or 3."
+
+ for i in range(1,len(self.Tables):
+ CurTable = self.Tables[i]
+ Poly1 = list() # Reinitialise Poly1
+ if CurTable.informative:
+ deg1 = self.degD
+ for j in range(deg1 + 1):
+ # Copy self.polyD to poly1
+ poly1[j] = self.polyD[j]
+ if DataType==1:
+ deg2 = self._PolyStratCC(CurTable, poly2)
+ elif DataType==2:
+ deg2 = self._PolyMatchCC(CurTable, poly2)
+ elif DataType==3:
+ deg2 = self._PolyStratPT(CurTable, poly2)
+ else:
+ raise ValueError, "DataType must be 1, 2 or 3."
+ self.degD = self._MultPoly( poly1, poly2, deg1, deg2, self.polyD)
+
+ # This routine multiplies together two polynomials p1 and p2 to obtain
+ # the product polynomial P3. Reference: "Algorithms" 2nd ed., by R.
+ # Sedgewick (Addison-Wesley, 1988), p. 522.
+ def _MultPoly(self, p1 , p2 , deg1, deg2, p3 ):
+ # p1, p2 are the two polynomials
+ # deg1, deg2 are the degrees of the above polynomials
+ # p3 is the product polynomial of p1 * p2
+ # deg3 is the degree of the product polynomial
+ # var i, j
+ deg3 = deg1 + deg2
+ for i in range(deg3 + 1):
+ p3[i] = 0.0
+ for i in range(deg1 + 1):
+ for j in range(deg2 + 1):
+ p3[i + j] = p1[i] * p2[j] + p3[i + j]
+ return deg3
+
+ def _Comb(self, y, x)
+ # Returns the combination y choose x.
+ # var i
+ # var f
+ f = 1.0
+ for i in range(1,round(min(x,y-x)) + 1):
+ f = f * y / float(i)
+ y = y - 1.0
+ return f
+
+ # Routines are given to calculate stratum-specific polynomials
+ def _PolyStratCC(self, Table, polyDi)
+ # var i
+ # var minA, maxA, aa, bb, cc, dd
+ degDi = 0
+ polyDi[0] = 1.0
+ if Table.informative:
+ minA = max(0, Table.m1 - Table.n0) # Min val of the "a" cell w/ these margins
+ maxA = min(Table.m1, Table.n1) # Max val of the "a" cell w/ these margins
+ degDi = round(maxA - minA) # The degree of this table's polynomial }
+ aa = minA # Corresponds to the "a" cell
+ bb = Table.m1 - minA + 1 # Corresponds to the "b" cell
+ cc = Table.n1 - minA + 1 # Corresponds to the "c" cell
+ dd = Table.n0 - Table.m1 + minA # Corresponds to the "d" cell
+ for i in range(1,degDi + 1):
+ polyDi[i] = polyDi[i - 1] * ((bb - i) / float(aa + i)) * ((cc - i) / float(dd + i))
+ return degDi
+
+ def _BinomialExpansion(self, c0, c1, f, p, degp):
+ # var i
+ degp = f
+ p[degp] = c1**degp
+ for i in range(degp - 1, -1, -1):
+ p[i] = p[i + 1] * c0 * (i + 1) / float(c1 *(degp - i))
+ return c0, c1, f, p, degp
+
+ def _PolyMatchCC(self, Table, polyEi)
+ # var c0, c1
+ degEi = 0
+ polyEi[0] = 1.0
+ if Table.informative:
+ c0 = (self._Comb(Table.n1, 0) * self._Comb(Table.n0, Table.m1)) # Corresponds to 0 in "a" cell
+ c1 = (self._Comb(Table.n1, 1) * self._Comb(Table.n0, Table.m1 - 1)) # Corresponds to 1 in "a" cell
+ c0, c1, freq, polyEi, degEi = self._BinomialExpansion (c0, c1, freq, polyEi, degEi)
+ return polyEi.length-1 # Need to subtract 1???
+
+
+ def _PolyStratPT(self, Table, polyDi)
+ degDi = 0
+ polyDi[0] = 1.0
+ if Table.informative:
+ self._BinomialExpansion((Table.n0 / float(Table.n1)), 1.0, round(Table.m1), polyDi, degDi)
+ return len(polyDi) - 1
+
+
+ def _CheckData(self, DataType)
+ # numTables Number of "unique" 2x2 tables
+ # var tables list of 2x2 table data
+ # var error Flags error in data
+ # This routine determines if the data allow exact estimates to be calculated.
+ # It MUST be called once prior to calling CalcPoly() given below. DATATYPE
+ # indicates the type of data to be analyzed (1 = stratified case-control,
+ # 2 = matched case-control, 3 = stratified person-time). Exact estimates
+ # can only be calculated if ERROR = 0.
+ #
+ # Errors : 0 = can calc exact estimates, i.e., no error,
+ # 1 = too much data (MAXDEGREE too small),
+ # 2 = no informative strata.
+ # 3 = More than one case in a Matched Table (added July 21, 1998)
+ # var i
+ curTbl = list()
+ error = 0
+ # Compute the global vars SUMA, MINSUMA, MAXSUMA
+ self.sumA = 0
+ self.minSumA = 0
+ self.maxSumA = 0
+ for i in range(len(self.Tables)):
+ curTbl = self.Tables[i]
+ if curTbl.informative:
+ self.sumA += round(curTbl.a * curTbl.freq)
+ if DataType in (1,2):
+ # Case-control data
+ self.minSumA += round(max(0, curTbl.m1 - curTbl.n0) * curTbl.freq)
+ self.maxSumA += round(min(curTbl.m1, curTbl.n1) * curTbl.freq)
+ else:
+ # Person-time data
+ self.minSumA = 0
+ self.maxSumA = round(curTbl.m1 * curTbl.freq) + self.maxSumA
+
+ # Check for errors
+ if self.maxSumA - self.minSumA > self.MAXDEGREE:
+ # Poly too small
+ error = 1
+ elif self.minSumA == self.maxSumA:
+ # No informative strata
+ error = 2
+ elif DataType == 2 and curTbl.a > 1:
+ error = 3
+ return error;
+
+ def _Strat2x2(self, stratum, ORbased, RRbased, assoc, references):
+ self._Process(stratum, 1, self.confLevel, ORbased, RRbased, assoc, references)
+
+ def _MatchedCC(self, stratum, ORbased, RRbased, assoc, references):
+ self._Process(stratum, 2, self.confLevel, ORbased, RRbased, assoc, references)
+
+ def _PersonTime(self, stratum, ORbased, RRbased, assoc, references):
+ self._Process(stratum, 3, self.confLevel, ORbased, RRbased, assoc, references)
+
+ def _addCCTbl(self, a,b,c,d,freq,tableArray):
+ tbl = dict() # Should be an MAstratum instance!
+ tbl.a = a
+ tbl.b = b
+ tbl.c = c
+ tbl.d = d
+ tbl.freq = freq
+ tbl.m1 = tbl.a + tbl.b # number of cases
+ tbl.n1 = tbl.a + tbl.c # number of exposed
+ tbl.n0 = tbl.b + tbl.d # number of unexposed
+ if tbl.a * tbl.d != 0 or tbl.b * tbl.c != 0:
+ tbl.informative = True
+ else:
+ tbl.informative = False
+ tableArray[tableArray.length]=tbl # check this!
+ # UP TO HERE
+
+def _Process(self, stratum, DataType, pnConfLevel, ORbased, RRbased, assoc, references):
+ # var b, c, d // { b, c, d cells of 2x2 table }
+ # var numTables // { Number of "unique" 2x2 tables }
+
+ # var cmle // { Odds Ratio (cond. max. likelihood estimate) }
+ # var loFishLim // { Lower exact Fisher confidence limit }
+ # var upFishLim // { Upper exact Fisher confidence limit }
+ # var loMidPLim // { Lower mid-P confidence limit }
+ # var upMidPLim // { Upper mid-P confidence limit }
+ # var approx // { An approximation to the exact estimate }
+ # var error // { Error in procedure CheckData }
+ # var s // temporary string
+ # var i //temporary number
+ # var errornum //
+ # this.resultString = "" //
+ # this.resultArray= new Array()
+ # var NumColumns = this.cmdObj.data[0].cols
+ # var NumRows = this.cmdObj.data[0].rows
+ # var firststratum, laststratum,NumStrata;
+ # // initialize variables
+ self.pValues = list() # Holds all 4 p values
+ self.polyD = list() # The polynomial of conditional coefficients
+ self.degD =0 # The degree of polyD
+ self.value =0.0 # Used in defining Func
+ self.polyN = list() # The "numerator" polynomial in Func
+ self.degN =0;
+ self.cl = pnConfLevel*100 # Confidence limit for output formatting
+
+ self.Tables = list() # initialize Tables array
+ # check this
+ NumStrata = this.cmdObj.data[0].strata; //May be reset to 1 if only one table is currently
+ //being processed
+ var totalstrata=this.cmdObj.data[0].strata; //Number of strata in dataset
+
+ if (stratum==0) {stratum="Adjusted"}
+ if (stratum=="Adjusted")
+ {
+
+ firststratum=1
+ laststratum=NumStrata
+ // alert("stratum=all "+firststratum+" "+laststratum)
+ }
+ else if (parseInt(stratum)>0)
+ {
+ NumStrata=1;
+ firststratum=stratum
+ laststratum=stratum
+ }
+ else if (stratum=="Crude")
+ {
+ dataTable=this.cmdObj.crudeTable()
+ firststratum=1
+ laststratum=1
+ }
+ var tbl = new Array()
+ //alert("as defined tbl="+tbl)
+ //ReDim Tables(NumStrata - 1)
+ for (i = firststratum; i<= laststratum; i++)
+ {
+ tbl=new Array()
+ //tbl=null;
+ //tbl.length=0;
+ if ( DataType == 1 )
+ {
+ // { Stratified case-control }
+ if (stratum=="Crude")
+ {
+ tbl.a = dataTable.E1D1
+ tbl.b = dataTable.E0D1
+ tbl.c = dataTable.E1D0
+ tbl.d = dataTable.E0D0
+ /*
+ alert("tbl.a crude="+tbl.a
+ +"\nb="+tbl.b
+ +"\nc="+tbl.c
+ +"\nd="+tbl.d)
+ */
+ }
+ else
+ {
+ tbl.a = this.cmdObj.data[i].E1D1
+ tbl.b = this.cmdObj.data[i].E0D1
+ tbl.c = this.cmdObj.data[i].E1D0
+ tbl.d = this.cmdObj.data[i].E0D0
+ }
+ tbl.freq = 1 //
+ tbl.m1 = tbl.a + tbl.b // { # cases }
+ tbl.n1 = tbl.a + tbl.c // { # exposed }
+ tbl.n0 = tbl.b + tbl.d // { # unexposed }
+ tbl.informative = ((tbl.a * tbl.d) != 0) || ((tbl.b * tbl.c) != 0)
+ /*
+ alert( "stratum="+stratum+
+ "\ni="+i+
+ "\na="+tbl.a+
+ "\nb="+tbl.b+
+ "\nc="+tbl.c+
+ "\nd="+tbl.d+
+ "\nfreq="+tbl.freq+
+ "\nm1="+tbl.m1+
+ "\nn1="+tbl.n1+
+ "\nn0="+tbl.n0+
+ "\ninformative="+tbl.informative+
+ "\nthis.cmdObj.data[1].E1D1="+this.cmdObj.data[1].E1D1)
+ */
+ }
+ else if(DataType == 2)
+ {
+
+ // Matched case-control
+ if (this.cmdObj.data[0].numD==2 &&
+ this.cmdObj.data[0].numE==2)
+ {
+ //Two by two table. Must be 1 to 1 matching.
+ //Other tables can be accomodated, but right now, we
+ //are setting up tables only for 1 to 1 matching.
+ //There are four possible tables, with frequencies
+ //represented by the counts of pairs entered in OpenEpi.
+ addCCTbl(1,0,0,1,this.cmdObj.data[i].E1D0,this.Tables);
+ addCCTbl(1,0,1,0,this.cmdObj.data[i].E1D1,this.Tables);
+ addCCTbl(0,1,0,1,this.cmdObj.data[i].E0D0,this.Tables);
+ addCCTbl(0,1,1,0,this.cmdObj.data[i].E0D1,this.Tables);
+
+
+ }
+ /* tbl.a = this.cmdObj.data[i].E1D1 //
+ tbl.c = this.cmdObj.data[i].E1D0 //
+ tbl.d = this.cmdObj.data[i].E0D0 //
+ // tbl.freq = pvaDataArray(2, 2, i) //
+
+ if ( tbl.a <= 1 )
+ {
+ b = 1 - tbl.a
+ }
+ else
+ {
+ b = -1
+ }
+ tbl.m1 = tbl.a + tbl.b // { # cases }
+ tbl.n1 = tbl.a + tbl.c // { # exposed }
+ tbl.n0 = tbl.b + tbl.d // { # unexposed }
+ tbl.informative = (tbl.a * tbl.d != 0) || (tbl.b * tbl.c != 0)
+ */
+ }
+ else if ( DataType == 3 )
+ {
+ // Stratified person-time
+ if (stratum=="Crude")
+ {
+ tbl.a = dataTable.E0D0
+ tbl.b = dataTable.E0D1
+ tbl.n1 = dataTable.E1D0
+ tbl.n0 = dataTable.E1D1
+ /*
+ alert("tbl.a crude="+tbl.a
+ +"\nb="+tbl.b
+ +"\nc="+tbl.c
+ +"\nd="+tbl.d)
+ */
+ }
+ else
+ {
+ tbl.a = this.cmdObj.data[i].E0D0 //
+ tbl.b = this.cmdObj.data[i].E0D1 //
+ tbl.n1 = this.cmdObj.data[i].E1D0 //
+ tbl.n0 = this.cmdObj.data[i].E1D1 //
+ }
+ tbl.freq = 1 //
+ tbl.m1 = tbl.a + tbl.b // { # cases }
+ tbl.informative = (tbl.a * tbl.n0 != 0)
+ || ((tbl.b * tbl.n1) != 0)
+ /*
+ alert( "stratum="+stratum+
+ "\ni="+i+
+ "\na="+tbl.a+
+ "\nb="+tbl.b+
+ "\nfreq="+tbl.freq+
+ "\nm1="+tbl.m1+
+ "\nn1="+tbl.n1+
+ "\nn0="+tbl.n0+
+ "\ninformative="+tbl.informative+
+ "\nthis.cmdObj.data[1].E1D1="+this.cmdObj.data[1].E1D1)
+ */
+ }
+ // End With
+
+ if (DataType != 2)
+ {
+ this.Tables[this.Tables.length]=tbl;
+ }
+ // alert("450 this.Tables.length="+this.Tables.length);
+ }//Next i
+ /* alert("this.Tables has length="+this.Tables.length)
+ for (i=0; i<this.Tables.length; i++)
+ {
+ alert ("this.Tables["+i+"].a="+this.Tables[i].a)
+ }
+ */
+ errornum=this.checkData( DataType, this.Tables)
+ if ( errornum != 0 )
+ {
+ if (errornum==1)
+ {
+ s="Exact calculations skipped, since numbers are large. Use other results."
+ this.cmdObj.title(s)
+ this.addToArray(this.resultArray, "ERROR", 1 )
+ }
+ else if (errornum==2)
+ {
+ s="All tables have zero marginals. Cannot perform exact calculations."
+ this.cmdObj.title(s)
+ this.addToArray(this.resultArray, "ERROR", 2)
+ } //
+ else if (errornum==3)
+ {
+ s="PROBLEM: Must have only one case in each table for exact calculations."
+ this.cmdObj.title(s)
+ this.addToArray(this.resultArray, "ERROR", 3 )
+ }
+ }
+
+ if (errornum==0)
+ {
+ //errornum was 0
+ // try
+ // {
+ this.calcPoly (DataType)
+ pValues=this.calcExactPVals()
+ cmle=this.calcCmle(1)
+ //alert("cmle="+cmle);
+
+ if (isNaN(cmle)||!isFinite(cmle))
+ {
+ approx = this.maxSumA
+ }
+ else
+ {
+ approx = cmle
+ }
+ upFishLim=this.calcExactLim (false, true, approx, pnConfLevel)
+ upMidPLim=this.calcExactLim (false, false, approx, pnConfLevel)
+
+ loFishLim=this.calcExactLim (true, true, approx, pnConfLevel)
+ loMidPLim=this.calcExactLim (true, false, approx, pnConfLevel)
+ // this.cmdObj.newtable(6,100)
+ //this.cmdObj.line(6)
+ var totalstrata=this.cmdObj.data[0].strata;
+ if (stratum=="Adjusted" || stratum=="Crude" || totalstrata==1)
+ {
+ editorschoice1=editorschoice;
+ }
+ else
+ {
+ editorschoice1="";
+ }
+ if (DataType==1 || DataType==2)
+ {
+ //s="Exact Odds Ratio Estimates"
+ //this.cmdObj.title(s)
+ s="CMLE OR*"
+ references[0]='newrow("span6:*Conditional maximum likelihood estimate of Odds Ratio");'
+ ORbased[0]='newrow("c:bold:Stratum","c:bold:CMLE OR*","c:bold:span2:'+editorschoice1+'Mid-P Limits","c:bold:span2:Fisher Limits");'
+
+ //newrow("span4:"+s,"",fmtSigFig(cmle,4))
+ }
+ else
+ {
+ //s="Exact Rate Ratio Estimates"
+ //this.cmdObj.title(s)
+ //s= "Conditional maximum likelihood estimate of Rate Ratio:"
+ //this.cmdObj.newrow("span5:"+s,"",fmtSigFig(cmle,4))
+ s="CMLE RR*"
+ references[0]='newrow("span6:*Conditional maximum likelihood estimate of Rate Ratio");'
+ //ORbased[0]='newrow("c:bold:Stratum","c:bold:CMLE RR*","c:bold:span2:Mid-P Limits","c:bold:span2:Fisher Limits");'
+ //ORbased[0]+='\nline(6)'
+ RRbased[0]='newrow("c:bold:Stratum","c:bold:CMLE RR*","c:bold:span2:Mid-P Limits","c:bold:span2:Fisher Limits");'
+ RRbased[0]+='\nline(6)'
+ }
+ // this.addToArray(this.resultArray, "CMLE", fmtSigFig(cmle,4))
+ //this.cmdObj.line(6)
+ // s="Lower & Upper " + pnConfLevel+"%" + " Exact Fisher Limits:"
+ // this.cmdObj.newrow("span4:"+s,fmtSigFig(loFishLim,4), fmtSigFig(upFishLim,4))
+ var index=stratum;
+
+ if(index=="Adjusted") {index=totalstrata+2}
+ if (index=="Crude") {index=totalstrata+1}
+
+ var pstratum ="";
+ if (totalstrata>1) {pstratum=stratum}
+ //alert("pstratum="+pstratum +"NumStrata="+NumStrata);
+ ORbased[index]='\nnewrow("'+pstratum +'","span2:CMLE Odds Ratio*",'+fmtSigFig(cmle,4)+','
+ ORbased[index]+='"c:span2:'+limits(loMidPLim,upMidPLim,1)+'","'+editorschoice1+'Mid-P Exact");'
+ ORbased[index]+='\nnewrow("","span2:","","c:span2:'+limits(loFishLim,upFishLim,1)+'","Fisher Exact");'
+ if (DataType==3)
+ {
+ //Person Time data
+ RRbased[index]='\nnewrow("'+pstratum +'","span2:'+editorschoice1+'CMLE Rate Ratio*",'+fmtSigFig(cmle,4)+','
+ RRbased[index]+='"c:span2:'+limits(loMidPLim,upMidPLim,1)+'","'+editorschoice1+'Mid-P Exact");'
+//alert("cmle="+cmle);
+
+ RRbased[index]+='\nnewrow("","span2:","","c:span2:'+limits(loFishLim,upFishLim,1)+'","Fisher Exact");'
+
+ }
+
+ assoc[0]='newrow("c:bold:Stratum","c:span2:bold:Value","c:bold:p-value(1-tail)","c:bold:p-value(2-tail)");'
+ assoc[0]+='\nline(5)'
+ //2-tail p is minimum value for Fisher and mid-P from the two values offered
+ //(per Rothman)
+ var FishP1Tail;
+ var FishP1TailType;
+
+ var midP1Tail;
+ var midP1TailType;
+
+
+
+ if (pValues.upFishP<pValues.loFishP)
+ {
+ FishP1TailType=""
+ FishP1Tail=pValues.upFishP;
+ }
+ else
+ {
+ //One tail type tests negative (protective) association
+ FishP1TailType="(P)"
+ FishP1Tail=pValues.loFishP;
+ }
+
+ if (pValues.upMidP<pValues.loMidP)
+ {
+ midP1TailType=""
+ midP1Tail=pValues.upMidP;
+ }
+ else
+ {
+ //One tail type tests negative (protective) association
+ midP1TailType="(P)"
+ midP1Tail=pValues.loMidP;
+ }
+
+ var FishP2Tail=2*FishP1Tail;
+ var midP2Tail=2*midP1Tail;
+
+
+ var Fish1Tailstr="c:span2:"+fmtPValue(FishP1Tail,ConfLevel)+FishP1TailType;
+ //var midP1Tailstr="c:span2:"+"lower="+fmtPValue(pValues.loMidP,ConfLevel)+"<br>upper="+fmtPValue(pValues.upMidP,ConfLevel);
+ var midP1Tailstr="c:span2:"+fmtPValue(midP1Tail,ConfLevel)+midP1TailType;
+
+ //assoc[index]='newrow("","c:span2:'+stratum+'","'+fmtPValue(FishP,ConfLevel)+'","'+fmtPValue(midP,ConfLevel)+'");'
+ assoc[index]='\nnewrow("","span2:Fisher exact","","'+Fish1Tailstr+'","c:span2:'+fmtPValue(FishP2Tail,ConfLevel)+ '");'
+ assoc[index]+='\nnewrow("","span2:'+editorschoice1+'Mid-P exact","","'+midP1Tailstr+'","c:span2:'+fmtPValue(midP2Tail,ConfLevel)+ '");'
+
+ s="(P)indicates a one-tail P-value for Protective or negative association; otherwise one-tailed exact P-values are for a positive association.";
+ s+="<br>Martin,D; Austin,H (1991): An efficient program for computing "
+ s+="conditional maximum likelihood estimates and exact confidence "
+ s+="limits for a common odds ratio. Epidemiology 2, 359-362."
+ //this.cmdObj.newrow();
+ references[0]+='\nnewrow("span6:'+s+'")'
+ s="Martin,DO; Austin,H (1996): Exact estimates for a rate ratio."
+ s+="Epidemiology 7, 29-33."
+ if (DataType==3)
+ {
+ references[0]+='\nnewrow("span6:'+s+'")'
+ }
+ //endtable();
+} //if errornum==0
+
+} //end of Process function
+
+
+
+
+
+
+
+
+
+
+
+class MAstratum(object):
+
+ def __init__(self,e1d1,e0d1,e1d0,e0d0,datatype=1,conflevel=0.95):
+ self.conflevel = conflevel
+ if datatype == 1: # Stratified case-control
+ self.a = e1d1
+ self.b = e0d1
+ self.c = e1d0
+ self.d = e0d0
+ self.freq = 1.0
+ self.m1 = self.a + self.b # cases
+ self.n1 = self.a + self.c # exposed
+ self.n0 = self.b + self.d # unexposed
+ self.informative = ((self.a * self.d) != 0) or ((self.b * self.c) != 0)
+ else:
+ raise ValueError, 'datatype must be 1' # not supporting other types yet
+
+
diff --git a/sandbox/poprate.py b/sandbox/poprate.py
new file mode 100644
index 0000000..207a726
--- /dev/null
+++ b/sandbox/poprate.py
@@ -0,0 +1,203 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: poprate.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/sandbox/poprate.py,v $
+
+from SOOMv0 import *
+import unittest
+import MA
+
+SOOMv0.soom.messages = False
+
+def _get_ds1():
+ ds = Dataset('visits')
+ ds.addcolumnfromseq('sex', label='Sex',
+ coltype='categorical', datatype='int',
+ all_value=-1,
+ data=[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,])
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',
+ data=[ 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10,11,12,13,14,15,16,17,18,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10,11,12,13,14,15,16,17,18,])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[659,146,102,140,221,177,268,302,276,
+ 240,207,163,143,117, 94, 65, 43, 38,
+ 549, 97, 93,248,299,300,288,292,231,
+ 168,149,149,180,144,132,128, 67, 85,])
+ ds.addcolumnfromseq('freq_wgtd_by_wgt', label='Statistical weighting',
+ coltype='scalar', datatype='float',
+ data=[ 19380, 3831, 2592, 3624, 5676, 4522,
+ 6836, 7783, 7186, 6195, 5358, 4239,
+ 3628, 2950, 2401, 1634, 1108, 954,
+ 16419, 2566, 2337, 6512, 7907, 7808,
+ 7597, 7690, 6008, 4274, 3736, 3901,
+ 4707, 3723, 3420, 3256, 1676, 2151,])
+ return ds
+
+def _get_pop_ds1():
+ ds = Dataset('pop')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',
+ data=[1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8,
+ 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+ 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18])
+ ds.addcolumnfromseq('race', label='Race',
+ coltype='categorical', datatype='int',
+ data=[1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,])
+ ds.addcolumnfromseq('sex', label='Sex',
+ coltype='categorical', datatype='int',
+ data=[1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,])
+ ds.addcolumnfromseq('_freq_', label='Population',
+ coltype='scalar', datatype='int',
+ data=
+ [7995000,7592000,1584000,1540000, 566000, 548000,8162000,7759000,1703000,
+ 1654000, 548000, 519000,7910000,7497000,1591000,1542000, 530000, 509000,
+ 7938000,7450000,1567000,1539000, 506000, 483000,7208000,6916000,1282000,
+ 1378000, 510000, 478000,7757000,7650000,1267000,1440000, 572000, 571000,
+ 8541000,8425000,1355000,1504000, 520000, 566000,9420000,9307000,1386000,
+ 1550000, 510000, 553000,8945000,8905000,1247000,1409000, 465000, 523000,
+ 7767000,7845000, 984000,1154000, 387000, 444000,6386000,6578000, 670000,
+ 820000, 290000, 318000,4870000,5142000, 527000, 672000, 217000, 236000,
+ 4116000,4482000, 428000, 567000, 168000, 197000,3905000,4535000, 398000,
+ 529000, 129000, 171000,3396000,4318000, 289000, 418000, 101000, 135000,
+ 2628000,3683000, 208000, 327000, 67000, 91000,1558000,2632000, 111000,
+ 212000, 39000, 52000, 986000,2443000, 83000, 199000, 28000, 44000,])
+ return ds
+
+class popn_rate_test(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ def ma_fmt(v, prec):
+ if v is None:
+ return 'None'
+ return '%.*f' % (prec, v)
+ first = ', '.join([ma_fmt(v, prec) for v in first])
+ second = ', '.join([ma_fmt(v, prec) for v in second])
+ self.assertEqual(first, second, '[%s] != [%s]' % (first, second))
+
+ def test_simple(self):
+ ds = _get_ds1()
+ pop = _get_pop_ds1()
+ calc_pop_rates(ds, pop)
+ # AM - This result has not be verified at this time...
+ self.assertListNear(ds['pop_rate_wgtd_by_wgt'],
+ [0.001910, 0.000368, 0.000258, 0.000362, 0.000631, 0.000471,
+ 0.000656, 0.000688, 0.000674, 0.000678, 0.000729, 0.000755,
+ 0.000770, 0.000666, 0.000634, 0.000563, 0.000649, 0.000870,
+ 0.001696, 0.000258, 0.000245, 0.000687, 0.000901, 0.000808,
+ 0.000724, 0.000674, 0.000554, 0.000453, 0.000484, 0.000645,
+ 0.000897, 0.000711, 0.000702, 0.000794, 0.000579, 0.000801],
+ prec=5)
+
+# From Selvin, S. Statistical Analysis of Epidemiologic Data (Monographs in
+# Epidemiology and Biostatistics, V. 35), Oxford University Press; 3rd
+# edition (May 1, 2004)
+
+agegrp_outtrans = {1:"<1",2:"1-4",3:"5-14",4:"15-24",5:"25-34",6:"35-44",7:"45-54",
+ 8:"55-64",9:"65-74",10:"75-84",11:"85+"}
+
+def _get_ds2():
+ ds = Dataset('deaths')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11,1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[141,926,1253,1080,1869,4891,14956,30888,41725,26501,5928,
+ 45,201,320,670,1126,3160,9723,17935,22179,13461,2238])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11 + [1940]*11)
+ return ds
+
+def _get_pop_ds2():
+ ds = Dataset('pops')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11,1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[1784033,7065148,15658730,10482916,9939972,10563872,
+ 9114202,6850263,4702482,1874619,330915,
+ 906897,3794573,10003544,10629526,9465330,8249558,
+ 7294330,5022499,2920220,1019504,142532])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11 + [1940]*11)
+ return ds
+
+def _get_std_ds2():
+ ds = Dataset('std')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ data=[1784033,7065148,15658730,10482916,9939972,10563872,
+ 9114202,6850263,4702482,1874619,330915]
+ ds.addcolumnfromseq('_stdpop_', label='Standard population',
+ coltype='scalar', datatype='int',
+ data=data)
+ return ds
+
+class popn_rate_test2(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ def ma_fmt(v, prec):
+ if v is None:
+ return 'None'
+ return '%.*f' % (prec, v)
+ first = ', '.join([ma_fmt(v, prec) for v in first])
+ second = ', '.join([ma_fmt(v, prec) for v in second])
+ self.assertEqual(first, second, '[%s] != [%s]' % (first, second))
+
+ def test_simple(self):
+ ds = _get_ds2()
+ pop = _get_pop_ds2()
+ std = _get_std_ds2()
+ calc_directly_std_rates(ds, pop, std)
+ # AM - This result has not be verified at this time...
+ self.assertListNear(ds['pop_rate_wgtd_by_wgt'],
+ [0.001910, 0.000368, 0.000258, 0.000362, 0.000631, 0.000471,
+ 0.000656, 0.000688, 0.000674, 0.000678, 0.000729, 0.000755,
+ 0.000770, 0.000666, 0.000634, 0.000563, 0.000649, 0.000870,
+ 0.001696, 0.000258, 0.000245, 0.000687, 0.000901, 0.000808,
+ 0.000724, 0.000674, 0.000554, 0.000453, 0.000484, 0.000645,
+ 0.000897, 0.000711, 0.000702, 0.000794, 0.000579, 0.000801],
+ prec=5)
+
+
+
+if __name__ == '__main__':
+ unittest.main()
+
diff --git a/sandbox/reformat_icd9.py b/sandbox/reformat_icd9.py
new file mode 100644
index 0000000..a9ce545
--- /dev/null
+++ b/sandbox/reformat_icd9.py
@@ -0,0 +1,54 @@
+import psyco
+psyco.full()
+from time import time
+import re
+
+def A(data):
+ if data and len(data) == 5:
+ if data[3] == "-":
+ return data[0:3]
+ elif data[4] == "-":
+ if data[0] == "E":
+ return data[0:4]
+ else:
+ return ".".join((data[0:3],data[3]))
+ else:
+ if data[0] == "E":
+ return ".".join((data[0:4],data[4]))
+ else:
+ # return data[0:3] + "." + data[3:5]
+ return ".".join((data[0:3],data[3]))
+ else:
+ return data
+
+def B(data):
+ if data and len(data) == 5:
+ if data[3] == "-":
+ return data[:3]
+ elif data[4] == "-":
+ if data[0] == "E":
+ return data[:4]
+ else:
+ return data[:3] + '.' + data[3]
+ else:
+ if data[0] == "E":
+ return data[:4] + '.' + data[4]
+ else:
+ return data[:3] + '.' + data[3]
+ else:
+ return data
+
+data = [l.strip() for l in open('/tmp/x', 'U')]
+
+map(A, data)
+map(B, data)
+
+st = time()
+a = map(A, data)
+print "A", time() - st
+
+st = time()
+b = map(B, data)
+print "B", time() - st
+
+print "A == B", a == b
diff --git a/sandbox/sander.py b/sandbox/sander.py
new file mode 100644
index 0000000..9f83cb5
--- /dev/null
+++ b/sandbox/sander.py
@@ -0,0 +1,286 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: sander.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/sandbox/sander.py,v $
+
+from Numeric import *
+import math
+
+import math
+
+def factgen():
+ n = 1
+ t = 1
+ while True:
+ t = t * n
+ n += 1
+ yield t
+
+def fact(x):
+ fg = factgen()
+ f = 1
+ for t in xrange(int(x)):
+ f = fg.next()
+ return f
+
+def logfact(x):
+ fg = factgen()
+ f = 1
+ for t in xrange(int(x)):
+ f = fg.next()
+ return math.log(f)
+
+def exact(a, b, c, d, names=None):
+ _clevel = 0.95
+ _criter = 0.0001
+ _maxit = 1000
+ _midp = 1
+ _oflow = 708
+ _rrt = 1.0
+ if len(b) != len(a) or len(c) != len(a) or len(d) != len(a):
+ raise ValueError, "All input vectors must be same length"
+ if innerproduct(a,d) == 0 and innerproduct(b,c) == 0:
+ raise ValueError, "No tables with non-zero diagonal or off-diagonal products are allowed."
+ if names:
+ print "Exact 2x2 analysis of association of %s and %s." % (names[0], names[1])
+ print "Crude table:"
+ print sum(a), sum(b)
+ print sum(c), sum(d)
+ print 'original a,b,c,d:', a,b,c,d
+ # margins
+ m1 = (a + b != 0) and (a + c != 0) and (b + d != 0) and (c + d != 0)
+ print 'm1 non-zero margin mask:', m1
+ # delete any strata with a zero margin
+ a = compress(m1,a)
+ b = compress(m1,b)
+ c = compress(m1,c)
+ d = compress(m1,d)
+ print 'a,b,c,d after compress:', a,b,c,d
+ # reverse sort the strata
+ st = argsort(a+b+c+d).tolist()
+ print "st = ", st, type(st)
+ st.reverse()
+ print "st = ", st, type(st)
+ a = take(a, st)
+ b = take(b, st)
+ c = take(c, st)
+ d = take(d, st)
+ print '"a,b,c,d after sort:', a,b,c,d
+ # flip tables if necessary
+ if sum(a) > sum(d):
+ d, a = a, d
+ if sum(b) > sum(c):
+ c, b = b, c
+ sw = sum(a) > sum(b) # sw also used for flow control later!
+ if sw:
+ b, a = a, b
+ d, c = c, d
+ _rrt = 1.0 / _rrt
+ # number of strata
+ ns = len(a)
+ print "ns = ", ns
+ # overflow cntrol constant
+ oflow = -_oflow / float(ns)
+ # marginal case totals
+ m1 = a + b
+ print "a, b:", a,b
+ print 'm1:', m1
+ ms = sum(m1)
+ # marginal exposure totals
+ n1 = a + c
+ n0 = b + d
+ print "m1, ms, n1, n0:", m1, ms, n1, n0
+ # bounds of stratum-specific "a" cells
+ l = maximum(0, m1 - n0)
+ u = minimum(m1, n1)
+ print "l = ", l
+ print "u = ", u
+ cl = concatenate((array([0.0]), cumsum(l)))
+ cu = concatenate((array([0.0]), cumsum(u)))
+ print 'cl = ', cl
+ print 'cu = ', cu
+ # "a" cell total
+ as = sum(a)
+ # compute network co-efficients
+ # initialise recursively defined co-efficient matrix
+ mm = zeros((cu[ns]+1,ns+1),typecode=Float32)
+ mm[0,0] = 1
+ print mm
+ print "l=", l
+ print "u=", u
+ print "cl=", cl
+ print "cu=", cu
+ # print "range for k:", 0,ns-1
+ k = -1
+ # for k in range(ns):
+ while True:
+ k += 1
+ j = int(cl[k+1])
+ i = j - 1
+ # print "range for i:", j-1, cu[k+1] - 1
+ # for i in range(int(j-1),int(cu[k+1])):
+ while True:
+ i += 1
+ r = int(max(l[k], i - cu[k])) - 1
+ # print "range for r:", max(l[k], i - cu[k]) - 1, min(u[k], i-cl[k]) - 1
+ # print "actual range for r:", range(int(max(l[k], i - cu[k]) - 1), int(min(u[k], i-cl[k])))
+ # for r in range(int(max(l[k], i - cu[k]) - 1), int(min(u[k], i-cl[k]))):
+ while True:
+ r += 1
+ print "k, j, i, r", k, j, i, r
+ print "[i, k]:", i, k
+ print "mm[i,k]", mm[i,k]
+ print "[i-r,k]:", i-r, k
+ print "mm[i-r,k]", mm[i-r,k]
+ #print "oflow", oflow
+ #print "n1[k], logfact(n1[k])", n1[k], logfact(n1[k])
+ #print "n1[k] - r, logfact(n1[k] - r)", n1[k] - r, logfact(n1[k] - r)
+ #print "r, logfact(r)", r, logfact(r)
+ #print "n0[k], logfact(n0[k])", n0[k], logfact(n0[k])
+ #print "n0[k] - m1[k] + r, logfact(n0[k] - m1[k] + r)", n0[k] - m1[k] + r, logfact(n0[k] - m1[k] + r)
+ #print "m1[k] - r, logfact(m1[k] - r)", m1[k] - r, logfact(m1[k] - r)
+ print (mm[i,k] + mm[i-r,k] * exp(oflow + \
+ logfact(n1[k]) - logfact(n1[k] - r) - logfact(r) + \
+ logfact(n0[k]) - logfact(n0[k] - m1[k] + r) - \
+ logfact(m1[k] - r)))
+ print (exp(oflow + \
+ logfact(n1[k]) - logfact(n1[k] - r) - logfact(r) + \
+ logfact(n0[k]) - logfact(n0[k] - m1[k] + r) - \
+ logfact(m1[k] - r)))
+ mm[i,k] = (mm[i,k] + mm[i-r,k] * exp(oflow + \
+ logfact(n1[k]) - logfact(n1[k] - r) - logfact(r) + \
+ logfact(n0[k]) - logfact(n0[k] - m1[k] + r) - \
+ logfact(m1[k] - r)))
+ print mm
+ print '--------------'
+ if r >= int(min(u[k], i-cl[k])):
+ break
+ print "==============="
+ if i >= cu[k+1]:
+ break
+ print "*****************"
+ if k >= ns-1:
+ break
+ cl = int(cl[ns])
+ cu = int(cu[ns])
+ print cl, cu, ns
+ print "mm:", mm
+ print mm[cl:cu, ns] # debug
+ print transpose(mm[cl:cu, ns]) # debug
+ lc = log(transpose(mm[cl:cu, ns]))
+ lc = lc - max(lc)
+ # compute p-values for testing _rrt
+ s = arrayrange(cl, cu - cl + 2) # double check this!
+ # null probs from 0 to as
+ p = exp(lc + log(_rrt)*s)
+ p = p / float(sum(p))
+ # Fisher p-values
+ ip = as - cl + 1
+ plf = sum(p[0:ip-1])
+ puf = 1.0 - plf + p[ip-1]
+ # 2-sided mid-p
+ pm = plf - p[ip-1]/2.0
+ pm = 2.0*minimum(pm, 1.- pm)
+ pf = 2.0*minimum(plf,puf)
+ if sw:
+ _rrt = 1.0/_rrt
+ if names: # print p-values
+ print "Two-sided p-values for testing OR = %d" % _rrt
+ print "(from doubling the smaller of the lower and upper p) --:"
+ print " Fisher: %d mid-p: %d" % (pf, pm)
+ # Find confidence limits
+ rr = array([-1.0, -1.0, -1.0])
+ namerr = ["Lower CL", "estimate", "Upper CL"]
+ # Alpha level for CL
+ alpha = (1.0 - _clevel) / 2.0
+ alpha = array([1.0 - alpha, 0.5, alpha])
+ zedu = innerproduct(a, d) == 0
+ zedl = innerproduct(b, c) == 0
+
+ if zedu or zedl:
+ # Add a small constant to a and find only one limit.
+ n = n1 + n0
+ a = (n*a + n1*m1/n)/(n + 1)
+ b = m1 - a
+ c = n1 - a
+ d = n0 - b
+ i = 2.0 * zedu
+ while i < (3.0 - 2.0*zedl):
+ i += 1
+ if _midp:
+ m = 0.5
+ else:
+ m = (3.0 - i) / 2.0
+ targ = alpha[i-1]
+ # use bisection on log odds ratio
+ # upper and lower bracketting values
+ bh = lrr + (i - 2.0)*rad + 1.0
+ bl = bh - 2.0
+ # pv is hypergeometric terms, sh, sl, sm are p-vals
+ pv = exp(lc + bh*s)
+ sh = (sum(pv[0:ip-1]) - m*pv[ip-1]) / float(sum(pv))
+ pv = exp(lc + bl*s)
+ sl = (sum(pv[0:ip-1]) - m*pv[ip-1]) / float(sum(pv))
+ cnv = 0
+ iter = 0
+ while not cnv and iter < _maxit:
+ iter += 1
+ if sl < targ:
+ # decrememt bl and try again
+ bl -= 1
+ pv = exp(lc + bl*s)
+ sl = (sum(pv[0:ip-1]) - m*pv[ip-1]) / float(sum(pv))
+ continue
+ elif sh > targ:
+ # increment bh and try again
+ bh += 1
+ pv = exp(lc + bh*s)
+ sh = (sum(pv[0:ip-1]) - m*pv[ip-1]) / float(sum(pv))
+ continue
+ bm = (bh + bl) / 2.0
+ pv = exp(lc + bm*s)
+ sm = (sum(pv[0:ip-1]) - m*pv[ip-1]) / float(sum(pv))
+ if ((sm - targ) >= 0) == ((sl - targ) >= 0):
+ # push up bl
+ bl = bm
+ sl = sm
+ else:
+ # push down bh
+ bh = bm
+ sh = sm
+ cnv = abs(bh - bl) < _criter and abs(sm - targ) < _criter / 10.0
+ if not cnv:
+ print "No convergence for %s after %i iteractions." % (namerr[i-1], iter)
+ else:
+ rr[i-1] = exp(bm)
+ if sw:
+ sw = (rr == 0)
+ rr = (1.0 - sw) / (array([rr[2],rr[1],rr[0]]) + sw)
+ i = 4 - i
+ return (rr[1], rr[0], rr[2], plf, puf, pm)
+
+a = array([3,2,1,1,4,4,5,4,3,8,5,8,5,4,4,7,4,5])
+b = array([5,4,4,5,1,5,3,4,2,1,1,1,3,1,2,1,2,3])
+c = array([1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1])
+d = array([0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0])
+names = ['crying','treated','strata']
+
+a = array([3,2],typecode=Float32)
+b = array([5,4],typecode=Float32)
+c = array([1,1],typecode=Float32)
+d = array([2,3],typecode=Float32)
+
+print exact(a,b,c,d,names=names)
+
diff --git a/sandbox/soomload-report.py b/sandbox/soomload-report.py
new file mode 100644
index 0000000..f12813a
--- /dev/null
+++ b/sandbox/soomload-report.py
@@ -0,0 +1,238 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+"""
+This script parses the output produced when loading a SOOM dataset in
+verbose mode and produces a summary report to aid performance tuning.
+"""
+
+import re
+import fileinput
+
+class Column:
+ maxname = 0
+
+ def __init__(self, name):
+ self.derived = False
+ self.name = name
+ self.totaltime = 0.0
+ self.times = {}
+ self.mem = None
+ if len(name) > Column.maxname:
+ Column.maxname = len(name)
+
+ def add_time(self, name, elap):
+ self.times[name] = self.times.get(name, 0.0) + elap
+ self.totaltime += elap
+
+ @classmethod
+ def hdr(cls):
+ print
+ print '%-*s %8s %13s' %\
+ (cls.maxname, 'Column', 'mem', 'tot time ')
+ print '-' * 79
+
+ def report(self):
+ rep = '%-*s %8s %13s' %\
+ (self.maxname, self.name,
+ kmeg(self.mem),
+ elap(self.totaltime, ralign=True))
+ extra = []
+ for key in sorted(self.times):
+ elap_time = self.times[key]
+ extra.append(' %s: %s' % (key, elap(elap_time)))
+ rep += ','.join(extra)
+ print rep
+
+
+class MatchMeta(type):
+ def __init__(cls, name, bases, dict):
+ if hasattr(cls, 're'):
+ Match.matches.append(cls())
+
+class Match:
+ __metaclass__ = MatchMeta
+ matches = []
+
+ def __init__(self):
+ self.cre = re.compile(self.re)
+
+ def match(self, state, line):
+ match = self.cre.search(line)
+ if match:
+ self.action(state, match)
+
+ def kill(self, cls):
+ Match.matches = [match for match in self.matches
+ if match.__class__ is not cls]
+
+ def set_column(self, state, name):
+ if state.cur_column is None or state.cur_column.name != name:
+ col = Column(name)
+ state.cur_column = col
+ state.columns.append(col)
+
+ @classmethod
+ def run(self, state, line):
+ for match in self.matches:
+ match.match(state, line)
+
+
+class DSNAME(Match):
+ re = r"Dataset '([^']+)' loaded"
+
+ def action(self, state, match):
+ state.dsname = match.group(1)
+ self.kill(DSNAME)
+
+class ChunkFlush(Match):
+ re = r'chunk flush took ([\d.]+) seconds'
+ def action(self, state, match):
+ t = float(match.group(1))
+ state.chunkflush += t
+ state.chunkflush_count += 1
+ if state.chunkflush_min is None or t < state.chunkflush_min:
+ state.chunkflush_min = t
+ if state.chunkflush_max is None or t > state.chunkflush_max:
+ state.chunkflush_max = t
+
+class Mem(Match):
+ re = r'mem delta: [\d-]+k, total: (\d+)k'
+ def action(self, state, match):
+ mem = int(match.group(1))
+ if state.mem_initial is None:
+ state.mem_initial = mem
+ if state.cur_column is not None and state.mem_final:
+ state.cur_column.mem = mem - state.mem_final
+ state.mem_final = mem
+ if mem > state.mem_peak:
+ state.mem_peak = mem
+
+class EndChunking(Match):
+ re = r'(\d+) rows read from DataSource (\S+), in ([\d.]+) seconds \((\d+) rows total\)'
+
+ def action(self, state, match):
+ self.kill(ChunkFlush)
+ state.mem_chunk = state.mem_final
+ state.rowcount = int(match.group(1))
+ state.srcname = match.group(2)
+ state.loadtime = float(match.group(3))
+ state.times.append((state.loadtime, 'chunking & loading'))
+
+
+class StoredCol(Match):
+ re = r'Stored data for column (\S+) in dataset \S+ \(([\d.]+)s\)'
+
+ def action(self, state, match):
+ storetime = float(match.group(2))
+ self.set_column(state, match.group(1))
+ storetime -= state.cur_column.times.get('wordidx', 0)
+ state.cur_column.add_time('store', storetime)
+ state.times.append((storetime, 'store %s' % state.cur_column.name))
+
+class DerivedCol(Match):
+ re = r'Creating derived column (\S+) in dataset \S+ took ([\d.]+), store took ([\d.]+)'
+
+ def action(self, state, match):
+ assert state.cur_column.name == match.group(1)
+ state.cur_column.derived = True
+ dertime = float(match.group(2))
+ state.cur_column.add_time('derive', dertime)
+ state.times.append((dertime, 'derive %s' % state.cur_column.name))
+
+class WordIndexCol(Match):
+ re = r"word index for '(\S+)' took ([\d.]+)s \([\d.]+s\+[\d.]+s\), \d+ words, \d+ overflow blocks \([\d.]+MB\)"
+
+ def action(self, state, match):
+ self.set_column(state, match.group(1))
+ wordtime = float(match.group(2))
+ state.cur_column.add_time('wordidx', wordtime)
+ state.times.append((wordtime, 'word index %s' % state.cur_column.name))
+
+
+class State:
+ dsname = None
+ srcname = None
+ chunkflush = 0.0
+ chunkflush_min = None
+ chunkflush_max = None
+ chunkflush_count = 0
+ rowcount = None
+ loadtime = None
+ mem_initial = None
+ mem_chunk = None
+ mem_final = None
+ mem_peak = 0
+ cur_column = None
+
+ def __init__(self):
+ self.columns = []
+ self.times = []
+
+def elap(sec, ralign=False):
+ if sec is None:
+ ret = 'None'
+ elif sec < 10:
+ ret = '%.1fs' % sec
+ elif sec < 90:
+ ret = '%.0fs ' % sec
+ else:
+ min, sec = divmod(sec, 60)
+ if min < 90:
+ ret = '%dm %2ds ' % (min, sec)
+ else:
+ hr, min = divmod(min, 60)
+ ret = '%dh %2dm ' % (hr, min)
+ if ralign:
+ return ret
+ return ret.rstrip()
+
+def kmeg(h, l=0):
+ if h is None or l is None:
+ return '?'
+ return '%.1fM' % ((h - l) / 1024.0)
+
+def report(state):
+ print 'Dataset: %s' % state.dsname
+ print 'Chunk flushing: %s, total: %s (chunk min %s, max %s, %d chunks)' %\
+ (elap(state.chunkflush),
+ elap(state.loadtime),
+ elap(state.chunkflush_min),
+ elap(state.chunkflush_max),
+ state.chunkflush_count)
+ print 'mem total: %s, chunk: %s, peak %s' %\
+ (kmeg(state.mem_final, state.mem_initial),
+ kmeg(state.mem_chunk, state.mem_initial),
+ kmeg(state.mem_peak, state.mem_initial))
+ Column.hdr()
+ for col in state.columns:
+ col.report()
+ print
+ print 'Top 10 time consumers:'
+ times = sorted(state.times)
+ for t, name in times[-1:-11:-1]:
+ print ' %13s %s' % (elap(t, ralign=True), name)
+
+
+
+def main():
+ dsname = None
+ state = State()
+ for line in fileinput.input():
+ Match.run(state, line)
+ report(state)
+
+if __name__ == '__main__':
+ main()
diff --git a/sandbox/source_bench.py b/sandbox/source_bench.py
new file mode 100644
index 0000000..17c5e5a
--- /dev/null
+++ b/sandbox/source_bench.py
@@ -0,0 +1,32 @@
+import sys, os
+from time import time
+demodir = os.path.join(os.path.dirname(__file__), '..', 'demo')
+sys.path.insert(0, demodir)
+import loaders.nhds
+
+if 1:
+ import psyco
+ psyco.full()
+
+class options:
+ datadir = os.path.join(demodir, 'rawdata')
+
+src = loaders.nhds.nhds96_source(options)
+st = time()
+i = 0
+for line in src.get_file_iter():
+ i += 1
+elapsed = time() - st
+print '%d total, %.3f per second, took %.3f' % (i, i / elapsed, elapsed)
+
+src = loaders.nhds.nhds96_source(options)
+st = time()
+i = 0
+try:
+ while 1:
+ d = src.next_rowdict()
+ i += 1
+except StopIteration:
+ pass
+elapsed = time() - st
+print '%d total, %.3f per second, took %.3f' % (i, i / elapsed, elapsed)
diff --git a/sandbox/summ_measure.py b/sandbox/summ_measure.py
new file mode 100644
index 0000000..241980d
--- /dev/null
+++ b/sandbox/summ_measure.py
@@ -0,0 +1,81 @@
+import math
+from time import time
+from SOOMv0 import *
+from SOOMv0.Utils import combinations
+from soomfunc import intersect
+
+ds = dsload('nhds', path='../SOOM_objects')
+
+cols = (
+ 'geog_region','sex','discharge_status','marital_status',
+ 'hosp_ownership','num_beds'
+)
+
+st = time()
+values = []
+for col in cols:
+ values.append(ds[col].inverted.items())
+
+class stats:
+ def __init__(self, name):
+ self.name = name
+ self.buckets = [0] * 1000
+ self.total = 0
+
+ def add(self, n):
+ self.total += n
+ bucket = 0
+ if n > 0:
+ bucket = int(math.log(n, 10)) + 1
+ self.buckets[bucket] += 1
+
+ def report(self):
+ print self.name, 'total:', self.total
+ for v, c in enumerate(self.buckets):
+ if c:
+ print " >%8d: %d" % (pow(10, v) - 1, c)
+
+
+if 1:
+ calls = 0
+ result = []
+ vl = stats('vector length distribution')
+ cvl = stats('cache vector length distribution')
+ for veckey in combinations(*values):
+ if len(veckey) > 0:
+ values, rows = zip(*veckey)
+ if len(rows) == 1:
+ rows = rows[0]
+ else:
+ calls += 1
+ rows = intersect(*rows)
+ vl.add(len(rows))
+ if len(values) > 1 and len(values) < len(cols):
+ cvl.add(len(rows))
+# result.append((values, rows))
+ elapsed = time() - st
+ print '%d intersect calls, %.3f sec, %.3f sec per call' % (
+ calls, elapsed, elapsed / calls)
+ vl.report()
+ cvl.report()
+
+if 0:
+ calls = 0
+ cache = {}
+ for veckey in combinations(*values):
+ if len(veckey) > 0:
+ values, rows = zip(*veckey)
+ if len(rows) == 1:
+ rows = rows[0]
+ else:
+ calls += 1
+ partial = cache.get(values[:-1], None)
+ if partial:
+ rows = intersect(partial, rows[-1])
+ else:
+ rows = intersect(*rows)
+ if len(values) > 1 and len(values) < len(cols):
+ cache[values] = rows
+ elapsed = time() - st
+ print '%d intersect calls, %.3f sec, %.3f sec per call' % (
+ calls, elapsed, elapsed / calls)
diff --git a/sandbox/twobytwo.output b/sandbox/twobytwo.output
new file mode 100644
index 0000000..6761dc1
--- /dev/null
+++ b/sandbox/twobytwo.output
@@ -0,0 +1,2371 @@
+OpenEpi example values
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 66
+b (unexposed, disease): 36
+c (exposed, no disease): 28
+d (unexposed, no disease): 32
+-----------------------
+Stratum 2:
+a (exposed, disease): 139
+b (unexposed, disease): 93
+c (exposed, no disease): 61
+d (unexposed, no disease): 54
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 205
+b (unexposed, disease): 129
+c (exposed, no disease): 89
+d (unexposed, no disease): 86
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 5.04737, p=0.02466
+Yates-corrected Chi sq: 4.33389, p=0.03736
+M-H Chi sq: 5.01621, p=0.02511
+Fisher's exact test: one-sided p=0.01881, two-sided (twice one-sided): p=0.03762, two-sided (as extreme): p=0.03204
+mid-p: one-sided p=0.01341, two-sided p=0.02682
+-----------------------
+Stratum 2:
+Chi sq: 1.48635, p=0.22278
+Yates-corrected Chi sq: 1.21829, p=0.26970
+M-H Chi sq: 1.48207, p=0.22345
+Fisher's exact test: one-sided p=0.13492, two-sided (twice one-sided): p=0.26984, two-sided (as extreme): p=0.24897
+mid-p: one-sided p=0.11306, two-sided p=0.22612
+-----------------------
+Unstratified (crude):
+Chi sq: 5.20903, p=0.02247
+Yates-corrected Chi sq: 4.78676, p=0.02868
+M-H Chi sq: 5.19879, p=0.02260
+Fisher's exact test: one-sided p=0.01448, two-sided (twice one-sided): p=0.02895, two-sided (as extreme): p=0.02371
+mid-p: one-sided p=0.01166, two-sided p=0.02331
+-----------------------
+Adjusted:
+Mantel-Haenszel chi square with continuity correction: 4.78871 (p=0.02865)
+Mantel-Haenszel chi square without continuity correction: 5.21052 (p=0.02245)
+Fisher exact test: one-sided: p=0.01438, two-sided (twice one-sided): p=0.02876, two-sided (as extreme): p=0.02371
+Mid-p exact test: one-sided: p=0.01158, two-sided: p=0.02316
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.70213 (0.60290, 0.78549)
+Risk in unexposed: 0.52941 (0.41240, 0.64328)
+Risk in overall population: 0.62963 (0.55301, 0.70024)
+Risk ratio: 1.32624 (1.02269, 1.71988)
+Risk difference: 0.17272 (0.02231, 0.32312)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.69500 (0.62791, 0.75474)
+Risk in unexposed: 0.63265 (0.55220, 0.70635)
+Risk in overall population: 0.66859 (0.61742, 0.71607)
+Risk ratio: 1.09855 (0.94210, 1.28097)
+Risk difference: 0.06235 (-0.03837, 0.16307)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.69728 (0.64243, 0.74703)
+Risk in unexposed: 0.60000 (0.53329, 0.66320)
+Risk in overall population: 0.65619 (0.61388, 0.69616)
+Risk ratio: 1.16213 (1.01781, 1.32692)
+Risk difference: 0.09728 (0.01334, 0.18122)
+Aetiological fraction in the population: 0.08563 (0.01098, 0.16028)
+Aetiological fraction in the exposed: 0.13951 (0.01750, 0.24638)
+Prevented fraction in the population: -0.09365 (-0.19087, -0.01110)
+Prevented fraction in the exposed: -0.16213 (-0.32692, -0.01781)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: 1.15345 (1.01056, 1.31655)
+Mantel-Haenszel adjusted risk ratio: 1.16240 (1.01794, 1.32736)
+Breslow-Day chi square test for homogeneity of RR across strata: 1.49516 (p=0.22142)
+Directly adjusted risk difference: 0.09652 (0.01283, 0.18021)
+Breslow-Day chi square test for homogeneity of RD across strata: 1.42813 (p=0.23207)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 2.09524 (1.09410, 4.01244)
+CMLE odds ratio: 2.08538 (1.03962, 4.22297)
+mid-p CMLE odds ratio: 2.08281 (1.08768, 4.02798)
+-----------------------
+Stratum 2:
+Sample odds ratio: 1.32311 (0.84313, 2.07633)
+CMLE odds ratio: 1.32201 (0.82130, 2.12749)
+mid-p CMLE odds ratio: 1.32208 (0.84088, 2.07825)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 1.53558 (1.06147, 2.22145)
+CMLE odds ratio: 1.53422 (1.04302, 2.25869)
+mid-p CMLE odds ratio: 1.53406 (1.05993, 2.22253)
+Aetiological fraction in the population: 0.21407 (0.05494, 0.37320)
+Aetiological fraction in the exposed: 0.34878 (0.05791, 0.54984)
+Prevented fraction in the population: -0.27238 (-0.59540, -0.05814)
+Prevented fraction in the exposed :-0.53558 (-1.22145, -0.06147)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: 1.53615 (1.06078, 2.22456)
+Mantel-Haenszel common odds ratio: 1.53597 (1.06192, 2.22163)
+CMLE common odds ratio: 1.53481 (1.04352, 2.25940)
+Breslow-Day chi square test for homogeneity of OR across strata: 1.29834 (p=0.25452)
+Woolf chi square test for homogeneity of OR across strata: 1.29834, df=1.00000 (p=0.25452)
+-----------------------
+Armitage and Berry example
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 1
+b (unexposed, disease): 21
+c (exposed, no disease): 4
+d (unexposed, no disease): 16
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 2.38585, p=0.12244
+Yates-corrected Chi sq: 1.13978, p=0.28570
+M-H Chi sq: 2.32904, p=0.12698
+Fisher's exact test: one-sided p=0.14353, two-sided (twice one-sided): p=0.28705, two-sided (as extreme): p=0.17448
+mid-p: one-sided p=0.08088, two-sided p=0.16175
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.20000 (0.02032, 0.64037)
+Risk in unexposed: 0.56757 (0.40902, 0.71340)
+Risk in overall population: 0.52381 (0.37721, 0.66642)
+Risk ratio: 0.35238 (0.05970, 2.08011)
+Risk difference: -0.36757 (-0.75281, 0.01767)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 0.19048 (0.01937, 1.87304)
+CMLE odds ratio: 0.19763 (0.00370, 2.25204)
+mid-p CMLE odds ratio: 0.21562 (0.00738, 1.74494)
+-----------------------
+
+==============================================================
+StatExact example 14.4.1 Alcohol and Oesophageal cancer values
+Breslow-Day homogeneity of OR chi-sq should be 9.323,p=0.0968
+CMLE common OR should be 5.251 with exact CI of (3.572, 7.757)
+and mid-p exact CI of (3.630, 7.629)
+Mantel-Haenszel common OR should be 5.158 with RGB CO of (3.562, 7.468)
+All p-values should be < 0.0000
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 9
+d (unexposed, no disease): 106
+-----------------------
+Stratum 2:
+a (exposed, disease): 4
+b (unexposed, disease): 5
+c (exposed, no disease): 26
+d (unexposed, no disease): 164
+-----------------------
+Stratum 3:
+a (exposed, disease): 25
+b (unexposed, disease): 21
+c (exposed, no disease): 29
+d (unexposed, no disease): 138
+-----------------------
+Stratum 4:
+a (exposed, disease): 42
+b (unexposed, disease): 34
+c (exposed, no disease): 27
+d (unexposed, no disease): 139
+-----------------------
+Stratum 5:
+a (exposed, disease): 19
+b (unexposed, disease): 36
+c (exposed, no disease): 18
+d (unexposed, no disease): 88
+-----------------------
+Stratum 6:
+a (exposed, disease): 5
+b (unexposed, disease): 8
+c (exposed, no disease): 0
+d (unexposed, no disease): 31
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 96
+b (unexposed, disease): 104
+c (exposed, no disease): 109
+d (unexposed, no disease): 666
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 10.69217, p=0.00108
+Yates-corrected Chi sq: 2.19249, p=0.13869
+M-H Chi sq: 10.60000, p=0.00113
+Fisher's exact test: one-sided p=0.08621, two-sided (twice one-sided): p=0.17241, two-sided (as extreme): p=0.08621
+mid-p: one-sided p=0.04310, two-sided p=0.08621
+-----------------------
+Stratum 2:
+Chi sq: 6.35068, p=0.01173
+Yates-corrected Chi sq: 4.17529, p=0.04102
+M-H Chi sq: 6.31877, p=0.01195
+Fisher's exact test: one-sided p=0.03103, two-sided (twice one-sided): p=0.06206, two-sided (as extreme): p=0.03103
+mid-p: one-sided p=0.01782, two-sided p=0.03564
+-----------------------
+Stratum 3:
+Chi sq: 26.06495, p=0.00000
+Yates-corrected Chi sq: 24.14739, p=0.00000
+M-H Chi sq: 25.94258, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 4:
+Chi sq: 38.89813, p=0.00000
+Yates-corrected Chi sq: 37.00837, p=0.00000
+M-H Chi sq: 38.73739, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 5:
+Chi sq: 6.31152, p=0.01200
+Yates-corrected Chi sq: 5.35819, p=0.02063
+M-H Chi sq: 6.27232, p=0.01226
+Fisher's exact test: one-sided p=0.01123, two-sided (twice one-sided): p=0.02247, two-sided (as extreme): p=0.01726
+mid-p: one-sided p=0.00754, two-sided p=0.01507
+-----------------------
+Stratum 6:
+Chi sq: 13.45168, p=0.00024
+Yates-corrected Chi sq: 9.90413, p=0.00165
+M-H Chi sq: 13.14596, p=0.00029
+Fisher's exact test: one-sided p=0.00119, two-sided (twice one-sided): p=0.00237, two-sided (as extreme): p=0.00119
+mid-p: one-sided p=0.00059, two-sided p=0.00119
+-----------------------
+Unstratified (crude):
+Chi sq: 110.25539, p=0.00000
+Yates-corrected Chi sq: 108.22115, p=0.00000
+M-H Chi sq: 110.14230, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: 83.21453 (p=0.00000)
+Mantel-Haenszel chi square without continuity correction: 85.00950 (p=0.00000)
+Fisher exact test: one-sided: p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+Mid-p exact test: one-sided: p=0.00000, two-sided: p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.10000 (0.00000, 0.42597)
+Risk in unexposed: 0.00000 (0.00000, 0.04200)
+Risk in overall population: 0.00862 (0.00000, 0.05198)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.10000 (-0.08594, 0.28594)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.13333 (0.04695, 0.30296)
+Risk in unexposed: 0.02959 (0.01081, 0.06927)
+Risk in overall population: 0.04523 (0.02278, 0.08490)
+Risk ratio: 4.50667 (1.28328, 15.82663)
+Risk difference: 0.10375 (-0.02055, 0.22804)
+-----------------------
+Stratum 3:
+Risk in exposed: 0.46296 (0.33688, 0.59397)
+Risk in unexposed: 0.13208 (0.08734, 0.19417)
+Risk in overall population: 0.21596 (0.16577, 0.27622)
+Risk ratio: 3.50529 (2.14484, 5.72867)
+Risk difference: 0.33089 (0.18786, 0.47391)
+-----------------------
+Stratum 4:
+Risk in exposed: 0.60870 (0.49060, 0.71533)
+Risk in unexposed: 0.19653 (0.14383, 0.26242)
+Risk in overall population: 0.31405 (0.25879, 0.37512)
+Risk ratio: 3.09719 (2.17000, 4.42053)
+Risk difference: 0.41216 (0.28268, 0.54165)
+-----------------------
+Stratum 5:
+Risk in exposed: 0.51351 (0.35894, 0.66554)
+Risk in unexposed: 0.29032 (0.21744, 0.37580)
+Risk in overall population: 0.34161 (0.27272, 0.41789)
+Risk ratio: 1.76877 (1.16538, 2.68457)
+Risk difference: 0.22319 (0.04341, 0.40297)
+-----------------------
+Stratum 6:
+Risk in exposed: 1.00000 (0.51095, 1.00000)
+Risk in unexposed: 0.20513 (0.10525, 0.35788)
+Risk in overall population: 0.29545 (0.18061, 0.44315)
+Risk ratio: 4.87500 (2.62824, 9.04241)
+Risk difference: 0.79487 (0.66814, 0.92160)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.46829 (0.40119, 0.53656)
+Risk in unexposed: 0.13506 (0.11266, 0.16109)
+Risk in overall population: 0.20513 (0.18094, 0.23163)
+Risk ratio: 3.46717 (2.75284, 4.36685)
+Risk difference: 0.33323 (0.26078, 0.40568)
+Aetiological fraction in the population: 0.34156 (0.26368, 0.41944)
+Aetiological fraction in the exposed: 0.71158 (0.63674, 0.77100)
+Prevented fraction in the population: -0.51874 (-0.72247, -0.35811)
+Prevented fraction in the exposed: -2.46717 (-3.36685, -1.75284)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: undefined (undefined, undefined)
+Mantel-Haenszel adjusted risk ratio: 2.88779 (2.31537, 3.60174)
+Breslow-Day chi square test for homogeneity of RR across strata: undefined (p=undefined)
+Directly adjusted risk difference: 0.36144 (0.30326, 0.41962)
+Breslow-Day chi square test for homogeneity of RD across strata: 72.07754 (p=0.00000)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.27179, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: 5.04615 (1.27161, 20.02475)
+CMLE odds ratio: 4.98366 (0.92680, 24.85832)
+mid-p CMLE odds ratio: 5.01580 (1.12490, 20.93448)
+-----------------------
+Stratum 3:
+Sample odds ratio: 5.66502 (2.79941, 11.46401)
+CMLE odds ratio: 5.60611 (2.63074, 12.15651)
+mid-p CMLE odds ratio: 5.59363 (2.77052, 11.49883)
+-----------------------
+Stratum 4:
+Sample odds ratio: 6.35948 (3.44904, 11.72585)
+CMLE odds ratio: 6.29985 (3.29887, 12.28189)
+mid-p CMLE odds ratio: 6.28295 (3.43102, 11.75666)
+-----------------------
+Stratum 5:
+Sample odds ratio: 2.58025 (1.21602, 5.47497)
+CMLE odds ratio: 2.56375 (1.13146, 5.85642)
+mid-p CMLE odds ratio: 2.56110 (1.20101, 5.50720)
+-----------------------
+Stratum 6:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (2.76183, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 5.64008 (4.00059, 7.95147)
+CMLE odds ratio: 5.62677 (3.93682, 8.06148)
+mid-p CMLE odds ratio: 5.62477 (3.99201, 7.94716)
+Aetiological fraction in the population: 0.39489 (0.31250, 0.47729)
+Aetiological fraction in the exposed: 0.82270 (0.75004, 0.87424)
+Prevented fraction in the population: -0.65261 (-0.91310, -0.45455)
+Prevented fraction in the exposed :-4.64008 (-6.95147, -3.00059)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: 5.15762 (3.56213, 7.46774)
+CMLE common odds ratio: 5.25095 (3.57214, 7.75832)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 6.93094, df=5.00000 (p=0.22583)
+-----------------------
+
+StatExact example values
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 9
+d (unexposed, no disease): 106
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 10.69217, p=0.00108
+Yates-corrected Chi sq: 2.19249, p=0.13869
+M-H Chi sq: 10.60000, p=0.00113
+Fisher's exact test: one-sided p=0.08621, two-sided (twice one-sided): p=0.17241, two-sided (as extreme): p=0.08621
+mid-p: one-sided p=0.04310, two-sided p=0.08621
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.10000 (0.00000, 0.42597)
+Risk in unexposed: 0.00000 (0.00000, 0.04200)
+Risk in overall population: 0.00862 (0.00000, 0.05198)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.10000 (-0.08594, 0.28594)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.27179, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+
+==============================================================
+StatExact example 14.5.3 Extremely imbalanced minority hiring
+CMLE common OR should be +Inf with exact CI of (1.819, +Inf)
+and mid-p exact CI of (3.069, +Inf)
+Mantel-Haenszel common OR cannot be estimated
+One-sided exact p-value for common OR=1.0 should be 0.0022
+Two-sided exact p-value for common OR=1.0 should be 0.0043 or 0.0044
+MH two-sided p-value for common OR=1.0 should be 0.0063
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 4
+b (unexposed, disease): 0
+c (exposed, no disease): 16
+d (unexposed, no disease): 7
+-----------------------
+Stratum 2:
+a (exposed, disease): 4
+b (unexposed, disease): 0
+c (exposed, no disease): 13
+d (unexposed, no disease): 7
+-----------------------
+Stratum 3:
+a (exposed, disease): 2
+b (unexposed, disease): 0
+c (exposed, no disease): 13
+d (unexposed, no disease): 8
+-----------------------
+Stratum 4:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 17
+d (unexposed, no disease): 8
+-----------------------
+Stratum 5:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 17
+d (unexposed, no disease): 8
+-----------------------
+Stratum 6:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 29
+d (unexposed, no disease): 10
+-----------------------
+Stratum 7:
+a (exposed, disease): 2
+b (unexposed, disease): 0
+c (exposed, no disease): 29
+d (unexposed, no disease): 10
+-----------------------
+Stratum 8:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 30
+d (unexposed, no disease): 10
+-----------------------
+Stratum 9:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 30
+d (unexposed, no disease): 10
+-----------------------
+Stratum 10:
+a (exposed, disease): 1
+b (unexposed, disease): 0
+c (exposed, no disease): 33
+d (unexposed, no disease): 13
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 18
+b (unexposed, disease): 0
+c (exposed, no disease): 227
+d (unexposed, no disease): 91
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 1.64348, p=0.19985
+Yates-corrected Chi sq: 0.44074, p=0.50676
+M-H Chi sq: 1.58261, p=0.20839
+Fisher's exact test: one-sided p=0.27607, two-sided (twice one-sided): p=0.55214, two-sided (as extreme): p=0.54530
+mid-p: one-sided p=0.13803, two-sided p=0.27607
+-----------------------
+Stratum 2:
+Chi sq: 1.97647, p=0.15976
+Yates-corrected Chi sq: 0.64538, p=0.42177
+M-H Chi sq: 1.89412, p=0.16874
+Fisher's exact test: one-sided p=0.22398, two-sided (twice one-sided): p=0.44796, two-sided (as extreme): p=0.28327
+mid-p: one-sided p=0.11199, two-sided p=0.22398
+-----------------------
+Stratum 3:
+Chi sq: 1.16825, p=0.27976
+Yates-corrected Chi sq: 0.09241, p=0.76113
+M-H Chi sq: 1.11746, p=0.29047
+Fisher's exact test: one-sided p=0.41502, two-sided (twice one-sided): p=0.83004, two-sided (as extreme): p=0.52569
+mid-p: one-sided p=0.20751, two-sided p=0.41502
+-----------------------
+Stratum 4:
+Chi sq: 0.46222, p=0.49659
+Yates-corrected Chi sq: 0.18056, p=0.67090
+M-H Chi sq: 0.44444, p=0.50499
+Fisher's exact test: one-sided p=0.69231, two-sided (twice one-sided): p=1.38462, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.34615, two-sided p=0.69231
+-----------------------
+Stratum 5:
+Chi sq: 0.46222, p=0.49659
+Yates-corrected Chi sq: 0.18056, p=0.67090
+M-H Chi sq: 0.44444, p=0.50499
+Fisher's exact test: one-sided p=0.69231, two-sided (twice one-sided): p=1.38462, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.34615, two-sided p=0.69231
+-----------------------
+Stratum 6:
+Chi sq: 0.34188, p=0.55875
+Yates-corrected Chi sq: 0.34188, p=0.55875
+M-H Chi sq: 0.33333, p=0.56370
+Fisher's exact test: one-sided p=0.75000, two-sided (twice one-sided): p=1.50000, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37500, two-sided p=0.75000
+-----------------------
+Stratum 7:
+Chi sq: 0.67825, p=0.41019
+Yates-corrected Chi sq: 0.00042, p=0.98357
+M-H Chi sq: 0.66170, p=0.41596
+Fisher's exact test: one-sided p=0.56707, two-sided (twice one-sided): p=1.13415, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.28354, two-sided p=0.56707
+-----------------------
+Stratum 8:
+Chi sq: 0.33065, p=0.56528
+Yates-corrected Chi sq: 0.36454, p=0.54600
+M-H Chi sq: 0.32258, p=0.57006
+Fisher's exact test: one-sided p=0.75610, two-sided (twice one-sided): p=1.51220, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37805, two-sided p=0.75610
+-----------------------
+Stratum 9:
+Chi sq: 0.33065, p=0.56528
+Yates-corrected Chi sq: 0.36454, p=0.54600
+M-H Chi sq: 0.32258, p=0.57006
+Fisher's exact test: one-sided p=0.75610, two-sided (twice one-sided): p=1.51220, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37805, two-sided p=0.75610
+-----------------------
+Stratum 10:
+Chi sq: 0.39066, p=0.53195
+Yates-corrected Chi sq: 0.25486, p=0.61368
+M-H Chi sq: 0.38235, p=0.53635
+Fisher's exact test: one-sided p=0.72340, two-sided (twice one-sided): p=1.44681, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.36170, two-sided p=0.72340
+-----------------------
+Unstratified (crude):
+Chi sq: 7.06415, p=0.00786
+Yates-corrected Chi sq: 5.68940, p=0.01707
+M-H Chi sq: 7.04313, p=0.00796
+Fisher's exact test: one-sided p=0.00285, two-sided (twice one-sided): p=0.00569, two-sided (as extreme): p=0.00476
+mid-p: one-sided p=0.00142, two-sided p=0.00285
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: 6.04838 (p=0.01392)
+Mantel-Haenszel chi square without continuity correction: 7.46154 (p=0.00630)
+Fisher exact test: one-sided: p=0.00216, two-sided (twice one-sided): p=0.00433, two-sided (as extreme): p=0.00444
+Mid-p exact test: one-sided: p=0.00108, two-sided: p=0.00216
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.20000 (0.07491, 0.42176)
+Risk in unexposed: 0.00000 (0.00000, 0.40444)
+Risk in overall population: 0.14815 (0.05297, 0.33097)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.20000 (0.02470, 0.37530)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.23529 (0.09047, 0.47770)
+Risk in unexposed: 0.00000 (0.00000, 0.40444)
+Risk in overall population: 0.16667 (0.06067, 0.36465)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.23529 (0.03365, 0.43693)
+-----------------------
+Stratum 3:
+Risk in exposed: 0.13333 (0.02479, 0.39139)
+Risk in unexposed: 0.00000 (0.00000, 0.37217)
+Risk in overall population: 0.08696 (0.01246, 0.27968)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.13333 (-0.03869, 0.30536)
+-----------------------
+Stratum 4:
+Risk in exposed: 0.05556 (0.00000, 0.27646)
+Risk in unexposed: 0.00000 (0.00000, 0.37217)
+Risk in overall population: 0.03846 (0.00000, 0.20449)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.05556 (-0.05026, 0.16137)
+-----------------------
+Stratum 5:
+Risk in exposed: 0.05556 (0.00000, 0.27646)
+Risk in unexposed: 0.00000 (0.00000, 0.37217)
+Risk in overall population: 0.03846 (0.00000, 0.20449)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.05556 (-0.05026, 0.16137)
+-----------------------
+Stratum 6:
+Risk in exposed: 0.03333 (0.00000, 0.18092)
+Risk in unexposed: 0.00000 (0.00000, 0.32089)
+Risk in overall population: 0.02500 (0.00000, 0.14043)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.03333 (-0.03090, 0.09757)
+-----------------------
+Stratum 7:
+Risk in exposed: 0.06452 (0.00760, 0.21746)
+Risk in unexposed: 0.00000 (0.00000, 0.32089)
+Risk in overall population: 0.04878 (0.00476, 0.17011)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.06452 (-0.02196, 0.15100)
+-----------------------
+Stratum 8:
+Risk in exposed: 0.03226 (0.00000, 0.17585)
+Risk in unexposed: 0.00000 (0.00000, 0.32089)
+Risk in overall population: 0.02439 (0.00000, 0.13736)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.03226 (-0.02994, 0.09445)
+-----------------------
+Stratum 9:
+Risk in exposed: 0.03226 (0.00000, 0.17585)
+Risk in unexposed: 0.00000 (0.00000, 0.32089)
+Risk in overall population: 0.02439 (0.00000, 0.13736)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.03226 (-0.02994, 0.09445)
+-----------------------
+Stratum 10:
+Risk in exposed: 0.02941 (0.00000, 0.16222)
+Risk in unexposed: 0.00000 (0.00000, 0.26586)
+Risk in overall population: 0.02128 (0.00000, 0.12141)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.02941 (-0.02738, 0.08620)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.07347 (0.04634, 0.11377)
+Risk in unexposed: 0.00000 (0.00000, 0.04860)
+Risk in overall population: 0.05357 (0.03364, 0.08359)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: 0.07347 (0.04080, 0.10614)
+Aetiological fraction in the population: 1.00000 (1.00000, 1.00000)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: undefined (undefined, undefined)
+Prevented fraction in the exposed: undefined (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: undefined (undefined, undefined)
+Mantel-Haenszel adjusted risk ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RR across strata: undefined (p=undefined)
+Directly adjusted risk difference: 0.04701 (0.02095, 0.07307)
+Breslow-Day chi square test for homogeneity of RD across strata: 8.42520 (p=0.49193)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.22521, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.27102, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.09860, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 4:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.01141, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 5:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.01141, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 6:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.00856, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 7:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.05909, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 8:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.00829, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 9:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.00829, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 10:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (0.00982, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (1.71298, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: undefined (undefined, undefined)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: undefined (undefined, undefined)
+Prevented fraction in the exposed :undefined (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: inf (nan, nan)
+CMLE common odds ratio: inf (1.81920, inf)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 1.23379, df=9.00000 (p=0.99868)
+-----------------------
+
+==============================================================
+Another extremely ill-conditioned data example - all zeros in cell A
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 0
+b (unexposed, disease): 4
+c (exposed, no disease): 16
+d (unexposed, no disease): 7
+-----------------------
+Stratum 2:
+a (exposed, disease): 0
+b (unexposed, disease): 4
+c (exposed, no disease): 13
+d (unexposed, no disease): 7
+-----------------------
+Stratum 3:
+a (exposed, disease): 0
+b (unexposed, disease): 2
+c (exposed, no disease): 13
+d (unexposed, no disease): 8
+-----------------------
+Stratum 4:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 17
+d (unexposed, no disease): 8
+-----------------------
+Stratum 5:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 17
+d (unexposed, no disease): 8
+-----------------------
+Stratum 6:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 29
+d (unexposed, no disease): 10
+-----------------------
+Stratum 7:
+a (exposed, disease): 0
+b (unexposed, disease): 2
+c (exposed, no disease): 29
+d (unexposed, no disease): 10
+-----------------------
+Stratum 8:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 30
+d (unexposed, no disease): 10
+-----------------------
+Stratum 9:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 30
+d (unexposed, no disease): 10
+-----------------------
+Stratum 10:
+a (exposed, disease): 0
+b (unexposed, disease): 1
+c (exposed, no disease): 33
+d (unexposed, no disease): 13
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 0
+b (unexposed, disease): 18
+c (exposed, no disease): 227
+d (unexposed, no disease): 91
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 6.83004, p=0.00896
+Yates-corrected Chi sq: 4.25252, p=0.03919
+M-H Chi sq: 6.57708, p=0.01033
+Fisher's exact test: one-sided p=0.01880, two-sided (twice one-sided): p=0.03761, two-sided (as extreme): p=0.01880
+mid-p: one-sided p=0.00940, two-sided p=0.01880
+-----------------------
+Stratum 2:
+Chi sq: 5.67273, p=0.01723
+Yates-corrected Chi sq: 3.35664, p=0.06693
+M-H Chi sq: 5.43636, p=0.01972
+Fisher's exact test: one-sided p=0.03106, two-sided (twice one-sided): p=0.06211, two-sided (as extreme): p=0.03106
+mid-p: one-sided p=0.01553, two-sided p=0.03106
+-----------------------
+Stratum 3:
+Chi sq: 2.84762, p=0.09151
+Yates-corrected Chi sq: 0.88567, p=0.34665
+M-H Chi sq: 2.72381, p=0.09886
+Fisher's exact test: one-sided p=0.17787, two-sided (twice one-sided): p=0.35573, two-sided (as extreme): p=0.17787
+mid-p: one-sided p=0.08893, two-sided p=0.17787
+-----------------------
+Stratum 4:
+Chi sq: 1.96444, p=0.16104
+Yates-corrected Chi sq: 0.10876, p=0.74156
+M-H Chi sq: 1.88889, p=0.16933
+Fisher's exact test: one-sided p=0.34615, two-sided (twice one-sided): p=0.69231, two-sided (as extreme): p=0.34615
+mid-p: one-sided p=0.17308, two-sided p=0.34615
+-----------------------
+Stratum 5:
+Chi sq: 1.96444, p=0.16104
+Yates-corrected Chi sq: 0.10876, p=0.74156
+M-H Chi sq: 1.88889, p=0.16933
+Fisher's exact test: one-sided p=0.34615, two-sided (twice one-sided): p=0.69231, two-sided (as extreme): p=0.34615
+mid-p: one-sided p=0.17308, two-sided p=0.34615
+-----------------------
+Stratum 6:
+Chi sq: 2.70396, p=0.10010
+Yates-corrected Chi sq: 0.26043, p=0.60983
+M-H Chi sq: 2.63636, p=0.10444
+Fisher's exact test: one-sided p=0.27500, two-sided (twice one-sided): p=0.55000, two-sided (as extreme): p=0.27500
+mid-p: one-sided p=0.13750, two-sided p=0.27500
+-----------------------
+Stratum 7:
+Chi sq: 5.08120, p=0.02419
+Yates-corrected Chi sq: 2.12409, p=0.14500
+M-H Chi sq: 4.95726, p=0.02598
+Fisher's exact test: one-sided p=0.08049, two-sided (twice one-sided): p=0.16098, two-sided (as extreme): p=0.08049
+mid-p: one-sided p=0.04024, two-sided p=0.08049
+-----------------------
+Stratum 8:
+Chi sq: 2.79545, p=0.09453
+Yates-corrected Chi sq: 0.28032, p=0.59649
+M-H Chi sq: 2.72727, p=0.09865
+Fisher's exact test: one-sided p=0.26829, two-sided (twice one-sided): p=0.53659, two-sided (as extreme): p=0.26829
+mid-p: one-sided p=0.13415, two-sided p=0.26829
+-----------------------
+Stratum 9:
+Chi sq: 2.79545, p=0.09453
+Yates-corrected Chi sq: 0.28032, p=0.59649
+M-H Chi sq: 2.72727, p=0.09865
+Fisher's exact test: one-sided p=0.26829, two-sided (twice one-sided): p=0.53659, two-sided (as extreme): p=0.26829
+mid-p: one-sided p=0.13415, two-sided p=0.26829
+-----------------------
+Stratum 10:
+Chi sq: 2.40839, p=0.12069
+Yates-corrected Chi sq: 0.19959, p=0.65505
+M-H Chi sq: 2.35714, p=0.12471
+Fisher's exact test: one-sided p=0.29787, two-sided (twice one-sided): p=0.59574, two-sided (as extreme): p=0.29787
+mid-p: one-sided p=0.14894, two-sided p=0.29787
+-----------------------
+Unstratified (crude):
+Chi sq: 39.60810, p=0.00000
+Yates-corrected Chi sq: 36.41801, p=0.00000
+M-H Chi sq: 39.49022, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: 30.22947 (p=0.00000)
+Mantel-Haenszel chi square without continuity correction: 33.09863 (p=0.00000)
+Fisher exact test: one-sided: p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+Mid-p exact test: one-sided: p=0.00000, two-sided: p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.00000 (0.00000, 0.22691)
+Risk in unexposed: 0.36364 (0.14980, 0.64806)
+Risk in overall population: 0.14815 (0.05297, 0.33097)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.36364 (-0.64791, -0.07936)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.00000 (0.00000, 0.26586)
+Risk in unexposed: 0.36364 (0.14980, 0.64806)
+Risk in overall population: 0.16667 (0.06067, 0.36465)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.36364 (-0.64791, -0.07936)
+-----------------------
+Stratum 3:
+Risk in exposed: 0.00000 (0.00000, 0.26586)
+Risk in unexposed: 0.20000 (0.04589, 0.52063)
+Risk in overall population: 0.08696 (0.01246, 0.27968)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.20000 (-0.44792, 0.04792)
+-----------------------
+Stratum 4:
+Risk in exposed: 0.00000 (0.00000, 0.21634)
+Risk in unexposed: 0.11111 (0.00000, 0.45671)
+Risk in overall population: 0.03846 (0.00000, 0.20449)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.11111 (-0.31643, 0.09421)
+-----------------------
+Stratum 5:
+Risk in exposed: 0.00000 (0.00000, 0.21634)
+Risk in unexposed: 0.11111 (0.00000, 0.45671)
+Risk in overall population: 0.03846 (0.00000, 0.20449)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.11111 (-0.31643, 0.09421)
+-----------------------
+Stratum 6:
+Risk in exposed: 0.00000 (0.00000, 0.13874)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.02500 (0.00000, 0.14043)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.09091 (-0.26080, 0.07898)
+-----------------------
+Stratum 7:
+Risk in exposed: 0.00000 (0.00000, 0.13874)
+Risk in unexposed: 0.16667 (0.03498, 0.46001)
+Risk in overall population: 0.04878 (0.00476, 0.17011)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.16667 (-0.37753, 0.04419)
+-----------------------
+Stratum 8:
+Risk in exposed: 0.00000 (0.00000, 0.13471)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.02439 (0.00000, 0.13736)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.09091 (-0.26080, 0.07898)
+-----------------------
+Stratum 9:
+Risk in exposed: 0.00000 (0.00000, 0.13471)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.02439 (0.00000, 0.13736)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.09091 (-0.26080, 0.07898)
+-----------------------
+Stratum 10:
+Risk in exposed: 0.00000 (0.00000, 0.12392)
+Risk in unexposed: 0.07143 (0.00000, 0.33539)
+Risk in overall population: 0.02128 (0.00000, 0.12141)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.07143 (-0.20633, 0.06348)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.00000 (0.00000, 0.02004)
+Risk in unexposed: 0.16514 (0.10619, 0.24689)
+Risk in overall population: 0.05357 (0.03364, 0.08359)
+Risk ratio: 0.00000 (undefined, undefined)
+Risk difference: -0.16514 (-0.23484, -0.09543)
+Aetiological fraction in the population: -1.00000 (-1.00000, -0.52435)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: 0.67560 (0.34398, 0.50000)
+Prevented fraction in the exposed: 1.00000 (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: undefined (undefined, undefined)
+Mantel-Haenszel adjusted risk ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RR across strata: undefined (p=undefined)
+Directly adjusted risk difference: -0.12840 (-0.18927, -0.06752)
+Breslow-Day chi square test for homogeneity of RD across strata: 7.00864 (p=0.63622)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 0.88436)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 1.10322)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 3.98270)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 4:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 20.64714)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 5:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 20.64714)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 6:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 14.79313)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 7:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 2.12558)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 8:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 14.30002)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 9:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 14.30002)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 10:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 16.54549)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 0.09437)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: -1.00000 (-1.00000, -0.39339)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: 0.71384 (0.28232, 0.50000)
+Prevented fraction in the exposed :1.00000 (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: 0.00000 (nan, nan)
+CMLE common odds ratio: 0.00000 (0.00000, 0.11373)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 0.57230, df=9.00000 (p=0.99995)
+-----------------------
+
+==============================================================
+Another extremely ill-conditioned data example - all zeros in cell C
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 16
+b (unexposed, disease): 4
+c (exposed, no disease): 0
+d (unexposed, no disease): 7
+-----------------------
+Stratum 2:
+a (exposed, disease): 13
+b (unexposed, disease): 4
+c (exposed, no disease): 0
+d (unexposed, no disease): 7
+-----------------------
+Stratum 3:
+a (exposed, disease): 13
+b (unexposed, disease): 2
+c (exposed, no disease): 0
+d (unexposed, no disease): 8
+-----------------------
+Stratum 4:
+a (exposed, disease): 17
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 8
+-----------------------
+Stratum 5:
+a (exposed, disease): 17
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 8
+-----------------------
+Stratum 6:
+a (exposed, disease): 29
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 10
+-----------------------
+Stratum 7:
+a (exposed, disease): 29
+b (unexposed, disease): 2
+c (exposed, no disease): 0
+d (unexposed, no disease): 10
+-----------------------
+Stratum 8:
+a (exposed, disease): 30
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 10
+-----------------------
+Stratum 9:
+a (exposed, disease): 30
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 10
+-----------------------
+Stratum 10:
+a (exposed, disease): 33
+b (unexposed, disease): 1
+c (exposed, no disease): 0
+d (unexposed, no disease): 13
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 227
+b (unexposed, disease): 18
+c (exposed, no disease): 0
+d (unexposed, no disease): 91
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 13.74545, p=0.00021
+Yates-corrected Chi sq: 10.63152, p=0.00111
+M-H Chi sq: 13.23636, p=0.00027
+Fisher's exact test: one-sided p=0.00037, two-sided (twice one-sided): p=0.00074, two-sided (as extreme): p=0.00037
+mid-p: one-sided p=0.00019, two-sided p=0.00037
+-----------------------
+Stratum 2:
+Chi sq: 11.67914, p=0.00063
+Yates-corrected Chi sq: 8.80202, p=0.00301
+M-H Chi sq: 11.19251, p=0.00082
+Fisher's exact test: one-sided p=0.00095, two-sided (twice one-sided): p=0.00191, two-sided (as extreme): p=0.00095
+mid-p: one-sided p=0.00048, two-sided p=0.00095
+-----------------------
+Stratum 3:
+Chi sq: 15.94667, p=0.00007
+Yates-corrected Chi sq: 12.61498, p=0.00038
+M-H Chi sq: 15.25333, p=0.00009
+Fisher's exact test: one-sided p=0.00009, two-sided (twice one-sided): p=0.00018, two-sided (as extreme): p=0.00009
+mid-p: one-sided p=0.00005, two-sided p=0.00009
+-----------------------
+Stratum 4:
+Chi sq: 21.82716, p=0.00000
+Yates-corrected Chi sq: 17.85376, p=0.00002
+M-H Chi sq: 20.98765, p=0.00000
+Fisher's exact test: one-sided p=0.00001, two-sided (twice one-sided): p=0.00001, two-sided (as extreme): p=0.00001
+mid-p: one-sided p=0.00000, two-sided p=0.00001
+-----------------------
+Stratum 5:
+Chi sq: 21.82716, p=0.00000
+Yates-corrected Chi sq: 17.85376, p=0.00002
+M-H Chi sq: 20.98765, p=0.00000
+Fisher's exact test: one-sided p=0.00001, two-sided (twice one-sided): p=0.00001, two-sided (as extreme): p=0.00001
+mid-p: one-sided p=0.00000, two-sided p=0.00001
+-----------------------
+Stratum 6:
+Chi sq: 35.15152, p=0.00000
+Yates-corrected Chi sq: 30.47022, p=0.00000
+M-H Chi sq: 34.27273, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 7:
+Chi sq: 31.96237, p=0.00000
+Yates-corrected Chi sq: 27.60327, p=0.00000
+M-H Chi sq: 31.18280, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 8:
+Chi sq: 36.07038, p=0.00000
+Yates-corrected Chi sq: 31.30919, p=0.00000
+M-H Chi sq: 35.19062, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 9:
+Chi sq: 36.07038, p=0.00000
+Yates-corrected Chi sq: 31.30919, p=0.00000
+M-H Chi sq: 35.19062, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 10:
+Chi sq: 42.35924, p=0.00000
+Yates-corrected Chi sq: 37.84559, p=0.00000
+M-H Chi sq: 41.45798, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Unstratified (crude):
+Chi sq: 259.90459, p=0.00000
+Yates-corrected Chi sq: 255.69426, p=0.00000
+M-H Chi sq: 259.13106, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: 248.67814 (p=0.00000)
+Mantel-Haenszel chi square without continuity correction: 252.80495 (p=0.00000)
+Fisher exact test: one-sided: p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+Mid-p exact test: one-sided: p=0.00000, two-sided: p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 1.00000 (0.77309, 1.00000)
+Risk in unexposed: 0.36364 (0.14980, 0.64806)
+Risk in overall population: 0.74074 (0.55074, 0.87077)
+Risk ratio: 2.75000 (1.25841, 6.00959)
+Risk difference: 0.63636 (0.35209, 0.92064)
+-----------------------
+Stratum 2:
+Risk in exposed: 1.00000 (0.73414, 1.00000)
+Risk in unexposed: 0.36364 (0.14980, 0.64806)
+Risk in overall population: 0.70833 (0.50626, 0.85292)
+Risk ratio: 2.75000 (1.25841, 6.00959)
+Risk difference: 0.63636 (0.35209, 0.92064)
+-----------------------
+Stratum 3:
+Risk in exposed: 1.00000 (0.73414, 1.00000)
+Risk in unexposed: 0.20000 (0.04589, 0.52063)
+Risk in overall population: 0.65217 (0.44779, 0.81300)
+Risk ratio: 5.00000 (1.44751, 17.27099)
+Risk difference: 0.80000 (0.55208, 1.04792)
+-----------------------
+Stratum 4:
+Risk in exposed: 1.00000 (0.78366, 1.00000)
+Risk in unexposed: 0.11111 (0.00000, 0.45671)
+Risk in overall population: 0.69231 (0.49853, 0.83657)
+Risk ratio: 9.00000 (1.41815, 57.11669)
+Risk difference: 0.88889 (0.68357, 1.09421)
+-----------------------
+Stratum 5:
+Risk in exposed: 1.00000 (0.78366, 1.00000)
+Risk in unexposed: 0.11111 (0.00000, 0.45671)
+Risk in overall population: 0.69231 (0.49853, 0.83657)
+Risk ratio: 9.00000 (1.41815, 57.11669)
+Risk difference: 0.88889 (0.68357, 1.09421)
+-----------------------
+Stratum 6:
+Risk in exposed: 1.00000 (0.86126, 1.00000)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.75000 (0.59639, 0.85980)
+Risk ratio: 11.00000 (1.69748, 71.28227)
+Risk difference: 0.90909 (0.73920, 1.07898)
+-----------------------
+Stratum 7:
+Risk in exposed: 1.00000 (0.86126, 1.00000)
+Risk in unexposed: 0.16667 (0.03498, 0.46001)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 6.00000 (1.69318, 21.26177)
+Risk difference: 0.83333 (0.62247, 1.04419)
+-----------------------
+Stratum 8:
+Risk in exposed: 1.00000 (0.86529, 1.00000)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 11.00000 (1.69748, 71.28227)
+Risk difference: 0.90909 (0.73920, 1.07898)
+-----------------------
+Stratum 9:
+Risk in exposed: 1.00000 (0.86529, 1.00000)
+Risk in unexposed: 0.09091 (0.00000, 0.39906)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 11.00000 (1.69748, 71.28227)
+Risk difference: 0.90909 (0.73920, 1.07898)
+-----------------------
+Stratum 10:
+Risk in exposed: 1.00000 (0.87608, 1.00000)
+Risk in unexposed: 0.07143 (0.00000, 0.33539)
+Risk in overall population: 0.72340 (0.58136, 0.83169)
+Risk ratio: 14.00000 (2.11782, 92.54786)
+Risk difference: 0.92857 (0.79367, 1.06348)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 1.00000 (0.97996, 1.00000)
+Risk in unexposed: 0.16514 (0.10619, 0.24689)
+Risk in overall population: 0.72917 (0.67919, 0.77396)
+Risk ratio: 6.05556 (3.97042, 9.23573)
+Risk difference: 0.83486 (0.76516, 0.90457)
+Aetiological fraction in the population: 0.77353 (0.68401, 0.86304)
+Aetiological fraction in the exposed: 0.83486 (0.74814, 0.89172)
+Prevented fraction in the population: -3.41551 (-6.30161, -2.16463)
+Prevented fraction in the exposed: -5.05556 (-8.23573, -2.97042)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: 4.59323 (3.08122, 6.84722)
+Mantel-Haenszel adjusted risk ratio: 6.40835 (4.12307, 9.96030)
+Breslow-Day chi square test for homogeneity of RR across strata: 8.36993 (p=0.49734)
+Directly adjusted risk difference: 0.87160 (0.81073, 0.93248)
+Breslow-Day chi square test for homogeneity of RD across strata: 7.00864 (p=0.63622)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (3.56090, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (2.84581, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (5.09426, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 4:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (9.32047, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 5:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (9.32047, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 6:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (21.00941, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 7:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (15.39757, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 8:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (21.76866, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 9:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (21.76866, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 10:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (32.15157, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: inf (262.56410, inf)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: undefined (undefined, undefined)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: undefined (undefined, undefined)
+Prevented fraction in the exposed :undefined (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: inf (nan, nan)
+CMLE common odds ratio: inf (235.90559, inf)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 2.88945, df=9.00000 (p=0.96852)
+-----------------------
+
+==============================================================
+Another extremely ill-conditioned data example - all zeros in cell D
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 16
+b (unexposed, disease): 4
+c (exposed, no disease): 7
+d (unexposed, no disease): 0
+-----------------------
+Stratum 2:
+a (exposed, disease): 13
+b (unexposed, disease): 4
+c (exposed, no disease): 7
+d (unexposed, no disease): 0
+-----------------------
+Stratum 3:
+a (exposed, disease): 13
+b (unexposed, disease): 2
+c (exposed, no disease): 8
+d (unexposed, no disease): 0
+-----------------------
+Stratum 4:
+a (exposed, disease): 17
+b (unexposed, disease): 1
+c (exposed, no disease): 8
+d (unexposed, no disease): 0
+-----------------------
+Stratum 5:
+a (exposed, disease): 17
+b (unexposed, disease): 1
+c (exposed, no disease): 8
+d (unexposed, no disease): 0
+-----------------------
+Stratum 6:
+a (exposed, disease): 29
+b (unexposed, disease): 1
+c (exposed, no disease): 10
+d (unexposed, no disease): 0
+-----------------------
+Stratum 7:
+a (exposed, disease): 29
+b (unexposed, disease): 2
+c (exposed, no disease): 10
+d (unexposed, no disease): 0
+-----------------------
+Stratum 8:
+a (exposed, disease): 30
+b (unexposed, disease): 1
+c (exposed, no disease): 10
+d (unexposed, no disease): 0
+-----------------------
+Stratum 9:
+a (exposed, disease): 30
+b (unexposed, disease): 1
+c (exposed, no disease): 10
+d (unexposed, no disease): 0
+-----------------------
+Stratum 10:
+a (exposed, disease): 33
+b (unexposed, disease): 1
+c (exposed, no disease): 13
+d (unexposed, no disease): 0
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 227
+b (unexposed, disease): 18
+c (exposed, no disease): 91
+d (unexposed, no disease): 0
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 1.64348, p=0.19985
+Yates-corrected Chi sq: 0.44074, p=0.50676
+M-H Chi sq: 1.58261, p=0.20839
+Fisher's exact test: one-sided p=0.27607, two-sided (twice one-sided): p=0.55214, two-sided (as extreme): p=0.54530
+mid-p: one-sided p=0.13803, two-sided p=0.27607
+-----------------------
+Stratum 2:
+Chi sq: 1.97647, p=0.15976
+Yates-corrected Chi sq: 0.64538, p=0.42177
+M-H Chi sq: 1.89412, p=0.16874
+Fisher's exact test: one-sided p=0.22398, two-sided (twice one-sided): p=0.44796, two-sided (as extreme): p=0.28327
+mid-p: one-sided p=0.11199, two-sided p=0.22398
+-----------------------
+Stratum 3:
+Chi sq: 1.16825, p=0.27976
+Yates-corrected Chi sq: 0.09241, p=0.76113
+M-H Chi sq: 1.11746, p=0.29047
+Fisher's exact test: one-sided p=0.41502, two-sided (twice one-sided): p=0.83004, two-sided (as extreme): p=0.52569
+mid-p: one-sided p=0.20751, two-sided p=0.41502
+-----------------------
+Stratum 4:
+Chi sq: 0.46222, p=0.49659
+Yates-corrected Chi sq: 0.18056, p=0.67090
+M-H Chi sq: 0.44444, p=0.50499
+Fisher's exact test: one-sided p=0.69231, two-sided (twice one-sided): p=1.38462, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.34615, two-sided p=0.69231
+-----------------------
+Stratum 5:
+Chi sq: 0.46222, p=0.49659
+Yates-corrected Chi sq: 0.18056, p=0.67090
+M-H Chi sq: 0.44444, p=0.50499
+Fisher's exact test: one-sided p=0.69231, two-sided (twice one-sided): p=1.38462, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.34615, two-sided p=0.69231
+-----------------------
+Stratum 6:
+Chi sq: 0.34188, p=0.55875
+Yates-corrected Chi sq: 0.34188, p=0.55875
+M-H Chi sq: 0.33333, p=0.56370
+Fisher's exact test: one-sided p=0.75000, two-sided (twice one-sided): p=1.50000, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37500, two-sided p=0.75000
+-----------------------
+Stratum 7:
+Chi sq: 0.67825, p=0.41019
+Yates-corrected Chi sq: 0.00042, p=0.98357
+M-H Chi sq: 0.66170, p=0.41596
+Fisher's exact test: one-sided p=0.56707, two-sided (twice one-sided): p=1.13415, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.28354, two-sided p=0.56707
+-----------------------
+Stratum 8:
+Chi sq: 0.33065, p=0.56528
+Yates-corrected Chi sq: 0.36454, p=0.54600
+M-H Chi sq: 0.32258, p=0.57006
+Fisher's exact test: one-sided p=0.75610, two-sided (twice one-sided): p=1.51220, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37805, two-sided p=0.75610
+-----------------------
+Stratum 9:
+Chi sq: 0.33065, p=0.56528
+Yates-corrected Chi sq: 0.36454, p=0.54600
+M-H Chi sq: 0.32258, p=0.57006
+Fisher's exact test: one-sided p=0.75610, two-sided (twice one-sided): p=1.51220, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.37805, two-sided p=0.75610
+-----------------------
+Stratum 10:
+Chi sq: 0.39066, p=0.53195
+Yates-corrected Chi sq: 0.25486, p=0.61368
+M-H Chi sq: 0.38235, p=0.53635
+Fisher's exact test: one-sided p=0.72340, two-sided (twice one-sided): p=1.44681, two-sided (as extreme): p=1.00000
+mid-p: one-sided p=0.36170, two-sided p=0.72340
+-----------------------
+Unstratified (crude):
+Chi sq: 7.06415, p=0.00786
+Yates-corrected Chi sq: 5.68940, p=0.01707
+M-H Chi sq: 7.04313, p=0.00796
+Fisher's exact test: one-sided p=0.00285, two-sided (twice one-sided): p=0.00569, two-sided (as extreme): p=0.00476
+mid-p: one-sided p=0.00142, two-sided p=0.00285
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: 6.04838 (p=0.01392)
+Mantel-Haenszel chi square without continuity correction: 7.46154 (p=0.00630)
+Fisher exact test: one-sided: p=0.00216, two-sided (twice one-sided): p=0.00433, two-sided (as extreme): p=0.00444
+Mid-p exact test: one-sided: p=0.00108, two-sided: p=0.00216
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.69565 (0.48945, 0.84585)
+Risk in unexposed: 1.00000 (0.45405, 1.00000)
+Risk in overall population: 0.74074 (0.55074, 0.87077)
+Risk ratio: 0.69565 (0.53088, 0.91157)
+Risk difference: -0.30435 (-0.49239, -0.11630)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.65000 (0.43159, 0.82007)
+Risk in unexposed: 1.00000 (0.45405, 1.00000)
+Risk in overall population: 0.70833 (0.50626, 0.85292)
+Risk ratio: 0.65000 (0.47124, 0.89656)
+Risk difference: -0.35000 (-0.55904, -0.14096)
+-----------------------
+Stratum 3:
+Risk in exposed: 0.61905 (0.40804, 0.79324)
+Risk in unexposed: 1.00000 (0.29023, 1.00000)
+Risk in overall population: 0.65217 (0.44779, 0.81300)
+Risk ratio: 0.61905 (0.44260, 0.86584)
+Risk difference: -0.38095 (-0.58865, -0.17325)
+-----------------------
+Stratum 4:
+Risk in exposed: 0.68000 (0.48266, 0.82939)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.69231 (0.49853, 0.83657)
+Risk ratio: 0.68000 (0.51967, 0.88980)
+Risk difference: -0.32000 (-0.50286, -0.13714)
+-----------------------
+Stratum 5:
+Risk in exposed: 0.68000 (0.48266, 0.82939)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.69231 (0.49853, 0.83657)
+Risk ratio: 0.68000 (0.51967, 0.88980)
+Risk difference: -0.32000 (-0.50286, -0.13714)
+-----------------------
+Stratum 6:
+Risk in exposed: 0.74359 (0.58756, 0.85594)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.75000 (0.59639, 0.85980)
+Risk ratio: 0.74359 (0.61844, 0.89407)
+Risk difference: -0.25641 (-0.39345, -0.11937)
+-----------------------
+Stratum 7:
+Risk in exposed: 0.74359 (0.58756, 0.85594)
+Risk in unexposed: 1.00000 (0.29023, 1.00000)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 0.74359 (0.61844, 0.89407)
+Risk difference: -0.25641 (-0.39345, -0.11937)
+-----------------------
+Stratum 8:
+Risk in exposed: 0.75000 (0.59639, 0.85980)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 0.75000 (0.62713, 0.89694)
+Risk difference: -0.25000 (-0.38419, -0.11581)
+-----------------------
+Stratum 9:
+Risk in exposed: 0.75000 (0.59639, 0.85980)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.75610 (0.60485, 0.86346)
+Risk ratio: 0.75000 (0.62713, 0.89694)
+Risk difference: -0.25000 (-0.38419, -0.11581)
+-----------------------
+Stratum 10:
+Risk in exposed: 0.71739 (0.57349, 0.82778)
+Risk in unexposed: 1.00000 (0.16750, 1.00000)
+Risk in overall population: 0.72340 (0.58136, 0.83169)
+Risk ratio: 0.71739 (0.59839, 0.86006)
+Risk difference: -0.28261 (-0.41273, -0.15249)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.71384 (0.66178, 0.76079)
+Risk in unexposed: 1.00000 (0.79329, 1.00000)
+Risk in overall population: 0.72917 (0.67919, 0.77396)
+Risk ratio: 0.71384 (0.66585, 0.76528)
+Risk difference: -0.28616 (-0.33584, -0.23649)
+Aetiological fraction in the population: -0.37143 (-0.46080, -0.28206)
+Aetiological fraction in the exposed: -0.40088 (-0.50184, -0.30671)
+Prevented fraction in the population: 0.27083 (0.22000, 0.31544)
+Prevented fraction in the exposed: 0.28616 (0.23472, 0.33415)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: 0.71985 (0.67203, 0.77107)
+Mantel-Haenszel adjusted risk ratio: 0.69216 (0.62619, 0.76508)
+Breslow-Day chi square test for homogeneity of RR across strata: 2.21323 (p=0.98763)
+Directly adjusted risk difference: -0.28325 (-0.33258, -0.23391)
+Breslow-Day chi square test for homogeneity of RD across strata: 2.36692 (p=0.98425)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 4.44021)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 3.68979)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 10.14190)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 4:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 87.63122)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 5:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 87.63122)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 6:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 116.78832)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 7:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 16.92419)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 8:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 120.67392)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 9:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 120.67392)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 10:
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 101.83928)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 0.00000 (undefined, undefined)
+CMLE odds ratio: 0.00000 (0.00000, 0.58378)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: undefined (undefined, undefined)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: 1.00000 (undefined, undefined)
+Prevented fraction in the exposed :1.00000 (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: 0.00000 (nan, nan)
+CMLE common odds ratio: 0.00000 (0.00000, 0.54969)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 1.23379, df=9.00000 (p=0.99868)
+-----------------------
+
+==============================================================
+Another extremely ill-conditioned data example - zeros in all cells in one stratum
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 16
+b (unexposed, disease): 4
+c (exposed, no disease): 7
+d (unexposed, no disease): 7
+-----------------------
+Stratum 2:
+a (exposed, disease): 0
+b (unexposed, disease): 0
+c (exposed, no disease): 0
+d (unexposed, no disease): 0
+-----------------------
+Stratum 3:
+a (exposed, disease): 13
+b (unexposed, disease): 2
+c (exposed, no disease): 8
+d (unexposed, no disease): 8
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 29
+b (unexposed, disease): 6
+c (exposed, no disease): 15
+d (unexposed, no disease): 15
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 3.38656, p=0.06573
+Yates-corrected Chi sq: 2.15452, p=0.14215
+M-H Chi sq: 3.28696, p=0.06983
+Fisher's exact test: one-sided p=0.07149, two-sided (twice one-sided): p=0.14297, two-sided (as extreme): p=0.13454
+mid-p: one-sided p=0.04243, two-sided p=0.08485
+-----------------------
+Stratum 2:
+Chi sq: undefined, p=undefined
+Yates-corrected Chi sq: undefined, p=undefined
+M-H Chi sq: undefined, p=undefined
+Fisher's exact test: one-sided p=1.00000, two-sided (twice one-sided): p=2.00000, two-sided (as extreme): p=undefined
+mid-p: one-sided p=0.50000, two-sided p=1.00000
+-----------------------
+Stratum 3:
+Chi sq: 4.76317, p=0.02907
+Yates-corrected Chi sq: 3.23301, p=0.07217
+M-H Chi sq: 4.60952, p=0.03179
+Fisher's exact test: one-sided p=0.03452, two-sided (twice one-sided): p=0.06904, two-sided (as extreme): p=0.05380
+mid-p: one-sided p=0.01928, two-sided p=0.03857
+-----------------------
+Unstratified (crude):
+Chi sq: 7.97426, p=0.00474
+Yates-corrected Chi sq: 6.54263, p=0.01053
+M-H Chi sq: 7.85158, p=0.00508
+Fisher's exact test: one-sided p=0.00504, two-sided (twice one-sided): p=0.01007, two-sided (as extreme): p=0.00742
+mid-p: one-sided p=0.00296, two-sided p=0.00593
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: undefined (p=undefined)
+Mantel-Haenszel chi square without continuity correction: undefined (p=undefined)
+Fisher exact test: one-sided: p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+Mid-p exact test: one-sided: p=undefined, two-sided: p=undefined
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.69565 (0.48945, 0.84585)
+Risk in unexposed: 0.36364 (0.14980, 0.64806)
+Risk in overall population: 0.58824 (0.42199, 0.73657)
+Risk ratio: 1.91304 (0.83654, 4.37483)
+Risk difference: 0.33202 (-0.00883, 0.67286)
+-----------------------
+Stratum 2:
+Risk in exposed: undefined (undefined, undefined)
+Risk in unexposed: undefined (undefined, undefined)
+Risk in overall population: undefined (undefined, undefined)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Risk in exposed: 0.61905 (0.40804, 0.79324)
+Risk in unexposed: 0.20000 (0.04589, 0.52063)
+Risk in overall population: 0.48387 (0.31969, 0.65160)
+Risk ratio: 3.09524 (0.85699, 11.17924)
+Risk difference: 0.41905 (0.09562, 0.74247)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.65909 (0.51084, 0.78180)
+Risk in unexposed: 0.28571 (0.13559, 0.50211)
+Risk in overall population: 0.53846 (0.41852, 0.65412)
+Risk ratio: 2.30682 (1.13543, 4.68668)
+Risk difference: 0.37338 (0.13474, 0.61202)
+Aetiological fraction in the population: 0.46939 (0.15495, 0.78383)
+Aetiological fraction in the exposed: 0.56650 (0.11928, 0.78663)
+Prevented fraction in the population: -0.88462 (-3.62600, -0.18336)
+Prevented fraction in the exposed: -1.30682 (-3.68668, -0.13543)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: undefined (undefined, undefined)
+Mantel-Haenszel adjusted risk ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RR across strata: undefined (p=undefined)
+Directly adjusted risk difference: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RD across strata: undefined (p=undefined)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 4.00000 (0.87842, 18.21445)
+CMLE odds ratio: 3.82472 (0.70209, 24.37976)
+mid-p CMLE odds ratio: 3.77316 (0.83636, 19.59694)
+-----------------------
+Stratum 2:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 3:
+Sample odds ratio: 6.50000 (1.09363, 38.63280)
+CMLE odds ratio: 6.10005 (0.89956, 73.42698)
+mid-p CMLE odds ratio: 5.85904 (1.09068, 51.23738)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 4.83333 (1.55582, 15.01533)
+CMLE odds ratio: 4.70752 (1.38054, 18.10658)
+mid-p CMLE odds ratio: 4.65107 (1.53799, 15.73656)
+Aetiological fraction in the population: 0.65714 (0.37891, 0.93537)
+Aetiological fraction in the exposed: 0.79310 (0.35725, 0.93340)
+Prevented fraction in the population: -1.91667 (-14.47321, -0.61008)
+Prevented fraction in the exposed :-3.83333 (-14.01533, -0.55582)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: undefined (undefined, undefined)
+CMLE common odds ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 0.37847, df=2.00000 (p=0.82759)
+-----------------------
+
+==============================================================
+Another extremely ill-conditioned data example - zeros in all cells in all strata
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 0
+b (unexposed, disease): 0
+c (exposed, no disease): 0
+d (unexposed, no disease): 0
+-----------------------
+Stratum 2:
+a (exposed, disease): 0
+b (unexposed, disease): 0
+c (exposed, no disease): 0
+d (unexposed, no disease): 0
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 0
+b (unexposed, disease): 0
+c (exposed, no disease): 0
+d (unexposed, no disease): 0
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: undefined, p=undefined
+Yates-corrected Chi sq: undefined, p=undefined
+M-H Chi sq: undefined, p=undefined
+Fisher's exact test: one-sided p=1.00000, two-sided (twice one-sided): p=2.00000, two-sided (as extreme): p=undefined
+mid-p: one-sided p=0.50000, two-sided p=1.00000
+-----------------------
+Stratum 2:
+Chi sq: undefined, p=undefined
+Yates-corrected Chi sq: undefined, p=undefined
+M-H Chi sq: undefined, p=undefined
+Fisher's exact test: one-sided p=1.00000, two-sided (twice one-sided): p=2.00000, two-sided (as extreme): p=undefined
+mid-p: one-sided p=0.50000, two-sided p=1.00000
+-----------------------
+Unstratified (crude):
+Chi sq: undefined, p=undefined
+Yates-corrected Chi sq: undefined, p=undefined
+M-H Chi sq: undefined, p=undefined
+Fisher's exact test: one-sided p=1.00000, two-sided (twice one-sided): p=2.00000, two-sided (as extreme): p=undefined
+mid-p: one-sided p=0.50000, two-sided p=1.00000
+-----------------------
+Adjusted:
+Warning: expected values in some strata are < 5: use of exact statistics recommended.
+Mantel-Haenszel chi square with continuity correction: undefined (p=undefined)
+Mantel-Haenszel chi square without continuity correction: undefined (p=undefined)
+Fisher exact test: one-sided: p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+Mid-p exact test: one-sided: p=undefined, two-sided: p=undefined
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: undefined (undefined, undefined)
+Risk in unexposed: undefined (undefined, undefined)
+Risk in overall population: undefined (undefined, undefined)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Risk in exposed: undefined (undefined, undefined)
+Risk in unexposed: undefined (undefined, undefined)
+Risk in overall population: undefined (undefined, undefined)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Risk in exposed: undefined (undefined, undefined)
+Risk in unexposed: undefined (undefined, undefined)
+Risk in overall population: undefined (undefined, undefined)
+Risk ratio: undefined (undefined, undefined)
+Risk difference: undefined (undefined, undefined)
+Aetiological fraction in the population: undefined (undefined, undefined)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: undefined (undefined, undefined)
+Prevented fraction in the exposed: undefined (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: undefined (undefined, undefined)
+Mantel-Haenszel adjusted risk ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RR across strata: undefined (p=undefined)
+Directly adjusted risk difference: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of RD across strata: undefined (p=undefined)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: undefined (undefined, undefined)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: undefined (undefined, undefined)
+Aetiological fraction in the exposed: undefined (undefined, undefined)
+Prevented fraction in the population: undefined (undefined, undefined)
+Prevented fraction in the exposed :undefined (undefined, undefined)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: undefined (undefined, undefined)
+Mantel-Haenszel common odds ratio: undefined (undefined, undefined)
+CMLE common odds ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of OR across strata: undefined (p=undefined)
+Woolf chi square test for homogeneity of OR across strata: 0.00000, df=1.00000 (p=1.00000)
+-----------------------
+
+==============================================================
+Large single stratum
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 950
+b (unexposed, disease): 999
+c (exposed, no disease): 234
+d (unexposed, no disease): 789
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 187.31728, p=0.00000
+Yates-corrected Chi sq: 186.23950, p=0.00000
+M-H Chi sq: 187.25425, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.80236 (0.77870, 0.82407)
+Risk in unexposed: 0.55872 (0.53561, 0.58159)
+Risk in overall population: 0.65579 (0.63851, 0.67266)
+Risk ratio: 1.43606 (1.36608, 1.50963)
+Risk difference: 0.24364 (0.21133, 0.27595)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 3.20641 (2.70296, 3.80363)
+CMLE odds ratio: 3.20517 (2.69469, 3.82075)
+mid-p CMLE odds ratio: 3.20401 (2.70440, 3.80622)
+-----------------------
+
+==============================================================
+Large single stratum with one small cell
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 950
+b (unexposed, disease): 999
+c (exposed, no disease): 23
+d (unexposed, no disease): 789
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 529.39117, p=0.00000
+Yates-corrected Chi sq: 527.38138, p=0.00000
+M-H Chi sq: 529.19943, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.97636 (0.96460, 0.98438)
+Risk in unexposed: 0.55872 (0.53561, 0.58159)
+Risk in overall population: 0.70590 (0.68863, 0.72261)
+Risk ratio: 1.74748 (1.67504, 1.82305)
+Risk difference: 0.41764 (0.39272, 0.44255)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 32.62175 (21.34841, 49.84814)
+CMLE odds ratio: 32.59245 (21.32196, 52.15658)
+mid-p CMLE odds ratio: 32.37603 (21.69607, 50.95631)
+-----------------------
+
+==============================================================
+Very large single stratum with a small cell
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 9504
+b (unexposed, disease): 8997
+c (exposed, no disease): 43
+d (unexposed, no disease): 7892
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 6218.39167, p=0.00000
+Yates-corrected Chi sq: 6216.18880, p=0.00000
+M-H Chi sq: 6218.15644, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.99550 (0.99392, 0.99667)
+Risk in unexposed: 0.53271 (0.52518, 0.54023)
+Risk in overall population: 0.69984 (0.69429, 0.70534)
+Risk ratio: 1.86873 (1.84240, 1.89543)
+Risk difference: 0.46278 (0.45514, 0.47043)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 193.87746 (143.47173, 261.99218)
+CMLE odds ratio: 193.77910 (143.05865, 269.55986)
+mid-p CMLE odds ratio: 193.12102 (144.91267, 264.16474)
+-----------------------
+
+==============================================================
+Very large single stratum
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 9504
+b (unexposed, disease): 8997
+c (exposed, no disease): 8943
+d (unexposed, no disease): 7892
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 10.83442, p=0.00100
+Yates-corrected Chi sq: 10.76435, p=0.00103
+M-H Chi sq: 10.83411, p=0.00100
+Fisher's exact test: one-sided p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+mid-p: one-sided p=undefined, two-sided p=undefined
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.51521 (0.50799, 0.52241)
+Risk in unexposed: 0.53271 (0.52518, 0.54023)
+Risk in overall population: 0.52357 (0.51836, 0.52878)
+Risk ratio: 0.96713 (0.94809, 0.98656)
+Risk difference: -0.01751 (-0.02793, -0.00709)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 0.93221 (0.89404, 0.97200)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+
+==============================================================
+Two very large single strata with small cells
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 9504
+b (unexposed, disease): 8997
+c (exposed, no disease): 43
+d (unexposed, no disease): 7892
+-----------------------
+Stratum 2:
+a (exposed, disease): 9763
+b (unexposed, disease): 8345
+c (exposed, no disease): 27
+d (unexposed, no disease): 8765
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 19267
+b (unexposed, disease): 17342
+c (exposed, no disease): 70
+d (unexposed, no disease): 16657
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 6218.39167, p=0.00000
+Yates-corrected Chi sq: 6216.18880, p=0.00000
+M-H Chi sq: 6218.15644, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Stratum 2:
+Chi sq: 7347.53732, p=0.00000
+Yates-corrected Chi sq: 7345.22169, p=0.00000
+M-H Chi sq: 7347.26418, p=0.00000
+Fisher's exact test: one-sided p=0.00000, two-sided (twice one-sided): p=0.00000, two-sided (as extreme): p=0.00000
+mid-p: one-sided p=0.00000, two-sided p=0.00000
+-----------------------
+Unstratified (crude):
+Chi sq: 13542.20306, p=0.00000
+Yates-corrected Chi sq: 13539.94400, p=0.00000
+M-H Chi sq: 13541.94915, p=0.00000
+Fisher's exact test: one-sided p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+mid-p: one-sided p=undefined, two-sided p=undefined
+-----------------------
+Adjusted:
+Mantel-Haenszel chi square with continuity correction: undefined (p=undefined)
+Mantel-Haenszel chi square without continuity correction: undefined (p=undefined)
+Fisher exact test: one-sided: p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+Mid-p exact test: one-sided: p=undefined, two-sided: p=undefined
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.99550 (0.99392, 0.99667)
+Risk in unexposed: 0.53271 (0.52518, 0.54023)
+Risk in overall population: 0.69984 (0.69429, 0.70534)
+Risk ratio: 1.86873 (1.84240, 1.89543)
+Risk difference: 0.46278 (0.45514, 0.47043)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.99724 (0.99597, 0.99812)
+Risk in unexposed: 0.48773 (0.48024, 0.49522)
+Risk in overall population: 0.67316 (0.66753, 0.67874)
+Risk ratio: 2.04467 (2.01345, 2.07639)
+Risk difference: 0.50952 (0.50195, 0.51708)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.99638 (0.99542, 0.99714)
+Risk in unexposed: 0.51007 (0.50476, 0.51539)
+Risk in overall population: 0.68638 (0.68243, 0.69031)
+Risk ratio: 1.95340 (1.93309, 1.97393)
+Risk difference: 0.48631 (0.48093, 0.49169)
+Aetiological fraction in the population: 0.25687 (0.25225, 0.26149)
+Aetiological fraction in the exposed: 0.48807 (0.48269, 0.49340)
+Prevented fraction in the population: -0.34566 (-0.35407, -0.33735)
+Prevented fraction in the exposed: -0.95340 (-0.97393, -0.93309)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: 1.94760 (1.92739, 1.96802)
+Mantel-Haenszel adjusted risk ratio: 1.95373 (1.93341, 1.97427)
+Breslow-Day chi square test for homogeneity of RR across strata: 70.97309 (p=0.00000)
+Directly adjusted risk difference: 0.48640 (0.48103, 0.49178)
+Breslow-Day chi square test for homogeneity of RD across strata: 72.57661 (p=0.00000)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 193.87746 (143.47173, 261.99218)
+CMLE odds ratio: 193.77910 (143.05865, 269.55986)
+mid-p CMLE odds ratio: 193.12102 (144.91267, 264.16474)
+-----------------------
+Stratum 2:
+Sample odds ratio: 379.79138 (260.00945, 554.75480)
+CMLE odds ratio: 381.08695 (258.42271, 568.77081)
+mid-p CMLE odds ratio: 377.44889 (264.15755, 567.00305)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 264.37091 (208.86876, 334.62150)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: 0.52430 (0.51914, 0.52946)
+Aetiological fraction in the exposed: 0.99622 (0.99521, 0.99701)
+Prevented fraction in the population: -1.10217 (-1.12521, -1.07962)
+Prevented fraction in the exposed :-263.37091 (-333.62150, -207.86876)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: 251.50739 (198.68996, 318.36520)
+Mantel-Haenszel common odds ratio: undefined (undefined, undefined)
+CMLE common odds ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of OR across strata: 7.41511 (p=0.00647)
+Woolf chi square test for homogeneity of OR across strata: 7.41511, df=1.00000 (p=0.00647)
+-----------------------
+
+==============================================================
+Two very large single strata
+==============================================================
+
+Tabulated values
+================
+Stratum 1:
+a (exposed, disease): 9504
+b (unexposed, disease): 8997
+c (exposed, no disease): 8943
+d (unexposed, no disease): 7892
+-----------------------
+Stratum 2:
+a (exposed, disease): 9763
+b (unexposed, disease): 8345
+c (exposed, no disease): 7827
+d (unexposed, no disease): 8765
+-----------------------
+Unstratified (crude):
+a (exposed, disease): 19267
+b (unexposed, disease): 17342
+c (exposed, no disease): 16770
+d (unexposed, no disease): 16657
+-----------------------
+
+Measures of association
+=======================
+Stratum 1:
+Chi sq: 10.83442, p=0.00100
+Yates-corrected Chi sq: 10.76435, p=0.00103
+M-H Chi sq: 10.83411, p=0.00100
+Fisher's exact test: one-sided p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+mid-p: one-sided p=undefined, two-sided p=undefined
+-----------------------
+Stratum 2:
+Chi sq: 157.45921, p=0.00000
+Yates-corrected Chi sq: 157.18959, p=0.00000
+M-H Chi sq: 157.45467, p=0.00000
+Fisher's exact test: one-sided p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+mid-p: one-sided p=undefined, two-sided p=undefined
+-----------------------
+Unstratified (crude):
+Chi sq: 42.33512, p=0.00000
+Yates-corrected Chi sq: 42.23669, p=0.00000
+M-H Chi sq: 42.33452, p=0.00000
+Fisher's exact test: one-sided p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+mid-p: one-sided p=undefined, two-sided p=undefined
+-----------------------
+Adjusted:
+Mantel-Haenszel chi square with continuity correction: undefined (p=undefined)
+Mantel-Haenszel chi square without continuity correction: undefined (p=undefined)
+Fisher exact test: one-sided: p=undefined, two-sided (twice one-sided): p=undefined, two-sided (as extreme): p=undefined
+Mid-p exact test: one-sided: p=undefined, two-sided: p=undefined
+-----------------------
+
+Risk-based measures
+===================
+Stratum 1:
+Risk in exposed: 0.51521 (0.50799, 0.52241)
+Risk in unexposed: 0.53271 (0.52518, 0.54023)
+Risk in overall population: 0.52357 (0.51836, 0.52878)
+Risk ratio: 0.96713 (0.94809, 0.98656)
+Risk difference: -0.01751 (-0.02793, -0.00709)
+-----------------------
+Stratum 2:
+Risk in exposed: 0.55503 (0.54768, 0.56236)
+Risk in unexposed: 0.48773 (0.48024, 0.49522)
+Risk in overall population: 0.52184 (0.51659, 0.52710)
+Risk ratio: 1.13800 (1.11516, 1.16130)
+Risk difference: 0.06730 (0.05682, 0.07779)
+-----------------------
+Unstratified (crude):
+Risk in exposed: 0.53464 (0.52949, 0.53979)
+Risk in unexposed: 0.51007 (0.50476, 0.51539)
+Risk in overall population: 0.52272 (0.51902, 0.52641)
+Risk ratio: 1.04817 (1.03340, 1.06315)
+Risk difference: 0.02457 (0.01717, 0.03197)
+Aetiological fraction in the population: 0.02419 (0.01690, 0.03148)
+Aetiological fraction in the exposed: 0.04596 (0.03233, 0.05940)
+Prevented fraction in the population: -0.02479 (-0.03250, -0.01719)
+Prevented fraction in the exposed: -0.04817 (-0.06315, -0.03340)
+-----------------------
+Adjusted:
+Directly adjusted risk ratio: 1.04746 (1.03270, 1.06244)
+Mantel-Haenszel adjusted risk ratio: 1.04810 (1.03334, 1.06307)
+Breslow-Day chi square test for homogeneity of RR across strata: 126.08734 (p=0.00000)
+Directly adjusted risk difference: 0.02463 (0.01723, 0.03202)
+Breslow-Day chi square test for homogeneity of RD across strata: 126.36931 (p=0.00000)
+-----------------------
+
+Odds-based measures
+===================
+Stratum 1:
+Sample odds ratio: 0.93221 (0.89404, 0.97200)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Stratum 2:
+Sample odds ratio: 1.31013 (1.25596, 1.36663)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+-----------------------
+Unstratified (crude):
+Sample odds ratio: 1.10352 (1.07125, 1.13675)
+CMLE odds ratio: undefined (undefined, undefined)
+mid-p CMLE odds ratio: undefined (undefined, undefined)
+Aetiological fraction in the population: 0.04937 (0.03488, 0.06386)
+Aetiological fraction in the exposed: 0.09381 (0.06651, 0.12030)
+Prevented fraction in the population: -0.05193 (-0.06821, -0.03614)
+Prevented fraction in the exposed :-0.10352 (-0.13675, -0.07125)
+-----------------------
+Adjusted:
+Directly adjusted common odds ratio: 1.10324 (1.07095, 1.13651)
+Mantel-Haenszel common odds ratio: undefined (undefined, undefined)
+CMLE common odds ratio: undefined (undefined, undefined)
+Breslow-Day chi square test for homogeneity of OR across strata: 126.02768 (p=0.00000)
+Woolf chi square test for homogeneity of OR across strata: 126.02768, df=1.00000 (p=0.00000)
+-----------------------
diff --git a/sandbox/wordidx.py b/sandbox/wordidx.py
new file mode 100644
index 0000000..1c9b92d
--- /dev/null
+++ b/sandbox/wordidx.py
@@ -0,0 +1,338 @@
+# vim: set sw=4 et ai:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+"""
+Code for experimenting with and benchmarking techniques for indexing
+the SearchableText columns.
+"""
+
+import os
+import sys
+import gzip
+import re
+import array
+from time import time
+from bsddb import db
+
+from soomfunc import strip_word
+from soomarray import ArrayVocab
+
+import SOOMv0
+from SOOMv0.ColTypes.SearchableText import Occurrences
+
+def timeit(f, *a, **kw):
+ st = time()
+ ret = f(*a, **kw)
+ el = time() - st
+ print '%s %.2fs (%.1f minutes)' % (f.__name__, el, el / 60)
+ return ret
+
+#------------------------------------------------------------------------------
+# Just time iteration over the rows
+if 0:
+ def read(f):
+ for rownum, value in enumerate(f):
+ pass
+
+ f = gzip.open(sys.argv[1])
+ timeit(read, f)
+
+#------------------------------------------------------------------------------
+# Iterate over the rows, splitting into words, and iterating over those
+if 0:
+ def read_and_split(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ cnt = 0
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ cnt += 1
+ print cnt
+
+ f = gzip.open(sys.argv[1])
+ timeit(read_and_split, f)
+
+#------------------------------------------------------------------------------
+# As above, but record word indexes within a record
+if 0:
+ def read_split_and_line_index(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ words = {}
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ words.setdefault(word, []).append(wordnum)
+
+ f = gzip.open(sys.argv[1])
+ timeit(read_split_and_line_index, f)
+
+#------------------------------------------------------------------------------
+# Iterate over rows, splitting into words, assign unique word number
+if 0:
+ def read_split_and_word_index(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ words = {}
+ next_word_num = 0
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ try:
+ word_num = words[word]
+ except KeyError:
+ word_num = words[word] = next_word_num
+ next_word_num += 1
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(read_split_and_word_index, f)
+
+#------------------------------------------------------------------------------
+# Iterate rows and words, recording unique words in the Occurrences abstraction
+# (the result is conceptually like the previous test, so we get to see how much
+# impact Occurrences has).
+if 0:
+ def index_occurrences(f):
+ SOOMv0.soom.messages = 1
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ word_first_last = {}
+ occurrences = Occurrences('occurrences', 'c')
+ next_word_num = 0
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ words = {}
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ words.setdefault(word, []).append(wordnum)
+ for word, wordnums in words.iteritems():
+ first_last = word_first_last.get(word, None)
+ new_first_last = occurrences.add_row_wordnums(first_last, rownum, wordnums)
+ if first_last != new_first_last:
+ word_first_last[word] = new_first_last
+ if rownum % 1000 == 0:
+ occurrences.age()
+ occurrences.close()
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(index_occurrences, f)
+
+#------------------------------------------------------------------------------
+# Iterate rows and words, inserting unique words into a bsddb hash file
+# (aka SearchableText's vocab dictionary).
+if 0:
+ def index_vocab(f):
+ SOOMv0.soom.messages = 1
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ word_first_last = ArrayVocab('vocab', 'c')
+ # occurrences = Occurrences('occurrences', 'c')
+ next_word_num = 0
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ words = {}
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ words.setdefault(word, []).append(wordnum)
+ for word, wordnums in words.iteritems():
+ first_last = word_first_last.get(word, None)
+ if first_last is None:
+ word_first_last[word] = 0, 0
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(index_vocab, f)
+
+#------------------------------------------------------------------------------
+# Iterate rows and words, creating a Word object for each unique word to test
+# the viability of a more structured Vocab implementation.
+if 0:
+ class Word(object):
+ __slots__ = ('start','indexes')
+ def __init__(self):
+ self.start = None
+ self.indexes = array.array('L')
+
+ def read_obj(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ cnt = 0
+ words = {}
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ try:
+ wrec = words[word]
+ except KeyError:
+ wrec = words[word] = Word()
+ wrec.indexes.append(rownum)
+ wrec.indexes.append(wordnum)
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(read_obj, f)
+
+#------------------------------------------------------------------------------
+# Iterate rows and words, recording word positions in the WordStore object.
+# When a WordStore instance has more than 500 positions, spill the positions to
+# the backing bsddb and clear the array. Finally, flush any WordStores with
+# residual positions.
+#
+# This technique is way too slow - the continual growing of bsddb values is a
+# killer for the commonly seen words. After several hours run time, I killed my
+# test fun because it was eating my machine alive, and the data file was only a
+# few hundred meg.
+
+if 0:
+ class WordStore(object):
+ __slots__ = ('start','indexes')
+ def __init__(self):
+ self.start = None
+ self.indexes = array.array('L')
+
+ def read_obj_store(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ cnt = 0
+ try:
+ os.unlink('vocab')
+ except OSError:
+ pass
+ worddb = db.DB()
+ worddb.set_cachesize(0, 8<<20)
+ worddb.open('vocab', db.DB_HASH, db.DB_CREATE, 0666)
+ words = {}
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ try:
+ wrec = words[word]
+ except KeyError:
+ wrec = words[word] = WordStore()
+ indexes = wrec.indexes
+ indexes.append(rownum)
+ indexes.append(wordnum)
+ if len(indexes) == 1024:
+ worddb[word] = worddb.get(word, '') + indexes.tostring()
+ del indexes[:]
+ print "flush %r, %d bytes" % (word, len(worddb[word]))
+ print "done, now flushing residual"
+ for word, wrec in words.iteritems():
+ indexes = wrec.indexes
+ if indexes:
+ worddb[word] = worddb.get(word, '') + indexes.tostring()
+ del indexes[:]
+ worddb.close()
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(read_obj_store, f)
+
+#------------------------------------------------------------------------------
+# As above, but spill to a separate data file.
+
+if 0:
+ class WordStore(object):
+ __slots__ = ('occurrences','indexes')
+ def __init__(self):
+ self.occurrences = array.array('L')
+ self.indexes = array.array('L')
+
+ def read_obj_store_sep(f):
+ WORD_RE = re.compile(r"[A-Z0-9][A-Z0-9']+", re.I)
+ cnt = 0
+ try:
+ os.unlink('vocab')
+ except OSError:
+ pass
+ worddb = db.DB()
+ worddb.set_cachesize(0, 8<<20)
+ worddb.open('vocab', db.DB_HASH, db.DB_CREATE, 0666)
+ occ = open('occurrences', 'wb')
+ occ_count = 0
+ words = {}
+ for rownum, value in enumerate(f):
+ value = value[:-1]
+ if value:
+ for wordnum, match in enumerate(WORD_RE.finditer(value)):
+ word = strip_word(match.group())
+ try:
+ wrec = words[word]
+ except KeyError:
+ wrec = words[word] = WordStore()
+ indexes = wrec.indexes
+ indexes.append(rownum)
+ indexes.append(wordnum)
+ if len(indexes) == 1024:
+ if wrec.occurrences is None:
+ wrec.occurrences = array.array('L')
+ indexes.tofile(occ)
+ wrec.occurrences.append(occ_count)
+ occ_count += 1
+ del indexes[:]
+# print "flush %r, %d bytes" % (word, len(worddb[word]))
+ print "done, %d spills, now flushing residual" % (occ_count)
+ for word, wrec in words.iteritems():
+ if wrec.indexes or wrec.occurrences is not None:
+ data = array.array('L')
+ if wrec.occurrences is not None:
+ data.append(len(wrec.occurrences))
+ data.extend(wrec.occurrences)
+ else:
+ data.append(0)
+ data.extend(wrec.indexes)
+ worddb[word] = data.tostring()
+ worddb.close()
+ return words
+
+ f = gzip.open(sys.argv[1])
+ words = timeit(read_obj_store_sep, f)
+
+
+#------------------------------------------------------------------------------
+# Read data created by the above
+if 1:
+ class Occurrences:
+ def __init__(self):
+ self.worddb = db.DB()
+ self.worddb.open('vocab', db.DB_HASH, db.DB_RDONLY)
+ self.occ = open('occurrences', 'rb')
+
+ def find(self, word):
+ data = array.array('L')
+ data.fromstring(self.worddb[word.upper()])
+ nspills = data[0]
+ spillblocks = data[1:nspills+1]
+ residual = data[nspills+1:]
+ print "found %r, nspills %s, residual len %s" %\
+ (word, nspills, len(residual))
+ pairs = array.array('L')
+ for block in spillblocks:
+ self.occ.seek(block * 4096, 0)
+ pairs.fromfile(self.occ, 1024)
+ pairs.extend(residual)
+ return pairs
+
+ occ = Occurrences()
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..23c565f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,42 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+#
+# To use:
+# python setup.py install
+#
+# $Id: setup.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/setup.py,v $
+
+from SOOMv0.common import version
+from distutils.core import setup
+
+import sys
+if 'sdist' in sys.argv:
+ sys.argv.append('--force-manifest')
+
+if 'bdist_rpm' in sys.argv:
+ version = version.replace('-', '_')
+
+setup(name = "NetEpi-Analysis",
+ version = version,
+ maintainer = "NSW Department of Health",
+ maintainer_email = "Tim CHURCHES <TCHUR at doh.health.nsw.gov.au>",
+ description = "Network-enabled tools for epidemiology and public health practice",
+ url = "http://netepi.info/",
+ packages = ['SOOMv0', 'SOOMv0.ColTypes', 'SOOMv0.Sources',
+ 'SOOMv0.Plot', 'SOOMv0.Analysis'],
+ license = 'Health Administration Corporation Open Source License Version 1.2',
+)
+
diff --git a/simpleinst/.cvsignore b/simpleinst/.cvsignore
new file mode 100644
index 0000000..b948985
--- /dev/null
+++ b/simpleinst/.cvsignore
@@ -0,0 +1,2 @@
+*.swp
+*.pyc
diff --git a/simpleinst/__init__.py b/simpleinst/__init__.py
new file mode 100644
index 0000000..25b7131
--- /dev/null
+++ b/simpleinst/__init__.py
@@ -0,0 +1,66 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import simpleinst.defaults
+import simpleinst.platform
+import simpleinst.config_register
+import simpleinst.install_files
+import simpleinst.pyinstaller
+import simpleinst.utils
+from simpleinst.filter import Filter
+from simpleinst.utils import secret, getpass, collect, rm_pyc
+from simpleinst.usergroup import user_lookup
+
+# Config priority:
+# 10. command line
+# 20. config.py
+# 30. install.py (includes any explicitly set config attributes)
+# 40. platform defaults (from simpleinst.platform)
+# 50. defaults (from simpleinst.defaults)
+# Note that we add these in reverse order, as later, more complex configs
+# can depend on earlier ones
+config = simpleinst.config_register.Config()
+config.source(50, simpleinst.defaults.Defaults())
+config.source(40, simpleinst.platform.get_platform())
+config.source_attrs(30)
+config.source_file(20, 'config')
+config.source_cmdline(10)
+
+import os
+joinpath = os.path.join
+basename = os.path.basename
+dirname = os.path.dirname
+abspath = os.path.abspath
+normpath = os.path.normpath
+del os
+
+from py_compile import compile as _py_compile
+def py_compile(fn):
+ _py_compile(fn, doraise=True)
+
+def make_dirs(*args, **kwargs):
+ kwargs['config'] = config
+ return simpleinst.utils.make_dirs(*args, **kwargs)
+
+def install(**kwargs):
+ return simpleinst.install_files.install(config=config, **kwargs)
+
+def on_install(*args, **kwargs):
+ simpleinst.install_files.on_install(config, *args, **kwargs)
+
+def py_installer(name, *args, **kwargs):
+ return simpleinst.pyinstaller.py_installer(config, name, *args, **kwargs)
+
+python_bang_path_filter = Filter(config, pattern = '^#!.*',
+ subst = '#!%(python)s', count = 1)
+
+copy = simpleinst.install_files.copy
diff --git a/simpleinst/config_register.py b/simpleinst/config_register.py
new file mode 100644
index 0000000..8c70b64
--- /dev/null
+++ b/simpleinst/config_register.py
@@ -0,0 +1,185 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+
+import sys
+import os
+import imp
+import tempfile
+import bisect
+import fnmatch
+from simpleinst.utils import chown, chmod, normjoin, make_dirs
+
+class ConfigBase:
+ pass
+
+
+class ConfigAttrs(ConfigBase):
+ config_source = 'Installer'
+
+
+class ConfigCmdLine(ConfigBase):
+ """
+ Parse the command line, infering types from prior config (defaults)
+ """
+ config_source = 'Command Line'
+
+ def __init__(self, config):
+ for arg in sys.argv[1:]:
+ try:
+ a, v = arg.split('=')
+ except ValueError:
+ sys.exit('Unknown command line option: %r' % arg)
+ try:
+ t = type(getattr(config, a))
+ except AttributeError:
+ pass
+ else:
+ if t is bool:
+ v = v.lower() in ('t', 'true', 'y', 'yes', '1')
+ else:
+ try:
+ v = t(v)
+ except (ValueError, TypeError):
+ pass
+ setattr(self, a, v)
+
+
+class Config:
+ def __init__(self):
+ self._sources = []
+ self._config_attrs = ConfigAttrs()
+
+ def source(self, prio, source):
+ assert source
+ pair = prio, source
+ i = bisect.bisect(self._sources, pair)
+ self._sources.insert(i, pair)
+
+ def source_attrs(self, prio):
+ self.source(prio, self._config_attrs)
+
+ def source_file(self, prio, name='config', path='', exclude=None):
+ if not os.path.isabs(path):
+ path = normjoin(self.base_dir, path)
+ try:
+ f, filename, extras = imp.find_module(name, [path])
+ except ImportError, e:
+ return
+ config_mod = imp.load_module(name, f, filename, extras)
+ config_mod.config_source = filename
+ if exclude:
+ for attr in exclude:
+ try:
+ delattr(config_mod, attr)
+ except AttributeError:
+ pass
+ self.source(prio, config_mod)
+
+ def source_cmdline(self, prio):
+ self.source(prio, ConfigCmdLine(self))
+
+ def __getattr__(self, a):
+ for prio, source in self._sources:
+ try:
+ return getattr(source, a)
+ except AttributeError:
+ pass
+ raise AttributeError('attribute "%s" not found' % a)
+
+ def __setattr__(self, a, v):
+ if a.startswith('_'):
+ self.__dict__[a] = v
+ else:
+ setattr(self._config_attrs, a, v)
+
+ def _config_dict(self):
+ """
+ Produce a dictionary of the current config
+ """
+ class _ConfigItem(object):
+ __slots__ = 'value', 'source'
+
+ def __init__(self, value, source):
+ self.value = value
+ self.source = source
+
+ config = {}
+ for prio, source in self._sources:
+ for a in dir(source):
+ if not config.has_key(a) and not a.startswith('_') \
+ and a != 'config_source':
+ v = getattr(source, a)
+ if not callable(v):
+ config[a] = _ConfigItem(v, source.config_source)
+ return config
+
+ def write_file(self, filename, exclude=None, owner=None, mode=None):
+ if not exclude:
+ exclude = ()
+ config = self._config_dict()
+ if self.install_prefix:
+ filename = self.install_prefix + filename
+ target_dir = os.path.dirname(filename)
+ make_dirs(target_dir, owner=owner)
+ fd, tmpname = tempfile.mkstemp(dir=target_dir)
+ f = os.fdopen(fd, 'w')
+ attributes = config.keys()
+ attributes.sort()
+ try:
+ for a in attributes:
+ for e in exclude:
+ if fnmatch.fnmatch(a, e):
+ break
+ else:
+ f.write('%s=%r\n' % (a, config[a].value))
+ f.flush()
+ if owner is not None:
+ chown(tmpname, owner)
+ if mode is not None:
+ chmod(tmpname, mode)
+ os.rename(tmpname, filename)
+ finally:
+ f.close()
+ try:
+ os.unlink(tmpname)
+ except OSError:
+ pass
+
+ def __str__(self):
+ srcs = ';'.join(['%s[%s]' % (s.config_source, p)
+ for p, s in self._sources])
+ config = self._config_dict()
+ attrs = config.keys()
+ attrs.sort()
+ attrs = ['\n %s=%r (from %s)' % (a,config[a].value,config[a].source)
+ for a in attrs]
+ return '<%s %s%s>' % (self.__class__.__name__, srcs, ''.join(attrs))
+
+class Args:
+ pass
+
+def args_with_defaults(kwargs, config, arglist, conf_prefix = ''):
+ args = Args()
+ for argname in arglist:
+ try:
+ value = kwargs[argname]
+ except KeyError:
+ try:
+ value = getattr(config, conf_prefix + argname)
+ except AttributeError:
+ try:
+ value = getattr(config, argname)
+ except AttributeError:
+ value = None
+ setattr(args, argname, value)
+ return args
diff --git a/simpleinst/defaults.py b/simpleinst/defaults.py
new file mode 100644
index 0000000..37201e6
--- /dev/null
+++ b/simpleinst/defaults.py
@@ -0,0 +1,25 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import sys
+from os.path import dirname, abspath, normpath
+from distutils.sysconfig import get_config_var
+
+
+class Defaults:
+ config_source = 'Defaults'
+ install_mode = 0444
+ install_verbose = False
+ install_prefix = ''
+ base_dir = normpath(dirname(sys.modules['__main__'].__file__))
+ python = abspath(sys.executable)
+ bin_dir = get_config_var('BINDIR')
diff --git a/simpleinst/filter.py b/simpleinst/filter.py
new file mode 100644
index 0000000..df1ab2e
--- /dev/null
+++ b/simpleinst/filter.py
@@ -0,0 +1,33 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import re
+
+class _InstanceAsDict:
+ def __init__(self, inst):
+ self.inst = inst
+
+ def __getitem__(self, a):
+ try:
+ return getattr(self.inst, a)
+ except AttributeError:
+ raise KeyError(a)
+
+class Filter:
+ def __init__(self, config, pattern, subst, count = 0):
+ self.config = _InstanceAsDict(config)
+ self.pattern = re.compile(pattern, re.MULTILINE)
+ self.subst = subst
+ self.count = count
+
+ def filter(self, data):
+ return self.pattern.sub(self.subst % self.config, data, self.count)
diff --git a/simpleinst/glob.py b/simpleinst/glob.py
new file mode 100644
index 0000000..2326e45
--- /dev/null
+++ b/simpleinst/glob.py
@@ -0,0 +1,49 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+"""
+The standard glob doesn't support the concept of a base dir - something
+that we need in this application
+"""
+import os
+import re
+import fnmatch
+
+glob_re = re.compile('[*?[]')
+
+def glob(basedir, pattern):
+ def has_glob(pattern):
+ return glob_re.search(pattern) is not None
+
+ dirs = ['']
+ pattern_components = pattern.split(os.path.sep)
+ for pc in pattern_components:
+ new_dirs = []
+ if has_glob(pc):
+ for dir in dirs:
+ try:
+ files = os.listdir(os.path.join(basedir, dir))
+ except OSError:
+ continue
+ if not pattern.startswith('.'):
+ files = [f for f in files if not f.startswith('.')]
+ new_dirs.extend([os.path.join(dir, f)
+ for f in fnmatch.filter(files, pc)])
+ else:
+ for dir in dirs:
+ if os.path.exists(os.path.join(basedir, dir, pc)):
+ new_dirs.append(os.path.join(dir, pc))
+ dirs = new_dirs
+ if not dirs:
+ return [pattern]
+ else:
+ return dirs
diff --git a/simpleinst/install_files.py b/simpleinst/install_files.py
new file mode 100644
index 0000000..0f32d59
--- /dev/null
+++ b/simpleinst/install_files.py
@@ -0,0 +1,190 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import os
+import stat
+import errno
+import tempfile
+import re
+from fnmatch import fnmatch
+from simpleinst.config_register import args_with_defaults
+from simpleinst.usergroup import user_lookup
+from simpleinst.glob import glob
+from simpleinst.filter import Filter
+from simpleinst.utils import *
+
+class PostInstallAction:
+ def __init__(self, pattern, fn, args, kwargs):
+ self.pattern = pattern
+ self.fn = fn
+ self.args = args
+ self.kwargs = kwargs
+
+ def match(self, filename):
+ return fnmatch(filename, self.pattern)
+
+ def action(self, filename, verbose):
+ if verbose:
+ print ' %s(%s)' % (self.fn.__name__, filename)
+ self.fn(filename, *self.args, **self.kwargs)
+
+class PostInstallActions:
+ def __init__(self, config):
+ self.actions = []
+ self.config = config
+
+ def add(self, pattern, fn, *args, **kwargs):
+ self.actions.append(PostInstallAction(pattern, fn, args, kwargs))
+
+ def post_install(self, filename, verbose=False):
+ for action in self.actions:
+ if action.match(filename):
+ action.action(filename, verbose=verbose)
+
+post_install_actions = None
+
+class FilenameFilter:
+ def __init__(self, include, exclude):
+ if type(include) in (str, unicode):
+ include = [include]
+ if type(exclude) in (str, unicode):
+ exclude = [exclude]
+ self.include_nop = not include
+ self.exclude_nop = not exclude
+ if not self.include_nop:
+ self.path_include = [i for i in include if i.find(os.path.sep) >= 0]
+ self.name_include = [i for i in include if i.find(os.path.sep) < 0]
+ if not self.exclude_nop:
+ self.path_exclude = [i for i in exclude if i.find(os.path.sep) >= 0]
+ self.name_exclude = [i for i in exclude if i.find(os.path.sep) < 0]
+
+ def include(self, name):
+ basename = os.path.basename(name)
+
+ if not self.exclude_nop:
+ if basename != name:
+ for exclude in self.path_exclude:
+ if fnmatch(name, exclude):
+ return False
+ for exclude in self.name_exclude:
+ if fnmatch(basename, exclude):
+ return False
+
+ if self.include_nop:
+ return True
+
+ if basename != name:
+ for include in self.path_include:
+ if fnmatch(name, include):
+ return True
+ for include in self.name_include:
+ if fnmatch(basename, include):
+ return True
+
+ return False
+
+def copy(src, dst,
+ owner = None, mode = None, filter = None, verbose = False,
+ bufsize = 1 << 22):
+ dst_dir = os.path.dirname(dst)
+ make_dirs(dst_dir, owner)
+ r_fd = os.open(src, os.O_RDONLY)
+ try:
+ st = os.fstat(r_fd)
+ try:
+ dst_st = os.stat(dst)
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ else:
+ # Same file? utime almost matches, and size matches...
+ if (abs(st.st_mtime - dst_st.st_mtime) <= 1 and
+ st.st_size == dst_st.st_size):
+ return False
+ w_fd, tmp_filename = tempfile.mkstemp(dir = dst_dir)
+ try:
+ while 1:
+ buf = os.read(r_fd, bufsize)
+ if not buf:
+ break
+ if filter:
+ if len(buf) == bufsize:
+ raise IOError('Can\'t filter files larger than %s' %
+ bufsize - 1)
+ for f in filter:
+ buf = f.filter(buf)
+ os.write(w_fd, buf)
+ if mode:
+ chmod(tmp_filename, mode)
+ else:
+ os.chmod(tmp_filename, st.st_mode & 0777)
+ if owner:
+ os.chown(tmp_filename, *owner)
+ os.rename(tmp_filename, dst)
+ os.utime(dst, (st.st_atime, st.st_mtime))
+ if verbose:
+ print ' %s -> %s' % (src, dst_dir)
+ tmp_filename = None
+ return True
+ finally:
+ os.close(w_fd)
+ if tmp_filename:
+ os.unlink(tmp_filename)
+ finally:
+ os.close(r_fd)
+
+def recursive_copy(args, src):
+ fullsrc = normjoin(args.base, src)
+ src_path, src_file = os.path.split(src)
+ st = os.stat(fullsrc)
+ if stat.S_ISDIR(st.st_mode):
+ for filename in os.listdir(fullsrc):
+ recursive_copy(args, os.path.join(src, filename))
+ else:
+ if not args.filename_filter.include(src):
+ return
+ dst = normjoin(args.target, src)
+ if copy(fullsrc, dst, filter = args.filter,
+ owner = args.owner, mode = args.mode,
+ verbose = args.verbose) and post_install_actions:
+ post_install_actions.post_install(dst, verbose=args.verbose)
+
+def install(config, **kwargs):
+ args = args_with_defaults(kwargs, config,
+ ('target', 'base', 'files', 'owner', 'mode',
+ 'include', 'exclude', 'filter'),
+ conf_prefix = 'install_')
+
+ if type(args.files) in (str, unicode):
+ args.files = [args.files]
+ if isinstance(args.filter, Filter):
+ args.filter = [args.filter]
+ if config.install_prefix:
+ args.target = config.install_prefix + args.target
+ args.verbose = getattr(config, 'install_verbose')
+ args.filename_filter = FilenameFilter(args.include, args.exclude)
+ if args.base:
+ args.base = normjoin(config.base_dir, args.base)
+ else:
+ args.base = config.base_dir
+ args.owner = user_lookup(args.owner)
+
+ for pat in args.files:
+ print 'installing %s to %s' % (normjoin(args.base, pat), args.target)
+ for src in glob(args.base, pat):
+ recursive_copy(args, src)
+
+def on_install(config, pattern, fn, *args, **kwargs):
+ global post_install_actions
+ if post_install_actions is None:
+ post_install_actions = PostInstallActions(config)
+ post_install_actions.add(pattern, fn, *args, **kwargs)
diff --git a/simpleinst/platform.py b/simpleinst/platform.py
new file mode 100644
index 0000000..8138e3d
--- /dev/null
+++ b/simpleinst/platform.py
@@ -0,0 +1,65 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import sys, os
+import pwd
+
+class PlatformBase:
+ def __init__(self):
+ self.config_source = '%s platform' % self.platform
+
+class RedHatLinux(PlatformBase):
+ platform = "RedHat Linux"
+ html_dir = '/var/www/html'
+ cgi_dir = '/var/www/cgi-bin'
+ web_user = 'apache'
+
+ def is_platform(self):
+ return sys.platform == 'linux2' \
+ and os.path.exists('/etc/redhat-release')
+
+class DebianLinux(PlatformBase):
+ platform = "Debian Linux"
+ html_dir = '/var/www'
+ cgi_dir = '/usr/lib/cgi-bin'
+ web_user = 'www-data'
+
+ def is_platform(self):
+ return sys.platform == 'linux2' \
+ and os.path.exists('/etc/debian_version')
+
+class OSX(PlatformBase):
+ platform = "Apple OS X"
+ html_dir = '/Library/WebServer/Documents'
+ cgi_dir = '/Library/WebServer/CGI-Executables'
+ web_user = 'www'
+
+ def is_platform(self):
+ if sys.platform != 'darwin':
+ return False
+ # Leopard returns _www for this:
+ self.web_user = pwd.getpwnam('www').pw_name
+ return True
+
+def get_platform():
+ platforms = []
+ for name, var in globals().items():
+ if hasattr(var, 'is_platform'):
+ platform = var()
+ if platform.is_platform():
+ platforms.append(platform)
+ if not platforms:
+ sys.exit('Unrecognised playform')
+ if len(platforms) > 1:
+ sys.exit('Ambiguous platform detection: %s' % \
+ ', '.join([p.platform for p in platforms]))
+ return platforms[0]
diff --git a/simpleinst/pyinstaller.py b/simpleinst/pyinstaller.py
new file mode 100644
index 0000000..b765b25
--- /dev/null
+++ b/simpleinst/pyinstaller.py
@@ -0,0 +1,30 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+"""
+Import and run a user-defined python installer
+"""
+
+import os
+import imp
+
+def py_installer(config, name, *args, **kwargs):
+ print 'executing installer', name
+ path = os.path.join(config.base_dir, name)
+ gbals = {
+ '__name__': '__install__',
+ 'config': config,
+ 'args': args,
+ 'kwargs': kwargs
+ }
+ return execfile(path, gbals)
+
diff --git a/simpleinst/usergroup.py b/simpleinst/usergroup.py
new file mode 100644
index 0000000..55f58b6
--- /dev/null
+++ b/simpleinst/usergroup.py
@@ -0,0 +1,47 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import pwd, grp
+import os
+
+cache = {}
+
+def user_lookup(name):
+ try:
+ return cache[name]
+ except KeyError:
+ if name:
+ try:
+ user, group = name.split(':')
+ except ValueError:
+ user, group = name, None
+ else:
+ name = str(os.geteuid())
+ try:
+ uid = int(user)
+ except ValueError:
+ pw_ent = pwd.getpwnam(user)
+ uid, gid = pw_ent.pw_uid, pw_ent.pw_gid
+ else:
+ pw_ent = pwd.getpwuid(uid)
+ uid, gid = pw_ent.pw_uid, pw_ent.pw_gid
+
+ if group:
+ try:
+ gid = int(group)
+ except ValueError:
+ gid = grp.getgrnam(group).gr_gid
+ else:
+ gid = grp.getgrgid(gid).gr_gid
+
+ cache[name] = uid, gid
+ return uid, gid
diff --git a/simpleinst/utils.py b/simpleinst/utils.py
new file mode 100644
index 0000000..d12dc4e
--- /dev/null
+++ b/simpleinst/utils.py
@@ -0,0 +1,112 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "SimpleInst". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia. Copyright (C) 2004 Health
+# Administration Corporation. All Rights Reserved.
+#
+import re
+import os
+import errno
+from simpleinst.usergroup import user_lookup
+
+__all__ = 'chown', 'chmod', 'normjoin', 'make_dirs'
+
+def normjoin(*args):
+ return os.path.normpath(os.path.join(*args))
+
+def chown(filename, owner):
+ if type(owner) in (unicode, str):
+ owner = user_lookup(owner)
+ os.chown(filename, *owner)
+
+chmod_re = re.compile('^([ugoa]*)([+-=])([rwxs]+)$')
+def chmod(filename, mode):
+ if type(mode) in (unicode, str):
+ if mode.startswith('0'):
+ mode = int(mode, 8)
+ else:
+ num_mode = 0400
+ for field in mode.split(','):
+ mask = 0
+ modes = 0
+ pre, mask_str, op, mode_str, post = chmod_re.split(field)
+ if mask_str:
+ for m in mask_str:
+ if m is 'u':
+ mask |= 04700
+ elif m is 'g':
+ mask |= 02070
+ elif m is 'o':
+ mask |= 00007
+ elif m is 'a':
+ mask |= 06777
+ else:
+ mask |= 06777
+ for m in mode_str:
+ if m is 'r':
+ modes |= 00444
+ elif m is 'w':
+ modes |= 00222
+ elif m is 'x':
+ modes |= 00111
+ elif m is 's':
+ modes |= 06000
+ if op is '+':
+ num_mode |= modes & mask
+ elif op is '=':
+ num_mode = modes & mask
+ elif op is '-':
+ num_mode &= ~(modes & mask)
+ mode = num_mode
+ os.chmod(filename, mode)
+
+def make_dirs(dir, owner=None, config=None):
+ if config and config.install_prefix:
+ dir = config.install_prefix + dir
+ if type(owner) in (unicode, str):
+ owner = user_lookup(owner)
+ if not os.path.exists(dir):
+ par_dir = os.path.dirname(dir)
+ make_dirs(par_dir, owner)
+ os.mkdir(dir, 0755)
+ if owner is not None:
+ chown(dir, owner)
+
+def secret(nbits=256):
+ import binascii
+ f = open('/dev/urandom', 'rb')
+ try:
+ data = f.read(nbits / 8)
+ finally:
+ f.close()
+ return binascii.b2a_base64(data).rstrip()
+
+def getpass(prompt):
+ import getpass
+ return getpass.getpass(prompt)
+
+def collect(cmd):
+ f = os.popen(cmd, 'r')
+ try:
+ return ' '.join([l.rstrip() for l in f])
+ finally:
+ f.close()
+
+def rm_pyc(fn):
+ if fn.endswith('.py'):
+ try:
+ os.unlink(fn + 'c')
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ try:
+ os.unlink(fn + 'o')
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
diff --git a/soomext/bad.py b/soomext/bad.py
new file mode 100644
index 0000000..68c104f
--- /dev/null
+++ b/soomext/bad.py
@@ -0,0 +1,7 @@
+from Numeric import arrayrange
+from soomfunc import union
+from mytime import timeit
+
+n = 1000
+bad = [arrayrange(i, 40000000, n * 4) for i in xrange(n)]
+_ = timeit(union,*bad)
diff --git a/soomext/blob.c b/soomext/blob.c
new file mode 100644
index 0000000..a225716
--- /dev/null
+++ b/soomext/blob.c
@@ -0,0 +1,409 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include "blobstore.h"
+
+/* Header for array BLOBs which describes the array.
+ */
+typedef struct {
+ int type_num;
+ int rank;
+ int shape[40];
+} MmapArrayDesc;
+
+static void reload_desc(BlobObject *self)
+{
+ self->desc = store_get_blobdesc(self->store->sm,
+ self->index, !self->is_raw);
+}
+
+/* Array data size calculation lifted out of Numpy source - no
+ * alignment included because we might want to append...
+ */
+static size_t array_data_size(PyArrayObject *array)
+{
+ int data_size, i;
+
+ data_size = array->descr->elsize;
+ for (i = 0; i < array->nd; i++)
+ data_size *= array->dimensions[i] ? array->dimensions[i] : 1;
+ return data_size;
+}
+
+static int array_matches_desc(PyArrayObject *array, MmapArrayDesc *desc)
+{
+ return desc->type_num == array->descr->type_num
+ && desc->rank == array->nd
+ && memcmp(desc->shape, array->dimensions,
+ desc->rank * sizeof(desc->shape[0])) == 0;
+}
+
+static int array_can_append(PyArrayObject *array, MmapArrayDesc *desc)
+{
+ return desc->type_num == array->descr->type_num
+ && desc->rank == 1
+ && array->nd == 1;
+}
+
+static char Blob_as_array__doc__[] =
+"";
+
+static PyObject *Blob_as_array(BlobObject *self, PyObject *args)
+{
+ MmapArrayDesc *arraydesc;
+ MmapBlobStore *sm;
+
+ if (self->is_raw) {
+ PyErr_SetString(PyExc_TypeError, "not supported for raw blob");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ reload_desc(self);
+ sm = self->store->sm;
+ arraydesc = store_blob_address(sm, self->desc);
+ if (self->obj != NULL && !PyArray_Check(self->obj)) {
+ Py_DECREF(self->obj);
+ self->obj = NULL;
+ }
+ if (self->obj == NULL) {
+ self->obj = PyArray_FromDimsAndData(arraydesc->rank,
+ arraydesc->shape,
+ arraydesc->type_num,
+ (char *)(arraydesc + 1));
+ if (self->obj == NULL)
+ return NULL;
+ } else
+ ((PyArrayObject*)self->obj)->data = (char *)(arraydesc + 1);
+ Py_INCREF(self->obj);
+ return self->obj;
+}
+
+static char Blob_as_str__doc__[] =
+"";
+
+static PyObject *Blob_as_str(BlobObject *self, PyObject *args)
+{
+ MmapBlobStore *sm;
+
+ if (self->is_raw) {
+ PyErr_SetString(PyExc_TypeError, "not supported for raw blob");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ if (self->obj != NULL) {
+ if (!PyString_Check(self->obj)) {
+ Py_DECREF(self->obj);
+ self->obj = NULL;
+ } else {
+ Py_INCREF(self->obj);
+ return self->obj;
+ }
+ }
+
+ reload_desc(self);
+ sm = self->store->sm;
+ self->obj = PyString_FromStringAndSize(store_blob_address(sm, self->desc),
+ self->desc->len);
+ if (self->obj == NULL)
+ return NULL;
+ Py_INCREF(self->obj);
+ return self->obj;
+}
+
+static char Blob_save_array__doc__[] =
+"";
+
+static PyObject *Blob_save_array(BlobObject *self, PyObject *args)
+{
+ PyObject *obj;
+ PyArrayObject *array;
+ size_t array_size;
+ MmapArrayDesc *arraydesc = NULL;
+ MmapBlobStore *sm;
+ int resize_blob;
+
+ if (self->is_raw) {
+ PyErr_SetString(PyExc_TypeError, "not supported for raw blob");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "O", &obj))
+ return NULL;
+ array = (PyArrayObject *)
+ PyArray_ContiguousFromObject(obj, PyArray_NOTYPE, 0, 0);
+ if (array == NULL)
+ return NULL;
+
+ sm = self->store->sm;
+ array_size = array_data_size(array);
+ if (store_blob_size(sm, self->index) > 0) {
+ reload_desc(self);
+ arraydesc = store_blob_address(sm, self->desc);
+ resize_blob = !array_matches_desc(array, arraydesc);
+ } else
+ resize_blob = 1;
+
+ if (resize_blob) {
+ /* (re)allocate blob */
+ if (store_blob_resize(sm, self->index,
+ sizeof(*arraydesc) + array_size) < 0) {
+ Py_DECREF(array);
+ return NULL;
+ }
+ reload_desc(self);
+ arraydesc = store_blob_address(sm, self->desc);
+ /* copy array description */
+ arraydesc->type_num = array->descr->type_num;
+ arraydesc->rank = array->nd;
+ memcpy(arraydesc->shape, array->dimensions,
+ arraydesc->rank * sizeof(arraydesc->shape[0]));
+ }
+ /* copy array data */
+ memmove(arraydesc + 1, array->data, array_size);
+
+ if (self->obj) {
+ Py_DECREF(self->obj);
+ self->obj = NULL;
+ }
+
+ Py_DECREF(array);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static char Blob_append_array__doc__[] =
+"";
+
+static PyObject *Blob_append_array(BlobObject *self, PyObject *args)
+{
+ PyObject *obj;
+ PyArrayObject *array;
+ size_t array_size;
+ MmapArrayDesc *arraydesc = NULL;
+ MmapBlobStore *sm;
+ int blob_len, num_elems;
+
+ if (self->is_raw) {
+ PyErr_SetString(PyExc_TypeError, "not supported for raw blob");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "O", &obj))
+ return NULL;
+ array = (PyArrayObject *)
+ PyArray_ContiguousFromObject(obj, PyArray_NOTYPE, 0, 0);
+ if (array == NULL)
+ return NULL;
+
+ sm = self->store->sm;
+ if (store_blob_size(sm, self->index) > 0) {
+ reload_desc(self);
+ arraydesc = store_blob_address(sm, self->desc);
+ if (!array_can_append(array, arraydesc)) {
+ Py_DECREF(array);
+ PyErr_SetString(PyExc_TypeError, "can only append rank-1 arrays");
+ return NULL;
+ }
+ blob_len = self->desc->len;
+ num_elems = arraydesc->shape[0];
+ } else {
+ blob_len = sizeof(*arraydesc);
+ num_elems = 0;
+ }
+
+ /* (re)allocate blob */
+ array_size = array_data_size(array);
+ if (store_blob_resize(sm, self->index, blob_len + array_size) < 0) {
+ Py_DECREF(array);
+ return NULL;
+ }
+ reload_desc(self);
+ arraydesc = store_blob_address(sm, self->desc);
+ /* copy array description */
+ arraydesc->type_num = array->descr->type_num;
+ arraydesc->rank = 1;
+ arraydesc->shape[0] = num_elems + array->dimensions[0];
+
+ /* copy array data */
+ memmove(((char*)arraydesc) + blob_len, array->data, array_size);
+
+ if (self->obj) {
+ Py_DECREF(self->obj);
+ self->obj = NULL;
+ }
+
+ Py_DECREF(array);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static char Blob_save_str__doc__[] =
+"";
+
+static PyObject *Blob_save_str(BlobObject *self, PyObject *args)
+{
+ char *str;
+ int len;
+ caddr_t addr = NULL;
+ MmapBlobStore *sm;
+ int resize_blob;
+
+ if (self->is_raw) {
+ PyErr_SetString(PyExc_TypeError, "not supported for raw blob");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "s#", &str, &len))
+ return NULL;
+
+ sm = self->store->sm;
+ if (store_blob_size(sm, self->index) > 0) {
+ reload_desc(self);
+ addr = store_blob_address(sm, self->desc);
+ resize_blob = (len != self->desc->len);
+ } else
+ resize_blob = 1;
+ if (resize_blob) {
+ if (store_blob_resize(sm, self->index, len) < 0)
+ return NULL;
+ reload_desc(self);
+ addr = store_blob_address(sm, self->desc);
+ }
+ /* copy string data */
+ memmove(addr, str, len);
+
+ if (self->obj) {
+ Py_DECREF(self->obj);
+ self->obj = NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static struct PyMethodDef Blob_methods[] = {
+ { "as_array", (PyCFunction)Blob_as_array, METH_VARARGS, Blob_as_array__doc__ },
+ { "as_str", (PyCFunction)Blob_as_str, METH_VARARGS, Blob_as_str__doc__ },
+ { "save_array", (PyCFunction)Blob_save_array, METH_VARARGS, Blob_save_array__doc__ },
+ { "save_str", (PyCFunction)Blob_save_str, METH_VARARGS, Blob_save_str__doc__ },
+ { "append_array", (PyCFunction)Blob_append_array, METH_VARARGS, Blob_append_array__doc__ },
+ { NULL, NULL}
+};
+
+static void Blob__del__(BlobObject *self)
+{
+ Py_XDECREF(self->obj);
+ Py_XDECREF(self->store);
+
+ PyObject_Del(self);
+}
+
+#define OFFSET(x) offsetof(BlobDesc, x)
+
+static struct memberlist Blob_memberlist[] = {
+ { "len", T_INT, OFFSET(len), RO },
+ { "loc", T_INT, OFFSET(loc), RO },
+ { "other", T_INT, OFFSET(other) },
+ { "size", T_INT, OFFSET(size), RO },
+ { "status", T_INT, OFFSET(status), RO },
+ { "type", T_INT, OFFSET(type) },
+ { NULL }
+};
+
+static PyObject *Blob__getattr__(BlobObject *self, char *name)
+{
+ PyObject *rv;
+
+ reload_desc(self);
+
+ rv = PyMember_Get((char *)self->desc, Blob_memberlist, name);
+ if (rv)
+ return rv;
+ PyErr_Clear();
+ return Py_FindMethod(Blob_methods, (PyObject *)self, name);
+}
+
+
+static int Blob__setattr__(BlobObject *self, char *name, PyObject *v)
+{
+ if (v == NULL) {
+ PyErr_SetString(PyExc_AttributeError, "Cannot delete attribute");
+ return -1;
+ }
+ reload_desc(self);
+ return PyMember_Set((char *)self->desc, Blob_memberlist, name, v);
+}
+
+static char BlobType__doc__[] =
+"";
+
+PyTypeObject BlobType = {
+ PyObject_HEAD_INIT(0)
+ 0, /*ob_size*/
+ "BlobType", /*tp_name*/
+ sizeof(BlobObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)Blob__del__, /*tp_dealloc*/
+ (printfunc)0, /*tp_print*/
+ (getattrfunc)Blob__getattr__, /*tp_getattr*/
+ (setattrfunc)Blob__setattr__, /*tp_setattr*/
+ (cmpfunc)0, /*tp_compare*/
+ (reprfunc)0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ (hashfunc)0, /*tp_hash*/
+ (ternaryfunc)0, /*tp_call*/
+ (reprfunc)0, /*tp_str*/
+
+ 0L,0L,0L,0L,
+ BlobType__doc__
+};
+
+PyObject *BlobObject__init__(BlobStoreObject *store, int index, int is_raw)
+{
+ BlobObject *self;
+ MmapBlobStore *sm;
+ BlobDesc *desc;
+
+ sm = store->sm;
+ desc = store_get_blobdesc(sm, index, !is_raw);
+ if (desc == NULL)
+ return NULL;
+
+ self = PyObject_New(BlobObject, &BlobType);
+ if (self == NULL)
+ return NULL;
+
+ self->store = store;
+ Py_INCREF(store);
+ self->desc = desc;
+ self->index = index;
+ self->is_raw = is_raw;
+ self->obj = NULL;
+
+ return (PyObject*)self;
+}
+
+void blob_make_numpy_work_please(void)
+{
+ import_array();
+}
diff --git a/soomext/blobstore.c b/soomext/blobstore.c
new file mode 100644
index 0000000..e569f6d
--- /dev/null
+++ b/soomext/blobstore.c
@@ -0,0 +1,253 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include "blobstore.h"
+
+/*-------------------------------------------------------------------
+ * Provide an object which works like a Python list of BLOB objects.
+ * BLOBs can be stored as either strings or Numpy arrays.
+ */
+
+static char BlobStore_append__doc__[] =
+"append() -> int";
+
+static PyObject *BlobStore_append(BlobStoreObject *self, PyObject *args)
+{
+ int index;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ index = store_append(self->sm);
+ if (index < 0)
+ return NULL;
+
+ return PyInt_FromLong(index);
+}
+
+static char BlobStore_get__doc__[] =
+"get(int) -> blob";
+
+static PyObject *BlobStore_get(BlobStoreObject *self, PyObject *args)
+{
+ int index;
+
+ if (!PyArg_ParseTuple(args, "i", &index))
+ return NULL;
+
+ return BlobObject__init__(self, index, 1);
+}
+
+static char BlobStore_free__doc__[] =
+"free(int) -> None";
+
+static PyObject *BlobStore_free(BlobStoreObject *self, PyObject *args)
+{
+ int index;
+
+ if (!PyArg_ParseTuple(args, "i", &index))
+ return NULL;
+
+ if (store_blob_free(self->sm, index) < 0)
+ return NULL;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static char BlobStore_usage__doc__[] =
+"usage() -> (int, int)";
+
+static PyObject *BlobStore_usage(BlobStoreObject *self, PyObject *args)
+{
+ size_t used, unused;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ store_usage(self->sm, &used, &unused);
+ return Py_BuildValue("(ii)", (int)used, (int)unused);
+}
+
+static char BlobStore_header__doc__[] =
+"header() -> (table_size, table_len, table_index, table_loc, seq_size, seq_len, seq_index)";
+
+static PyObject *BlobStore_header(BlobStoreObject *self, PyObject *args)
+{
+ StoreHeader header;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ if (store_get_header(self->sm, &header) < 0)
+ return NULL;
+ return Py_BuildValue("(iiiiiii)",
+ (int)header.table_size,
+ (int)header.table_len,
+ (int)header.table_index,
+ (int)header.table_loc,
+ (int)header.seq_size,
+ (int)header.seq_len,
+ (int)header.seq_index);
+}
+
+static struct PyMethodDef BlobStore_methods[] = {
+ { "append", (PyCFunction)BlobStore_append, METH_VARARGS, BlobStore_append__doc__ },
+ { "get", (PyCFunction)BlobStore_get, METH_VARARGS, BlobStore_get__doc__ },
+ { "free", (PyCFunction)BlobStore_free, METH_VARARGS, BlobStore_free__doc__ },
+ { "usage", (PyCFunction)BlobStore_usage, METH_VARARGS, BlobStore_usage__doc__ },
+ { "header", (PyCFunction)BlobStore_header, METH_VARARGS, BlobStore_header__doc__ },
+ { NULL, NULL }
+};
+
+static void BlobStore__del__(BlobStoreObject *self)
+{
+ if (self->sm != NULL)
+ store_close(self->sm);
+ PyObject_Del(self);
+}
+
+static PyObject *BlobStore__getattr__(BlobStoreObject *self, char *name)
+{
+ return Py_FindMethod(BlobStore_methods, (PyObject *)self, name);
+}
+
+static int BlobStore__setattr__(BlobStoreObject *self, char *name, PyObject *v)
+{
+ return -1;
+}
+
+/*---------------------------------------------------------
+ * Access as a Sequence
+ *---------------------------------------------------------*/
+
+static int BlobStore__len__(BlobStoreObject *self)
+{
+ return store_num_blobs(self->sm);
+}
+
+static PyObject *BlobStore__getitem__(PyObject *self, Py_ssize_t i)
+{
+ return BlobObject__init__((BlobStoreObject *)self, i, 0);
+}
+
+static PySequenceMethods BlobStore_as_sequence = {
+ (inquiry)BlobStore__len__, /*sq_length*/
+ 0, /*sq_concat*/
+ 0, /*sq_repeat*/
+ BlobStore__getitem__, /*sq_item*/
+ 0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+};
+
+static char BlobStoreType__doc__[] =
+"";
+
+static PyTypeObject BlobStoreType = {
+ PyObject_HEAD_INIT(0)
+ 0, /*ob_size*/
+ "BlobStore", /*tp_name*/
+ sizeof(BlobStoreObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)BlobStore__del__,/*tp_dealloc*/
+ (printfunc)0, /*tp_print*/
+ (getattrfunc)BlobStore__getattr__, /*tp_getattr*/
+ (setattrfunc)BlobStore__setattr__, /*tp_setattr*/
+ (cmpfunc)0, /*tp_compare*/
+ (reprfunc)0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ &BlobStore_as_sequence, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ (hashfunc)0, /*tp_hash*/
+ (ternaryfunc)0, /*tp_call*/
+ (reprfunc)0, /*tp_str*/
+
+ 0L, 0L, 0L, 0L,
+ BlobStoreType__doc__
+};
+
+static PyObject *BlobStore__init__(char *filename, char *mode)
+{
+ BlobStoreObject *self;
+
+ self = PyObject_New(BlobStoreObject, &BlobStoreType);
+ if (self == NULL)
+ return NULL;
+
+ self->sm = NULL;
+
+ /* open the BLOB store */
+ self->sm = store_open(filename, mode);
+ if (self->sm == NULL)
+ goto error;
+
+ return (PyObject*)self;
+
+error:
+ Py_DECREF(self);
+ return NULL;
+}
+
+static char blobstore_open__doc__[] =
+"";
+
+static PyObject *blobstore_open(PyObject *module, PyObject *args)
+{
+ char *filename;
+ char *mode = "r";
+
+ if (!PyArg_ParseTuple(args, "s|s", &filename, &mode))
+ return NULL;
+
+ return BlobStore__init__(filename, mode);
+}
+
+static struct PyMethodDef blobstore_methods[] = {
+ { "open", (PyCFunction)blobstore_open, METH_VARARGS, blobstore_open__doc__ },
+ { NULL, (PyCFunction)NULL, 0, NULL }
+};
+
+static char blobstore_module__doc__[] =
+"";
+
+void initblobstore(void)
+{
+ PyObject *module, *dict, *ver = NULL;
+
+ module = Py_InitModule4("blobstore", blobstore_methods,
+ blobstore_module__doc__,
+ (PyObject*)NULL, PYTHON_API_VERSION);
+
+ if (PyType_Ready(&BlobStoreType) < 0)
+ goto error;
+ if (PyType_Ready(&BlobType) < 0)
+ goto error;
+
+ blob_make_numpy_work_please();
+
+ if ((dict = PyModule_GetDict(module)) == NULL)
+ goto error;
+ if ((ver = PyString_FromString("0.11")) == NULL)
+ goto error;
+ if (PyDict_SetItemString(dict, "__version__", ver) < 0)
+ goto error;
+
+error:
+ Py_XDECREF(ver);
+
+ if (PyErr_Occurred())
+ Py_FatalError("can't initialize module blobstore");
+}
diff --git a/soomext/blobstore.h b/soomext/blobstore.h
new file mode 100644
index 0000000..3300b38
--- /dev/null
+++ b/soomext/blobstore.h
@@ -0,0 +1,44 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include "Python.h"
+#include "structmember.h"
+#include "Numeric/arrayobject.h"
+
+#include "storage.h"
+
+#if (PY_VERSION_HEX < 0x02050000)
+typedef int Py_ssize_t;
+#endif
+
+typedef struct {
+ PyObject_HEAD
+
+ MmapBlobStore *sm; /* BLOB store */
+} BlobStoreObject;
+
+typedef struct {
+ PyObject_HEAD
+
+ BlobStoreObject *store;
+ int index;
+ int is_raw;
+
+ BlobDesc *desc;
+ PyObject *obj;
+} BlobObject;
+
+extern PyTypeObject BlobType;
+PyObject *BlobObject__init__(BlobStoreObject *store, int index, int is_raw);
+void blob_make_numpy_work_please(void);
diff --git a/soomext/doc/Makefile b/soomext/doc/Makefile
new file mode 100644
index 0000000..5f16da1
--- /dev/null
+++ b/soomext/doc/Makefile
@@ -0,0 +1,52 @@
+PYTHON_SRC = ${HOME}/download/Python-2.3.4
+
+MKHOWTO = $(PYTHON_SRC)/Doc/tools/mkhowto
+
+.PHONY: html
+
+PAPER = a4
+
+DOCFILES = blobstore.tex copyright.tex installation.tex soomarray.tex \
+ storage.tex soom.tex soomfunc.tex
+
+FIGURES = storage.pdf
+
+all: pdf
+
+pdf: $(DOCFILES) $(FIGURES)
+ $(MKHOWTO) --pdf --$(PAPER) soom.tex
+
+ps: pdf
+ acroread -toPostScript -size $(PAPER) -fast -level2 -pairs soom.pdf soom.ps
+
+booklet: ps
+ cat soom.ps | psbook | psnup -2 | pstumble > soom-booklet.ps
+
+html: $(DOCFILES) $(FIGURES)
+ $(MKHOWTO) --html soom.tex
+ mkdir -p soom/icons
+ cp $(PYTHON_SRC)/Doc/html/icons/* soom/icons/
+ rm soom/soom.how
+# the iconserver option of mkhowto is broken since it writes
+# it to the end if the init_file where they aren't useful anymore,
+# so we work around it:
+ for f in `find soom`; do \
+ cat $$f | sed s/\.\.\\/icons/icons/g > $${f}2; \
+ mv $${f}2 $$f; \
+ done
+ rm soom/soom2
+ rm soom/icons/icons2
+
+# convert .dia images to .eps and then to .pdf
+%.pdf: %.dia
+ dia --nosplash --export-to-format=eps-builtin \
+ --export=$(subst .dia,.eps,$<) $<
+ epstopdf $(subst .dia,.eps,$<)
+
+clean:
+ rm -f *~ *.aux *.idx *.ilg *.ind *.log *.toc *.bkm *.syn *.pla api.tex
+
+# HTML in the doc directory
+dist: html
+ rm -rf ../doc
+ mv soom ../doc
diff --git a/soomext/doc/blobstore.tex b/soomext/doc/blobstore.tex
new file mode 100644
index 0000000..2e4b759
--- /dev/null
+++ b/soomext/doc/blobstore.tex
@@ -0,0 +1,178 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\section{\module{blobstore}}
+
+\declaremodule{extension}{blobstore}
+
+The \module{blobstore} extension module contains a fairly thin wrapper
+on top of the memory mapped BLOB storage which is implemented in the
+Storage API.
+
+The \module{blobstore} module contains the following:
+
+\begin{datadesc}{__version__}
+A string which specifies the version of the \module{blobstore} module.
+\end{datadesc}
+
+\begin{funcdesc}{open}{filename \optional{, mode \code{= 'r'}}}
+Open the file named in the \var{filename} argument using the specified
+\var{mode}. Returns a \class{BlobStore} object.
+
+\begin{verbatim}
+>>> import blobstore
+>>> bs = blobstore.open('file.dat')
+\end{verbatim}
+\end{funcdesc}
+
+\subsection{BlobStore Objects}
+
+These objects are used to manage a sequence of BLOB items. The object
+behaves much like a Python list. Each element in the list is a
+\class{Blob} object.
+
+\class{BlobStore} objects have the following interface:
+
+\begin{methoddesc}[BlobStore]{__len__}{}
+Returns the number of blobs in the blob store.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{__getitem__}{i}
+Uses the blob store sequence to retrieve the blob descriptor indexed
+by \var{i} and returns it as a new instance of the \class{Blob} class.
+The blob data is accessed via the \class{Blob} object.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{append}{}
+Reserves an index for a new blob in the blob store. The reserved
+index is returned. Note that no data will be allocated in the blob
+store until data is saved to that blob index.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{get}{i}
+Retrieves the raw blob descriptor indexed by \var{i} and returns it as
+a new instance of the \class{Blob} class. This by-passes the blob
+store sequence and indexes directly into the blob table. This allows
+you to see the blobs which are used to store the blob sequence and
+table as well as free blobs. The \class{Blob} objects obtained this
+way cannot be used to load or save data.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{free}{i}
+Free the storage associated with the blob in the sequence at index
+\var{i}.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{usage}{}
+Returns a tuple which contains two numbers; the amount of data storage
+allocated in the blob store, and the amount of space free or wasted in
+the blob store.
+\end{methoddesc}
+
+\begin{methoddesc}[BlobStore]{header}{}
+Returns a tuple containing the following values;
+\begin{longtable}{l|l}
+table_size & Number of blob table entries allocated. \\
+table_len & Number of blob table entries in use. \\
+table_index & Table index of the blob which contains the blob table. \\
+table_loc & File offset of the start of the blob table. \\
+seq_size & Number of blob sequence entries allocated. \\
+seq_len & Length of the blob sequence. \\
+seq_index & Table index of the blob which contains the blob sequence. \\
+\end{longtable}
+
+To understand these numbers you should refer to the documentation of
+the Storage API.
+\end{methoddesc}
+
+\subsection{Blob Objects}
+
+Blobs retrieved from a \class{BlobStore} object are returned as
+\class{Blob} objects. The \class{Blob} objects provide access to the
+data associated with a blob in a blob store. The \class{Blob} object
+retains a reference to the containing \class{BlobStore} and they also
+store their index within the blob store.
+
+\class{Blob} objects have the following interface:
+
+\begin{memberdesc}[Blob]{len}
+This read-only member contains the length of the blob data.
+\end{memberdesc}
+
+\begin{memberdesc}[Blob]{size}
+This read-only member contains the allocated size of the blob. The
+blob \member{size} will always be at least as large as the blob
+\member{len}. Blob allocations are aligned to the size of an integer.
+
+When a blob is resized or freed it can leave some free space in the
+blob store. When allocating a new blob, the smallest free area which
+can contain the new data will be used in preference to growing the
+file.
+\end{memberdesc}
+
+\begin{memberdesc}[Blob]{loc}
+This read-only member contains the offset within the file of the blob
+data.
+\end{memberdesc}
+
+\begin{memberdesc}[Blob]{status}
+This read-only member contains the status of the blob. The values and
+their meaning are:
+
+\begin{longtable}{l|l}
+0 & The blob is free space. \\
+1 & The blob contains the blob table. \\
+2 & The blob contains the blob sequence. \\
+3 & The blob contains user data. \\
+\end{longtable}
+\end{memberdesc}
+
+\begin{memberdesc}[Blob]{other}
+This integer member can be used by your code. The \class{ArrayDict}
+class uses it to link the mask and data parts of a masked array.
+\end{memberdesc}
+
+\begin{memberdesc}[Blob]{type}
+This integer member can be used by your code.
+\end{memberdesc}
+
+\begin{methoddesc}[Blob]{as_array}{}
+Returns a Numpy array object which uses the memory mapped blob data as
+the contents of the array. The array object is cached internally, so
+you can call this without penalty.
+
+Every time you call this method the cached Numpy array will have its
+internal data pointer updated to protect against blob store resizing.
+\end{methoddesc}
+
+\begin{methoddesc}[Blob]{as_str}{}
+Returns a Python string which is constructed using the blob data. The
+string allocates its own copy of the data. The string is cached
+internally, so you can call this without penalty.
+\end{methoddesc}
+
+\begin{methoddesc}[Blob]{save_array}{a}
+Save the array passed in the \var{a} argument in the blob associated
+with this object.
+\end{methoddesc}
+
+\begin{methoddesc}[Blob]{append_array}{a}
+Append the array passed in the \var{a} argument to the blob associated
+with this object.
+\end{methoddesc}
+
+\begin{methoddesc}[Blob]{save_str}{s}
+Save the string passed in the \var{s} argument in the blob associated
+with this object.
+\end{methoddesc}
diff --git a/soomext/doc/copyright.tex b/soomext/doc/copyright.tex
new file mode 100644
index 0000000..297c97c
--- /dev/null
+++ b/soomext/doc/copyright.tex
@@ -0,0 +1,17 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\centerline{\strong{Copyright \copyright\ 2001,2004 Health Administration
+Corporation, New South Wales, Australia. All rights reserved.}}
+
diff --git a/soomext/doc/installation.tex b/soomext/doc/installation.tex
new file mode 100644
index 0000000..6a44b15
--- /dev/null
+++ b/soomext/doc/installation.tex
@@ -0,0 +1,58 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\section{Installation}
+
+\subsection{Prerequisites}
+
+\begin{itemize}
+\item Python 1.5.2 or later.
+
+\item C compiler
+
+The \module{SOOM} package contains extension module code written in C.
+The extension module code is used by the Python wrapper module
+\module{SOOM.py}.
+
+\item Numeric Python
+
+The \module{SOOM} package uses the Numpy extension modules for Python.
+
+Numpy can be downloaded from \url{http://numpy.sourceforge.net/}.
+\end{itemize}
+
+\subsection{Installing}
+
+The \module{SOOM} package the \module{distutils} package so all you
+need to do is type the following command as root:
+
+\begin{verbatim}
+python setup.py install
+\end{verbatim}
+
+If you have problems with this step make sure that you contact the
+package author so that the installation process can be made more
+robust for other people.
+
+\subsection{Testing}
+
+The most simple way to test the \module{SOOM} package is via the
+interactive Python prompt.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> intersect(array(range(10)) + 4, array(range(5)) * 2)
+array([4, 6, 8])
+\end{verbatim}
diff --git a/soomext/doc/soom.pdf b/soomext/doc/soom.pdf
new file mode 100644
index 0000000..88c5222
Binary files /dev/null and b/soomext/doc/soom.pdf differ
diff --git a/soomext/doc/soom.tex b/soomext/doc/soom.tex
new file mode 100644
index 0000000..0b660d7
--- /dev/null
+++ b/soomext/doc/soom.tex
@@ -0,0 +1,72 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\let\pdfannotlink\pdfstartlink
+\documentclass[a4paper]{howto}
+\usepackage{longtable}
+\usepackage{graphicx}
+\usepackage{color}
+\pagecolor{white}
+
+\title{SOOM Extensions for NetEpi Analysis}
+
+\author{Dave Cole and Tim Churches}
+
+% Please at least include a long-lived email address;
+% the rest is at your discretion.
+\authoraddress{E-mail: \email{djc at object-craft.com.au} \email{TCHUR at doh.health.nsw.gov.au}}
+
+\release{0.11}
+\date{December 5, 2001}
+
+\makeindex % tell \index to actually write the .idx file
+\makemodindex % If this contains a lot of module sections.
+
+\begin{document}
+
+\maketitle
+
+% This makes the contents more accessible from the front page of the HTML.
+\ifhtml
+\chapter*{Front Matter\label{front}}
+\fi
+
+\input{copyright}
+
+\begin{abstract}
+
+\noindent
+
+The SOOM (Set Operations on Ordinal Mappings) Extensions package is a collection
+of Numeric Python optimisations which accelerate a number of performance
+critical SOOM functions.
+
+\begin{seealso}
+\seetitle[http://numpy.sourceforge.net/]
+{Numerical Python Web Site}{for information on Numpy}
+\end{seealso}
+
+\end{abstract}
+
+\tableofcontents
+
+\input{installation}
+\input{soomfunc}
+\input{soomarray}
+\input{blobstore}
+\input{storage}
+
+\input{soom.ind}
+
+\end{document}
diff --git a/soomext/doc/soomarray.tex b/soomext/doc/soomarray.tex
new file mode 100644
index 0000000..1c0c721
--- /dev/null
+++ b/soomext/doc/soomarray.tex
@@ -0,0 +1,163 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\section{\module{soomarray}}
+
+\declaremodule{standard}{soomarray}
+
+The \module{soomarray} module contains a Python wrapper around the
+\module{blobstore} extension module.
+
+The \module{soomarray} module contains the following:
+
+\begin{excdesc}{Error}
+Exception raised when unexpected data is located in the BLOB store.
+\end{excdesc}
+
+\begin{classdesc}{MmapArray}{blob}
+Return a new instance of the \class{MmapArray} class using the
+\class{Blob} object specified in the \var{blob} argument.
+\end{classdesc}
+
+\begin{classdesc}{ArrayDict}{filename, mode \code{= 'r'}}
+Return a new instance of the \class{ArrayDict} class.
+
+The \var{filename} argument specifies the name of the file which will
+be used to store and retrieve memory arrays. The \var{mode} argument
+is can be \code{'r'} to access an existing file in read only mode,
+\code{'r+'} to access an existing file in read-write mode, or
+\code{'w+'} to create a new file in read-write mode.
+\end{classdesc}
+
+\subsection{ArrayDict Objects}
+
+These objects behave like a Python dictionary. Each item stored must
+be either a plain Numpy array, or a masked array from the MA package.
+
+Internally the object manages a \class{BlobStore} object in which the
+BLOB at index zero contains a pickled dictionary which yields the
+index of the BLOB which contains the Numpy array. For masked arrays,
+the BLOB referenced by the dictionary contains the index of the mask
+array in the \member{other} BLOB member.
+
+You must be careful when modifying the contents of the
+\class{ArrayDict} object because the internal \class{BlobStore} may
+need to grow the memory mapped file. Growing the file will invalidate
+all of the addresses contained in arrays retrieved from the file. The
+\class{MmapArray} objects are able to automatically handle this event,
+but any arrays derived from these will not. If you slice and dice
+\class{MmapArray} objects then modify their containing
+\class{ArrayDict}, you will get a segmentation fault.
+
+\begin{verbatim}
+>>> from Numeric import *
+>>> from soomarray import ArrayDict
+>>> ad = ArrayDict('file.dat', 'w+')
+>>> ad['a'] = array(xrange(100))
+>>> ad.keys()
+['a']
+>>> len(ad)
+1
+>>> ad['b'] = array(range(10))
+>>> ad['b']
+[0,1,2,3,4,5,6,7,8,9,]
+>>> from soomfunc import *
+>>> intersect(ad['a'], ad['b'])
+[0,1,2,3,4,5,6,7,8,9,]
+\end{verbatim}
+
+\class{ArrayDict} objects have the following interface:
+
+\begin{methoddesc}[ArrayDict]{__del__}{}
+When the last reference to the object is dropped, the object will
+repickle the internal array dictionary to BLOB zero in the associated
+\class{BlobStore} object.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{__getitem__}{key}
+Returns a \class{MmapArray} object indexed by the \var{key} argument.
+This is called to evaluate \code{ad[key]}. Raises
+\exception{KeyError} if \var{key} is not a key in the dictionary.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{__setitem__}{key, a}
+Called to implement assignment of \var{a} to \code{ad[key]}.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{__delitem__}{key}
+Called to delete \code{ad[key]}. Raises \exception{KeyError} if
+\var{key} is not a key in the dictionary.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{__len__}{}
+Returns the number of items stored in the dictionary. A masked array
+counts as one item.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{clear}{}
+Deletes all contents.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{get}{key \optional{, default \code{= None}}}
+Implements the dictionary \method{get()} method.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{has_key}{key}
+Returns TRUE if \var{key} is a valid key in the dictionary.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{keys}{}
+Returns a list containing all of the keys in the dictionary.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{values}{}
+Returns a list containing all of the arrays in the dictionary.
+\end{methoddesc}
+
+\begin{methoddesc}[ArrayDict]{items}{key}
+Returns a list of tuples containing all \code{(key, array)} pairs in
+the dictionary.
+\end{methoddesc}
+
+\subsection{MmapArray Objects}
+
+Implements a Numpy array which has memory mapped data within a
+\class{BlobStore}. These objects act exactly like the \class{UserArray}
+object from the Numpy package (they should since their code was pasted
+from the Numpy source).
+
+Internally they wrap a \class{Blob} object which was retrieved from
+the parent \class{BlobStore} object. Each time the array is used the
+object calls the \class{Blob} \method{as_array()} method to refresh
+the address of the array data.
+
+Apart from the standard Numpy array methods the \class{MmapArray}
+class implements the following:
+
+\begin{methoddesc}[MmapArray]{append}{a}
+This method appends the array in the \var{a} argument to the BLOB.
+The \class{BlobStore} container will resize as required. This is a
+handy way to build up an array which is too large to fit in memory.
+
+\begin{verbatim}
+>>> from Numeric import *
+>>> from soomarray import ArrayDict
+>>> ad = ArrayDict('file.dat', 'w+')
+>>> ad['a'] = array(xrange(100))
+>>> a = ad['a']
+>>> a.append(xrange(100))
+>>> len(a)
+200
+\end{verbatim}
+\end{methoddesc}
diff --git a/soomext/doc/soomfunc.tex b/soomext/doc/soomfunc.tex
new file mode 100644
index 0000000..42335f4
--- /dev/null
+++ b/soomext/doc/soomfunc.tex
@@ -0,0 +1,170 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\section{\module{soomfunc}}
+
+\declaremodule{extension}{soomfunc}
+
+The \module{soomfunc} module contains a collection of set operations
+which can be performed on Numpy arrays:
+
+The \module{soomfunc} module contains the following:
+
+\begin{datadesc}{__version__}
+A string which specifies the version of the \module{soomfunc} module.
+\end{datadesc}
+
+\begin{funcdesc}{unique}{a \optional{, b}}
+Remove all duplicate values from the sorted rank-1 array passed as the
+\var{a} argument returning a new rank-1 array of unique values. If
+the optional second argument \var{b} is supplied it will be used to
+store the result.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> unique(array([1,2,3,3,4,4,5]))
+array([1, 2, 3, 4, 5])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{intersect}{a, b \optional{, \ldots}}
+Return the unique intersection of the sorted rank-1 arrays passed.
+All arrays must have the same typecode.
+
+The function finds the smallest array then allocates an array of that
+length to hold the result. The result is primed by finding the
+intersection of the smallest array with the first array in the
+argument list (or the second if the first is the smallest). The
+result is then incrementally intersected in-place with the remainder
+of the arrays generating a new result.
+
+With each intersection, the function compares the length of the two
+arrays. If one array is at least three times larger than the other,
+the \function{sparse_intersect()} method is used, otherwise the
+\function{dense_intersect()} method is used.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> intersect(array([1,2,3]), array([2,3]), array([3,4]))
+array([3])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{sparse_intersect}{a, b \optional{, \ldots}}
+Return the intersection of the sorted rank-1 arrays passed. All
+arrays must have the same typecode. This generates the same result as
+the \function{intersect()} function.
+
+When finding the intersection of two arrays it uses a binary search to
+locate matching values. The first point for the binary search is
+determined by dividing the number of elements remaining in the target
+array by the number of elements remaining in the source array.
+
+This method can be thousands of times faster than the
+\function{dense_intersect()} function. For arrays of similar size,
+the \function{dense_intersect()} function will be more than twice as
+fast as this function.
+
+You are encouraged to use the \function{intersect()} function as it
+will automatically use the best intersection function.
+\end{funcdesc}
+
+\begin{funcdesc}{dense_intersect}{a, b \optional{, \ldots}}
+Return the intersection of the sorted rank-1 arrays passed. All
+arrays must have the same typecode. This generates the same result as
+the \function{intersect()} function.
+
+When finding the intersection of two arrays it steps through both
+arrays one value at a time to locate matching values. The
+\function{sparse_intersect()} function can be thousands of times
+faster than this function for some inputs.
+
+You are encouraged to use the \function{intersect()} function as it
+will automatically use the best intersection function.
+\end{funcdesc}
+
+\begin{funcdesc}{outersect}{a, b \optional{, \ldots}}
+Return the unique symmetric difference of the sorted rank-1 arrays
+passed. All arrays must have the same typecode.
+
+Steps through all arrays in lock-step and finds all values which do
+not occur in every array. This is the exact opposite of the
+\function{intersect()} function.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> outersect(array([1,2,3]), array([2,3]), array([3,4]))
+array([1, 2, 4])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{union}{a, b \optional{, \ldots}}
+Return the unique union of the sorted rank-1 arrays passed. All
+arrays must have the same typecode.
+
+Steps through all arrays in lock-step and finds all unique values
+across every array.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> union(array([1,2,3,3]), array([2,2,3]), array([3,4]))
+array([1, 2, 3, 4])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{difference}{a, b \optional{, \ldots}}
+Return the result of subtracting the second and subsequent sorted
+rank-1 arrays from the first sorted rank-1 array. All arrays must
+have the same typecode.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> difference(array([1,2,3,3]), array([2,2,3]), array([3,4]))
+array([1])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{valuepos}{a, b}
+Return an array of indexes into rank-1 array \var{a} where the values
+in rank-1 array \var{b} appear. Both arrays must have the same
+typecode.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> valuepos(array([2,2,3,3,4,5,6]),array([3,5]))
+array([2, 3, 5])
+\end{verbatim}
+\end{funcdesc}
+
+\begin{funcdesc}{preload}{a \optional{, num \code{= -1}}}
+Step backwards over \var{num} entries of the array in the \var{a}
+argument forcing the pages into memory. If \var{num} is less than
+zero or greater than the length of the array, the entire array is
+scanned.
+
+This function is used to optimise the use of arrays stored in memory
+mapped blobs.
+
+\begin{verbatim}
+>>> from soomfunc import *
+>>> from Numeric import *
+>>> preload(array([2,2,3,3,4,5,6]))
+\end{verbatim}
+\end{funcdesc}
diff --git a/soomext/doc/storage.dia b/soomext/doc/storage.dia
new file mode 100644
index 0000000..bd719da
Binary files /dev/null and b/soomext/doc/storage.dia differ
diff --git a/soomext/doc/storage.tex b/soomext/doc/storage.tex
new file mode 100644
index 0000000..ca175e5
--- /dev/null
+++ b/soomext/doc/storage.tex
@@ -0,0 +1,244 @@
+%
+% The contents of this file are subject to the HACOS License Version 1.2
+% (the "License"); you may not use this file except in compliance with
+% the License. Software distributed under the License is distributed
+% on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+% implied. See the LICENSE file for the specific language governing
+% rights and limitations under the License. The Original Software
+% is "NetEpi Analysis". The Initial Developer of the Original
+% Software is the Health Administration Corporation, incorporated in
+% the State of New South Wales, Australia.
+%
+% Copyright (C) 2004,2005 Health Administration Corporation.
+% All Rights Reserved.
+%
+\section{Storage API}
+
+The \module{blobstore} extension module is built on top of the Storage
+API. This API provides a blob storage via a memory mapped file.
+
+The memory mapped file managed by the Storage API is logically
+arranged like the following figure:
+
+\begin{figure}[h]
+\begin{center}
+\includegraphics{storage}
+\caption{Memory Mapped File Layout}
+\end{center}
+\end{figure}
+
+There is a simplicifcation in the above diagram. The blob containing
+the blob sequence (labelled ``seq'') contains blob numbers which are
+then used as indexes back into the blob table to locate the actual
+blob data.
+
+At offset zero in the file is the file header which is a
+\ctype{StoreHeader} structure from which everything else is located.
+
+\begin{ctypedesc}[StoreHeader]{StoreHeader}
+This structure which is contained at offset zero in the memory mapped
+file. Its definition, found in \file{storage.h}, is:
+
+\begin{verbatim}
+typedef struct {
+ off_t table_loc; /* where the blob table is stored */
+ int table_size; /* allocated table size */
+ int table_len; /* entries used in the table */
+ int table_index; /* blob which contains the blob table */
+
+ int seq_index; /* blob which contains the array lookup */
+ int seq_size; /* allocated sequence size */
+ int seq_len; /* number of blobs in the sequence */
+} StoreHeader;
+\end{verbatim}
+
+The \code{table_loc} member contains the file offset of a special blob
+which stores the blob table. The blob table contains an array of
+\ctype{BlobDesc} structures. Each \ctype{BlobDesc} describes one blob
+in the file. All blob descriptors are kept together to minimise the
+amount of data which will be paged in while accessing the blob
+meta-data.
+
+As the blob table grows it will move around in the file. The
+\code{table_size} member records the number of array elements
+allocated in the blob table while \code{table_len} records the length
+of the blob table. Once allocated a blob cannot be deleted, it can
+only be marked as free space.
+
+The \code{table_index} member records the blob table index of the blob
+which contains the blob table.
+
+The blob table entries are always arranged in strict ascending
+sequence of the offset of the data that they describe. This means
+that as blobs are resized they may be handled by different entries in
+the blob table.
+
+To present external code with blob numbers which do not change after
+the initial blob allocation, a blob sequence is maintained. The blob
+sequence is a simple indirection table which translates external blob
+index into an internal blob table index. This allows the table entry
+for a blob to change without affecting external code.
+
+The \code{seq_index} member records the blob table index of the blob
+which describes the blob sequence.
+\end{ctypedesc}
+
+\begin{ctypedesc}[BlobDesc]{BlobDesc}
+The blob table is constructed from an array of these structures. Its
+definition, found in \file{storage.h}, is:
+
+\begin{verbatim}
+typedef struct {
+ off_t loc; /* location of the blob */
+ size_t size; /* size of blob */
+ size_t len; /* length of blob */
+ int status; /* status of blob */
+ int type; /* user: type of blob */
+ int other; /* user: index of related blob */
+} BlobDesc;
+\end{verbatim}
+
+The \code{loc} member contains the file offset of the start of the
+blob data. The \code{size} member contains the allocated size of the
+blob. The \code{loc} member of the next \ctype{BlobDesc} \emph{is
+always equal to} \code{loc + size} of this \ctype{BlobDesc}. The
+\code{len} member contains size of the blob requested by the external
+code.
+
+The \code{status} member records the status of this space controlled
+by this blob. The value will be one of:
+
+\begin{longtable}{l|l}
+BLOB_FREE & The blob is free space. \\
+BLOB_TABLE & The blob contains the blob table. \\
+BLOB_SEQUENCE & The blob contains the blob sequence. \\
+BLOB_DATA & The blob contains user data. \\
+\end{longtable}
+
+The \code{type} and \code{other} members are not used by the Storage
+API. Applications are free to store whatever they wish in these
+members.
+\end{ctypedesc}
+
+\begin{ctypedesc}[MmapBlobStore]{MmapBlobStore}
+This structure is allocated by the \cfunction{store_open} function
+when the blob storage is opened. Its definition, found in
+\file{storage.h}, is:
+
+\begin{verbatim}
+typedef struct {
+ int mode; /* file open mode */
+ int fd; /* file descriptor */
+ int prot; /* mmap prot */
+ StoreHeader *header; /* address of start of store */
+ size_t size; /* size of file */
+ int cycle; /* increment when file remapped */
+} MmapBlobStore;
+\end{verbatim}
+\end{ctypedesc}
+
+\begin{cfuncdesc}{MmapBlobStore *}{store_open}{char *filename, char *mode}
+Opens the file specified in the \var{filename} argument using the mode
+specified in the \var{mode} argument.
+
+\begin{longtable}{l|l}
+\code{'r'} & Open an existing file in read only mode. \\
+\code{'r+'} & Open an existing file in read-write mode. \\
+\code{'w+'} & Create a new file in read-write mode. \\
+\end{longtable}
+
+If successful the function allocates and returns a
+\ctype{MmapBlobStore} structure which is passed as the first argument
+to all other Storage API functions. If the operation fails for any
+reason the function will set a Python exception and will return
+\NULL{}.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_close}{MmapBlobStore *sm}
+Closes the blob store and frees the \ctype{MmapBlobStore} structure.
+On failure the function will set a Python exception and will return
+\code{-1}. On success the function returns \code{0}.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_get_header}{MmapBlobStore *sm, StoreHeader *header}
+Copies the contents of the header at the start of the memory mapped
+file into the buffer supplied as the \var{header} argument.
+
+The function returns \code{0} on success and \code{-1} on failure.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{void}{store_usage}{MmapBlobStore *sm, size_t *used, size_t *unused}
+Traverses the blob table and returns the amount of space in use in the
+\var{usage} argument and the amount of space which is either free or
+wasted in the \var{unused} argument.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{size_t}{store_compress}{MmapBlobStore *sm}
+Compresses the blob store to remove any space contained in free blobs
+or at the end of existing blobs. Note this function is not current
+implemented.
+
+Returns \code{0} on success and code{-1} on failure.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_cycle}{MmapBlobStore *sm}
+Every time the file is remapped due to size changes, the \code{cycle}
+member of the \ctype{MmapBlobStore} is incremented. This function
+returns the current value of the \code{cycle} member.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_num_blobs}{MmapBlobStore *sm}
+Returns the number of blobs in the blob sequence.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{BlobDesc *}{store_get_blobdesc}{MmapBlobStore *sm, int index, int from_seq}
+Returns a pointer to the blob table entry which describes the blob
+specified by the \var{index} argument. If the \var{from_seq} argument
+is non-zero then the \var{index} argument will be used to look up the
+blob sequence to obtain the index into the blob table. When
+\var{from_seq} is zero the index is used directly to index the blob
+table.
+
+If the \var{index} argument is outside the bounds of the blob sequence
+or table the function will set a Python \exception{IndexError}
+exception and will return \NULL{}.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{void *}{store_blob_address}{MmapBlobStore *sm, BlobDesc *desc}
+Returns the address of the start of the data for the blob descriptor
+passed in \var{desc}. The \cfunction{store_blob_blobdesc()} returns
+values suitable for use as the \var{desc} argument.
+
+The function preforms no checking on the arguments.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_append}{MmapBlobStore *sm}
+Allocates a new entry in the blob sequence and returns the index.
+Note that no blob data is allocated until
+\cfunction{store_blob_resize()} is called to allocate some space for
+the blob.
+
+If the operation fails for any reason the function will set a Python
+exception and will return \code{-1}.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_blob_size}{MmapBlobStore *sm, int index}
+Return the length of the blob identified by the blob sequence index in
+the \var{index} argument.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_blob_resize}{MmapBlobStore *sm, int index, size_t data_len}
+Resize the blob identified by the blob sequence index in the
+\var{index} argument to be the new size specified in the
+\var{data_len} argument.
+
+If you are growing the blob, the address of the blob will almost
+certainly change after calling this function. You must call
+\cfunction{store_blob_blobdesc()} and \cfunction{store_blob_address()}
+to obtain a valid address for the blob.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{store_blob_free}{MmapBlobStore *sm, int index}
+Free the blob identified by the blob sequence index in the
+\var{index} argument. The space will be reused.
+\end{cfuncdesc}
diff --git a/soomext/matest.py b/soomext/matest.py
new file mode 100644
index 0000000..ede2272
--- /dev/null
+++ b/soomext/matest.py
@@ -0,0 +1,71 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: matest.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/matest.py,v $
+
+import MA
+import Numeric
+from soomarray import ArrayDict
+
+ad = ArrayDict('blah.dat', 'r+')
+a = Numeric.array([0,1,2,3,4,5,6,7,8,9],Numeric.Int)
+m = Numeric.array([0,0,0,0,0,1,0,0,1,0],Numeric.Int)
+ad['matest1'] = MA.array(a, mask = m)
+del ad
+
+ad = ArrayDict('blah.dat')
+matest = ad['matest1']
+
+print "matest: ", matest
+print "sum of matest: ", MA.sum(matest)
+print "length of matest: ", len(matest)
+print "count of matest: ", MA.count(matest)
+print "average of matest: ", MA.average(matest)
+print "minimum of matest: ", MA.minimum(matest)
+print "maximum of matest: ", MA.maximum(matest)
+
+del ad
+
+ad = ArrayDict('blah.dat', 'w')
+
+a = Numeric.array(xrange(1000),Numeric.Int)
+m = Numeric.array(Numeric.repeat(Numeric.array([0,1],Numeric.Int),500),Numeric.Int)
+ad['matest2'] = MA.array(a, mask = m)
+
+m = Numeric.array(Numeric.repeat(Numeric.array([1,0],Numeric.Int),500),Numeric.Int)
+ad['matest3'] = MA.array(a, mask = m)
+del ad
+
+ad = ArrayDict('blah.dat')
+
+matest2 = ad['matest2']
+
+print "matest2: ", matest2
+print "sum of matest2: ", MA.sum(matest2)
+print "length of matest2: ", len(matest2)
+print "count of matest2: ", MA.count(matest2)
+print "average of matest2: ", MA.average(matest2)
+print "minimum of matest2: ", MA.minimum(matest2)
+print "maximum of matest2: ", MA.maximum(matest2)
+
+matest3 = ad['matest3']
+
+print "matest3: ", matest3
+print "sum of matest3: ", MA.sum(matest3)
+print "length of matest3: ", len(matest3)
+print "count of matest3: ", MA.count(matest3)
+print "average of matest3: ", MA.average(matest3)
+print "minimum of matest3: ", MA.minimum(matest3)
+print "maximum of matest3: ", MA.maximum(matest3)
diff --git a/soomext/mmaparray.c b/soomext/mmaparray.c
new file mode 100644
index 0000000..602d762
--- /dev/null
+++ b/soomext/mmaparray.c
@@ -0,0 +1,277 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include "Python.h"
+#include "structmember.h"
+#include "Numeric/arrayobject.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+/* Implement an object which looks like UserArray but does the mmap
+ * thing.
+ */
+
+typedef struct {
+ int type_num;
+ int rank;
+ int shape[40];
+} MmapArrayDesc;
+
+typedef struct {
+ PyObject_HEAD
+
+ MmapArrayDesc *desc;
+ off_t len;
+ PyArrayObject *array;
+} MmapArrayObject;
+
+staticforward PyTypeObject MmapArray_Type;
+
+static char MmapArray_as_array__doc__[] =
+"as_array() -> array";
+
+static PyObject *MmapArray_as_array(MmapArrayObject *self, PyObject *args)
+{
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+
+ Py_INCREF(self->array);
+ return (PyObject *)self->array;
+}
+
+static struct PyMethodDef MmapArray_methods[] = {
+ { "as_array", (PyCFunction)MmapArray_as_array, METH_VARARGS, MmapArray_as_array__doc__ },
+ { NULL, NULL}
+};
+
+#define OFFSET(x) offsetof(MmapArrayObject, x)
+
+static struct memberlist MmapArray_memberlist[] = {
+ { NULL }
+};
+
+static PyObject *MmapArray__getattr__(MmapArrayObject *self, char *name)
+{
+ PyObject *rv;
+
+ rv = PyMember_Get((char *)self->desc, MmapArray_memberlist, name);
+ if (rv)
+ return rv;
+ PyErr_Clear();
+ return Py_FindMethod(MmapArray_methods, (PyObject *)self, name);
+}
+
+static int MmapArray__setattr__(MmapArrayObject *self, char *name, PyObject *v)
+{
+ if (v == NULL) {
+ PyErr_SetString(PyExc_AttributeError, "Cannot delete attribute");
+ return -1;
+ }
+ return PyMember_Set((char *)self->desc, MmapArray_memberlist, name, v);
+}
+
+static void MmapArray__del__(MmapArrayObject *self)
+{
+ Py_XDECREF(self->array);
+ munmap(self->desc, self->len);
+ PyObject_Del(self);
+}
+
+static char MmapArray_Type__doc__[] =
+"";
+
+static PyTypeObject MmapArray_Type = {
+ PyObject_HEAD_INIT(0)
+ 0, /*ob_size*/
+ "MmapArray", /*tp_name*/
+ sizeof(MmapArrayObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)MmapArray__del__,/*tp_dealloc*/
+ (printfunc)0, /*tp_print*/
+ (getattrfunc)MmapArray__getattr__, /*tp_getattr*/
+ (setattrfunc)MmapArray__setattr__, /*tp_setattr*/
+ (cmpfunc)0, /*tp_compare*/
+ (reprfunc)0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ (hashfunc)0, /*tp_hash*/
+ (ternaryfunc)0, /*tp_call*/
+ (reprfunc)0, /*tp_str*/
+
+ /* Space for future expansion */
+ (getattrofunc)0, /*tp_getattro*/
+ (setattrofunc)0, /*tp_setattro*/
+ (PyBufferProcs *)0, /*tp_as_buffer*/
+ 0, /*tp_flags*/
+ MmapArray_Type__doc__, /* Documentation string */
+};
+
+char mmaparray_read__doc__[] =
+"read(filename) -> MmapArray";
+
+PyObject *mmaparray_read(PyObject *module, PyObject *args)
+{
+ char *filename;
+ int fd;
+ struct stat st;
+ MmapArrayDesc *desc;
+ PyArrayObject *array;
+ MmapArrayObject *self;
+
+ if (!PyArg_ParseTuple(args, "s", &filename))
+ return NULL;
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ PyErr_SetString(PyExc_IOError, "could not open file");
+ return NULL;
+ }
+ if (fstat(fd, &st) < 0) {
+ PyErr_SetString(PyExc_IOError, "stat failed");
+ close(fd);
+ return NULL;
+ }
+ desc = (MmapArrayDesc*)mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
+ if ((caddr_t)desc == (caddr_t)-1) {
+ PyErr_SetString(PyExc_IOError, "mmap failed");
+ return NULL;
+ }
+
+ array = (PyArrayObject *)
+ PyArray_FromDimsAndData(desc->rank, desc->shape, desc->type_num,
+ (char *)(desc + 1));
+ if (array == NULL) {
+ munmap(desc, st.st_size);
+ return NULL;
+ }
+
+ self = PyObject_New(MmapArrayObject, &MmapArray_Type);
+ if (self == NULL) {
+ Py_XDECREF(array);
+ munmap(desc, st.st_size);
+ return NULL;
+ }
+
+ self->desc = desc;
+ self->len = st.st_size;
+ self->array = array;
+
+ return (PyObject*)self;
+}
+
+char mmaparray_write__doc__[] =
+"write(filename, array)";
+
+PyObject *mmaparray_write(PyObject *module, PyObject *args)
+{
+ char *filename;
+ int fd;
+ int data_size, i;
+ MmapArrayDesc desc;
+ PyArrayObject *array;
+ PyArrayObject *new_array;
+
+ if (!PyArg_ParseTuple(args, "sO", &filename, &array))
+ return NULL;
+
+ if (!PyArray_Check(array)) {
+ PyErr_SetString(PyExc_ValueError, "array object expected");
+ return NULL;
+ }
+
+ fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ if (fd < 0) {
+ PyErr_SetString(PyExc_IOError, "could not create file");
+ return NULL;
+ }
+
+ new_array = (PyArrayObject*)
+ PyArray_ContiguousFromObject((PyObject*)array,
+ array->descr->type_num, 0, 0);
+ if (new_array == NULL) {
+ close(fd);
+ unlink(filename);
+ return NULL;
+ }
+
+ memset(&desc, 0, sizeof(desc));
+ desc.type_num = new_array->descr->type_num;
+ desc.rank = new_array->nd;
+ memcpy(desc.shape, new_array->dimensions, desc.rank * sizeof(desc.shape[0]));
+
+ data_size = new_array->descr->elsize;
+ for (i = 0; i < new_array->nd; i++)
+ data_size *= new_array->dimensions[i] ? new_array->dimensions[i] : 1;
+ /* Make sure we're alligned on ints. */
+ data_size += sizeof(int) - data_size % sizeof(int);
+
+ if (write(fd, &desc, sizeof(desc)) != sizeof(desc)
+ || write(fd, new_array->data, data_size) != data_size) {
+ close(fd);
+ unlink(filename);
+ Py_DECREF(new_array);
+ PyErr_SetString(PyExc_IOError, "could not write file");
+ return NULL;
+ }
+ close(fd);
+
+ Py_DECREF(new_array);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static struct PyMethodDef mmaparray_methods[] = {
+ { "read", (PyCFunction)mmaparray_read, METH_VARARGS, mmaparray_read__doc__ },
+ { "write", (PyCFunction)mmaparray_write, METH_VARARGS, mmaparray_write__doc__ },
+ { NULL, (PyCFunction)NULL, 0, NULL }
+};
+
+char mmaparray_module__doc__[] =
+"Implements a simple mmap file based Numeric array. To create a new\n"
+"memory mapped array you call the write() function.\n"
+"\n"
+">>> a = Numeric.array(range(100))\n"
+">>> mmaparray.write('blah.dat', a)\n"
+"\n"
+"To access the array in the file you call the read function.\n"
+"\n"
+">>> b = mmaparray.read('blah.dat')\n"
+"\n"
+"The object returned has the type MmapArray. The only method\n"
+"implemented in the MmapArray type is as_array(). This allows\n"
+"MmapArray objects to be used in by soomarray.MmapArray UserArray\n"
+"class.\n"
+"\n"
+">>> c = soomarray.MmapArray(b)\n"
+;
+
+void initmmaparray(void)
+{
+ PyObject *module;
+
+ module = Py_InitModule4("mmaparray", mmaparray_methods,
+ mmaparray_module__doc__,
+ (PyObject*)NULL, PYTHON_API_VERSION);
+
+ if (PyType_Ready(&MmapArray_Type) < 0)
+ return;
+
+ import_array();
+}
diff --git a/soomext/setup.py b/soomext/setup.py
new file mode 100644
index 0000000..07b2cf3
--- /dev/null
+++ b/soomext/setup.py
@@ -0,0 +1,43 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+#
+# $Id: setup.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/setup.py,v $
+
+# To use:
+# python setup.py install
+#
+
+import distutils, os
+from distutils.core import setup, Extension
+
+setup(
+ name = "NetEpi-Analysis-SOOM",
+ version = "0.11",
+ maintainer = "NSW Department of Health",
+ maintainer_email = "Tim CHURCHES <TCHUR at doh.health.nsw.gov.au>",
+ description = "NetEpi-Analysis SOOM Numpy Utilities",
+ py_modules=["soomarray"],
+ ext_modules = [Extension('soomfunc',
+ ['soomfunc.c']),
+ Extension('mmaparray',
+ ['mmaparray.c']),
+ Extension('blobstore',
+ ['blobstore.c', 'blob.c', 'storage.c'])
+ ],
+ url = "http://netepi.info/",
+ license = 'Health Administration Corporation Open Source License Version 1.2',
+ )
+
diff --git a/soomext/soomarray.py b/soomext/soomarray.py
new file mode 100644
index 0000000..87fe913
--- /dev/null
+++ b/soomext/soomarray.py
@@ -0,0 +1,542 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: soomarray.py 3701 2009-02-26 05:56:34Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/soomarray.py,v $
+
+import cPickle
+import string
+import blobstore
+import mmaparray
+import MA
+import Numeric
+import types
+from mx import DateTime
+import struct
+import bsddb
+import types
+
+class Error(Exception):
+ pass
+
+
+class MmapArray:
+ def __init__(self, blob):
+ self.blob = blob
+ array = blob.as_array()
+ self.shape = array.shape
+ self._typecode = array.typecode()
+ self.name = string.split(str(self.__class__))[0]
+
+ def append(self, a):
+ self.blob.append_array(a)
+ self.shape = self.blob.as_array().shape
+
+ def __repr__(self):
+ return repr(self.blob.as_array())
+
+ def __array__(self,t=None):
+ if t:
+ return Numeric.asarray(self.blob.as_array(),t)
+ return Numeric.asarray(self.blob.as_array())
+
+ def __float__(self):
+ return float(Numeric.asarray(self.blob.as_array()))
+
+ # Array as sequence
+ def __len__(self):
+ return len(self.blob.as_array())
+
+ def __getitem__(self, index):
+ return self.blob.as_array()[index]
+
+ def take(self, rows):
+ array = self.blob.as_array()
+ if MA.isMaskedArray(array):
+ return MA.take(array, rows)
+ else:
+ return Numeric.take(array, rows)
+
+# __getslice__ is deprecated - slice object passed as index to __getitem__
+# def __getslice__(self, i, j):
+# return self.blob.as_array()[i:j]
+
+ def __setitem__(self, index, value):
+ self.blob.as_array()[index] = Numeric.asarray(value,self._typecode)
+
+ def __setslice__(self, i, j, value):
+ self.blob.as_array()[i:j] = Numeric.asarray(value,self._typecode)
+
+ def __abs__(self):
+ return Numeric.absolute(self.blob.as_array())
+
+ def __neg__(self):
+ return -self.blob.as_array()
+
+ def __add__(self, other):
+ return self.blob.as_array()+Numeric.asarray(other)
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ return self.blob.as_array()-Numeric.asarray(other)
+
+ def __rsub__(self, other):
+ return Numeric.asarray(other)-self.blob.as_array()
+
+ def __mul__(self, other):
+ return Numeric.multiply(self.blob.as_array(),Numeric.asarray(other))
+ __rmul__ = __mul__
+
+ def __div__(self, other):
+ return Numeric.divide(self.blob.as_array(),Numeric.asarray(other))
+
+ def __rdiv__(self, other):
+ return Numeric.divide(Numeric.asarray(other),self.blob.as_array())
+
+ def __pow__(self,other):
+ return Numeric.power(self.blob.as_array(),Numeric.asarray(other))
+
+ def __rpow__(self,other):
+ return Numeric.power(Numeric.asarray(other),self.blob.as_array())
+
+ def __sqrt__(self):
+ return Numeric.sqrt(self.blob.as_array())
+
+ def tostring(self):
+ return self.blob.as_array().tostring()
+
+ def byteswapped(self):
+ return self.blob.as_array().byteswapped()
+
+ def astype(self, typecode):
+ return self.blob.as_array().astype(typecode)
+
+ def typecode(self):
+ return self._typecode
+
+ def itemsize(self):
+ return self.blob.as_array().itemsize()
+
+ def iscontiguous(self):
+ return self.blob.as_array().iscontiguous()
+
+ def __setattr__(self,attr,value):
+ if attr=='shape':
+ self.blob.as_array().shape=value
+ self.__dict__[attr]=value
+
+ def __getattr__(self,attr):
+ # for .attributes for example, and any future attributes
+ return getattr(self.blob.as_array(), attr)
+
+
+class ArrayFile(MmapArray):
+ def __init__(self, filename, array = None):
+ if array is not None:
+ mmaparray.write(filename, array)
+ blob = mmaparray.read(filename)
+ MmapArray.__init__(self, blob)
+
+
+BLOB_DICT = 0
+BLOB_ARRAY = 1
+BLOB_FILLED = 2
+BLOB_MASK = 3
+BLOB_STRING = 4
+
+class ArrayDict:
+ def __init__(self, filename, mode = 'r'):
+ self.dict_dirty = 0
+ self.store = blobstore.open(filename, mode)
+ if len(self.store) > 0:
+ blob = self.store[0]
+ self.dict = cPickle.loads(blob.as_str())
+ else:
+ self.dict_dirty = 1
+ self.store.append()
+ self.dict = {}
+
+ def __del__(self):
+ if self.dict_dirty:
+ blob = self.store[0]
+ blob.type = BLOB_DICT
+ blob.save_str(cPickle.dumps(self.dict))
+
+ def __getitem__(self, key):
+ index = self.dict[key]
+ blob = self.store[index]
+ if blob.type == BLOB_ARRAY:
+ return MmapArray(blob)
+ elif blob.type == BLOB_FILLED:
+ data = MmapArray(blob)
+ blob = self.store[blob.other]
+ mask = MmapArray(blob)
+ return MA.array(data, mask = mask)
+ elif blob.type == BLOB_STRING:
+ return blob.as_str()
+ else:
+ raise Error('bad BLOB type %s in index' % blob.type)
+
+ def __setitem__(self, key, a):
+ index = self.dict.get(key)
+ if index is None:
+ if MA.isMaskedArray(a):
+ index = self._save_new_masked(a)
+ elif type(a) == Numeric.ArrayType:
+ index = self._save_new_array(a)
+ elif type(a) == types.StringType:
+ index = self._save_new_str(a)
+ else:
+ index = self._save_new_str(repr(a))
+ self.dict[key] = index
+ self.dict_dirty = 1
+ else:
+ if MA.isMaskedArray(a):
+ self._save_masked(index, a)
+ elif type(a) == Numeric.ArrayType:
+ self._save_array(index, a)
+ elif type(a) == types.StringType:
+ self._save_str(index, a)
+ else:
+ index = self._save_str(index, repr(a))
+
+ def __delitem__(self, key):
+ index = self.dict.get(key)
+ if index is None:
+ raise KeyError, key
+ blob = self.store[index]
+ if blob.type == BLOB_FILLED:
+ self.store.free(blob.other)
+ self.store.free(index)
+ del self.dict[key]
+ self.dict_dirty = 1
+
+ def __getslice__(self, i, j):
+ slice = []
+ if j > len(self):
+ j = len(self)
+ for s in range(i,j):
+ slice.append(self[s])
+ return slice
+
+ def clear(self):
+ for key in self.dict.keys():
+ del self[key]
+
+ def get(self, key, default = None):
+ if self.dict.has_key(key):
+ return self[key]
+ return default
+
+ def has_key(self, key):
+ return self.dict.has_key(key)
+
+ def keys(self):
+ return self.dict.keys()
+
+ def values(self):
+ values = []
+ for key in self.dict.keys():
+ values.append(self[key])
+ return values
+
+ def items(self):
+ items = []
+ for key in self.dict.keys():
+ items.append((key, self[key]))
+ return items
+
+ def _save_new_masked(self, a):
+ index = self.store.append()
+ blob = self.store[index]
+ blob.type = BLOB_FILLED
+ blob.save_array(a.filled())
+ blob.other = self.store.append()
+ blob = self.store[blob.other]
+ blob.type = BLOB_MASK
+ blob.save_array(a.mask())
+ blob.other = index
+ return index
+
+ def _save_new_array(self, a):
+ index = self.store.append()
+ blob = self.store[index]
+ blob.type = BLOB_ARRAY
+ blob.save_array(a)
+ return index
+
+ def _save_new_str(self, a):
+ index = self.store.append()
+ blob = self.store[index]
+ blob.type = BLOB_STRING
+ blob.save_str(str(a))
+ return index
+
+ def _save_masked(self, index, a):
+ blob = self.store[index]
+ if blob.type == BLOB_FILLED:
+ blob.save_array(a.filled())
+ blob = self.store[blob.other]
+ blob.save_array(a.mask())
+ elif blob.type == BLOB_ARRAY:
+ blob.type = BLOB_FILLED
+ blob.save_array(a.filled())
+ blob.other = self.store.append()
+ blob = self.store[blob.other]
+ blob.type = BLOB_MASK
+ blob.save_array(a.mask())
+ else:
+ raise Error('unexpected BLOB type %s in index' % blob.type)
+
+ def _save_array(self, index, a):
+ blob = self.store[index]
+ if blob.type == BLOB_FILLED:
+ blob.type = BLOB_ARRAY
+ blob.save_array(a)
+ self.store.free(blob.other)
+ blob.other = 0
+ elif blob.type == BLOB_ARRAY:
+ blob.save_array(a)
+ else:
+ raise Error('unexpected BLOB type %s in index' % blob.type)
+
+ def _save_str(self, index, a):
+ blob = self.store[index]
+ if blob.type == BLOB_STRING:
+ blob.save_str(a)
+ else:
+ raise Error('unexpected BLOB type %s in index' % blob.type)
+
+ def __len__(self):
+ return len(self.dict)
+
+
+class RNArrayBase:
+
+ """
+ An array-like object backed by a bsddb "recno" database (in fact,
+ a btree indexed by an integer 1-based record number).
+ """
+
+ def __init__(self, filename=None, mode='c'):
+ self.store = bsddb.rnopen(filename, mode)
+
+ def __del__(self):
+ try:
+ self.store.close()
+ except AttributeError:
+ pass
+
+ def __delitem__(self, i):
+ del self.store[i+1]
+
+ def __setitem__(self, i, a):
+ self.store[i+1] = self._pack(a)
+
+ def __getitem__(self, i):
+ if isinstance(i, slice):
+ return [self._unpack(self.store[s+1])
+ for s in xrange(*i.indices(len(self)))]
+ try:
+ a = self.store[i+1]
+ except KeyError:
+ raise IndexError('index %d out of range' % i)
+ return self._unpack(a)
+
+ def take(self, vec):
+ return [self._unpack(self.store[i+1]) for i in vec]
+
+ def __len__(self):
+ try:
+ return self.store.last()[0]
+ except bsddb.db.DBNotFoundError:
+ return 0
+# Some previously used __len__ methods attempts:
+# This is too slow (essentially takes as long as len(store.keys()):
+# stat = self.store.db.stat()
+# return stat['ndata']
+# This is too slow (see above) and only works on first call?
+# return len(self.store)
+# Additionally, since the rn db can have "holes", the number of records is not
+# the array length.
+
+
+class ArrayString(RNArrayBase):
+
+ def _pack(self, a):
+ if a is None:
+ return ''
+ elif type(a) is str:
+ return a
+ else:
+ raise TypeError('Cannot store a non-string in an ArrayString: %r' % a)
+
+ def _unpack(self, a):
+ return a
+
+
+class ArrayTuple(RNArrayBase):
+
+ def _pack(self, a):
+ if type(a) is not tuple:
+ raise TypeError('Cannot store a non-tuple in an ArrayTuple: %r' % a)
+ try:
+ hash(a)
+ except:
+ raise ValueError('Cannot store a non-hashable value in an ArrayTuple: %r' % a)
+ return cPickle.dumps(a,-1)
+
+ def _unpack(self, a):
+ return cPickle.loads(a)
+
+
+class ArrayDateTime(RNArrayBase):
+
+ def _pack(self, a):
+ if a is None:
+ return 'None'
+ try:
+ absdt = a.absvalues()
+ except AttributeError:
+ raise TypeError('ArrayDateTime values must be mx.DateTimes or None')
+ return struct.pack('ld', absdt[0], absdt[1])
+
+ def _unpack(self, a):
+ if a == 'None':
+ return None
+ absdt = struct.unpack('ld', a)
+ return DateTime.DateTimeFromAbsDateTime(absdt[0], absdt[1])
+
+
+class ArrayDate(RNArrayBase):
+
+ def _pack(self, a):
+ if a is None:
+ return 'None'
+ try:
+ absdt = a.absvalues()
+ except AttributeError:
+ raise TypeError('ArrayDate values must be mx.DateTimes or None')
+ return struct.pack('l',absdt[0])
+
+ def _unpack(self, a):
+ if a == 'None':
+ return None
+ absdt = struct.unpack('l', a)
+ return DateTime.DateTimeFromAbsDateTime(absdt[0], 0)
+
+
+class ArrayTime(RNArrayBase):
+
+ def _pack(self, a):
+ if a is None:
+ return 'None'
+ try:
+ absdt = a.absvalues()
+ except AttributeError:
+ raise TypeError('ArrayTime values must be mx.DateTimes or None')
+ return struct.pack('d', absdt[1])
+
+ def _unpack(self, a):
+ if a == 'None':
+ return None
+ absdt = struct.unpack('d', a)
+ return DateTime.DateTimeDeltaFromSeconds(absdt[0])
+
+
+RECODE_META_IDX = 0
+RECODE_DATA_IDX = 1
+
+class RecodeBlobArray:
+ def __init__(self, size, filename, mode='r'):
+ self.dirty = False
+ self.store = None
+ self.store = blobstore.open(filename, mode)
+ if len(self.store):
+ metadata = cPickle.loads(self.store[RECODE_META_IDX].as_str())
+ self.obj_to_code, self.code_to_obj, self.next_code = metadata
+ else:
+ self.store.append()
+ self.store.append()
+ self.obj_to_code = {}
+ self.code_to_obj = [None]
+ self.next_code = 1
+ zeros = Numeric.zeros(size)
+ self.store[RECODE_DATA_IDX].save_array(zeros)
+
+ def __del__(self):
+ if self.dirty:
+ metadata = self.obj_to_code, self.code_to_obj, self.next_code
+ self.store[0].save_str(cPickle.dumps(metadata))
+
+ def __len__(self):
+ return len(self.store[RECODE_DATA_IDX].as_array())
+
+ def __getitem__(self, i):
+ array = self.store[RECODE_DATA_IDX].as_array()
+ if type(i) is slice:
+ return [self.code_to_obj[v] for v in array[i]]
+ else:
+ return self.code_to_obj[array[i]]
+
+ def take(self, rows):
+ array = self.store[RECODE_DATA_IDX].as_array()
+ return [self.code_to_obj[v] for v in Numeric.take(array, rows)]
+
+ def __setitem__(self, i, v):
+ code = self.obj_to_code.get(v, None)
+ if code is None:
+ code = self.next_code
+ self.next_code += 1
+ self.obj_to_code[v] = code
+ self.code_to_obj.append(v)
+ array = self.store[RECODE_DATA_IDX].as_array()
+ array[i] = code
+ self.dirty = True
+
+
+class RecodeNumericArray:
+ def __init__(self, size):
+ self.obj_to_code = {}
+ self.code_to_obj = [None]
+ self.next_code = 1
+ self.data = Numeric.zeros(size)
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, i):
+ if type(i) is slice:
+ return [self.code_to_obj[v] for v in self.data[i]]
+ else:
+ return self.code_to_obj[self.data[i]]
+
+ def take(self, rows):
+ return [self.code_to_obj[v] for v in Numeric.take(self.data, rows)]
+
+ def __setitem__(self, i, v):
+ code = self.obj_to_code.get(v, None)
+ if code is None:
+ code = self.next_code
+ self.next_code += 1
+ self.obj_to_code[v] = code
+ self.code_to_obj.append(v)
+ self.data[i] = code
+
+def get_recode_array(size, filename=None, mode='r'):
+ if filename:
+ return RecodeBlobArray(size, filename, mode)
+ else:
+ return RecodeNumericArray(size)
+
diff --git a/soomext/soomfunc.c b/soomext/soomfunc.c
new file mode 100644
index 0000000..3b1c93e
--- /dev/null
+++ b/soomext/soomfunc.c
@@ -0,0 +1,1517 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include "Python.h"
+#include "structmember.h"
+#include "Numeric/arrayobject.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <ctype.h>
+
+static PyObject *instrument_func = NULL;
+
+#define INSTRUMENT(args) \
+ if (instrument_func != Py_None) { \
+ PyObject *args_tuple, *result; \
+ \
+ args_tuple = result = NULL; \
+ args_tuple = Py_BuildValue args; \
+ if (args_tuple != NULL) \
+ result = PyObject_CallObject(instrument_func, args_tuple); \
+ if (result == NULL || args_tuple == NULL) \
+ PyErr_Clear(); \
+ Py_XDECREF(result); \
+ Py_XDECREF(args_tuple); \
+ }
+
+typedef struct {
+ PyArrayObject *array; /* must DECREF once finished */
+ char *data; /* step through array data */
+ int stride; /* distance between each element */
+ int index; /* current array index */
+ int len; /* array length */
+} ArrayInfo;
+
+static void free_array_info(ArrayInfo *array_info, int num_arrays)
+{
+ ArrayInfo *info;
+ int i;
+
+ for (i = 0, info = array_info; i < num_arrays; i++, info++)
+ Py_XDECREF(info->array);
+ free(array_info);
+}
+
+static void set_array_info(ArrayInfo *info, PyArrayObject *array)
+{
+ info->array = array;
+ info->data = array->data;
+ info->stride = array->strides[0];
+ info->index = 0;
+ info->len = array->dimensions[0];
+}
+
+static ArrayInfo *check_array_args(PyObject *args,
+ int min_arrays, int max_arrays,
+ int contiguous)
+{
+ char msg[128];
+ ArrayInfo *array_info, *info;
+ int num_arrays, type_num, i;
+
+ num_arrays = PyTuple_Size(args);
+ if (num_arrays < min_arrays) {
+ sprintf(msg, "at least %d arrays required", min_arrays);
+ PyErr_SetString(PyExc_ValueError, msg);
+ return NULL;
+ }
+ if (max_arrays > 0 && num_arrays > max_arrays) {
+ sprintf(msg, "more than %d arrays not supported", max_arrays);
+ PyErr_SetString(PyExc_ValueError, msg);
+ return NULL;
+ }
+ type_num = -1;
+ array_info = malloc(sizeof(*array_info) * num_arrays);
+ if (array_info == NULL)
+ return (ArrayInfo*)PyErr_NoMemory();
+ memset(array_info, 0, sizeof(*array_info) * num_arrays);
+
+ for (i = 0, info = array_info; i < num_arrays; i++, info++) {
+ PyObject *arg;
+ PyArrayObject *array;
+
+ arg = PyTuple_GetItem(args, i);
+ if (arg == NULL)
+ goto error;
+ if (contiguous)
+ array = (PyArrayObject *)
+ PyArray_ContiguousFromObject(arg, PyArray_NOTYPE, 0, 0);
+ else
+ array = (PyArrayObject *)
+ PyArray_FromObject(arg, PyArray_NOTYPE, 0, 0);
+ if (array == NULL)
+ goto error;
+ if (i == 0)
+ type_num = array->descr->type_num;
+ else if (array->descr->type_num != type_num) {
+ PyErr_SetString(PyExc_ValueError,
+ "arrays must have the same typecode");
+ goto error;
+ }
+ if (array->nd != 1) {
+ PyErr_SetString(PyExc_ValueError, "arrays must be rank-1");
+ goto error;
+ }
+ if (array->descr->type_num == PyArray_CFLOAT
+ || array->descr->type_num == PyArray_CDOUBLE
+ || array->descr->type_num == PyArray_OBJECT
+ || array->descr->type_num == PyArray_NTYPES
+ || array->descr->type_num == PyArray_NOTYPE) {
+ PyErr_SetString(PyExc_ValueError, "unhandled array type");
+ goto error;
+ }
+ set_array_info(info, array);
+ }
+ return array_info;
+
+error:
+ free_array_info(array_info, num_arrays);
+ return NULL;
+}
+
+#define unique_func(NAME, TYPE) \
+static void NAME(ArrayInfo *info1, ArrayInfo *info2) \
+{ \
+ while (info1->index < info1->len && info2->index < info2->len) { \
+ char *value = info1->data; \
+ \
+ *(TYPE *)info2->data = *(TYPE *)info1->data; \
+ info1->index++; \
+ info2->index++; \
+ info1->data += info1->stride; \
+ info2->data += info2->stride; \
+ \
+ while (info1->index < info1->len \
+ && *(TYPE *)info1->data == *(TYPE *)value) { \
+ info1->index++; \
+ info1->data += info1->stride; \
+ } \
+ } \
+ info2->len = info2->index; \
+}
+
+unique_func(unique_schar, signed char)
+unique_func(unique_uchar, unsigned char)
+unique_func(unique_short, short)
+unique_func(unique_int, int)
+unique_func(unique_long, long)
+unique_func(unique_float, float)
+unique_func(unique_double, double)
+
+static char soomfunc_unique__doc__[] =
+"unique(a [,b]) -> array\n"
+"\n"
+" Remove all duplicate values from the input rank-1 array returning\n"
+" a new rank-1 array of unique values. If a second argument is\n"
+" supplied it will be used to store the result.\n";
+
+static PyObject *soomfunc_unique(PyObject *module, PyObject *args)
+{
+ ArrayInfo *array_info, *info, info_ret;
+ int type_num, num_arrays;
+ PyArrayObject *ret;
+
+ array_info = check_array_args(args, 1, 2, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ INSTRUMENT(("ssi", "unique", "enter", array_info[0].len));
+
+ if (num_arrays != 2) {
+ int shape[1];
+
+ /* Build the array to capture the unique values.
+ */
+ shape[0] = array_info->len;
+ ret = (PyArrayObject *)PyArray_FromDims(1, shape, type_num);
+ if (ret == NULL) {
+ free_array_info(array_info, num_arrays);
+ return NULL;
+ }
+ set_array_info(&info_ret, ret);
+ info = &info_ret;
+ } else {
+ info = &array_info[1];
+ ret = info->array;
+ }
+
+ /* Generate the unique array.
+ */
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ unique_schar(array_info, info);
+ break;
+ case PyArray_UBYTE:
+ unique_uchar(array_info, info);
+ break;
+ case PyArray_SHORT:
+ unique_short(array_info, info);
+ break;
+ case PyArray_INT:
+ unique_int(array_info, info);
+ break;
+ case PyArray_LONG:
+ unique_long(array_info, info);
+ break;
+ case PyArray_FLOAT:
+ unique_float(array_info, info);
+ break;
+ case PyArray_DOUBLE:
+ unique_double(array_info, info);
+ break;
+ }
+
+ ret->dimensions[0] = info->len;
+
+ free_array_info(array_info, num_arrays);
+
+ INSTRUMENT(("ssi", "unique", "exit", ret->dimensions[0]));
+ return (PyObject *)ret;
+}
+
+#define intersect_func(NAME, TYPE) \
+inline int NAME##_search(char *key, char *base, int num, int stride, int mid) \
+{ \
+ int low, high; \
+ char *val; \
+ \
+ low = 0; \
+ high = num - 1; \
+ while (low <= high) { \
+ val = base + mid * stride; \
+ if (*(TYPE*)key < *(TYPE*)val) \
+ high = mid - 1; \
+ else if (*(TYPE*)key > *(TYPE*)val) \
+ low = mid + 1; \
+ else \
+ return mid; \
+ mid = (low + high) / 2; \
+ } \
+ return mid + 1; \
+} \
+ \
+static void sparse_##NAME(ArrayInfo *info1, ArrayInfo *info2, ArrayInfo *info_res) \
+{ \
+ char *data1 = info1->array->data; \
+ char *data2 = info2->array->data; \
+ char *data_res = info_res->array->data; \
+ int index1, index2, index_res; \
+ int stride1, stride2, stride_res; \
+ int len1, len2; \
+ int skip1 = 0, skip2 = 0; \
+ \
+ index1 = index2 = index_res = 0; \
+ stride1 = info1->stride; \
+ stride2 = info2->stride; \
+ stride_res = info_res->stride; \
+ len1 = info1->len; \
+ len2 = info2->len; \
+ \
+ INSTRUMENT(("ssii", "sparse_intersect", "enter", len1, len2)); \
+ \
+ while (index1 < len1 && index2 < len2) { \
+ if (*(TYPE *)data1 < *(TYPE *)data2) { \
+ int num1, mid; \
+ \
+ num1 = len1 - index1; \
+ mid = skip1 * 3; \
+ if (!mid || mid > num1 / 2) \
+ mid = num1 / 2; \
+ skip1 = NAME##_search(data2, data1, num1, stride1, mid); \
+ index1 += skip1; \
+ data1 += skip1 * stride1; \
+ } else if (*(TYPE *)data2 < *(TYPE *)data1) { \
+ int num2, mid; \
+ \
+ num2 = len2 - index2; \
+ mid = skip2 * 3; \
+ if (!mid || mid > num2 / 2) \
+ mid = num2 / 2; \
+ skip2 = NAME##_search(data1, data2, num2, stride2, mid); \
+ index2 += skip2; \
+ data2 += skip2 * stride2; \
+ } else { \
+ char *match_value; \
+ \
+ match_value = data1; \
+ *(TYPE *)data_res = *(TYPE *)data1; \
+ index_res++; \
+ data_res += stride_res; \
+ \
+ index1++; \
+ data1 += stride1; \
+ while (index1 < len1 && *(TYPE *)data1 == *(TYPE *)match_value) { \
+ index1++; \
+ data1 += stride1; \
+ } \
+ index2++; \
+ data2 += stride2; \
+ while (index2 < len2 && *(TYPE *)data2 == *(TYPE *)match_value) { \
+ index2++; \
+ data2 += stride2; \
+ } \
+ } \
+ } \
+ info_res->len = index_res; \
+ \
+ INSTRUMENT(("ssi", "sparse_intersect", "exit", index_res)); \
+ \
+} \
+ \
+static void dense_##NAME(ArrayInfo *info1, ArrayInfo *info2, ArrayInfo *info_res) \
+{ \
+ char *data1 = info1->array->data; \
+ char *data2 = info2->array->data; \
+ char *data_res = info_res->array->data; \
+ int index1, index2, index_res; \
+ int stride1, stride2, stride_res; \
+ int len1, len2; \
+ \
+ index1 = index2 = index_res = 0; \
+ stride1 = info1->stride; \
+ stride2 = info2->stride; \
+ stride_res = info_res->stride; \
+ len1 = info1->len; \
+ len2 = info2->len; \
+ \
+ INSTRUMENT(("ssii", "dense_intersect", "enter", len1, len2)); \
+ \
+ while (index1 < len1 && index2 < len2) { \
+ if (*(TYPE *)data1 < *(TYPE *)data2) { \
+ index1++; \
+ data1 += stride1; \
+ } else if (*(TYPE *)data2 < *(TYPE *)data1) { \
+ index2++; \
+ data2 += stride2; \
+ } else { \
+ char *match_value; \
+ \
+ match_value = data1; \
+ *(TYPE *)data_res = *(TYPE *)data1; \
+ index_res++; \
+ data_res += info_res->stride; \
+ \
+ index1++; \
+ data1 += stride1; \
+ while (index1 < len1 \
+ && *(TYPE *)data1 == *(TYPE *)match_value) { \
+ index1++; \
+ data1 += stride1; \
+ } \
+ index2++; \
+ data2 += stride2; \
+ while (index2 < len2 \
+ && *(TYPE *)data2 == *(TYPE *)match_value) { \
+ index2++; \
+ data2 += stride2; \
+ } \
+ } \
+ } \
+ info_res->len = index_res; \
+ \
+ INSTRUMENT(("ssi", "dense_intersect", "exit", index_res)); \
+ \
+} \
+ \
+static void NAME(ArrayInfo *info1, ArrayInfo *info2, ArrayInfo *info_res) \
+{ \
+ if (info1->len > info2->len) { \
+ if (info2->len > 0 && info1->len / info2->len >= 3) \
+ sparse_##NAME(info1, info2, info_res); \
+ else \
+ dense_##NAME(info1, info2, info_res); \
+ } else if (info1->len > 0 && info2->len / info1->len >= 3) \
+ sparse_##NAME(info1, info2, info_res); \
+ else \
+ dense_##NAME(info1, info2, info_res); \
+}
+
+intersect_func(intersect_schar, signed char)
+intersect_func(intersect_uchar, unsigned char)
+intersect_func(intersect_short, short)
+intersect_func(intersect_int, int)
+intersect_func(intersect_long, long)
+intersect_func(intersect_float, float)
+intersect_func(intersect_double, double)
+
+typedef void (*IntersectFunc)(ArrayInfo *info1, ArrayInfo *info2,
+ ArrayInfo *info_res);
+
+typedef struct {
+ IntersectFunc schar_func;
+ IntersectFunc uchar_func;
+ IntersectFunc short_func;
+ IntersectFunc int_func;
+ IntersectFunc long_func;
+ IntersectFunc float_func;
+ IntersectFunc double_func;
+} IntersectTable;
+
+static int ai_size_cmp(const void *info1, const void *info2)
+{
+ return (((ArrayInfo *)info1)->len - ((ArrayInfo *)info2)->len);
+}
+
+static PyObject *intersect_with(IntersectTable *table, PyObject *args)
+{
+ ArrayInfo *array_info, info1, *info2, info_ret;
+ PyArrayObject *ret;
+ int shape[1];
+ int alloc_size, i;
+ int type_num, num_arrays;
+
+ array_info = check_array_args(args, 2, -1, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ /* These algorithms are much faster if they are applied smallest array to
+ * largest, so we sort the array list (this will, however, be a net loss
+ * in the degenerate case of lots of small arrays) */
+ qsort(array_info, num_arrays, sizeof(*array_info), ai_size_cmp);
+
+ alloc_size = array_info->len;
+
+ /* Build the array to capture the intersection */
+ shape[0] = alloc_size;
+ ret = (PyArrayObject *)PyArray_FromDims(1, shape, type_num);
+ if (ret == NULL) {
+ free_array_info(array_info, num_arrays);
+ return NULL;
+ }
+ set_array_info(&info_ret, ret);
+
+ /* Generate the intersection. Intersect the smallest array with
+ * each other array - after each operation subsitute the result
+ * for the smallest array.
+ */
+ info1 = *array_info;
+ i = 1;
+ info2 = array_info + 1;
+ while (i < num_arrays && info1.len > 0)
+ {
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ table->schar_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_UBYTE:
+ table->uchar_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_SHORT:
+ table->short_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_INT:
+ table->int_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_LONG:
+ table->long_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_FLOAT:
+ table->float_func(&info1, info2, &info_ret);
+ break;
+ case PyArray_DOUBLE:
+ table->double_func(&info1, info2, &info_ret);
+ break;
+ }
+ info1 = info_ret;
+ i++;
+ info2++;
+ }
+ free_array_info(array_info, num_arrays);
+
+ ret->dimensions[0] = info_ret.len;
+ return (PyObject *)ret;
+}
+
+static char soomfunc_intersect__doc__[] =
+"intersect(a, b, ...) -> array\n"
+"\n"
+" Return the intersection of the rank-1 arrays passed. All arrays\n"
+" must have the same typecode.\n";
+
+static PyObject *soomfunc_intersect(PyObject *module, PyObject *args)
+{
+ static IntersectTable intersect = {
+ intersect_schar,
+ intersect_uchar,
+ intersect_short,
+ intersect_int,
+ intersect_long,
+ intersect_float,
+ intersect_double
+ };
+
+ return intersect_with(&intersect, args);
+}
+
+static char soomfunc_dense_intersect__doc__[] =
+"dense_intersect(a, b, ...) -> array\n"
+"\n"
+" Return the intersection of the rank-1 arrays passed. All arrays\n"
+" must have the same typecode.\n";
+
+static PyObject *soomfunc_dense_intersect(PyObject *module, PyObject *args)
+{
+ static IntersectTable dense_intersect = {
+ dense_intersect_schar,
+ dense_intersect_uchar,
+ dense_intersect_short,
+ dense_intersect_int,
+ dense_intersect_long,
+ dense_intersect_float,
+ dense_intersect_double
+ };
+
+ return intersect_with(&dense_intersect, args);
+}
+
+static char soomfunc_sparse_intersect__doc__[] =
+"sparse_intersect(a, b, ...) -> array\n"
+"\n"
+" Return the intersection of the rank-1 arrays passed. All arrays\n"
+" must have the same typecode.\n";
+
+static PyObject *soomfunc_sparse_intersect(PyObject *module, PyObject *args)
+{
+ static IntersectTable sparse_intersect = {
+ sparse_intersect_schar,
+ sparse_intersect_uchar,
+ sparse_intersect_short,
+ sparse_intersect_int,
+ sparse_intersect_long,
+ sparse_intersect_float,
+ sparse_intersect_double
+ };
+
+ return intersect_with(&sparse_intersect, args);
+}
+
+#define outersect_func(NAME, TYPE) \
+static PyObject *NAME(ArrayInfo *array_info, int num_arrays) \
+{ \
+ ArrayInfo *info; \
+ int type_num, i, out_size, out_len; \
+ TYPE *out_data; \
+ \
+ out_size = type_num = 0; \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ type_num = info->array->descr->type_num; \
+ if (info->len > out_size) \
+ out_size = info->len; \
+ } \
+ out_size /= 2; \
+ \
+ out_len = 0; \
+ out_data = malloc(out_size * sizeof(TYPE)); \
+ if (out_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ return NULL; \
+ } \
+ \
+ for (;;) { \
+ ArrayInfo *use_info = NULL; \
+ int duplicate_count = 0; \
+ \
+ /* Find the minimum unique value \
+ */ \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ if (info->index < info->len) { \
+ if (use_info == NULL) { \
+ /* Minimum unique value is here (I think) \
+ */ \
+ use_info = info; \
+ duplicate_count = 1; \
+ } else if (*(TYPE *)info->data < *(TYPE *)use_info->data) { \
+ /* Nope - minimum unique value is here (I think) \
+ */ \
+ use_info = info; \
+ duplicate_count = 1; \
+ } else if (*(TYPE *)info->data == *(TYPE *)use_info->data) { \
+ /* Nope - minimum (I think) value has duplicate \
+ */ \
+ duplicate_count++; \
+ } \
+ } \
+ } \
+ /* Now handle minimum value search result \
+ */ \
+ if (duplicate_count) { \
+ char *data = use_info->data; \
+ \
+ if (duplicate_count < num_arrays) { \
+ /* Woohoo - found a minimum value which is not in all sets \
+ */ \
+ if (out_len == out_size) { \
+ TYPE *new_data; \
+ \
+ out_size *= 2; \
+ new_data = realloc(out_data, out_size * sizeof(TYPE)); \
+ if (new_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ free(out_data); \
+ return NULL; \
+ } \
+ out_data = new_data; \
+ } \
+ out_data[out_len++] = *(TYPE *)use_info->data; \
+ } \
+ /* Skip over all duplicate values \
+ */ \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ while (info->index < info->len \
+ && *(TYPE *)info->data == *(TYPE *)data) { \
+ info->data += info->stride; \
+ info->index++; \
+ } \
+ } \
+ } else { \
+ /* Finished scanning all arrays \
+ */ \
+ PyArrayObject *ret; \
+ int shape[1]; \
+ \
+ shape[0] = out_len; \
+ ret = (PyArrayObject *) \
+ PyArray_FromDimsAndData(1, shape, type_num, (char *)out_data); \
+ if (ret) \
+ ret->flags |= OWN_DATA; \
+ else \
+ free(out_data); \
+ \
+ return (PyObject *)ret; \
+ } \
+ } \
+}
+
+outersect_func(outersect_schar, signed char)
+outersect_func(outersect_uchar, unsigned char)
+outersect_func(outersect_short, short)
+outersect_func(outersect_int, int)
+outersect_func(outersect_long, long)
+outersect_func(outersect_float, float)
+outersect_func(outersect_double, double)
+
+static char soomfunc_outersect__doc__[] =
+"outersect(a, b, ...) -> array\n"
+"\n"
+" Return the symmetric difference of the rank-1 arrays\n"
+" passed. All arrays must have the same typecode.\n";
+
+static PyObject *soomfunc_outersect(PyObject *module, PyObject *args)
+{
+ ArrayInfo *array_info;
+ PyArrayObject *ret;
+ int type_num, num_arrays;
+
+ array_info = check_array_args(args, 2, -1, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ INSTRUMENT(("ssi", "outersect", "enter", num_arrays));
+
+ /* Accumulate the difference cumulatively by comparing the first
+ * two arrays then comparing the result with the third and so on.
+ */
+ ret = NULL;
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ ret = (PyArrayObject *)outersect_schar(array_info, num_arrays);
+ break;
+ case PyArray_UBYTE:
+ ret = (PyArrayObject *)outersect_uchar(array_info, num_arrays);
+ break;
+ case PyArray_SHORT:
+ ret = (PyArrayObject *)outersect_short(array_info, num_arrays);
+ break;
+ case PyArray_INT:
+ ret = (PyArrayObject *)outersect_int(array_info, num_arrays);
+ break;
+ case PyArray_LONG:
+ ret = (PyArrayObject *)outersect_long(array_info, num_arrays);
+ break;
+ case PyArray_FLOAT:
+ ret = (PyArrayObject *)outersect_float(array_info, num_arrays);
+ break;
+ case PyArray_DOUBLE:
+ ret = (PyArrayObject *)outersect_double(array_info, num_arrays);
+ break;
+ default:
+ PyErr_SetString(PyExc_ValueError, "bogus - unhandled array type");
+ }
+ free_array_info(array_info, num_arrays);
+
+ INSTRUMENT(("ssi", "outersect", "exit", ret->dimensions[0]));
+ return (PyObject *)ret;
+}
+
+#define sparse_union_func_old(NAME, TYPE) \
+static PyObject *sparse_##NAME(ArrayInfo *array_info, int num_arrays) \
+{ \
+ ArrayInfo *info; \
+ int type_num, i, out_size, out_len; \
+ TYPE *out_data; \
+ \
+ out_size = type_num = 0; \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ type_num = info->array->descr->type_num; \
+ if (info->len > out_size) \
+ out_size = info->len; \
+ } \
+ \
+ out_len = 0; \
+ out_data = malloc(out_size * sizeof(TYPE)); \
+ if (out_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ return NULL; \
+ } \
+ \
+ for (;;) { \
+ ArrayInfo *use_info = NULL; \
+ int have_duplicate = 0; \
+ \
+ /* Find the minimum value \
+ */ \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ if (info->index < info->len) { \
+ if (use_info == NULL) \
+ /* Minimum value is here (I think) \
+ */ \
+ use_info = info; \
+ else if (*(TYPE *)info->data < *(TYPE *)use_info->data) { \
+ /* Nope - minimum value is here (I think) \
+ */ \
+ use_info = info; \
+ have_duplicate = 0; \
+ } else if (*(TYPE *)info->data == *(TYPE *)use_info->data) { \
+ /* Nope - minimum (I think) value has duplicate \
+ */ \
+ have_duplicate = 1; \
+ } \
+ } \
+ } \
+ /* Now handle minimum value search result \
+ */ \
+ if (use_info) { \
+ /* Woohoo - found a minimum value \
+ */ \
+ char *data = use_info->data; \
+ \
+ if (out_len == out_size) { \
+ TYPE *new_data; \
+ \
+ out_size *= 2; \
+ new_data = realloc(out_data, out_size * sizeof(TYPE)); \
+ if (new_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ free(out_data); \
+ return NULL; \
+ } \
+ out_data = new_data; \
+ } \
+ out_data[out_len++] = *(TYPE *)use_info->data; \
+ \
+ if (have_duplicate) { \
+ /* Skip over all duplicate values \
+ */ \
+ char *data = use_info->data; \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ while (info->index < info->len \
+ && *(TYPE *)info->data == *(TYPE *)data) { \
+ info->data += info->stride; \
+ info->index++; \
+ } \
+ } \
+ } else { \
+ while (use_info->index < use_info->len \
+ && *(TYPE *)use_info->data == *(TYPE *)data) { \
+ use_info->data += use_info->stride; \
+ use_info->index++; \
+ } \
+ } \
+ } else { \
+ /* Finished scanning all arrays \
+ */ \
+ PyArrayObject *ret; \
+ int shape[1]; \
+ \
+ shape[0] = out_len; \
+ ret = (PyArrayObject *) \
+ PyArray_FromDimsAndData(1, shape, type_num, (char *)out_data); \
+ if (ret) \
+ ret->flags |= OWN_DATA; \
+ else \
+ free(out_data); \
+ return (PyObject *)ret; \
+ } \
+ } \
+}
+
+#define sparse_union_func(NAME, TYPE) \
+static inline int \
+_soomfunc_bisect_##NAME(ArrayInfo **array_info, int low, int high, \
+ ArrayInfo *insert_info) \
+{ \
+ int mid; \
+ TYPE d = *(TYPE *)insert_info->data; \
+ \
+ while (low < high) { \
+ mid = (low + high) / 2; \
+ if (d > *(TYPE *)array_info[mid]->data) \
+ low = mid + 1; \
+ else \
+ high = mid; \
+ } \
+ return low; \
+} \
+ \
+static PyObject *sparse_##NAME(ArrayInfo *unsorted_array_info, \
+ int num_unsorted_arrays) \
+{ \
+ ArrayInfo **array_info, *info; \
+ PyArrayObject *ret; \
+ int shape[1]; \
+ int type_num, num_arrays, i, j; \
+ int out_len = 0, out_size = 0; \
+ TYPE *out_data; \
+ TYPE data; \
+ \
+ type_num = unsorted_array_info->array->descr->type_num; \
+ \
+ array_info = malloc(sizeof(array_info) * num_unsorted_arrays); \
+ if (array_info == NULL) \
+ return (PyObject *)PyErr_NoMemory(); \
+ num_arrays = 0; \
+ \
+ /* Sort the arrays by their first entry, ignoring empty arrays */ \
+ for (i = 0; i < num_unsorted_arrays; ++i) { \
+ info = unsorted_array_info + i; \
+ if (!info->len) \
+ continue; \
+ if (info->len > out_size) \
+ out_size = info->len; \
+ j = _soomfunc_bisect_##NAME(array_info, 0, num_arrays, info); \
+ memmove(array_info + j + 1, array_info + j, \
+ (num_arrays - j) * sizeof(array_info)); \
+ array_info[j] = info; \
+ ++num_arrays; \
+ } \
+ /* \
+ for (i = 1; i < num_arrays; ++i) { \
+ if (*(int *)array_info[i]->data < *(int *)array_info[i-1]->data) \
+ printf("%5d: %d\n", i, *(int *)array_info[i]->data); \
+ } \
+ */ \
+ out_data = malloc(out_size * sizeof(TYPE)); \
+ if (out_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ free(array_info); \
+ return NULL; \
+ } \
+ \
+ /* we now know the first array contains the lowest value - we return this \
+ * value, and look at the next value in the array. If the array is no \
+ * longer the one with the lowest first element, we move it to it's new \
+ * sorted position in the list of arrays, then repeat */ \
+ while (num_arrays) { \
+ info = *array_info; \
+ if (out_len == out_size) { \
+ TYPE *new_data; \
+\
+ out_size *= 2; \
+ new_data = realloc(out_data, out_size * sizeof(TYPE)); \
+ if (new_data == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ free(array_info); \
+ free(out_data); \
+ return NULL; \
+ } \
+ out_data = new_data; \
+ } \
+ data = *(int *)info->data; \
+ out_data[out_len++] = data; \
+ \
+ /* Remove head of first array and any duplicates in other arrays */ \
+ for (i = 0; i < num_arrays; ++i) { \
+ info = array_info[i]; \
+ if (*(TYPE *)info->data != data) \
+ break; \
+ ++info->index; \
+ info->data += info->stride; \
+ } \
+ /* Arrays up to i are not necessarily in their correct positions now, \
+ * or they may be empty, in which case they can be discarded */ \
+ for (; i > 0; --i) { \
+ info = array_info[i - 1]; \
+ if (info->index >= info->len) { \
+ /* empty, remove */ \
+ memmove(array_info + i - 1, array_info + i, \
+ (num_arrays - i) * sizeof(array_info)); \
+ --num_arrays; \
+ } else { \
+ if (i < num_arrays && \
+ *(TYPE *)array_info[i]->data < *(TYPE *)info->data) { \
+ /* needs to be moved */ \
+ j = _soomfunc_bisect_##NAME(array_info, i, num_arrays, info); \
+ /* \
+ printf("i %d, j %d, num_arrays %d, memmove %d (data %d, insert data %d)\n", \
+ i, j, num_arrays, (j - i), \
+ data, *(int *)array_info[j]->data); \
+ */ \
+ memmove(array_info + i - 1, array_info + i, \
+ (j - i) * sizeof(array_info)); \
+ array_info[j-1] = info; \
+ } \
+ } \
+ } \
+ } \
+ \
+ free(array_info); \
+ \
+ shape[0] = out_len; \
+ ret = (PyArrayObject *) \
+ PyArray_FromDimsAndData(1, shape, type_num, (char *)out_data); \
+ if (ret) \
+ ret->flags |= OWN_DATA; \
+ else \
+ free(out_data); \
+ return (PyObject *)ret; \
+}
+
+#define dense_union_func(NAME, TYPE) \
+static PyObject *dense_##NAME(ArrayInfo *array_info, int num_arrays) \
+{ \
+ int type_num = 0, i, v; \
+ ArrayInfo *info; \
+ TYPE min = 0, max = 0, *p; \
+ unsigned long arg_tot_len, range, out_size; \
+ unsigned char *map; \
+ int shape[1]; \
+ PyArrayObject *ret; \
+ \
+ info = array_info; \
+ arg_tot_len = 0; \
+ for (i = 0; i < num_arrays; i++) { \
+ type_num = info->array->descr->type_num; \
+ if (info->len) { \
+ if (!arg_tot_len || *(TYPE *)info->data < min) \
+ min = *(TYPE *)info->data; \
+ if (!arg_tot_len || *(TYPE *)(info->data + info->stride * (info->len - 1)) > max) \
+ max = *(TYPE *)(info->data + info->stride * (info->len - 1)); \
+ arg_tot_len += info->len; \
+ } \
+ info++; \
+ } \
+ if (!arg_tot_len) { \
+ shape[0] = 0; \
+ return (PyObject *)PyArray_FromDims(1, shape, type_num); \
+ } \
+ range = max - min + 1; \
+ /* \
+ printf("num_arrays %d, min %ld, max %ld, range %ld, arg_tot_len %ld, sparse %ld\n", \
+ num_arrays, (long)min, (long)max, (long)range, (long)arg_tot_len, \
+ (range / arg_tot_len)); \
+ */ \
+ if ((range / arg_tot_len) >= 3) \
+ return sparse_##NAME(array_info, num_arrays); \
+ map = malloc(range); \
+ if (map == NULL) { \
+ PyErr_SetString(PyExc_MemoryError, "can't allocate memory for array"); \
+ return NULL; \
+ } \
+ memset(map, 0, range); \
+ out_size = 0; \
+ for (info = array_info, i = 0; i < num_arrays; info++, i++) { \
+ for (; info->index < info->len; ++info->index) { \
+ v = *(TYPE *)info->data - min; \
+ if (v < 0 || v >= range) { \
+ PyErr_SetString(PyExc_ValueError, "arrays must be sorted"); \
+ free(map); \
+ return NULL; \
+ } \
+ if (!map[v]) { \
+ ++out_size; \
+ map[v] = 1; \
+ } \
+ info->data += info->stride; \
+ } \
+ } \
+ shape[0] = out_size; \
+ ret = (PyArrayObject *)PyArray_FromDims(1, shape, type_num); \
+ if (ret == NULL) { \
+ free(map); \
+ return NULL; \
+ } \
+ for (i = 0, p = (TYPE*)ret->data; i < range; ++i) \
+ if (map[i]) \
+ *p++ = i + min; \
+ free(map); \
+ return (PyObject *)ret; \
+}
+
+sparse_union_func(union_schar, signed char)
+sparse_union_func(union_uchar, unsigned char)
+sparse_union_func(union_short, short)
+sparse_union_func(union_int, int)
+sparse_union_func(union_long, long)
+sparse_union_func(union_float, float)
+sparse_union_func(union_double, double)
+dense_union_func(union_schar, signed char)
+dense_union_func(union_uchar, unsigned char)
+dense_union_func(union_short, short)
+dense_union_func(union_int, int)
+dense_union_func(union_long, long)
+
+static char soomfunc_union__doc__[] =
+"union(a, b, ...) -> array\n"
+"\n"
+" Return the union of the rank-1 arrays passed. All arrays must\n"
+" have the same typecode.\n";
+
+static PyObject *soomfunc_union(PyObject *module, PyObject *args)
+{
+ ArrayInfo *array_info;
+ PyArrayObject *ret;
+ int type_num, num_arrays;
+
+ array_info = check_array_args(args, 2, -1, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ INSTRUMENT(("ssi", "union", "enter", num_arrays));
+
+ /* Accumulate the difference cumulatively by comparing the first
+ * two arrays then comparing the result with the third and so on.
+ */
+ ret = NULL;
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ ret = (PyArrayObject *)dense_union_schar(array_info, num_arrays);
+ break;
+ case PyArray_UBYTE:
+ ret = (PyArrayObject *)dense_union_uchar(array_info, num_arrays);
+ break;
+ case PyArray_SHORT:
+ ret = (PyArrayObject *)dense_union_short(array_info, num_arrays);
+ break;
+ case PyArray_INT:
+ ret = (PyArrayObject *)dense_union_int(array_info, num_arrays);
+ break;
+ case PyArray_LONG:
+ ret = (PyArrayObject *)dense_union_long(array_info, num_arrays);
+ break;
+ case PyArray_FLOAT:
+ ret = (PyArrayObject *)sparse_union_float(array_info, num_arrays);
+ break;
+ case PyArray_DOUBLE:
+ ret = (PyArrayObject *)sparse_union_double(array_info, num_arrays);
+ break;
+ default:
+ PyErr_SetString(PyExc_ValueError, "bogus - unhandled array type");
+ }
+ free_array_info(array_info, num_arrays);
+
+ INSTRUMENT(("ssi", "union", "exit", ret->dimensions[0]));
+ return (PyObject *)ret;
+}
+
+#define difference_func(NAME, TYPE) \
+static void NAME(ArrayInfo *info1, ArrayInfo *info2, ArrayInfo *info_res) \
+{ \
+ char *prev; \
+ \
+ info1->data = info1->array->data; \
+ info2->data = info2->array->data; \
+ info_res->data = info_res->array->data; \
+ info1->index = info2->index = info_res->index = 0; \
+ \
+ INSTRUMENT(("ssii", "difference", "enter", info1->len, info2->len)); \
+ \
+ while (info1->index < info1->len && info2->index < info2->len) { \
+ if (*(TYPE *)info1->data < *(TYPE *)info2->data) { \
+ *(TYPE *)info_res->data = *(TYPE *)info1->data; \
+ info_res->index++; \
+ info_res->data += info_res->stride; \
+ \
+ prev = info1->data; \
+ info1->index++; \
+ info1->data += info1->stride; \
+ \
+ while (info1->index < info1->len \
+ && *(TYPE *)info1->data == *(TYPE *)prev) { \
+ info1->index++; \
+ info1->data += info1->stride; \
+ } \
+ } else if (*(TYPE *)info2->data < *(TYPE *)info1->data) { \
+ info2->index++; \
+ info2->data += info2->stride; \
+ } else { \
+ prev = info1->data; \
+ \
+ info1->index++; \
+ info1->data += info1->stride; \
+ while (info1->index < info1->len \
+ && *(TYPE *)info1->data == *(TYPE *)prev) { \
+ info1->index++; \
+ info1->data += info1->stride; \
+ } \
+ info2->index++; \
+ info2->data += info2->stride; \
+ while (info2->index < info2->len \
+ && *(TYPE *)info2->data == *(TYPE *)prev) { \
+ info2->index++; \
+ info2->data += info2->stride; \
+ } \
+ } \
+ } \
+ while (info1->index < info1->len) { \
+ *(TYPE *)info_res->data = *(TYPE *)info1->data; \
+ info_res->index++; \
+ info_res->data += info_res->stride; \
+ \
+ prev = info1->data; \
+ info1->index++; \
+ info1->data += info1->stride; \
+ \
+ while (info1->index < info1->len \
+ && *(TYPE *)info1->data == *(TYPE *)prev) { \
+ info1->index++; \
+ info1->data += info1->stride; \
+ } \
+ } \
+ \
+ info_res->len = info_res->index; \
+ \
+ INSTRUMENT(("ssi", "difference", "exit", info_res->len)); \
+}
+
+difference_func(difference_schar, signed char)
+difference_func(difference_uchar, unsigned char)
+difference_func(difference_short, short)
+difference_func(difference_int, int)
+difference_func(difference_long, long)
+difference_func(difference_float, float)
+difference_func(difference_double, double)
+
+static char soomfunc_difference__doc__[] =
+"difference(a, b, ...) -> array\n"
+"\n"
+" Return the result of subtracting the second and subsequent rank-1\n"
+" arrays from the first rank-1 array. All arrays must have the same\n"
+" typecode.\n";
+
+static PyObject *soomfunc_difference(PyObject *module, PyObject *args)
+{
+ ArrayInfo *array_info, info1, *info2, info_ret;
+ PyArrayObject *ret;
+ int shape[1];
+ int alloc_size, i;
+ int type_num, num_arrays;
+
+ array_info = check_array_args(args, 2, -1, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ alloc_size = array_info->len;
+
+ /* Build the array to capture the intersection */
+ shape[0] = alloc_size;
+ ret = (PyArrayObject *)PyArray_FromDims(1, shape, type_num);
+ if (ret == NULL) {
+ free_array_info(array_info, num_arrays);
+ return NULL;
+ }
+ set_array_info(&info_ret, ret);
+
+ /* Generate the difference. Subtract the second array from the
+ * first - after each operation subsitute the result for the first
+ * array.
+ */
+ info1 = array_info[0];
+ for (i = 1, info2 = array_info + 1; i < num_arrays; i++, info2++) {
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ difference_schar(&info1, info2, &info_ret);
+ break;
+ case PyArray_UBYTE:
+ difference_uchar(&info1, info2, &info_ret);
+ break;
+ case PyArray_SHORT:
+ difference_short(&info1, info2, &info_ret);
+ break;
+ case PyArray_INT:
+ difference_int(&info1, info2, &info_ret);
+ break;
+ case PyArray_LONG:
+ difference_long(&info1, info2, &info_ret);
+ break;
+ case PyArray_FLOAT:
+ difference_float(&info1, info2, &info_ret);
+ break;
+ case PyArray_DOUBLE:
+ difference_double(&info1, info2, &info_ret);
+ break;
+ }
+ info1 = info_ret;
+ }
+ free_array_info(array_info, num_arrays);
+
+ ret->dimensions[0] = info_ret.len;
+ return (PyObject *)ret;
+}
+
+#define valuepos_func(NAME, TYPE) \
+static void NAME(ArrayInfo *info1, ArrayInfo *info2, ArrayInfo *info_res) \
+{ \
+ int index1, index_res; \
+ TYPE *data1; \
+ long *data_res; \
+ \
+ data1 = (TYPE*)info1->array->data; \
+ data_res = (long*)info_res->array->data; \
+ for (index1 = index_res = 0; index1 < info1->len; index1++, data1++) { \
+ int index2; \
+ TYPE *data2; \
+ \
+ data2 = (TYPE*)info2->array->data; \
+ for (index2 = 0; index2 < info2->len; index2++, data2++) \
+ if (*data1 == *data2) { \
+ *data_res++ = (long)index1; \
+ index_res++; \
+ } \
+ } \
+ info_res->len = index_res; \
+}
+
+valuepos_func(valuepos_schar, signed char)
+valuepos_func(valuepos_uchar, unsigned char)
+valuepos_func(valuepos_short, short)
+valuepos_func(valuepos_int, int)
+valuepos_func(valuepos_long, long)
+valuepos_func(valuepos_float, float)
+valuepos_func(valuepos_double, double)
+
+static char soomfunc_valuepos__doc__[] =
+"valuepos(a, b) -> array\n"
+"\n"
+" Return the equivalent of the following (where the result is c):\n"
+" c = []\n"
+" for i in range(len(a)):\n"
+" if a[i] in b:\n"
+" c.append(i)\n"
+" c = array(c)\n";
+
+static PyObject *soomfunc_valuepos(PyObject *module, PyObject *args)
+{
+ ArrayInfo *array_info, info_ret;
+ PyArrayObject *ret;
+ int shape[1];
+ int alloc_size;
+ int type_num, num_arrays;
+
+ array_info = check_array_args(args, 2, 2, 0);
+ if (array_info == NULL)
+ return NULL;
+ type_num = array_info->array->descr->type_num;
+ num_arrays = PyTuple_Size(args);
+
+ alloc_size = array_info->len;
+
+ INSTRUMENT(("ssii", "valuepos", "enter", array_info[0].len, array_info[1].len));
+
+ /* Build the array to capture the result */
+ shape[0] = alloc_size;
+ ret = (PyArrayObject *)PyArray_FromDims(1, shape, PyArray_LONG);
+ if (ret == NULL) {
+ free_array_info(array_info, num_arrays);
+ return NULL;
+ }
+ set_array_info(&info_ret, ret);
+
+ /* Generate the valuepos.
+ */
+ switch (type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ valuepos_schar(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_UBYTE:
+ valuepos_uchar(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_SHORT:
+ valuepos_short(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_INT:
+ valuepos_int(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_LONG:
+ valuepos_long(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_FLOAT:
+ valuepos_float(&array_info[0], &array_info[1], &info_ret);
+ break;
+ case PyArray_DOUBLE:
+ valuepos_double(&array_info[0], &array_info[1], &info_ret);
+ break;
+ }
+ free_array_info(array_info, num_arrays);
+
+ ret->dimensions[0] = info_ret.len;
+
+ INSTRUMENT(("ssi", "valuepos", "exit", info_ret.len));
+ return (PyObject *)ret;
+}
+
+#define preload_func(NAME, TYPE) \
+static void NAME(PyArrayObject *array, int preload) \
+{ \
+ int step; \
+ volatile TYPE *data; \
+ \
+ step = 4096 / sizeof(TYPE) / 2; \
+ for (data = ((volatile TYPE *)array->data) + preload; \
+ preload >= 0; preload -= step, data -= step) \
+ *data; \
+}
+
+preload_func(preload_schar, signed char)
+preload_func(preload_uchar, unsigned char)
+preload_func(preload_short, short)
+preload_func(preload_int, int)
+preload_func(preload_long, long)
+preload_func(preload_float, float)
+preload_func(preload_double, double)
+
+static char soomfunc_preload__doc__[] =
+"preload(a, num) -> array\n"
+"\n"
+" Step backwards over num entries of the array forcing the pages\n"
+" into memory.\n";
+
+static PyObject *soomfunc_preload(PyObject *module, PyObject *args)
+{
+ PyObject *obj;
+ PyArrayObject *array;
+ int preload = -1;
+
+ if (!PyArg_ParseTuple(args, "O|i", &obj, &preload))
+ return NULL;
+ array = (PyArrayObject *)PyArray_FromObject(obj, PyArray_NOTYPE, 0, 0);
+ if (array == NULL)
+ return NULL;
+ if (array->nd != 1) {
+ PyErr_SetString(PyExc_ValueError, "arrays must be rank-1");
+ Py_DECREF(array);
+ return NULL;
+ }
+
+ if (preload < 0 || preload > array->dimensions[0])
+ preload = array->dimensions[0] - 1;
+
+ INSTRUMENT(("ssi", "preload", "enter", preload));
+
+ switch (array->descr->type_num) {
+ case PyArray_CHAR:
+ case PyArray_SBYTE:
+ preload_schar(array, preload);
+ break;
+ case PyArray_UBYTE:
+ preload_uchar(array, preload);
+ break;
+ case PyArray_SHORT:
+ preload_short(array, preload);
+ break;
+ case PyArray_INT:
+ preload_int(array, preload);
+ break;
+ case PyArray_LONG:
+ preload_long(array, preload);
+ break;
+ case PyArray_FLOAT:
+ preload_float(array, preload);
+ break;
+ case PyArray_DOUBLE:
+ preload_double(array, preload);
+ break;
+ }
+
+ Py_DECREF(array);
+ Py_INCREF(Py_None);
+
+ INSTRUMENT(("ss", "preload", "exit"));
+ return Py_None;
+}
+
+static char soomfunc_set_instrument__doc__[] =
+"set_instrument(func) -> old_func";
+
+static PyObject *soomfunc_set_instrument(PyObject *module, PyObject *args)
+{
+ PyObject *obj, *res;
+
+ if (!PyArg_ParseTuple(args, "O", &obj))
+ return NULL;
+
+ res = instrument_func;
+ instrument_func = obj;
+ Py_INCREF(instrument_func);
+ return res;
+}
+
+static char soomfunc_strip_word__doc__[] =
+"strip_word(string) -> string"
+"\n"
+" Remove ' from a string and convert to uppercase.\n";
+
+static PyObject *soomfunc_strip_word(PyObject *module, PyObject *args)
+{
+ PyObject *res;
+ char *word, *new_word, *p, *q;
+ int word_length;
+
+ if (!PyArg_ParseTuple(args, "s#", &word, &word_length))
+ return NULL;
+ if (!(new_word = malloc(word_length))) {
+ PyErr_SetString(PyExc_MemoryError, "could not allocate new string");
+ return NULL;
+ }
+ for (p = word, q = new_word; word_length; word_length--) {
+ if (*p == '\'')
+ p++;
+ else
+ *q++ = toupper(*p++);
+ }
+
+ res = Py_BuildValue("s#", new_word, q - new_word);
+ free(new_word);
+ return res;
+}
+
+static struct PyMethodDef soomfunc_methods[] = {
+ { "unique", (PyCFunction)soomfunc_unique, METH_VARARGS, soomfunc_unique__doc__ },
+ { "intersect", (PyCFunction)soomfunc_intersect, METH_VARARGS, soomfunc_intersect__doc__ },
+ { "sparse_intersect", (PyCFunction)soomfunc_sparse_intersect, METH_VARARGS, soomfunc_sparse_intersect__doc__ },
+ { "dense_intersect", (PyCFunction)soomfunc_dense_intersect, METH_VARARGS, soomfunc_dense_intersect__doc__ },
+ { "outersect", (PyCFunction)soomfunc_outersect, METH_VARARGS, soomfunc_outersect__doc__ },
+ { "union", (PyCFunction)soomfunc_union, METH_VARARGS, soomfunc_union__doc__ },
+ { "difference", (PyCFunction)soomfunc_difference, METH_VARARGS, soomfunc_difference__doc__ },
+ { "valuepos", (PyCFunction)soomfunc_valuepos, METH_VARARGS, soomfunc_valuepos__doc__ },
+ { "preload", (PyCFunction)soomfunc_preload, METH_VARARGS, soomfunc_preload__doc__ },
+
+ { "set_instrument", (PyCFunction)soomfunc_set_instrument, METH_VARARGS, soomfunc_set_instrument__doc__ },
+ { "strip_word", (PyCFunction)soomfunc_strip_word, METH_VARARGS, soomfunc_strip_word__doc__ },
+ { NULL, (PyCFunction)NULL, 0, NULL } /* sentinel */
+};
+
+static char soomfunc_module__doc__[] =
+"A collection of Numpy extension and other utility functions\n"
+"intended for use in the SOOM and prototype mortality analysis\n"
+"system project.\n";
+
+void initsoomfunc(void)
+{
+ PyObject *module, *dict, *ver = NULL;
+
+ /* Create the module and add the functions */
+ module = Py_InitModule4("soomfunc", soomfunc_methods,
+ soomfunc_module__doc__,
+ (PyObject*)NULL, PYTHON_API_VERSION);
+ if (module == NULL)
+ goto error;
+
+ /* Set instrument func to None */
+ instrument_func = Py_None;
+ Py_INCREF(instrument_func);
+
+ import_array();
+ if ((dict = PyModule_GetDict(module)) == NULL)
+ goto error;
+ if ((ver = PyString_FromString("0.8")) == NULL)
+ goto error;
+ if (PyDict_SetItemString(dict, "__version__", ver) < 0)
+ goto error;
+
+error:
+ Py_XDECREF(ver);
+ /* Check for errors */
+ if (PyErr_Occurred())
+ Py_FatalError("can't initialize module soomfunc");
+}
diff --git a/soomext/storage.c b/soomext/storage.c
new file mode 100644
index 0000000..44bba55
--- /dev/null
+++ b/soomext/storage.c
@@ -0,0 +1,623 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+/*-------------------------------------------------------------------
+ * Implement a simple sequential storage manager. Each BLOB is
+ * accessed via an integer index.
+ *
+ * All of the BLOB data is managed by a table of BlobDesc structures.
+ * By keeping the structures which control the data in one location we
+ * can avoid scanning (and paging in) the entire file.
+ */
+#include "Python.h"
+#include "storage.h"
+
+#define GROW_INC (64 * 1024)
+#define TABLE_INC 1024
+
+static PyObject *index_error(void)
+{
+ static PyObject *str;
+
+ if (str == NULL)
+ str = PyString_FromString("list index out of range");
+ return str;
+}
+
+static int grow_file_error(MmapBlobStore *sm, size_t old_size, char *reason)
+{
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, reason);
+ fprintf(stderr, "grow_file() %s failed: old-size:%ul new-size:%ul\n",
+ reason, old_size, sm->size);
+ return -1;
+}
+
+/* Grow the data store file to at least the specified min_size -
+ * return success status.
+ */
+static int grow_file(MmapBlobStore *sm, off_t min_size)
+{
+ size_t old_size;
+
+ old_size = sm->size;
+ sm->cycle++;
+ sm->size = min_size;
+ if (sm->size % GROW_INC)
+ sm->size += GROW_INC - sm->size % GROW_INC;
+ if (sm->header != (StoreHeader*)-1) {
+ if (munmap(sm->header, old_size) < 0)
+ return grow_file_error(sm, old_size, "munmap");
+ sm->header = (StoreHeader*)-1;
+ }
+ if (lseek(sm->fd, sm->size - 1, SEEK_SET) < 0)
+ return grow_file_error(sm, old_size, "lseek");
+ if (write(sm->fd, "", 1) != 1)
+ return grow_file_error(sm, old_size, "write");
+ sm->header = (StoreHeader*)
+ mmap(0, sm->size, sm->prot, MAP_SHARED, sm->fd, 0);
+ if (sm->header == (StoreHeader*)-1)
+ return grow_file_error(sm, old_size, "mmap");
+ return 0;
+}
+
+/* Return a pointer to the BlobDesc specified by index.
+ */
+static BlobDesc *get_desc(MmapBlobStore *sm, int index)
+{
+ return (BlobDesc*)((((char*)sm->header) + sm->header->table_loc)) + index;
+}
+
+/* Return the address of the BLOB data managed by desc.
+ */
+void *store_blob_address(MmapBlobStore *sm, BlobDesc *desc)
+{
+ return ((char*)sm->header) + desc->loc;
+}
+
+/* Retrieve the BLOB sequence
+ */
+static int *get_sequence(MmapBlobStore *sm)
+{
+ return store_blob_address(sm, get_desc(sm, sm->header->seq_index));
+}
+
+/* Enforce data alignment.
+ */
+static off_t data_align(off_t offset)
+{
+ int frag;
+
+ frag = offset % sizeof(int);
+ if (frag)
+ return offset + sizeof(int) - frag;
+ return offset;
+}
+
+/* Initialise a new BLOB store file.
+ */
+static int init_store(MmapBlobStore *sm)
+{
+ BlobDesc *table_desc;
+
+ /* allocate initial block of data*/
+ if (grow_file(sm, GROW_INC) < 0)
+ return -1;
+ /* initialise the header */
+ sm->header->table_loc = sizeof(*sm->header);
+ sm->header->table_size = TABLE_INC;
+ sm->header->table_len = 1;
+ sm->header->table_index = 0;
+
+ sm->header->seq_index = -1;
+ sm->header->seq_size = 0;
+ sm->header->seq_len = 0;
+
+ table_desc = get_desc(sm, 0);
+ table_desc->loc = sm->header->table_loc;
+ table_desc->len = sm->header->table_size * sizeof(*table_desc);
+ table_desc->size = data_align(table_desc->len);
+ table_desc->status = BLOB_TABLE;
+ table_desc->type = table_desc->other = 0;
+ return 0;
+}
+
+/* Grow the table which contains all of the BlobDesc structures.
+ * This is a bit complex because the table is actually contained in
+ * space managed by the table.
+ */
+static int grow_table(MmapBlobStore *sm)
+{
+ BlobDesc *last_desc, *table_desc, *new_table_desc;
+ int old_table_index;
+ off_t new_table_loc;
+ size_t data_len, data_size;
+
+ /* the location of the new table will be at the end of the file */
+ last_desc = get_desc(sm, sm->header->table_len - 1);
+ new_table_loc = last_desc->loc + last_desc->size;
+ data_len = (sm->header->table_size + TABLE_INC) * sizeof(*last_desc);
+ data_size = data_align(data_len);
+ if (new_table_loc + data_size >= sm->size) {
+ /* grow the file */
+ if (grow_file(sm, new_table_loc + data_size) < 0)
+ return -1;
+ }
+
+ /* current table is stored in desc (sm->header->table_index) */
+ old_table_index = sm->header->table_index;
+ table_desc = get_desc(sm, old_table_index);
+ /* we want to move it to address of new_table_loc */
+ memmove(((char*)sm->header) + new_table_loc,
+ store_blob_address(sm, table_desc),
+ sm->header->table_len * sizeof(*table_desc));
+
+ /* update the file header */
+ sm->header->table_loc = new_table_loc;
+ sm->header->table_size += TABLE_INC;
+ sm->header->table_index = sm->header->table_len;
+ /* now setup new desc for new desc table */
+ new_table_desc = get_desc(sm, sm->header->table_len);
+ new_table_desc->loc = new_table_loc;
+ new_table_desc->size = data_size;
+ new_table_desc->len = data_len;
+ new_table_desc->status = BLOB_TABLE;
+ new_table_desc->type = new_table_desc->other = 0;
+ /* use up the desc used to store the new desc table */
+ sm->header->table_len++;
+ /* free desc which held old desc table */
+ table_desc = get_desc(sm, old_table_index);
+ table_desc->status = BLOB_FREE;
+
+ return 0;
+}
+
+/* Find the index of a free BlobDesc.
+ */
+static int find_free_blob(MmapBlobStore *sm, size_t data_len)
+{
+ BlobDesc *table_desc, *desc;
+ int i, best, wasted;
+
+ table_desc = get_desc(sm, sm->header->table_index);
+ desc = store_blob_address(sm, table_desc);
+ best = -1;
+ wasted = 0;
+ for (i = 0; i < sm->header->table_size; i++, desc++)
+ if (desc->status == BLOB_FREE && desc->size > data_len) {
+ if (best < 0 || desc->size - data_len < wasted) {
+ best = i;
+ wasted = desc->size - data_len;
+ }
+ }
+ return best;
+}
+
+/* Reuse a free BlobDesc.
+ */
+static void reuse_free_blob(MmapBlobStore *sm, int index, size_t data_len)
+{
+ BlobDesc *desc;
+
+ desc = get_desc(sm, index);
+ desc->len = data_len;
+ desc->status = BLOB_DATA;
+ desc->type = desc->other = 0;
+}
+
+/* Allocate a new BlobDesc.
+ */
+static int allocate_new_blob(MmapBlobStore *sm, size_t data_len, int status)
+{
+ BlobDesc *desc, *prev_desc;
+ off_t desc_loc;
+ int index;
+ size_t data_size;
+
+ data_size = data_align(data_len);
+ if (sm->header->table_len == sm->header->table_size) {
+ /* grow the table */
+ if (grow_table(sm) < 0)
+ return -1;
+ }
+ prev_desc = get_desc(sm, sm->header->table_len - 1);
+ desc_loc = prev_desc->loc + prev_desc->size;
+ if (desc_loc + data_size >= sm->size) {
+ /* grow the file */
+ if (grow_file(sm, desc_loc + data_size) < 0)
+ return -1;
+ }
+ index = sm->header->table_len;
+ desc = get_desc(sm, index);
+ sm->header->table_len++;
+ desc->loc = desc_loc;
+ desc->size = data_size;
+ desc->len = data_len;
+ desc->status = status;
+ desc->type = desc->other = 0;
+ return index;
+}
+
+static int grow_last_desc(MmapBlobStore *sm, size_t data_len)
+{
+ size_t data_size;
+ BlobDesc *desc;
+
+ desc = get_desc(sm, sm->header->table_len - 1);
+ data_size = data_align(data_len);
+ if (desc->loc + data_size >= sm->size) {
+ /* grow the file */
+ if (grow_file(sm, desc->loc + data_size) < 0)
+ return -1;
+ desc = get_desc(sm, sm->header->table_len - 1);
+ }
+ desc->len = data_len;
+ desc->size = data_size;
+ return 0;
+}
+
+/* Find a free BLOB or allocate a new BLOB for specified size.
+ */
+static int allocate_blob(MmapBlobStore *sm, size_t data_len, int status)
+{
+ int index;
+
+ /* first look for a free BLOB */
+ index = find_free_blob(sm, data_len);
+ if (index < 0) {
+ /* allocate a new BLOB */
+ index = allocate_new_blob(sm, data_len, status);
+ if (index < 0)
+ return -1;
+ } else
+ reuse_free_blob(sm, index, data_len);
+ return index;
+}
+
+/* Change the size of a BLOB.
+ */
+static int grow_blob(MmapBlobStore *sm, int index, size_t data_len)
+{
+ int new_index;
+ BlobDesc *desc, *new_desc;
+
+ if (index == sm->header->table_len - 1) {
+ /* this is the last BLOB in the file - just make it bigger */
+ if (grow_last_desc(sm, data_len) < 0)
+ return -1;
+ return index;
+ }
+
+ desc = get_desc(sm, index);
+ new_index = allocate_blob(sm, data_len, desc->status);
+ if (new_index < 0)
+ return -1;
+ desc = get_desc(sm, index);
+ new_desc = get_desc(sm, new_index);
+ new_desc->type = desc->type;
+ new_desc->other = desc->other;
+ memmove(store_blob_address(sm, new_desc),
+ store_blob_address(sm, desc), desc->size);
+ desc->status = BLOB_FREE;
+ return new_index;
+}
+
+/* Add a new entry to the BLOB sequence - return the index assigned.
+ */
+static int array_append(MmapBlobStore *sm, int index)
+{
+ BlobDesc *desc;
+ int *seq;
+
+ /* Initialise the BLOB sequence if it does not exist.
+ */
+ if (sm->header->seq_index < 0) {
+ sm->header->seq_index = allocate_new_blob(sm, TABLE_INC * sizeof(*seq), BLOB_SEQUENCE);
+ if (sm->header->seq_index < 0)
+ return -1;
+ sm->header->seq_size = TABLE_INC;
+ desc = get_desc(sm, sm->header->seq_index);
+ seq = store_blob_address(sm, desc);
+ memset(seq, 0, TABLE_INC * sizeof(*seq));
+ } else
+ desc = get_desc(sm, sm->header->seq_index);
+ /* Grow the BLOB sequence if necessary.
+ */
+ if (sm->header->seq_len == sm->header->seq_size) {
+ size_t new_size;
+
+ sm->header->seq_size += TABLE_INC;
+ new_size = sm->header->seq_size * sizeof(*seq);
+ sm->header->seq_index = grow_blob(sm, sm->header->seq_index, new_size);
+ if (sm->header->seq_index < 0)
+ return -1;
+ desc = get_desc(sm, sm->header->seq_index);
+ }
+ seq = store_blob_address(sm, desc);
+ /* Place BLOB desc into sequence and return new sequence index.
+ */
+ seq[sm->header->seq_len] = index;
+ return sm->header->seq_len++;
+}
+
+/* Return the number of BLOBs in the store.
+ */
+int store_num_blobs(MmapBlobStore *sm)
+{
+ return sm->header->seq_len;
+}
+
+/* Open a BLOB store.
+ */
+MmapBlobStore *store_open(char *filename, char *mode)
+{
+ MmapBlobStore *sm;
+ struct stat st;
+
+ sm = malloc(sizeof(*sm));
+ if (sm == NULL)
+ return (MmapBlobStore*)PyErr_NoMemory();
+
+ memset(sm, 0, sizeof(*sm));
+ sm->fd = -1;
+ sm->header = (StoreHeader*)-1;
+ sm->cycle = 0;
+
+ /* make sure mode is valid */
+ if (strcmp(mode, "r") == 0) {
+ sm->mode = O_RDONLY;
+ sm->prot = PROT_READ;
+ } else if (strcmp(mode, "r+") == 0) {
+ sm->mode = O_RDWR;
+ sm->prot = PROT_READ|PROT_WRITE;
+ } else if (strcmp(mode, "w+") == 0 || strcmp(mode, "w") == 0) {
+ sm->mode = O_RDWR|O_CREAT|O_TRUNC;
+ sm->prot = PROT_READ|PROT_WRITE;
+ } else {
+ PyErr_SetString(PyExc_ValueError, "mode must be 'r', 'r+', 'w' or 'w+'");
+ goto error;
+ }
+ /* open the file */
+ if (sm->mode & O_CREAT)
+ sm->fd = open(filename, sm->mode, 0666);
+ else
+ sm->fd = open(filename, sm->mode);
+ if (sm->fd < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ goto error;
+ }
+ if (fstat(sm->fd, &st) < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ goto error;
+ }
+ /* map the file */
+ sm->size = st.st_size;
+ if (sm->size > 0) {
+ sm->header = (StoreHeader*)
+ mmap(0, sm->size, sm->prot, MAP_SHARED, sm->fd, 0);
+ if ((caddr_t)sm->header == (caddr_t)-1) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, "mmap");
+ goto error;
+ }
+ } else if (sm->mode & O_CREAT)
+ /* we just created the store, intialise internal structures */
+ if (init_store(sm) < 0)
+ goto error;
+
+ return sm;
+
+error:
+ if (sm->fd >= 0)
+ close(sm->fd);
+ if ((caddr_t)sm->header != (caddr_t)-1)
+ munmap(sm->header, sm->size);
+ free(sm);
+ return NULL;
+}
+
+/* Allocate a new index in the BLOB sequence.
+ *
+ * The first time the BLOB is accessed a BlobDesc will be allocated in
+ * the table.
+ */
+int store_append(MmapBlobStore *sm)
+{
+ return array_append(sm, -1);
+}
+
+/* Return the BlobDesc referenced by index.
+ */
+BlobDesc *store_get_blobdesc(MmapBlobStore *sm, int index, int from_seq)
+{
+ if (from_seq) {
+ int *seq, raw_index;
+
+ if (index >= sm->header->seq_len) {
+ PyErr_SetObject(PyExc_IndexError, index_error());
+ return NULL;
+ }
+ seq = get_sequence(sm);
+ if (seq[index] < 0) {
+ /* allocate empty BLOB descriptor */
+ raw_index = allocate_blob(sm, 0, BLOB_DATA);
+ seq = get_sequence(sm);
+ seq[index] = raw_index;
+ }
+ return get_desc(sm, seq[index]);
+ }
+
+ if (index >= sm->header->table_len) {
+ PyErr_SetObject(PyExc_IndexError, index_error());
+ return NULL;
+ }
+
+ return get_desc(sm, index);
+}
+
+/* Return the BLOB store header.
+ */
+int store_get_header(MmapBlobStore *sm, StoreHeader *header)
+{
+ *header = *sm->header;
+ return 0;
+}
+
+int store_blob_size(MmapBlobStore *sm, int index)
+{
+ int *seq;
+ BlobDesc *desc;
+
+ if (index >= sm->header->seq_len) {
+ PyErr_SetObject(PyExc_IndexError, index_error());
+ return -1;
+ }
+
+ seq = get_sequence(sm);
+ if (seq[index] < 0)
+ return -1;
+ desc = get_desc(sm, seq[index]);
+ return desc->len;
+}
+
+int store_blob_resize(MmapBlobStore *sm, int index, size_t data_len)
+{
+ int *seq, raw_index;
+ BlobDesc *desc;
+
+ if (index >= sm->header->seq_len) {
+ PyErr_SetObject(PyExc_IndexError, index_error());
+ return -1;
+ }
+
+ /* get current desc for BLOB */
+ seq = get_sequence(sm);
+ raw_index = seq[index];
+ if (raw_index < 0) {
+ raw_index = allocate_blob(sm, data_len, BLOB_DATA);
+ if (raw_index < 0)
+ return -1;
+ seq = get_sequence(sm);
+ seq[index] = raw_index;
+ return 0;
+ }
+
+ desc = get_desc(sm, raw_index);
+ if (desc->size < data_len) {
+ /* need to resize the blob */
+ int new_index;
+
+ /* grow BLOB */
+ new_index = grow_blob(sm, raw_index, data_len);
+ if (new_index < 0)
+ return -1;
+ if (new_index != raw_index) {
+ /* update the BLOB sequence to point at the new BLOB */
+ seq = get_sequence(sm);
+ seq[index] = new_index;
+ }
+ } else
+ desc->len = data_len;
+
+ return 0;
+}
+
+int store_blob_free(MmapBlobStore *sm, int index)
+{
+ int *seq, raw_index;
+ BlobDesc *desc;
+
+ if (index >= sm->header->seq_len) {
+ PyErr_SetObject(PyExc_IndexError, index_error());
+ return -1;
+ }
+
+ seq = get_sequence(sm);
+ if (seq[index] < 0)
+ return 0;
+
+ raw_index = seq[index];
+ seq[index] = -1;
+ desc = get_desc(sm, raw_index);
+ desc->status = BLOB_FREE;
+ return 0;
+}
+
+/* Compress all free space out of the BLOB store - return amount of
+ * space freed.
+ */
+size_t store_compress(MmapBlobStore *sm)
+{
+ return 0;
+}
+
+/* Return mmap() cycle
+ */
+int store_cycle(MmapBlobStore *sm)
+{
+ return sm->cycle;
+}
+
+/* Return amount of space in use and amount free.
+ */
+void store_usage(MmapBlobStore *sm, size_t *used, size_t *unused)
+{
+ BlobDesc *table_desc, *desc;
+ int i;
+
+ *used = *unused = 0;
+ table_desc = get_desc(sm, sm->header->table_index);
+ desc = store_blob_address(sm, table_desc);
+ for (i = 0; i < sm->header->table_size; i++, desc++)
+ if (desc->status == BLOB_FREE)
+ *unused += desc->size;
+ else {
+ *used += desc->len;
+ *unused += desc->size - desc->len;
+ }
+}
+
+/* Close the BLOB store - return success status
+ */
+int store_close(MmapBlobStore *sm)
+{
+ BlobDesc *desc;
+ off_t file_size;
+
+ desc = get_desc(sm, sm->header->table_len - 1);
+ file_size = data_align(desc->loc + desc->size);
+
+ if (sm->header != (StoreHeader*)-1) {
+ if (munmap(sm->header, sm->size) < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, "munmap");
+ return -1;
+ }
+ sm->header = (StoreHeader*)-1;
+ }
+
+ if (sm->fd >= 0 && file_size != sm->size) {
+ if (ftruncate(sm->fd, file_size) < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, "ftruncate");
+ return -1;
+ }
+ }
+ if (sm->fd >= 0) {
+ if (close(sm->fd) < 0) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, "close");
+ return -1;
+ }
+ sm->fd = -1;
+ }
+
+ free(sm);
+ return 0;
+}
diff --git a/soomext/storage.h b/soomext/storage.h
new file mode 100644
index 0000000..8574f62
--- /dev/null
+++ b/soomext/storage.h
@@ -0,0 +1,69 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+enum {
+ BLOB_FREE, /* blob is free space */
+ BLOB_TABLE, /* blob contains the blob table! */
+ BLOB_SEQUENCE, /* blob contains the sequence lookup table */
+ BLOB_DATA /* blob contains data */
+};
+
+typedef struct {
+ off_t loc; /* location of the blob */
+ size_t size; /* size of blob */
+ size_t len; /* length of blob */
+ int status; /* status of blob */
+ int type; /* user: type of blob */
+ int other; /* user: index of related blob */
+} BlobDesc;
+
+typedef struct {
+ off_t table_loc; /* where the blob table is stored */
+ int table_size; /* allocated table size */
+ int table_len; /* entries used in the table */
+ int table_index; /* blob which contains the blob table */
+
+ int seq_index; /* blob which contains the array lookup */
+ int seq_size; /* allocated sequence size */
+ int seq_len; /* number of blobs in the sequence */
+} StoreHeader;
+
+typedef struct {
+ int mode; /* file open mode */
+ int fd; /* file descriptor */
+ int prot; /* mmap prot */
+ StoreHeader *header; /* address of start of store */
+ size_t size; /* size of file */
+ int cycle; /* increment when file remapped */
+} MmapBlobStore;
+
+MmapBlobStore *store_open(char *filename, char *mode);
+int store_append(MmapBlobStore *sm);
+void *store_blob_address(MmapBlobStore *sm, BlobDesc *desc);
+int store_num_blobs(MmapBlobStore *sm);
+int store_get_header(MmapBlobStore *sm, StoreHeader *header);
+void store_usage(MmapBlobStore *sm, size_t *used, size_t *unused);
+int store_close(MmapBlobStore *sm);
+size_t store_compress(MmapBlobStore *sm);
+int store_cycle(MmapBlobStore *sm);
+BlobDesc *store_get_blobdesc(MmapBlobStore *sm, int index, int from_seq);
+int store_blob_resize(MmapBlobStore *sm, int index, size_t data_len);
+int store_blob_free(MmapBlobStore *sm, int index);
+int store_blob_size(MmapBlobStore *sm, int index);
diff --git a/soomext/test/Makefile b/soomext/test/Makefile
new file mode 100644
index 0000000..f19a60b
--- /dev/null
+++ b/soomext/test/Makefile
@@ -0,0 +1,12 @@
+# Run test suite.
+#
+# $Id: Makefile 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/test/Makefile,v $
+
+PYTHON=PYTHONPATH=.. python
+
+all:
+ $(PYTHON) all.py
+
+clean:
+ rm -rf *.pyc
diff --git a/soomext/test/all.py b/soomext/test/all.py
new file mode 100644
index 0000000..6c08f45
--- /dev/null
+++ b/soomext/test/all.py
@@ -0,0 +1,39 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Test package modules
+
+The module files must have a function "suite" which returns
+an instance of unittest.TestSuite or unittest.TestCase.
+
+$Id: all.py 2626 2007-03-09 04:35:54Z andrewm $
+$Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/test/all.py,v $
+"""
+
+import unittest
+
+class AllTestSuite(unittest.TestSuite):
+ all_tests = [
+ "soomfunctest",
+ ]
+
+ def __init__(self):
+ unittest.TestSuite.__init__(self)
+ for module_name in self.all_tests:
+ module = __import__(module_name, globals())
+ self.addTest(module.suite())
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='AllTestSuite')
diff --git a/soomext/test/soomarraytest.py b/soomext/test/soomarraytest.py
new file mode 100644
index 0000000..9d0550d
--- /dev/null
+++ b/soomext/test/soomarraytest.py
@@ -0,0 +1,228 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: soomarraytest.py 2736 2007-07-17 02:58:24Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/test/soomarraytest.py,v $
+
+import os
+import gc
+import random
+import unittest
+import soomarray
+import tempfile
+from mx.DateTime import DateTime, Date, Time
+import MA, Numeric
+
+class TempFile:
+ def __init__(self, name):
+ self.tempdir = tempfile.mkdtemp()
+ self.tempfilename = os.path.join(self.tempdir, name)
+
+ def fn(self):
+ return self.tempfilename
+
+ def done(self):
+ try:
+ os.unlink(self.tempfilename)
+ except OSError:
+ pass
+ os.rmdir(self.tempdir)
+
+ def __del__(self):
+ try:
+ self.done()
+ except:
+ pass
+
+
+class ArrayDictTest(unittest.TestCase):
+ # This also tests MmapArray
+ def test_arraydict(self):
+ def _check(a):
+ self.assertEqual(a['na'], na)
+ self.assertEqual(a['maa'], maa)
+ self.assertEqual(a['na'][500:-500], na[500:-500])
+ self.assertEqual(a['maa'][500:-500], maa[500:-500])
+ scatter = Numeric.array(xrange(0, len(na), 3))
+ self.assertEqual(a['na'].take(scatter), Numeric.take(na, scatter))
+ self.assertEqual(Numeric.take(a['na'],scatter),
+ Numeric.take(na, scatter))
+ self.assertEqual(MA.take(a['maa'], scatter), MA.take(maa, scatter))
+
+ tmpfile = TempFile('soomtestarray_tmpfile')
+ try:
+ a = soomarray.ArrayDict(tmpfile.fn(), 'w')
+ self.assertEqual(len(a), 0)
+ self.assertRaises(KeyError, a.__getitem__, 'not_found')
+ na = Numeric.arrayrange(10000)
+ a['na'] = na
+ na[1000] = -2222
+ # MmapArray __setitem__
+ a['na'][1000] = -2222
+
+ maa = MA.arrayrange(10000)
+ for i in xrange(0, len(maa), 5):
+ maa[i] = MA.masked
+ a['maa'] = maa
+ maa[1000] = -2222
+ a['maa'][1000] = -2222
+
+ _check(a)
+ del a
+
+ a = soomarray.ArrayDict(tmpfile.fn(), 'r')
+ _check(a)
+ finally:
+ try: del a
+ except UnboundLocalError: pass
+ tmpfile.done()
+
+
+class _BSDDB_Base(unittest.TestCase):
+ def _test_array(self, cls, data):
+ def _set(a, data):
+ # __setitem__
+ for i, d in enumerate(data):
+ a[i] = d
+ # catch bad type assignment?
+ self.assertRaises(TypeError, a.__setitem__, 0, object)
+
+ def _check(a, data):
+ # __len__ method
+ self.assertEqual(len(a), len(data))
+ # __getitem__
+ for i, d in enumerate(data):
+ self.assertEqual(a[i], d)
+ # iteration (if supported)
+ self.assertEqual(list(a), data)
+ # slices
+ self.assertEqual(a[-2:], data[-2:])
+
+ # Disk backed
+ tempfile = TempFile('soomarraytest_tmpfile')
+ try:
+ a=cls(tempfile.fn())
+ _set(a, data)
+ _check(a, data)
+ del a
+ a=cls(tempfile.fn())
+ _check(a, data)
+ finally:
+ try: del a
+ except UnboundLocalError: pass
+ tempfile.done()
+
+ # Unbacked
+ a=cls()
+ _set(a, data)
+ _check(a, data)
+ del a
+
+
+class TupleString(_BSDDB_Base):
+ def test_tuple(self):
+ data = ['', 'qsl', 'rst', 'qrm', 'qsy']
+ self._test_array(soomarray.ArrayString, data)
+
+
+class TupleTest(_BSDDB_Base):
+ def test_tuple(self):
+ data = [(), ('abc', 'def'), ('pqr',), ('qsl', 'rst', 'qrm', 'qsy')]
+ self._test_array(soomarray.ArrayTuple, data)
+
+
+class DateTimeTest(_BSDDB_Base):
+ def test_datetime(self):
+ data = [DateTime(2004,1,1,0,0),
+ DateTime(1900,12,31,23,59,59),
+ DateTime(2050,2,28),
+ None]
+ self._test_array(soomarray.ArrayDateTime, data)
+
+ def test_date(self):
+ data = [Date(2004,1,1),
+ Date(1900,12,31),
+ Date(2050,2,28),
+ None]
+ self._test_array(soomarray.ArrayDate, data)
+
+ def test_time(self):
+ data = [Time(0,0),
+ Time(23,59,59),
+ Time(12,0,0),
+ None]
+ self._test_array(soomarray.ArrayTime, data)
+
+
+class RecodeArray(unittest.TestCase):
+ def _check_data(self, r):
+ self.assertEqual(list(r), ['def', None, None, 'abc'])
+ self.assertEqual(list(r[2:]), [None, 'abc'])
+ self.assertEqual(r[0], 'def')
+ self.assertEqual(r[3], 'abc')
+ self.assertEqual(r[-1], 'abc')
+
+ def _test_array(self, r):
+ self.assertEqual(list(r), [None] * 4)
+ self.assertEqual(r[0], None)
+ self.assertEqual(r[3], None)
+ self.assertRaises(IndexError, r.__getitem__, 4)
+ r[3] = 'abc'
+ r[0] = 'def'
+ self._check_data(r)
+
+ def test_numeric_array(self):
+ self._test_array(soomarray.RecodeNumericArray(4))
+
+ def test_blobstore_array(self):
+ tempfile = TempFile('recode_array_tmpfile')
+ try:
+ self._test_array(soomarray.RecodeBlobArray(4, tempfile.fn(), 'w'))
+ self._check_data(soomarray.RecodeBlobArray(4, tempfile.fn()))
+ finally:
+ tempfile.done()
+
+ def test_blobstore_array_big(self):
+ for dictpath in ('/usr/dict/words', '/usr/share/dict/words'):
+ if os.path.exists(dictpath):
+ break
+ else:
+ print "skipped test_blobstore_array_big - no dictionary found?"
+ return
+ words = [w.strip() for w in open(dictpath, 'U')]
+ tempfile = TempFile('recode_array_tmpfile')
+ size = 100000
+ try:
+ data = []
+ array = soomarray.RecodeBlobArray(size, tempfile.fn(), 'w')
+ for i in xrange(size):
+ word = random.choice(words)
+ data.append(word)
+ array[i] = word
+ del array
+ gc.collect()
+ array = soomarray.RecodeBlobArray(size, tempfile.fn())
+ for i in xrange(size):
+ a = array[i]
+ b = data[i]
+ self.assertEqual(a, b,
+ 'Data at index %d does not match: %r vs %r'
+ % (i, a, b))
+ del array
+ gc.collect()
+ finally:
+ tempfile.done()
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/soomext/test/soomfunctest.py b/soomext/test/soomfunctest.py
new file mode 100644
index 0000000..70ae12d
--- /dev/null
+++ b/soomext/test/soomfunctest.py
@@ -0,0 +1,313 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Test soomfuncs
+
+$Id: soomfunctest.py 3015 2008-01-18 04:22:02Z andrewm $
+$Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/test/soomfunctest.py,v $
+"""
+
+import unittest
+
+import Numeric
+import RandomArray
+import soomfunc
+
+
+class _soomfuncTest(unittest.TestCase):
+ def _test_soomfunc(self, fn, want, *args):
+ args = [Numeric.array(a, typecode='l') for a in args]
+ result = fn(*args)
+ self.assertEqual(result.typecode(), 'l')
+ self.assertEqual(list(result), want)
+
+ def _test_soomfunc_bothways(self, fn, want, *args):
+ args = [Numeric.array(a, typecode='l') for a in args]
+ result = fn(*args)
+ self.assertEqual(result.typecode(), 'l')
+ self.assertEqual(list(result), want)
+
+ args.reverse()
+ result = fn(*args)
+ self.assertEqual(result.typecode(), 'l')
+ self.assertEqual(list(result), want)
+
+class uniqueCase(_soomfuncTest):
+ def test_dups(self):
+ self._test_soomfunc(soomfunc.unique,
+ [5, 6, 7, 8, 9], [5, 6, 6, 7, 8, 8, 8, 8, 9])
+ def test_nodups(self):
+ self._test_soomfunc(soomfunc.unique,
+ [5, 6, 7, 8, 9], [5, 6, 7, 8, 9])
+ def test_one(self):
+ self._test_soomfunc(soomfunc.unique,
+ [4], [4])
+ def test_empty(self):
+ self._test_soomfunc(soomfunc.unique,
+ [], [])
+
+class uniqueSuite(unittest.TestSuite):
+ test_list = (
+ "test_dups",
+ "test_nodups",
+ "test_one",
+ "test_empty",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(uniqueCase, self.test_list))
+
+
+class intersectCase(_soomfuncTest):
+ def test_dense(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [0, 2, 3, 5, 8],
+ [0, 2, 3, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3, 5, 6, 8])
+
+ def test_sparse(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [1, 5, 16],
+ [0, 1, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19],
+ [1, 5, 8, 16])
+ def test_dups(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [1, 5, 16],
+ [0, 1, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19],
+ [1, 5, 5, 5, 8, 8, 16])
+ def test_nointersect(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [],
+ [0, 1, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19],
+ [2, 4, 8, 18])
+ def test_one(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [5],
+ [5],
+ [5])
+ def test_empty(self):
+ self._test_soomfunc_bothways(soomfunc.intersect,
+ [],
+ [],
+ [])
+
+ def test_sparse_vs_dense(self):
+ RandomArray.seed(0) # For reproducability
+ for s, l in (100, 100000), (10000, 100000), (100000, 100000):
+ small = Numeric.sort(RandomArray.randint(0, 100000, (s,)))
+ large = Numeric.sort(RandomArray.randint(0, 100000, (l,)))
+
+ sparse1 = soomfunc.sparse_intersect(small, large)
+ sparse2 = soomfunc.sparse_intersect(large, small)
+ dense1 = soomfunc.dense_intersect(small, large)
+ dense2 = soomfunc.dense_intersect(large, small)
+
+ self.assertEqual(sparse1, sparse2)
+ self.assertEqual(dense1, dense2)
+ self.assertEqual(sparse1, dense1)
+
+
+
+class intersectSuite(unittest.TestSuite):
+ test_list = (
+ "test_dense",
+ "test_sparse",
+ "test_dups",
+ "test_nointersect",
+ "test_one",
+ "test_empty",
+ "test_sparse_vs_dense",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(intersectCase, self.test_list))
+
+
+class outersectCase(_soomfuncTest):
+ def test_simple(self):
+ self._test_soomfunc_bothways(soomfunc.outersect,
+ [1, 4, 6, 7, 9],
+ [0, 2, 3, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3, 5, 6, 8])
+
+ def test_dups(self):
+ self._test_soomfunc_bothways(soomfunc.outersect,
+ [1, 4, 6, 7, 9],
+ [0, 2, 3, 3, 4, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3, 3, 5, 6, 8, 8])
+ def test_nooutersect(self):
+ self._test_soomfunc_bothways(soomfunc.outersect,
+ [],
+ [0, 1, 2, 3, 5, 6, 8],
+ [0, 1, 2, 3, 5, 6, 8, 8])
+ def test_one(self):
+ self._test_soomfunc_bothways(soomfunc.outersect,
+ [],
+ [5],
+ [5])
+ def test_empty(self):
+ self._test_soomfunc_bothways(soomfunc.outersect,
+ [],
+ [],
+ [])
+
+class outersectSuite(unittest.TestSuite):
+ test_list = (
+ "test_simple",
+ "test_dups",
+ "test_nooutersect",
+ "test_one",
+ "test_empty",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(outersectCase, self.test_list))
+
+
+class unionCase(_soomfuncTest):
+ def test_simple(self):
+ self._test_soomfunc_bothways(soomfunc.union,
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+ [0, 2, 3, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3, 5, 6, 8])
+
+ def test_one(self):
+ self._test_soomfunc_bothways(soomfunc.union,
+ [5],
+ [],
+ [5])
+ def test_empty(self):
+ self._test_soomfunc_bothways(soomfunc.union,
+ [],
+ [],
+ [])
+
+class unionSuite(unittest.TestSuite):
+ test_list = (
+ "test_simple",
+ "test_one",
+ "test_empty",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(unionCase, self.test_list))
+
+
+class differenceCase(_soomfuncTest):
+ def test_simple(self):
+ self._test_soomfunc(soomfunc.difference,
+ [4, 7, 9],
+ [0, 2, 3, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3, 5, 6, 8])
+ def test_more(self):
+ self._test_soomfunc(soomfunc.difference,
+ [4, 7, 9],
+ [0, 2, 3, 4, 5, 7, 8, 9],
+ [0, 1, 2, 3],
+ [5, 6, 8])
+
+ def test_one(self):
+ self._test_soomfunc(soomfunc.difference,
+ [], [], [5])
+ self._test_soomfunc(soomfunc.difference,
+ [5], [5], [])
+ def test_empty(self):
+ self._test_soomfunc_bothways(soomfunc.difference,
+ [],
+ [],
+ [])
+
+class differenceSuite(unittest.TestSuite):
+ test_list = (
+ "test_simple",
+ "test_more",
+ "test_one",
+ "test_empty",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(differenceCase, self.test_list))
+
+
+class valueposCase(_soomfuncTest):
+ def test_simple(self):
+ self._test_soomfunc(soomfunc.valuepos,
+ [1, 2, 3, 4, 8],
+ [0, 2, 3, 3, 5, 7, 7, 7, 8, 9],
+ [1, 2, 3, 5, 6, 8])
+
+ def test_one(self):
+ self._test_soomfunc(soomfunc.valuepos,
+ [], [], [5])
+ self._test_soomfunc(soomfunc.valuepos,
+ [], [5], [])
+
+ def test_empty(self):
+ self._test_soomfunc_bothways(soomfunc.valuepos,
+ [],
+ [],
+ [])
+
+class valueposSuite(unittest.TestSuite):
+ test_list = (
+ "test_simple",
+ "test_one",
+ "test_empty",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(valueposCase, self.test_list))
+
+
+class stripwordCase(unittest.TestCase):
+ def test_empty(self):
+ self.assertEqual(soomfunc.strip_word(""), "")
+
+ def test_upper(self):
+ self.assertEqual(soomfunc.strip_word("hello"), "HELLO")
+
+ def test_quote(self):
+ self.assertEqual(soomfunc.strip_word("'START"), "START")
+ self.assertEqual(soomfunc.strip_word("END'"), "END")
+ self.assertEqual(soomfunc.strip_word("DON'T"), "DONT")
+ self.assertEqual(soomfunc.strip_word("CAN''T"), "CANT")
+ self.assertEqual(soomfunc.strip_word("'''"), "")
+
+ def test_both(self):
+ self.assertEqual(soomfunc.strip_word("don't"), "DONT")
+ self.assertEqual(soomfunc.strip_word("'confusion'"), "CONFUSION")
+
+class stripwordSuite(unittest.TestSuite):
+ test_list = (
+ "test_empty",
+ "test_upper",
+ "test_quote",
+ "test_both",
+ )
+ def __init__(self):
+ unittest.TestSuite.__init__(self, map(stripwordCase, self.test_list))
+
+
+class soomfuncSuite(unittest.TestSuite):
+ def __init__(self):
+ unittest.TestSuite.__init__(self)
+ self.addTest(uniqueSuite())
+ self.addTest(intersectSuite())
+ self.addTest(outersectSuite())
+ self.addTest(unionSuite())
+ self.addTest(differenceSuite())
+ self.addTest(valueposSuite())
+ self.addTest(stripwordSuite())
+
+def suite():
+ return soomfuncSuite()
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='soomfuncSuite')
+
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..b872200
--- /dev/null
+++ b/test.py
@@ -0,0 +1,64 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: test.py 3689 2009-02-09 05:04:55Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/test.py,v $
+
+import sys
+import os
+import unittest
+import optparse
+
+from SOOMv0 import soom
+
+topdir = os.path.dirname(__file__)
+sys.path.insert(0, topdir)
+sys.path.insert(0, os.path.join(topdir, 'soomext'))
+
+test_dirs = 'tests', 'soomext/test',
+sys.path[0:0] = test_dirs
+
+slow_tests = (
+ 'stats', 'indirect_std_SAS',
+)
+
+if __name__ == '__main__':
+ parser = optparse.OptionParser()
+ parser.add_option('-v', '--verbose', dest='verbosity',
+ action='count', default=1)
+ parser.add_option('-m', '--messages', dest='messages',
+ action='store_true', default=False)
+ parser.add_option('--skip-slow', dest='skip_slow',
+ action='store_true', default=False)
+ options, args = parser.parse_args()
+ soom.messages = options.messages
+ if options.messages:
+ options.verbosity = max(2, options.verbosity)
+ if args:
+ tests = args
+ else:
+ tests = []
+ for test_dir in test_dirs:
+ for f in os.listdir(test_dir):
+ if f.endswith('.py') and not f.startswith('.') and f != 'all.py':
+ tests.append(f[:-len('.py')])
+ if options.skip_slow:
+ for test in slow_tests:
+ if test in tests:
+ tests.remove(test)
+ test_suite = unittest.defaultTestLoader.loadTestsFromNames(tests)
+ test_runner = unittest.TextTestRunner(verbosity=options.verbosity)
+ result = test_runner.run(test_suite)
+ sys.exit(not result.wasSuccessful())
+
diff --git a/tests/SAS/Stats_py_test.sas b/tests/SAS/Stats_py_test.sas
new file mode 100644
index 0000000..f1dfc96
--- /dev/null
+++ b/tests/SAS/Stats_py_test.sas
@@ -0,0 +1,436 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+
+options pageno=1 ;
+%macro misc(exclnpwgts,vdef,wvdef) ;
+
+data a ;
+ x = . ;
+ wgt = . ;
+ output ;
+ run ;
+
+proc summary data=a nway vardef=&vdef;
+ var x ;
+ output out=a_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=a nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=a_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_a ;
+ set a_quantiles
+ a_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[]" ;
+proc print data=quantiles_a ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+data b ;
+ x = 1 ;
+ wgt = x ;
+ output ;
+ x = 2 ;
+ wgt = x ;
+ output ;
+ x = 3 ;
+ wgt = x ;
+ output ;
+ x = 3 ;
+ wgt = x ;
+ output ;
+ x = 5 ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=b nway vardef=&vdef;
+ var x ;
+ output out=b_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=b nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=b_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_b ;
+ set b_quantiles
+ b_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[1,2,3,3,5]" ;
+proc print data=quantiles_b ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+data c ;
+ x = . ;
+ wgt = x ;
+ output ;
+ x = 2 ;
+ wgt = x ;
+ output ;
+ x = . ;
+ wgt = x ;
+ output ;
+ x = 5 ;
+ wgt = x ;
+ output ;
+ x = . ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=c nway vardef=&vdef;
+ var x ;
+ output out=c_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=c nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=c_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_c ;
+ set c_quantiles
+ c_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[.,2,.,5,.]" ;
+proc print data=quantiles_c ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+data d ;
+ x = 2 ;
+ wgt = x ;
+ output ;
+ x = 5 ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=d nway vardef=&vdef;
+ var x ;
+ output out=d_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=d nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=d_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_d ;
+ set d_quantiles
+ d_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[2,5]" ;
+proc print data=quantiles_d ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+data e ;
+ x = 2 ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=e nway vardef=&vdef;
+ var x ;
+ output out=e_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=e nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=e_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_e ;
+ set e_quantiles
+ e_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[2]" ;
+proc print data=quantiles_e ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+data f ;
+ x = -2 ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=f nway vardef=&vdef;
+ var x ;
+ output out=f_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=f nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=f_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_f ;
+ set f_quantiles
+ f_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[-2]" ;
+proc print data=quantiles_f ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+
+data g ;
+ x = -3 ;
+ wgt = x ;
+ output ;
+ x = -4 ;
+ wgt = x ;
+ output ;
+ x = -5 ;
+ wgt = x ;
+ output ;
+ x = -2 ;
+ wgt = x ;
+ output ;
+ x = -76 ;
+ wgt = x ;
+ output ;
+ run ;
+
+proc summary data=g nway vardef=&vdef;
+ var x ;
+ output out=g_quantiles n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+proc summary data=g nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=g_quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt ;
+ run ;
+
+data quantiles_g ;
+ set g_quantiles
+ g_quantiles_wgted(in=w) ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+title "[-3,-4,-5,-2,-76]" ;
+proc print data=quantiles_g ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+
+%mend ;
+
+%misc(0,DF,WDF)
+%misc(1,DF,WDF)
+%misc(0,N,WEIGHT)
+%misc(1,N,WEIGHT)
+
+********************************************************************************************* ;
+options pageno=1 ;
+%macro pc(n,miss_x,miss_wgt,exclnpwgts,vdef,wvdef) ;
+
+data a(drop=y) ;
+ do y = &n to -10 by -1 ;
+ x = y ;
+ wgt = x / 3 ;
+%if &miss_x = 1 %then %do ;
+ if mod(x,7) = 0 and x < 500 then x = . ;
+%end ;
+%if &miss_wgt = 1 %then %do ;
+ if mod(x,13) = 0 and x > 500 then wgt = . ;
+%end ;
+ output ;
+end ;
+run ;
+
+%do def = 1 %to 5 ;
+
+proc summary data=a nway pctldef=&def vardef=&vdef;
+ var x ;
+ output out=quantiles_&def n=n nmiss=nmiss
+ sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt
+ median=median p1=p1 p10=p10 p25=p25 p75=p75 p90=p90 p99=p99 ;
+ run ;
+%end ;
+
+proc summary data=a nway alpha=0.05 vardef=&wvdef
+%if &exclnpwgts = 1 %then exclnpwgts ; ;
+ var x ;
+ weight wgt ;
+ output out=quantiles_wgted n=n nmiss=nmiss sum=sum mean=mean min=min max=max sumwgt=sumwgt
+ stderr=stderr stddev=stddev cv=cv lclm=lclm uclm=uclm var=var
+ skew=skew kurt=kurt t=t probt=probt
+ median=median p1=p1 p10=p10 p25=p25 p75=p75 p90=p90 p99=p99 ;
+ run ;
+
+data quantiles ;
+ set quantiles_1(in=a)
+ quantiles_2(in=b)
+ quantiles_3(in=c)
+ quantiles_4(in=d)
+ quantiles_5(in=e)
+ quantiles_wgted(in=w) ;
+ select ;
+ when (a) def = 1 ;
+ when (b) def = 2 ;
+ when (c) def = 3 ;
+ when (d) def = 4 ;
+ when (e) def = 5 ;
+ otherwise ;
+ end ;
+ if w then wgted = 1 ;
+ else wgted = 0 ;
+ negten_to_ = &n ;
+ missing_x = &miss_x ;
+ missing_wgt = &miss_wgt ;
+ exclnpwgts = &exclnpwgts ;
+ vardef = "&vdef" ;
+ wvardef = "&wvdef" ;
+ run ;
+
+proc print data=quantiles ;
+ format var 20.10 stderr 20.10 cv 20.10 lclm 20.10 uclm 20.10 t 20.10 skew 20.10 kurt 20.10 probt 20.10 ;
+ run ;
+%mend ;
+
+%pc(990,0,0,0,DF,WDF)
+%pc(995,0,0,0,DF,WDF)
+%pc(990,1,0,0,DF,WDF)
+%pc(995,1,0,0,DF,WDF)
+%pc(990,0,1,0,DF,WDF)
+%pc(995,0,1,0,DF,WDF)
+%pc(990,1,1,0,DF,WDF)
+%pc(995,1,1,0,DF,WDF)
+
+%pc(990,0,0,1,DF,WDF)
+%pc(995,0,0,1,DF,WDF)
+%pc(990,1,0,1,DF,WDF)
+%pc(995,1,0,1,DF,WDF)
+%pc(990,0,1,1,DF,WDF)
+%pc(995,0,1,1,DF,WDF)
+%pc(990,1,1,1,DF,WDF)
+%pc(995,1,1,1,DF,WDF)
+
+%pc(990,0,0,0,N,WEIGHT)
+%pc(995,0,0,0,N,WEIGHT)
+%pc(990,1,0,0,N,WEIGHT)
+%pc(995,1,0,0,N,WEIGHT)
+%pc(990,0,1,0,N,WEIGHT)
+%pc(995,0,1,0,N,WEIGHT)
+%pc(990,1,1,0,N,WEIGHT)
+%pc(995,1,1,0,N,WEIGHT)
+
+%pc(990,0,0,1,N,WEIGHT)
+%pc(995,0,0,1,N,WEIGHT)
+%pc(990,1,0,1,N,WEIGHT)
+%pc(995,1,0,1,N,WEIGHT)
+%pc(990,0,1,1,N,WEIGHT)
+%pc(995,0,1,1,N,WEIGHT)
+%pc(990,1,1,1,N,WEIGHT)
+%pc(995,1,1,1,N,WEIGHT)
diff --git a/tests/SAS/indirect_std_check.sas b/tests/SAS/indirect_std_check.sas
new file mode 100644
index 0000000..333425d
--- /dev/null
+++ b/tests/SAS/indirect_std_check.sas
@@ -0,0 +1,335 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+
+/* indirect_std_check.sas
+ Written by Tim Churches, November 2005. Checks NetEpi Analysis indirect standardisation
+ results.
+*/
+
+/* Adjust the paths below to point to the CSV files of synthetic death and population data
+ left behind in the /demo/scratch subdirectory of the unpacked NetEpi-Analysis distribution
+ after running the SOOM_demo_data_load.py program with either no options or with the
+ --datasets="syndeath" option. Note that you need to decompress (gunzip) the files
+ left by the NetEpi program in the /demo/scratch directory first, because SAS does not
+ know how to read gzipped files (without using a pipe through an external program). */
+
+%let path = C:\USERDATA\;
+
+filename syndths "&path.synthetic_deaths.csv" ;
+filename synpopns "&path.synthetic_pops.csv" ;
+
+ /**********************************************************************
+ * PRODUCT: SAS
+ * VERSION: 8.2
+ * CREATOR: External File Interface
+ * DATE: 17NOV05
+ * DESC: Generated SAS Datastep Code
+ * TEMPLATE SOURCE: (None Specified.)
+ ***********************************************************************/
+ data WORK.syndeaths ;
+ %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
+ infile syndths delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
+ informat agegrp best32. ;
+ informat sex best32. ;
+ informat region best32. ;
+ informat year best32. ;
+ informat causeofdeath best32. ;
+ format agegrp best12. ;
+ format sex best12. ;
+ format region best12. ;
+ format year best12. ;
+ format causeofdeath best12. ;
+ input
+ agegrp
+ sex
+ region
+ year
+ causeofdeath
+ ;
+ if _ERROR_ then call symput('_EFIERR_',1); /* set ERROR detection macro variable */
+ run;
+
+/**********************************************************************
+ * PRODUCT: SAS
+ * VERSION: 8.2
+ * CREATOR: External File Interface
+ * DATE: 17NOV05
+ * DESC: Generated SAS Datastep Code
+ * TEMPLATE SOURCE: (None Specified.)
+ ***********************************************************************/
+ data WORK.synpops ;
+ %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
+ infile synpopns delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
+ informat agegrp best32. ;
+ informat sex best32. ;
+ informat region best32. ;
+ informat year best32. ;
+ informat pop best32. ;
+ format agegrp best12. ;
+ format sex best12. ;
+ format region best12. ;
+ format year best12. ;
+ format pop best12. ;
+ input
+ agegrp
+ sex
+ region
+ year
+ pop
+ ;
+ if _ERROR_ then call symput('_EFIERR_',1); /* set ERROR detection macro variable */
+ run;
+
+%macro smrcalc(stdcod,targcod,conflev) ;
+
+** Step 1a: Get the number of events in the Standard numerator dataset by age & sex ** ;
+data events1 ;
+set syndeaths ;
+where causeofdeath = &stdcod ;
+run ;
+
+proc summary data=events1 nway ;
+var year ;
+class agegrp sex ;
+output out=st_evnts n=st_evnti ;
+run ;
+
+** Step 1b: Get the number of events in the Target numerator dataset ** ;
+data events2 ;
+set syndeaths ;
+where causeofdeath = &targcod ;
+run ;
+
+** Step 2: Get the Standard Population by age and sex ** ;
+data st_pops ;
+set synpops ;
+run ;
+
+proc summary data=st_pops nway ;
+class agegrp sex ;
+var pop ;
+output out=st_pops sum=pop ;
+run ;
+
+** Step 3: Calculate the Standard age&sex-specific rates ** ;
+proc sql ;
+create table st_rate as
+select * from st_pops as a left outer join st_evnts as b
+on a.agegrp=b.agegrp and a.sex=b.sex ;
+
+data st_rate ;
+set st_rate ;
+if st_ratei=. then st_ratei=0 ;
+if st_evnti > 0 then st_ratei = (st_evnti/pop) ;
+else st_ratei = 0 ;
+run ;
+
+** Step 4: Get the Target Population Totals** ;
+data sp_pops ;
+set synpops ;
+run ;
+
+proc summary data=sp_pops nway ;
+class agegrp sex region ;
+var pop ;
+output out=sp_pops sum=sp_popi ;
+run ;
+
+** Step 5: Calculate the Target Population Expected Number of Events (Deaths) by Age&Sex ** ;
+proc sql ;
+create table sp_expi as
+select * from st_rate as a right join sp_pops as b
+on a.agegrp=b.agegrp and a.sex=b.sex ;
+
+data sp_expi ;
+set sp_expi ;
+sp_expi=st_ratei*sp_popi ;
+run ;
+
+proc summary data=sp_expi nway ;
+var sp_expi ;
+class region sex ;
+output out=sp_exp sum=sp_exp ;
+run ;
+
+** Step 6: Get the Target Population Observed events ** ;
+proc summary data=events2 nway ;
+var year ;
+class region sex ;
+output out=sp_evnts n=sp_evnt ;
+run ;
+
+** Step 7: Calculate the Indirectly Standardised Mortality Ratio (SMR) and Confidence Intervals** ;
+proc sql ;
+create table smr as
+select * from sp_exp as a left outer join sp_evnts as b
+on a.region=b.region and a.sex=b.sex ;
+
+data smr ;
+set smr ;
+attrib smr label="Std Mortality Ratio"
+l99 label="SMR lower CL"
+u99 label="SMR upper CL"
+p label="SMR p value"
+sp_exp label="Expected" ;
+if sp_evnt = . then sp_evnt = 0 ;
+smr=(sp_evnt/sp_exp)*100 ;
+
+* Calculate a p-value. ;
+select ;
+when (sp_evnt=0) do ;
+p_u = poisson(sp_exp,sp_evnt) ;
+p_l = poisson(sp_exp,sp_evnt) ;
+p = p_u + p_l ;
+end ;
+when (sp_evnt ge sp_exp) do ;
+p_u = 1 - poisson(sp_exp,(sp_evnt-1)) ;
+p_l = poisson(sp_exp,round(sp_exp*(sp_exp/sp_evnt))) ;
+p = p_u + p_l ;
+end ;
+when (sp_evnt lt sp_exp) do ;
+if (sp_exp + (sp_exp-sp_evnt)) = int(sp_exp+(sp_exp-sp_evnt)) then
+p_u = 1 - poisson(sp_exp,(int(sp_exp+(sp_exp-sp_evnt))-1)) ;
+else
+p_u = 1 - poisson(sp_exp,(int(sp_exp*(sp_exp/sp_evnt)))) ;
+p_l = poisson(sp_exp,sp_evnt) ;
+p = p_u + p_l ;
+end ;
+otherwise ;
+end ;
+if p > 1 then p = 1 ;
+* Calculate Confidence Intervals ;
+if sp_evnt = 0 then do ;
+l99 = 0 ;
+u_lam99 = -log(1 - &conflev) ;
+if u_lam99 ne . then u99 = u_lam99/sp_exp ;
+else l99 = . ;
+end ;
+else do ;
+l_lam99 = gaminv((1 - &conflev)/2,sp_evnt) ;
+if sp_exp > 0 then l99 = l_lam99/sp_exp ;
+else l99 = . ;
+u_lam99 = gaminv((1 + &conflev)/2,sp_evnt+1) ;
+if u_lam99 ne . then u99 = u_lam99/sp_exp ;
+else l99 = . ;
+end ;
+if l99 ne . then l99 = 100*l99 ;
+if u99 ne . then u99 = 100*u99 ;
+run ;
+
+filename smr_out "&path.smr_results_&stdcod._&targcod._CL&conflev..csv" ;
+
+title "Std causeofdeath=&stdcod., target causeofdeath=&targcod., conf. level=&conflev." ;
+proc print data=smr label noobs ;
+ var region sex sp_evnt sp_exp smr l99 u99 p ;
+ run ;
+
+/**********************************************************************
+* PRODUCT: SAS
+* VERSION: 8.2
+* CREATOR: External File Interface
+* DATE: 17NOV05
+* DESC: Generated SAS Datastep Code
+* TEMPLATE SOURCE: (None Specified.)
+***********************************************************************/
+ data _null_;
+ set WORK.SMR end=EFIEOD;
+ %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
+ %let _EFIREC_ = 0; /* clear export record count macro variable */
+ file smr_out delimiter=',' DSD DROPOVER lrecl=32767;
+ format region best12. ;
+ format sex best12. ;
+ format _TYPE_ best12. ;
+ format _FREQ_ best12. ;
+ format sp_evnt best12. ;
+ format sp_exp best12. ;
+ format smr best12. ;
+ format l99 best12. ;
+ format u99 best12. ;
+ format p best12. ;
+ format p_u best12. ;
+ format p_l best12. ;
+ format u_lam99 best12. ;
+ format l_lam99 best12. ;
+ if _n_ = 1 then /* write column names */
+ do;
+ put
+ 'region'
+ ','
+ 'sex'
+ ','
+ '_TYPE_'
+ ','
+ '_FREQ_'
+ ','
+ 'sp_evnt'
+ ','
+ 'sp_exp'
+ ','
+ 'smr'
+ ','
+ 'l99'
+ ','
+ 'u99'
+ ','
+ 'p'
+ ','
+ 'p_u'
+ ','
+ 'p_l'
+ ','
+ 'u_lam99'
+ ','
+ 'l_lam99'
+ ;
+ end;
+ do;
+ EFIOUT + 1;
+ put region @;
+ put sex @;
+ put _TYPE_ @;
+ put _FREQ_ @;
+ put sp_evnt @;
+ put sp_exp @;
+ put smr @;
+ put l99 @;
+ put u99 @;
+ put p @;
+ put p_u @;
+ put p_l @;
+ put u_lam99 @;
+ put l_lam99 ;
+ ;
+ end;
+ if _ERROR_ then call symput('_EFIERR_',1); /* set ERROR detection macro variable */
+ If EFIEOD then
+ call symput('_EFIREC_',EFIOUT);
+ run;
+
+filename smr_out ;
+
+%mend ;
+
+%smrcalc(37,37,0.99) ;
+%smrcalc(37,37,0.90) ;
+
+%smrcalc(95,95,0.99) ;
+%smrcalc(95,95,0.90) ;
+
+%smrcalc(37,95,0.99) ;
+%smrcalc(37,95,0.90) ;
+
+%smrcalc(95,37,0.99) ;
+%smrcalc(95,37,0.90) ;
diff --git a/tests/SAS/summ_higher_order.sas b/tests/SAS/summ_higher_order.sas
new file mode 100644
index 0000000..39454bd
--- /dev/null
+++ b/tests/SAS/summ_higher_order.sas
@@ -0,0 +1,321 @@
+/*
+ * The contents of this file are subject to the HACOS License Version 1.2
+ * (the "License"); you may not use this file except in compliance with
+ * the License. Software distributed under the License is distributed
+ * on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the LICENSE file for the specific language governing
+ * rights and limitations under the License. The Original Software
+ * is "NetEpi Analysis". The Initial Developer of the Original
+ * Software is the Health Administration Corporation, incorporated in
+ * the State of New South Wales, Australia.
+ *
+ * Copyright (C) 2004,2005 Health Administration Corporation.
+ * All Rights Reserved.
+ */
+
+options nodate pageno=1 linesize=100 pagesize=60;
+
+data testdata;
+file 'c:\temp\testdata.txt' ;
+keep z a b c d e f ;
+letters = 'abc' ;
+attrib cola length=$2000
+ colb length=$2000
+ colc length=$2000
+ cold length=$2000
+ cole length=$32000
+ colf length=$32000
+ z length=$1
+ a length=$1
+ b length=$1
+ c length=$1
+ d length=$1
+ e length=8
+ f length=8
+ sume length=8
+ sumf length=8 ;
+sume = 0 ;
+sumf = 0 ;
+z = 'z' ;
+do t = 1 to 200 ;
+ a = substr(letters,int(ranuni(123)*3 + 1),1) ;
+ b = substr(letters,int(ranuni(456)*3 + 1),1) ;
+ c = substr(letters,int(ranuni(789)*3 + 1),1) ;
+ d = substr(letters,int(ranuni(246)*3 + 1),1) ;
+ e = ranuni(999) ;
+ f = ranuni(888) ;
+ cola = trim(left(cola)) || "'" || a || "', " ;
+ colb = trim(left(colb)) || "'" || b || "', " ;
+ colc = trim(left(colc)) || "'" || c || "', " ;
+ cold = trim(left(cold)) || "'" || d || "', " ;
+ cole = trim(left(cole)) || put(e,18.16) || ", " ;
+ colf = trim(left(colf)) || put(f,18.16) || ", " ;
+ output ;
+ sume = sume + e ;
+ sumf = sumf + f ;
+end ;
+put cola= ;
+put colb= ;
+put colc= ;
+put cold= ;
+put cole= ;
+put colf= ;
+file log ;
+put sume 18.16 ;
+put sumf 18.16 ;
+run ;
+
+
+proc summary data=testdata ;
+ class a b c ;
+ var e ;
+ freq f ;
+ output out=test_out n=n mean=wgt_meane;
+ run;
+
+data test_out ;
+ set test_out ;
+ if _type_ = '000'b then level = 0 ;
+ if _type_ = '100'b or _type_ = '010'b or _type_ = '001'b then level = 1 ;
+ if _type_ = '110'b or _type_ = '101'b or _type_ = '011'b then level = 2 ;
+ if _type_ = '111'b then level = 3 ;
+ if a = ' ' then a = 'z' ;
+ if b = ' ' then b = 'z' ;
+ if c = ' ' then c = 'z' ;
+run ;
+
+
+data level_111 ;
+ set test_out ;
+ * if _type_ = '111'b ;
+ n_111 = n ;
+ drop n ;
+ run ;
+data level_011 ;
+ set test_out ;
+ if _type_ = '011'b ;
+ n_011 = n ;
+ drop n ;
+ run ;
+proc sort data=level_011 ;
+ by b c ;
+ run ;
+data level_101 ;
+ set test_out ;
+ if _type_ = '101'b ;
+ n_101 = n ;
+ drop n ;
+run ;
+proc sort data=level_101 ;
+ by a c ;
+ run ;
+data level_110 ;
+ set test_out ;
+ if _type_ = '110'b ;
+ n_110 = n ;
+ drop n ;
+run ;
+proc sort data=level_110 ;
+ by a b ;
+ run ;
+data level_001 ;
+ set test_out ;
+ if _type_ = '001'b ;
+ n_001 = n ;
+ drop n ;
+ run ;
+proc sort data=level_001 ;
+ by c ;
+ run ;
+data level_010 ;
+ set test_out ;
+ if _type_ = '010'b ;
+ n_010 = n ;
+ drop n ;
+ run ;
+proc sort data=level_010 ;
+ by b ;
+ run ;
+data level_100 ;
+ set test_out ;
+ if _type_ = '100'b ;
+ n_100 = n ;
+ drop n ;
+ run ;
+proc sort data=level_100 ;
+ by a ;
+ run ;
+data level_000 ;
+ set test_out ;
+ if _type_ = '000'b ;
+ n_000 = n ;
+ drop n ;
+ run ;
+
+*******;
+
+proc sort data=level_111 ;
+ by b c ;
+ run ;
+data testprops ;
+ merge level_011 level_111(in=b111) ;
+ by b c;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by b ;
+ run ;
+data testprops ;
+ merge level_011 testprops(in=b111) ;
+ by b ;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by c ;
+ run ;
+data testprops ;
+ merge level_011 testprops(in=b111) ;
+ by c ;
+ if b111 ;
+ run ;
+
+
+ proc sort data=testprops ;
+ by a c ;
+ run ;
+data testprops ;
+ merge level_101 testprops(in=b111) ;
+ by a c;
+ if b111 ;
+ run ;
+ proc sort data=testprops ;
+ by a ;
+ run ;
+data testprops ;
+ merge level_101 testprops(in=b111) ;
+ by a ;
+ if b111 ;
+ run ;
+ proc sort data=testprops ;
+ by c ;
+ run ;
+data testprops ;
+ merge level_101 testprops(in=b111) ;
+ by c;
+ if b111 ;
+ run ;
+
+
+
+
+proc sort data=testprops ;
+ by a b ;
+ run ;
+data testprops ;
+ merge level_110 testprops(in=b111) ;
+ by a b;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by a ;
+ run ;
+data testprops ;
+ merge level_110 testprops(in=b111) ;
+ by a ;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by b ;
+ run ;
+data testprops ;
+ merge level_110 testprops(in=b111) ;
+ by b;
+ if b111 ;
+ run ;
+
+
+
+
+proc sort data=testprops ;
+ by a ;
+ run ;
+data testprops ;
+ merge level_100 testprops(in=b111) ;
+ by a ;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by b ;
+ run ;
+data testprops ;
+ merge level_010 testprops(in=b111) ;
+ by b ;
+ if b111 ;
+ run ;
+proc sort data=testprops ;
+ by c ;
+ run ;
+data testprops ;
+ merge level_001 testprops(in=b111) ;
+ by c ;
+ if b111 ;
+ run ;
+data testprops ;
+ set testprops ;
+ n_000 = 200 ;
+ drop _freq_ _type_ ;
+
+select ;
+ when (_type_ = '111'b) n = n_111 ;
+ when (_type_ = '011'b) n = n_011 ;
+ when (_type_ = '101'b) n = n_101 ;
+ when (_type_ = '110'b) n = n_110 ;
+ when (_type_ = '100'b) n = n_100 ;
+ when (_type_ = '010'b) n = n_010 ;
+ when (_type_ = '001'b) n = n_001 ;
+ when (_type_ = '000'b) n = n_000 ;
+ otherwise n = . ;
+ end ;
+ of_all_abc = n / n_000 ;
+ of_all_ab = n / n_001 ;
+ of_all_ac = n / n_010 ;
+ of_all_bc = n / n_100 ;
+ of_all_a = n / n_011 ;
+ of_all_b = n / n_101 ;
+ of_all_c = n / n_110 ;
+ of_all = n / n_000 ;
+ run ;
+proc sort data=testprops ;
+ by level a b c ;
+ run ;
+proc print data=testprops ;
+ where level = 3 ;
+ run ;
+proc print data=testprops ;
+ run ;
+
+******************************** ;
+ proc summary data=testdata ;
+ * where c ne 'c' and f > 0.1 ;
+ class a b d ;
+ var e ;
+ weight f ;
+ output out=test_out n=n p25=p25_cole mean=mean_cole stderr=stderr_cole;
+ run;
+data test_out ;
+ set test_out ;
+ if _type_ = '000'b then level = 0 ;
+ if _type_ = '100'b or _type_ = '010'b or _type_ = '001'b then level = 1 ;
+ if _type_ = '110'b or _type_ = '101'b or _type_ = '011'b then level = 2 ;
+ if _type_ = '111'b then level = 3 ;
+ if a = ' ' then a = 'z' ;
+ if b = ' ' then b = 'z' ;
+ if c = ' ' then c = 'z' ;
+ if d = ' ' then d = 'z' ;
+run ;
+proc sort data=test_out ;
+ by level a b d ;
+ run ;
+title 'Unfiltered, weighted' ;
+proc print data=test_out ;
+ run ;
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..6d55fee
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/__init__.py,v $
diff --git a/tests/column.py b/tests/column.py
new file mode 100644
index 0000000..ae6bef6
--- /dev/null
+++ b/tests/column.py
@@ -0,0 +1,335 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: column.py 3690 2009-02-09 05:58:21Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/column.py,v $
+
+import os, shutil
+import errno
+import unittest
+import SOOMv0
+import soomfunc
+import array
+
+class column_basic_test(unittest.TestCase):
+ data = [3,1,4,1,5,9,2,6,5,4]
+
+ def test_00_instanciate(self):
+ ds = SOOMv0.Dataset('testds')
+ col = ds.addcolumn('test')
+
+ def test_01_badargs(self):
+ ds = SOOMv0.Dataset('testds')
+ self.assertRaises(SOOMv0.Error, ds.addcolumn, '!test') # Bad name
+ self.assertRaises(SOOMv0.Error, ds.addcolumn,
+ 'test', coltype='UnknownColType')
+ self.assertRaises(SOOMv0.Error, ds.addcolumn,
+ 'test', datatype='UnknownDataType')
+ self.assertRaises(SOOMv0.Error, ds.addcolumn,
+ 'test', datatype=int, all_value='BadValue')
+
+ def test_02_data(self):
+ ds = SOOMv0.Dataset('testds')
+ ds.addcolumnfromseq('test', self.data)
+ self.assertEqual(list(ds['test']), self.data)
+ self.assertEqual(len(ds['test']), len(self.data))
+ self.assertEqual(str(ds['test']), '''\
+Ordinal test
+------- ----
+ 0 3
+ 1 1
+ 2 4
+ 3 1
+ 4 5
+ 5 9
+ 6 2
+ 7 6
+ 8 5
+ 9 4 ''')
+ self.assertEqual(list(ds['test'][2:5]), self.data[2:5])
+
+ def test_03_cardinality(self):
+ ds = SOOMv0.Dataset('testds')
+ ds.addcolumnfromseq('test', self.data)
+ self.assertEqual(ds['test'].cardinality(), 7)
+
+class dataset_mixin_base:
+ def _asserts(self, ds, expect):
+ self.assertEqual(list(ds['test']), list(expect))
+ self.assertEqual(len(ds['test']), len(expect))
+ self.assertEqual(list(ds['row_ordinal']), range(len(expect)))
+ self.assertEqual(len(ds['row_ordinal']), len(expect))
+
+class non_persistent_dataset_mixin(dataset_mixin_base):
+ def _test(self, data, expect = None, **kwargs):
+ if expect == None:
+ expect = data
+ ds = SOOMv0.Dataset('testds', backed = False)
+ ds.addcolumnfromseq('test', data, **kwargs)
+ self._asserts(ds, expect)
+ return ds['test']
+
+class persistent_dataset_mixin(dataset_mixin_base):
+ def setUp(self):
+ SOOMv0.dsunload('testds')
+ self.path = os.path.join(os.path.dirname(__file__), 'test_objects')
+ self.saved_writepath, SOOMv0.soom.writepath = \
+ SOOMv0.soom.writepath, self.path
+
+ def _test(self, data, expect = None, **kwargs):
+ if expect == None:
+ expect = data
+ ds = SOOMv0.makedataset('testds', path=self.path, backed = True)
+ ds.writepath = self.path
+ ds.addcolumnfromseq('test', data, **kwargs)
+ self._asserts(ds, expect)
+ ds.save()
+ ds.unload()
+ ds = SOOMv0.dsload('testds')
+ self._asserts(ds, expect)
+ return ds['test']
+
+ def tearDown(self):
+ if hasattr(self, 'path') and self.path:
+ SOOMv0.soom.writepath = self.saved_writepath
+ try:
+ shutil.rmtree(self.path)
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+
+class datatypes_test_mixin:
+ def test_int(self):
+ data = [3,1,4,1,5,9,2,6,5,4]
+ self._test(data, datatype=int)
+
+ def test_float(self):
+ data = [0.16, 0.78, 0.34, 0.85, 0.87, 0.10, 0.79, 0.28,
+ 0.14, 0.95, 0.47, 0.71, 0.35, 0.65, 0.34, 0.01]
+ self._test(data, datatype=float)
+
+ def test_str(self):
+ data = ['pickle', 'cheese', 'salami', 'pickle',
+ 'cheese', 'cheese', 'salami', 'salami']
+ self._test(data, datatype=str)
+
+ def test_tuple(self):
+ data = [
+ ('285.9', '285.9'),
+ ('276.7', '285.9', '009.2'),
+ ('250.01',),
+ ('009.2',),
+ ('276.7', '244.9'),
+ ('250.00', '276.7', '285.9'),
+ ('276.7', '079.9', '079.9', '276.7'),
+ ('250.01',),
+ (),
+ ]
+ col = self._test(data, datatype=tuple)
+ self.assertEqual(col.cardinality(), 7)
+ self.assertEqual(list(col.inverted['276.7']), [1,4,5,6])
+
+ def test_recode(self):
+ data = [None, 1, 2.3, 'A', 'A', 1, None]
+ col = self._test(data, datatype='recode')
+ self.assertEqual(col.cardinality(), 4)
+ self.assertEqual(list(col), data)
+
+ def test_missing(self):
+ data = [3,None,1,4,1,5,9,2,6,5,4]
+ expect = [3,0,1,0,1,5,9,2,6,5,0]
+ self._test(data, expect=expect, datatype=int, missingvalues = {4: True})
+
+class column_datatypes_persistent_test(datatypes_test_mixin,
+ persistent_dataset_mixin,
+ unittest.TestCase):
+ pass
+
+class column_datatypes_non_persistent_test(datatypes_test_mixin,
+ non_persistent_dataset_mixin,
+ unittest.TestCase):
+ pass
+
+class column_calculatedby(unittest.TestCase):
+ def test_calculatedby(self):
+ realdata = [3,1,4,1,5,9,2,6,5,4]
+ data = [None] * len(realdata)
+ fn = iter(realdata).next
+ ds = SOOMv0.Dataset('testds')
+ ds.addcolumnfromseq('test', data, datatype='int', calculatedby=fn)
+ self.assertEqual(list(ds['test']), list(realdata))
+ self.assertEqual(len(ds['test']), len(realdata))
+
+ def test_calculatedby_witharg(self):
+ def fn(arg):
+ return arg.next()
+ realdata = [3,1,4,1,5,9,2,6,5,4]
+ data = [None] * len(realdata)
+ ds = SOOMv0.Dataset('testds')
+ ds.addcolumnfromseq('test', data, datatype='int',
+ calculatedby=fn, calculatedargs=(iter(realdata),))
+ self.assertEqual(list(ds['test']), list(realdata))
+ self.assertEqual(len(ds['test']), len(realdata))
+
+class searchabletext_coltype(persistent_dataset_mixin, unittest.TestCase):
+ data = [
+ 'this would be line one',
+ 'and this is another line',
+ " ".join(["amphibian"] * 50),
+ 'one more line',
+ " ".join(["zog"] * 100),
+ " ".join(["amphibian"] * 50),
+ ]
+
+ def _assertword(self, col, word, positions):
+ w=col.word_occurrences(word)
+ if positions is not None:
+ pos = []
+ for line, word in positions:
+ pos.append(line)
+ pos.append(word)
+ want = array.array('L')
+ want.fromlist(pos)
+ else:
+ want = None
+ self.assertEquals(w, want)
+
+ def _assertrows(self, col, query, rows, message = None):
+ sexpr = SOOMv0.soomparse.parse('sgoal', query)
+ result = list(col.op_contains(sexpr))
+ self.assertEqual(result, rows, message)
+
+ def test_null(self):
+ col = self._test([], datatype='str', coltype='searchabletext')
+ self.assertEqual(len(col), 0)
+
+ def test_basics(self):
+ col = self._test(self.data, datatype='str', coltype='searchabletext')
+ self._assertword(col, 'line', [(0, 3), (1, 4), (3, 2)])
+ self._assertword(col, 'one', [(0, 4), (3, 0)])
+ self._assertword(col, 'missing', None)
+ # these test words which should take up more than one block in the index file
+ self._assertword(col, 'amphibian', [ (2, x) for x in range(50) ] + [ (5, x) for x in range(50) ])
+ self._assertword(col, 'zog', [ (4, x) for x in range(100) ])
+
+ def test_bug_zero_word_idx(self):
+ # If dataset creation was interrupted during the indexing of a text
+ # column, the bsddb word index file was not being zeroed before the
+ # next creation attempt, which then referred to non-existent blocks in
+ # the occurrences file. In this test, we start the indexing, then
+ # "abort" before flushing, then restart the attempt with a fresh
+ # Dataset object.
+ ds = SOOMv0.makedataset('testds', path=self.path, backed = True)
+ ds.writepath = self.path
+ ds.addcolumnfromseq('test', self.data,
+ datatype='str', coltype='searchabletext')
+ # Now destroy the dataset without flushing it...
+ del SOOMv0.datasets.datasets['testds']
+ # And restart the creation
+ ds = SOOMv0.makedataset('testds', path=self.path, backed = True)
+ ds.addcolumnfromseq('test', self.data,
+ datatype='str', coltype='searchabletext')
+ ds.save()
+ ds.unload()
+ # And check that the resulting column does the right things.
+ ds = SOOMv0.dsload('testds')
+ self._asserts(ds, self.data)
+ col = ds['test']
+ self._assertword(col, 'amphibian',
+ [ (2, x) for x in range(50) ] + [ (5, x) for x in range(50) ])
+ self._assertword(col, 'zog', [ (4, x) for x in range(100) ])
+
+ def test_query(self):
+ data = """
+Sir Walter Elliot, of Kellynch Hall, in Somersetshire, was a man who,
+for his own amusement, never took up any book but the Baronetage;
+there he found occupation for an idle hour, and consolation in a
+distressed one; there his faculties were roused into admiration and
+respect, by contemplating the limited remnant of the earliest patents;
+there any unwelcome sensations, arising from domestic affairs
+changed naturally into pity and contempt as he turned over
+the almost endless creations of the last century; and there,
+if every other leaf were powerless, he could read his own history
+with an interest which never failed. This was the page at which
+the favourite volume always opened:
+
+ "ELLIOT OF KELLYNCH HALL.
+
+"Walter Elliot, born March 1, 1760, married, July 15, 1784, Elizabeth,
+daughter of James Stevenson, Esq. of South Park, in the county of
+Gloucester, by which lady (who died 1800) he has issue Elizabeth,
+born June 1, 1785; Anne, born August 9, 1787; a still-born son,
+November 5, 1789; Mary, born November 20, 1791."
+
+Precisely such had the paragraph originally stood from the printer's hands;
+but Sir Walter had improved it by adding, for the information of
+himself and his family, these words, after the date of Mary's birth--
+"Married, December 16, 1810, Charles, son and heir of Charles
+Musgrove, Esq. of Uppercross, in the county of Somerset,"
+and by inserting most accurately the day of the month on which
+he had lost his wife.
+
+Then followed the history and rise of the ancient and respectable family,
+in the usual terms; how it had been first settled in Cheshire;
+how mentioned in Dugdale, serving the office of high sheriff,
+representing a borough in three successive parliaments,
+exertions of loyalty, and dignity of baronet, in the first year
+of Charles II, with all the Marys and Elizabeths they had married;
+forming altogether two handsome duodecimo pages, and concluding with
+the arms and motto:--"Principal seat, Kellynch Hall, in the county
+of Somerset," and Sir Walter's handwriting again in this finale:--
+
+"Heir presumptive, William Walter Elliot, Esq., great grandson of
+the second Sir Walter."
+
+Vanity was the beginning and the end of Sir Walter Elliot's character;
+vanity of person and of situation. He had been remarkably handsome
+in his youth; and, at fifty-four, was still a very fine man.
+Few women could think more of their personal appearance than he did,
+nor could the valet of any new made lord be more delighted with
+the place he held in society. He considered the blessing of beauty
+as inferior only to the blessing of a baronetcy; and the Sir Walter Elliot,
+who united these gifts, was the constant object of his warmest respect
+and devotion.
+ """.strip().split("\n\n")
+ col = self._test(data, datatype='str', coltype='searchabletext')
+ self._assertrows(col, 'elliot', [0, 1, 2, 5, 6])
+ self._assertrows(col, 'eliot', [])
+ self._assertrows(col, 'elliot baronetage', [0])
+ self._assertrows(col, 'elliot baronetcy', [6])
+ self._assertrows(col, 'elliot Kellynch', [0, 1])
+ self._assertrows(col, 'elliot Cheshire', [])
+ self._assertrows(col, 'elliot | Cheshire', [0, 1, 2, 4, 5, 6])
+ self._assertrows(col, 'in the', [0, 2, 3, 4, 6])
+ self._assertrows(col, 'the in', [0, 2, 3, 4, 6])
+ self._assertrows(col, 'in ~ the', [2, 3, 4, 6])
+ self._assertrows(col, 'the ~ in', [2, 3, 4, 6])
+ self._assertrows(col, '"in the"', [2, 3, 4])
+ self._assertrows(col, '"the in"', [])
+ self._assertrows(col, '"sir walter elliot"', [0, 6])
+ self._assertrows(col, 'in ~[11] the', [2, 3, 4, 6])
+ self._assertrows(col, 'in ~[12] the', [0, 2, 3, 4, 6])
+ self._assertrows(col, 'elliot < kellynch', [0, 1])
+ self._assertrows(col, 'elliot > kellynch', [])
+ self._assertrows(col, 'in < the', [2, 3, 4, 6])
+ self._assertrows(col, 'in > the', [4, 6])
+ self._assertrows(col, 'elliot walter', [0, 2, 5, 6])
+ self._assertrows(col, 'elliot walter', [0, 2, 5, 6])
+ self._assertrows(col, 'son', [2, 3])
+ self._assertrows(col, '*son', [2, 3, 5, 6])
+ self._assertrows(col, 'person*', [6])
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/csv_source.py b/tests/csv_source.py
new file mode 100644
index 0000000..941c3c9
--- /dev/null
+++ b/tests/csv_source.py
@@ -0,0 +1,61 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: csv_source.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/csv_source.py,v $
+
+import os, sys
+import unittest
+import SOOMv0
+from SOOMv0 import DataSourceColumn
+from SOOMv0.Sources.CSV import CSVDataSource
+
+def data_dir():
+ return os.path.join(os.path.dirname(sys.modules[__name__].__file__), 'data')
+
+class DummyDataType:
+ def __init__(self, name):
+ self.name = name
+
+class DummyCol:
+ def __init__(self, name, datatype):
+ self.name = name
+ self.datatype = DummyDataType(datatype)
+
+class csv_data_source_test(unittest.TestCase):
+ def test_source(self):
+ columns = [
+ DataSourceColumn('a_col', label='A Col', ordinalpos=1, posbase=1),
+ DataSourceColumn('b_col', label='B Col', ordinalpos=2, posbase=1),
+ ]
+ dummy_dataset = [
+ DummyCol('a_col', 'str'),
+ DummyCol('b_col', 'str'),
+ ]
+ source = CSVDataSource('test', columns,
+ path=data_dir(),
+ filename='csv_data')
+ source.register_dataset_types(dummy_dataset)
+ expect = [
+ ('1', '2'),
+ ('a,b', 'c'),
+ ('d','e\nf'),
+ ]
+ for db_row in source:
+ row_expect = dict(zip(('a_col', 'b_col'), expect.pop(0)))
+ self.assertEqual(db_row, row_expect)
+ self.failIf(expect)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/data/csv_data b/tests/data/csv_data
new file mode 100644
index 0000000..d996eca
--- /dev/null
+++ b/tests/data/csv_data
@@ -0,0 +1,4 @@
+1,2
+"a,b",c
+d,"e
+f"
diff --git a/tests/data/smr_results_37_37_CL0.90.csv b/tests/data/smr_results_37_37_CL0.90.csv
new file mode 100644
index 0000000..0e5a2f2
--- /dev/null
+++ b/tests/data/smr_results_37_37_CL0.90.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,3,3.1461459203,95.354763447,25.990258172,246.44936136,1,0.385473014,0.614526986,7.7536565279,0.8176914472
+1,2,3,18,0,1.2463621675,0,0,184.74446297,0.5750979028,0.2875489514,0.2875489514,2.302585093,
+2,1,3,18,0,3.1658820306,0,0,72.731234795,0.0843538481,0.0421769241,0.0421769241,2.302585093,
+2,2,3,18,0,1.2282650863,0,0,187.46646132,0.5856002404,0.2928001202,0.2928001202,2.302585093,
+3,1,3,18,7,3.7560135593,186.36780431,87.468153137,350.05501431,0.3629053437,0.0868299025,0.2760754412,13.148113802,3.2853156919
+3,2,3,18,0,1.3292660788,0,0,173.22228633,0.5293428761,0.264671438,0.264671438,2.302585093,
+4,1,3,18,10,7.339643813,136.24639362,73.919196017,231.10411987,0.4649902336,0.2055447083,0.2594455254,16.962219236,5.4254056971
+4,2,3,18,2,2.7597268023,72.470941628,12.876691649,228.13104604,0.7782248395,0.2991162421,0.4791085974,6.2957936219,0.3553615107
+5,1,3,18,1,0.8266012096,120.97732115,6.2053253481,573.90002135,1,0.5624661521,0.7991998558,4.7438645184,0.0512932944
+5,2,3,18,0,0.2638784018,0,0,872.59323883,1,0.7680669295,0.7680669295,2.302585093,
+6,1,3,18,5,11.269627974,44.367036886,17.481939712,93.286441517,0.0320216183,0.0001194766,0.0319021416,10.513034909,1.9701495681
+6,2,3,18,3,5.1580920364,58.161040532,15.852595134,150.32024386,0.322435795,0.0789043472,0.2435314478,7.7536565279,0.8176914472
+7,1,3,18,5,3.4931459505,143.1374489,56.400436626,300.96179941,0.5953783336,0.2732615546,0.3221167789,10.513034909,1.9701495681
+7,2,3,18,0,1.0566962901,0,0,217.90415227,0.6952045852,0.3476022926,0.3476022926,2.302585093,
+8,1,3,18,15,7.2898041436,205.7668451,126.83921692,316.8415681,0.1562487621,0.0080920375,0.1481567246,23.09712976,9.246330491
+8,2,3,18,13,3.016764811,430.92520679,254.89485503,685.12364639,0.1966753138,0.0000170987,0.1966582151,20.668569076,7.6895782916
+9,1,3,18,16,11.311048695,141.4546116,88.727022601,214.84465621,0.3156706623,0.1101383857,0.2055322766,24.301183684,10.035956732
+9,2,3,18,9,4.5948413452,195.87183374,102.18475868,341.80105998,0.2080027485,0.044814631,0.1631881175,15.705216422,4.6952275403
+10,1,3,18,5,9.0566695807,55.207932182,21.753576748,116.08058365,0.1240313208,0.0117390791,0.1122922417,10.513034909,1.9701495681
+10,2,3,18,1,2.5604257083,39.056005286,2.0033111767,185.27639771,0.2910619788,0.0159413397,0.2751206392,4.7438645184,0.0512932944
+11,1,3,18,1,3.3186722553,30.132532624,1.5455968665,142.94465236,0.1565194202,0.0001797473,0.1563396729,4.7438645184,0.0512932944
+11,2,3,18,0,1.4870153647,0,0,154.84608617,0.4520926311,0.2260463155,0.2260463155,2.302585093,
+12,1,3,18,4,8.4071076766,47.578788733,16.251943585,108.87833698,0.0812595291,0.0026821881,0.078577341,9.1535190266,1.3663183967
+12,2,3,18,2,2.61913992,76.360945238,13.567870429,240.37637599,0.7818039748,0.2681686921,0.5136352827,6.2957936219,0.3553615107
+13,1,3,18,3,4.6630365015,64.335760593,17.535600395,166.27913003,0.4161598518,0.1006199652,0.3155398867,7.7536565279,0.8176914472
+13,2,3,18,0,1.4968562704,0,0,153.82806877,0.4476654496,0.2238327248,0.2238327248,2.302585093,
+14,1,3,18,2,2.70333968,73.98256367,13.145277796,232.88947624,0.7794509958,0.286644252,0.4928067439,6.2957936219,0.3553615107
+14,2,3,18,0,0.96446211,0,0,238.74292926,0.7623763639,0.3811881819,0.3811881819,2.302585093,
+15,1,3,18,1,2.4304199272,41.145153099,2.1104704505,195.18703189,0.3394104408,0.0375339287,0.301876512,4.7438645184,0.0512932944
+15,2,3,18,0,0.8266567315,0,0,278.54186694,0.8750191117,0.4375095559,0.4375095559,2.302585093,
+16,1,3,18,6,2.379016776,252.20503111,109.83591081,497.78529399,0.3474613197,0.0344237308,0.3130375888,11.842395652,2.6130147442
+16,2,3,18,2,0.8869035785,225.50365661,40.06766004,709.86224148,0.6346584415,0.2227291548,0.4119292868,6.2957936219,0.3553615107
+17,1,3,18,2,1.4438243069,138.52100913,24.612517534,436.04984289,1,0.4232002669,0.5767997331,6.2957936219,0.3553615107
+17,2,3,18,0,0.5046472973,0,0,456.27611704,1,0.603718471,0.603718471,2.302585093,
diff --git a/tests/data/smr_results_37_37_CL0.99.csv b/tests/data/smr_results_37_37_CL0.99.csv
new file mode 100644
index 0000000..ffba811
--- /dev/null
+++ b/tests/data/smr_results_37_37_CL0.99.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,3,3.1461459203,95.354763447,10.73896117,348.91825661,1,0.385473014,0.614526986,10.977477495,0.3378633887
+1,2,3,18,0,1.2463621675,0,0,369.48892594,0.5750979028,0.2875489514,0.2875489514,4.605170186,
+2,1,3,18,0,3.1658820306,0,0,145.46246959,0.0843538481,0.0421769241,0.0421769241,4.605170186,
+2,2,3,18,0,1.2282650863,0,0,374.93292265,0.5856002404,0.2928001202,0.2928001202,4.605170186,
+3,1,3,18,7,3.7560135593,186.36780431,54.242016077,456.16430821,0.3629053437,0.0868299025,0.2760754412,17.133593269,2.0373374787
+3,2,3,18,0,1.3292660788,0,0,346.44457266,0.5293428761,0.264671438,0.264671438,4.605170186,
+4,1,3,18,10,7.339643813,136.24639362,50.641723579,291.53768282,0.4649902336,0.2055447083,0.2594455254,21.3978275,3.7169221315
+4,2,3,18,2,2.7597268023,72.470941628,3.7501736281,336.04022258,0.7782248395,0.2991162421,0.4791085974,9.2737920893,0.1034945467
+5,1,3,18,1,0.8266012096,120.97732115,0.606403882,898.87716276,1,0.5624661521,0.7991998558,7.4301295003,0.0050125418
+5,2,3,18,0,0.2638784018,0,0,1745.1864777,1,0.7680669295,0.7680669295,4.605170186,
+6,1,3,18,5,11.269627974,44.367036886,9.5648964028,125.55657954,0.0320216183,0.0001194766,0.0319021416,14.149759411,1.0779282407
+6,2,3,18,3,5.1580920364,58.161040532,6.550162082,212.82050452,0.322435795,0.0789043472,0.2435314478,10.977477495,0.3378633887
+7,1,3,18,5,3.4931459505,143.1374489,30.858379693,405.07209293,0.5953783336,0.2732615546,0.3221167789,14.149759411,1.0779282407
+7,2,3,18,0,1.0566962901,0,0,435.80830453,0.6952045852,0.3476022926,0.3476022926,4.605170186,
+8,1,3,18,15,7.2898041436,205.7668451,94.561661657,386.34861685,0.1562487621,0.0080920375,0.1481567246,28.16405748,6.8933599298
+8,2,3,18,13,3.016764811,430.92520679,184.97029277,845.16658513,0.1966753138,0.0000170987,0.1966582151,25.496688134,5.5801187031
+9,1,3,18,16,11.311048695,141.4546116,66.899332295,260.64747604,0.3156706623,0.1101383857,0.2055322766,29.481962938,7.5670160527
+9,2,3,18,9,4.5948413452,195.87183374,68.172154531,435.23642394,0.2080027485,0.044814631,0.1631881175,19.998423156,3.1324023423
+10,1,3,18,5,9.0566695807,55.207932182,11.902037841,156.23579159,0.1240313208,0.0117390791,0.1122922417,14.149759411,1.0779282407
+10,2,3,18,1,2.5604257083,39.056005286,0.19576986,290.19117704,0.2910619788,0.0159413397,0.2751206392,7.4301295003,0.0050125418
+11,1,3,18,1,3.3186722553,30.132532624,0.15104058,223.88861956,0.1565194202,0.0001797473,0.1563396729,7.4301295003,0.0050125418
+11,2,3,18,0,1.4870153647,0,0,309.69217234,0.4520926311,0.2260463155,0.2260463155,4.605170186,
+12,1,3,18,4,8.4071076766,47.578788733,7.9956932796,149.8028843,0.0812595291,0.0026821881,0.078577341,12.594089786,0.6722065435
+12,2,3,18,2,2.61913992,76.360945238,3.9514707083,354.07776494,0.7818039748,0.2681686921,0.5136352827,9.2737920893,0.1034945467
+13,1,3,18,3,4.6630365015,64.335760593,7.2455660301,235.41478802,0.4161598518,0.1006199652,0.3155398867,10.977477495,0.3378633887
+13,2,3,18,0,1.4968562704,0,0,307.65613754,0.4476654496,0.2238327248,0.2238327248,4.605170186,
+14,1,3,18,2,2.70333968,73.98256367,3.8283959472,343.04945685,0.7794509958,0.286644252,0.4928067439,9.2737920893,0.1034945467
+14,2,3,18,0,0.96446211,0,0,477.48585853,0.7623763639,0.3811881819,0.3811881819,4.605170186,
+15,1,3,18,1,2.4304199272,41.145153099,0.2062418007,305.71381584,0.3394104408,0.0375339287,0.301876512,7.4301295003,0.0050125418
+15,2,3,18,0,0.8266567315,0,0,557.08373387,0.8750191117,0.4375095559,0.4375095559,4.605170186,
+16,1,3,18,6,2.379016776,252.20503111,64.602815522,658.24146216,0.3474613197,0.0344237308,0.3130375888,15.659674811,1.536911819
+16,2,3,18,2,0.8869035785,225.50365661,11.669199365,1045.6370134,0.6346584415,0.2227291548,0.4119292868,9.2737920893,0.1034945467
+17,1,3,18,2,1.4438243069,138.52100913,7.1680845275,642.30751933,1,0.4232002669,0.5767997331,9.2737920893,0.1034945467
+17,2,3,18,0,0.5046472973,0,0,912.55223407,1,0.603718471,0.603718471,4.605170186,
diff --git a/tests/data/smr_results_37_95_CL0.90.csv b/tests/data/smr_results_37_95_CL0.90.csv
new file mode 100644
index 0000000..bd5cfb0
--- /dev/null
+++ b/tests/data/smr_results_37_95_CL0.90.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,980,3.1461459203,31149.222726,29530.819954,32836.383727,0.0430176011,0,0.0430176011,1033.080547,929.0826872
+1,2,3,18,176,1.2463621675,14121.096146,12417.070941,16001.524858,0.2875489514,0,0.2875489514,199.43695205,154.76167453
+2,1,3,18,1128,3.1658820306,35629.880997,33903.05576,37424.979752,0.0421769241,0,0.0421769241,1184.8307089,1073.3307501
+2,2,3,18,260,1.2282650863,21168.068921,19055.991768,23458.264928,0.2928001202,0,0.2928001202,288.12967797,234.05809374
+3,1,3,18,1049,3.7560135593,27928.546674,26525.478802,29389.185157,0.0233767449,0,0.0233767449,1103.8617795,996.30058046
+3,2,3,18,238,1.3292660788,17904.616977,16039.342134,19934.648261,0.264671438,0,0.264671438,264.98451725,213.20553425
+4,1,3,18,2409,7.339643813,32821.756224,31729.612075,33943.244561,0.0006492817,0,0.0006492817,2491.3132493,2328.8405095
+4,2,3,18,561,2.7597268023,20328.099127,18937.297085,21797.591355,0.0633090619,0,0.0633090619,601.5539709,522.6176633
+5,1,3,18,195,0.8266012096,23590.577625,20882.291221,26564.478155,0.4375338479,0,0.4375338479,219.58229776,172.61327183
+5,2,3,18,31,0.2638784018,11747.835286,8505.626693,15854.890011,0.7680669295,0,0.7680669295,41.837630371,22.444511782
+6,1,3,18,2528,11.269627974,22431.97385,21703.204866,23179.850483,0.0000127545,0,0.0000127545,2612.2829142,2445.8704468
+6,2,3,18,1099,5.1580920364,21306.327848,20260.312866,22394.253259,0.0057526651,0,0.0057526651,1155.1161939,1045.0455845
+7,1,3,18,697,3.4931459505,19953.360377,18726.687325,21242.100305,0.0304050687,0,0.0304050687,742.01756661,654.15051996
+7,2,3,18,206,1.0566962901,19494.721608,17315.660344,21881.410696,0.3476022926,0,0.3476022926,231.22005505,182.97394046
+8,1,3,18,2563,7.2898041436,35158.694932,34024.232169,36322.695516,0.0006824617,0,0.0006824617,2647.8533628,2480.2998865
+8,2,3,18,615,3.016764811,20386.07709,19053.037552,21791.050822,0.0489593552,0,0.0489593552,657.38475312,574.7853323
+9,1,3,18,3303,11.311048695,29201.536383,28370.838271,30051.254127,0.000012237,0,0.000012237,3399.1119878,3209.0393321
+9,2,3,18,1152,4.5948413452,25071.594718,23869.074093,26321.150784,0.0101038237,0,0.0101038237,1209.4151187,1096.7460851
+10,1,3,18,2423,9.0566695807,26753.763935,25866.089388,27665.218864,0.0001166107,0,0.0001166107,2505.5474613,2342.6062493
+10,2,3,18,653,2.5604257083,25503.571452,23884.457028,27207.402742,0.0772718382,0,0.0772718382,696.62533439,611.54377806
+11,1,3,18,1411,3.3186722553,42517.003532,40672.519458,44426.540062,0.0362008654,0,0.0362008654,1474.371259,1349.7876188
+11,2,3,18,361,1.4870153647,24276.817077,22214.06337,26486.181823,0.2260463155,0,0.2260463155,393.85359323,330.32653544
+12,1,3,18,2216,8.4071076766,26358.648958,25444.448177,27298.476518,0.0002232747,0,0.0002232747,2295.012315,2139.142156
+12,2,3,18,669,2.61913992,25542.736182,23940.372698,27227.902716,0.0728655061,0,0.0728655061,713.1368694,627.03185833
+13,1,3,18,1370,4.6630365015,29379.997338,28086.676667,30719.622634,0.0094377611,0,0.0094377611,1432.4672166,1309.6919851
+13,2,3,18,333,1.4968562704,22246.624916,20280.064045,24358.950979,0.2238327248,0,0.2238327248,364.61848513,303.5634103
+14,1,3,18,637,2.70333968,23563.446529,22049.097841,25158.048229,0.0669814422,0,0.0669814422,680.1075005,596.06201103
+14,2,3,18,103,0.96446211,10679.527888,9009.5912797,12579.371503,0.3811881819,0,0.3811881819,121.32327182,86.894094156
+15,1,3,18,603,2.4304199272,24810.527319,23172.346686,26538.009868,0.0879998713,0,0.0879998713,644.98508012,563.18533146
+15,2,3,18,124,0.8266567315,15000.180278,12855.343401,17412.3968,0.4375095559,0,0.4375095559,143.94075026,106.26956158
+16,1,3,18,639,2.379016776,26859.835813,25136.303562,28674.559553,0.0926416202,0,0.0926416202,682.17258221,597.99687861
+16,2,3,18,106,0.8869035785,11951.6938,10108.47925,14044.792582,0.4119292868,0,0.4119292868,124.563768,89.652464199
+17,1,3,18,331,1.4438243069,22925.227011,20892.688527,25108.894001,0.2360234046,0,0.2360234046,362.52831478,301.65371532
+17,2,3,18,69,0.5046472973,13672.91579,11082.687552,16706.019743,0.603718471,0,0.603718471,84.306477127,55.928483201
diff --git a/tests/data/smr_results_37_95_CL0.99.csv b/tests/data/smr_results_37_95_CL0.99.csv
new file mode 100644
index 0000000..39a979c
--- /dev/null
+++ b/tests/data/smr_results_37_95_CL0.99.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,980,3.1461459203,31149.222726,28645.928571,33804.999294,0.0430176011,0,0.0430176011,1063.5546061,901.24271308
+1,2,3,18,176,1.2463621675,14121.096146,11530.103003,17101.323541,0.2875489514,0,0.2875489514,213.14442676,143.7068417
+2,1,3,18,1128,3.1658820306,35629.880997,32956.623986,38454.583355,0.0421769241,0,0.0421769241,1217.4267444,1043.3678367
+2,2,3,18,260,1.2282650863,21168.068921,17939.541156,24790.22955,0.2928001202,0,0.2928001202,304.48973439,220.34512067
+3,1,3,18,1049,3.7560135593,27928.546674,25757.421851,30227.359296,0.0233767449,0,0.0233767449,1135.3437138,967.45225727
+3,2,3,18,238,1.3292660788,17904.616977,15056.515947,21116.701734,0.264671438,0,0.264671438,280.6971531,200.14115912
+4,1,3,18,2409,7.339643813,32821.756224,31124.848457,34583.826129,0.0006492817,0,0.0006492817,2538.3296548,2284.4530141
+4,2,3,18,561,2.7597268023,20328.099127,18185.473597,22645.025797,0.0633090619,0,0.0633090619,624.94084632,501.86938898
+5,1,3,18,195,0.8266012096,23590.577625,19466.412361,28301.078617,0.4375338479,0,0.4375338479,233.93705818,160.90960005
+5,2,3,18,31,0.2638784018,11747.835286,7023.7683555,18356.582582,0.7680669295,0,0.7680669295,48.439056745,18.534207684
+6,1,3,18,2528,11.269627974,22431.97385,21299.443555,23606.937239,0.0000127545,0,0.0000127545,2660.4140028,2400.368049
+6,2,3,18,1099,5.1580920364,21306.327848,19687.263076,23018.354784,0.0057526651,0,0.0057526651,1187.307925,1015.4871489
+7,1,3,18,697,3.4931459505,19953.360377,18060.371893,21983.897899,0.0304050687,0,0.0304050687,767.92963923,630.87514944
+7,2,3,18,206,1.0566962901,19494.721608,16173.899968,23273.979666,0.3476022926,0,0.3476022926,245.93527969,170.90900093
+8,1,3,18,2563,7.2898041436,35158.694932,33395.610455,36987.376213,0.0006824617,0,0.0006824617,2696.3072838,2434.4745947
+8,2,3,18,615,3.016764811,20386.07709,18330.912879,22600.612658,0.0489593552,0,0.0489593552,681.80732973,553.00052925
+9,1,3,18,3303,11.311048695,29201.536383,27909.360177,30535.962288,0.000012237,0,0.000012237,3453.937564,3156.8413202
+9,2,3,18,1152,4.5948413452,25071.594718,23209.775175,27037.752731,0.0101038237,0,0.0101038237,1242.3418413,1066.4523459
+10,1,3,18,2423,9.0566695807,26753.763935,25374.517207,28185.818088,0.0001166107,0,0.0001166107,2552.6964128,2298.0861811
+10,2,3,18,653,2.5604257083,25503.571452,23006.198219,28188.662098,0.0772718382,0,0.0772718382,721.74975121,589.05661372
+11,1,3,18,1411,3.3186722553,42517.003532,39658.096719,45520.263077,0.0362008654,0,0.0362008654,1510.6683413,1316.1222528
+11,2,3,18,361,1.4870153647,24276.817077,21111.9772,27766.013173,0.2260463155,0,0.2260463155,412.88488204,313.93834476
+12,1,3,18,2216,8.4071076766,26358.648958,24938.697115,27835.502331,0.0002232747,0,0.0002232747,2340.1606533,2096.6231196
+12,2,3,18,669,2.61913992,25542.736182,23070.744886,28198.214198,0.0728655061,0,0.0728655061,738.55068479,604.25508915
+13,1,3,18,1370,4.6630365015,29379.997338,27375.688979,31487.051935,0.0094377611,0,0.0094377611,1468.252725,1276.5383696
+13,2,3,18,333,1.4968562704,22246.624916,19231.949057,25583.704473,0.2238327248,0,0.2238327248,382.95128459,287.87463537
+14,1,3,18,637,2.70333968,23563.446529,21228.113647,26076.595683,0.0669814422,0,0.0669814422,704.93895829,573.86801955
+14,2,3,18,103,0.96446211,10679.527888,8163.8253076,13701.141094,0.3811881819,0,0.3811881819,132.14231448,78.737001816
+15,1,3,18,603,2.4304199272,24810.527319,22285.317929,27533.577401,0.0879998713,0,0.0879998713,669.18155181,541.62680779
+15,2,3,18,124,0.8266567315,15000.180278,11757.688331,18831.647239,0.4375095559,0,0.4375095559,155.67307955,97.195722051
+16,1,3,18,639,2.379016776,26859.835813,24201.848708,29719.876979,0.0926416202,0,0.0926416202,707.04085913,575.76604086
+16,2,3,18,106,0.8869035785,11951.6938,9173.3818042,15279.967486,0.4119292868,0,0.4119292868,135.51857842,81.359051487
+17,1,3,18,331,1.4438243069,22925.227011,19809.612684,26375.1012,0.2360234046,0,0.2360234046,380.81012209,286.01600303
+17,2,3,18,69,0.5046472973,13672.91579,9805.2415159,18512.616946,0.603718471,0,0.603718471,93.423421081,49.481886307
diff --git a/tests/data/smr_results_95_37_CL0.90.csv b/tests/data/smr_results_95_37_CL0.90.csv
new file mode 100644
index 0000000..2fde840
--- /dev/null
+++ b/tests/data/smr_results_95_37_CL0.90.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,3,1122.3446554,0.2672975708,0.0728556458,0.6908445183,0,0,0,7.7536565279,0.8176914472
+1,2,3,18,0,274.84175425,0,0,0.8377857648,8.68506E-120,4.34253E-120,4.34253E-120,2.302585093,
+2,1,3,18,0,1206.3764712,0,0,0.1908678715,0,0,0,2.302585093,
+2,2,3,18,0,294.92659408,0,0,0.7807315919,1.64451E-128,8.22257E-129,8.22257E-129,2.302585093,
+3,1,3,18,7,1052.2923871,0.6652143535,0.3122055934,1.2494734319,0,0,0,13.148113802,3.2853156919
+3,2,3,18,0,277.10687434,0,0,0.8309375574,9.01662E-121,4.50831E-121,4.50831E-121,2.302585093,
+4,1,3,18,10,2182.0974548,0.4582746741,0.2486326028,0.7773355493,0,0,0,16.962219236,5.4254056971
+4,2,3,18,2,594.63055986,0.336343292,0.0597617302,1.0587739761,1.00961E-253,0,1.00961E-253,6.2957936219,0.3553615107
+5,1,3,18,1,235.47706841,0.4246698019,0.0217827132,2.0145760054,1.28055E-100,0,1.28055E-100,4.7438645184,0.0512932944
+5,2,3,18,0,57.947952225,0,0,3.9735400555,1.363183E-25,6.815914E-26,6.815914E-26,2.302585093,
+6,1,3,18,5,3344.8511471,0.1494834831,0.0589009639,0.3143050153,0,0,0,10.513034909,1.9701495681
+6,2,3,18,3,1020.4731831,0.2939812677,0.0801286561,0.7598099251,0,0,0,7.7536565279,0.8176914472
+7,1,3,18,5,731.03251638,0.6839641039,0.2695023168,1.4381077002,5.7556E-306,0,5.7556E-306,10.513034909,1.9701495681
+7,2,3,18,0,202.78608728,0,0,1.1354748858,1.706677E-88,8.533384E-89,8.533384E-89,2.302585093,
+8,1,3,18,15,2082.6570925,0.7202337847,0.4439679736,1.1090222122,0,0,0,23.09712976,9.246330491
+8,2,3,18,13,605.93368185,2.1454493106,1.269046188,3.4110282519,1.70999E-237,0,1.70999E-237,20.668569076,7.6895782916
+9,1,3,18,16,3390.1347442,0.471957642,0.2960341547,0.7168205844,0,0,0,24.301183684,10.035956732
+9,2,3,18,9,982.85427448,0.9157003468,0.4777134986,1.5979191249,0,0,0,15.705216422,4.6952275403
+10,1,3,18,5,2000.5238408,0.249934537,0.098481684,0.5255141026,0,0,0,10.513034909,1.9701495681
+10,2,3,18,1,544.95818662,0.183500317,0.0094123358,0.870500643,1.16098E-234,0,1.16098E-234,4.7438645184,0.0512932944
+11,1,3,18,1,1307.6619931,0.076472361,0.0039225193,0.36277452,0,0,0,4.7438645184,0.0512932944
+11,2,3,18,0,347.84974981,0,0,0.6619481814,1.70531E-151,8.52655E-152,8.52655E-152,2.302585093,
+12,1,3,18,4,1880.3878951,0.2127220671,0.0726615184,0.4867888722,0,0,0,9.1535190266,1.3663183967
+12,2,3,18,2,532.44321966,0.3756269075,0.0667416726,1.1824347441,8.2413E-227,0,8.2413E-227,6.2957936219,0.3553615107
+13,1,3,18,3,1369.6191042,0.2190390007,0.0597021058,0.566117726,0,0,0,7.7536565279,0.8176914472
+13,2,3,18,0,349.10613699,0,0,0.6595659168,4.85469E-152,2.42734E-152,2.42734E-152,2.302585093,
+14,1,3,18,2,761.24734896,0.2627266949,0.0466814776,0.827036525,0,0,0,6.2957936219,0.3553615107
+14,2,3,18,0,198.47571965,0,0,1.1601343968,1.270926E-86,6.354628E-87,6.354628E-87,2.302585093,
+15,1,3,18,1,736.00971558,0.1358677717,0.0069691056,0.6445383013,0.00000E-309,0,0.00000E-309,4.7438645184,0.0512932944
+15,2,3,18,0,184.68173694,0,0,1.2467854868,1.243858E-80,6.219289E-81,6.219289E-81,2.302585093,
+16,1,3,18,6,696.61162885,0.8613120642,0.3751035205,1.6999997074,4.67495E-289,0,4.67495E-289,11.842395652,2.6130147442
+16,2,3,18,2,185.95734916,1.0755154389,0.1910983956,3.3856116202,3.035407E-77,0,3.035407E-77,6.2957936219,0.3553615107
+17,1,3,18,2,417.6749363,0.4788412773,0.0850808798,1.5073429298,3.5384E-177,0,3.5384E-177,6.2957936219,0.3553615107
+17,2,3,18,0,107.02693971,0,0,2.1514070188,6.604207E-47,3.302103E-47,3.302103E-47,2.302585093,
diff --git a/tests/data/smr_results_95_37_CL0.99.csv b/tests/data/smr_results_95_37_CL0.99.csv
new file mode 100644
index 0000000..58a9db5
--- /dev/null
+++ b/tests/data/smr_results_95_37_CL0.99.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,3,1122.3446554,0.2672975708,0.0301033544,0.978084356,0,0,0,10.977477495,0.3378633887
+1,2,3,18,0,274.84175425,0,0,1.6755715297,8.68506E-120,4.34253E-120,4.34253E-120,4.605170186,
+2,1,3,18,0,1206.3764712,0,0,0.381735743,0,0,0,4.605170186,
+2,2,3,18,0,294.92659408,0,0,1.5614631839,1.64451E-128,8.22257E-129,8.22257E-129,4.605170186,
+3,1,3,18,7,1052.2923871,0.6652143535,0.1936094477,1.6282160243,0,0,0,17.133593269,2.0373374787
+3,2,3,18,0,277.10687434,0,0,1.6618751148,9.01662E-121,4.50831E-121,4.50831E-121,4.605170186,
+4,1,3,18,10,2182.0974548,0.4582746741,0.1703371279,0.9806082425,0,0,0,21.3978275,3.7169221315
+4,2,3,18,2,594.63055986,0.336343292,0.0174048483,1.5595888801,1.00961E-253,0,1.00961E-253,9.2737920893,0.1034945467
+5,1,3,18,1,235.47706841,0.4246698019,0.0021286751,3.1553516231,1.28055E-100,0,1.28055E-100,7.4301295003,0.0050125418
+5,2,3,18,0,57.947952225,0,0,7.947080111,1.363183E-25,6.815914E-26,6.815914E-26,4.605170186,
+6,1,3,18,5,3344.8511471,0.1494834831,0.0322264936,0.4230310644,0,0,0,14.149759411,1.0779282407
+6,2,3,18,3,1020.4731831,0.2939812677,0.0331085024,1.07572425,0,0,0,10.977477495,0.3378633887
+7,1,3,18,5,731.03251638,0.6839641039,0.1474528446,1.9355855032,5.7556E-306,0,5.7556E-306,14.149759411,1.0779282407
+7,2,3,18,0,202.78608728,0,0,2.2709497716,1.706677E-88,8.533384E-89,8.533384E-89,4.605170186,
+8,1,3,18,15,2082.6570925,0.7202337847,0.3309887141,1.3523137141,0,0,0,28.16405748,6.8933599298
+8,2,3,18,13,605.93368185,2.1454493106,0.920912448,4.2078347677,1.70999E-237,0,1.70999E-237,25.496688134,5.5801187031
+9,1,3,18,16,3390.1347442,0.471957642,0.2232069408,0.8696398569,0,0,0,29.481962938,7.5670160527
+9,2,3,18,9,982.85427448,0.9157003468,0.3187046568,2.0347292244,0,0,0,19.998423156,3.1324023423
+10,1,3,18,5,2000.5238408,0.249934537,0.0538822992,0.7073027135,0,0,0,14.149759411,1.0779282407
+10,2,3,18,1,544.95818662,0.183500317,0.000919803,1.3634311187,1.16098E-234,0,1.16098E-234,7.4301295003,0.0050125418
+11,1,3,18,1,1307.6619931,0.076472361,0.0003833209,0.5681995454,0,0,0,7.4301295003,0.0050125418
+11,2,3,18,0,347.84974981,0,0,1.3238963629,1.70531E-151,8.52655E-152,8.52655E-152,4.605170186,
+12,1,3,18,4,1880.3878951,0.2127220671,0.0357482914,0.6697602031,0,0,0,12.594089786,0.6722065435
+12,2,3,18,2,532.44321966,0.3756269075,0.0194376683,1.7417429215,8.2413E-227,0,8.2413E-227,9.2737920893,0.1034945467
+13,1,3,18,3,1369.6191042,0.2190390007,0.0246684197,0.8014985671,0,0,0,10.977477495,0.3378633887
+13,2,3,18,0,349.10613699,0,0,1.3191318336,4.85469E-152,2.42734E-152,2.42734E-152,4.605170186,
+14,1,3,18,2,761.24734896,0.2627266949,0.0135953901,1.2182363724,0,0,0,9.2737920893,0.1034945467
+14,2,3,18,0,198.47571965,0,0,2.3202687936,1.270926E-86,6.354628E-87,6.354628E-87,4.605170186,
+15,1,3,18,1,736.00971558,0.1358677717,0.0006810429,1.0095151386,0.00000E-309,0,0.00000E-309,7.4301295003,0.0050125418
+15,2,3,18,0,184.68173694,0,0,2.4935709737,1.243858E-80,6.219289E-81,6.219289E-81,4.605170186,
+16,1,3,18,6,696.61162885,0.8613120642,0.2206267819,2.2479778061,4.67495E-289,0,4.67495E-289,15.659674811,1.536911819
+16,2,3,18,2,185.95734916,1.0755154389,0.0556549914,4.9870532846,3.035407E-77,0,3.035407E-77,9.2737920893,0.1034945467
+17,1,3,18,2,417.6749363,0.4788412773,0.0247787305,2.2203372248,3.5384E-177,0,3.5384E-177,9.2737920893,0.1034945467
+17,2,3,18,0,107.02693971,0,0,4.3028140377,6.604207E-47,3.302103E-47,3.302103E-47,4.605170186,
diff --git a/tests/data/smr_results_95_95_CL0.90.csv b/tests/data/smr_results_95_95_CL0.90.csv
new file mode 100644
index 0000000..753fe1d
--- /dev/null
+++ b/tests/data/smr_results_95_95_CL0.90.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,980,1122.3446554,87.317206463,82.780515122,92.046640219,8.6420493E-6,9.5088721E-7,7.6911621E-6,1033.080547,929.0826872
+1,2,3,18,176,274.84175425,64.036849306,56.30937517,72.564284343,1.139262E-10,0,1.139262E-10,199.43695205,154.76167453
+2,1,3,18,1128,1206.3764712,93.50314988,88.97145922,98.214010068,0.0200568739,0.0082111084,0.0118457655,1184.8307089,1073.3307501
+2,2,3,18,260,294.92659408,88.157529778,79.361474495,97.695387176,0.0326435648,0.0117565355,0.0208870293,288.12967797,234.05809374
+3,1,3,18,1049,1052.2923871,99.687122407,94.679063793,104.90067143,0.9263273816,0.4585978319,0.4677295498,1103.8617795,996.30058046
+3,2,3,18,238,277.10687434,85.887439844,76.939821416,95.625385651,0.01281423,0.0038281995,0.0089860306,264.98451725,213.20553425
+4,1,3,18,2409,2182.0974548,110.398369,106.72486256,114.17057675,5.2652047E-6,9.2202175E-7,4.3431829E-6,2491.3132493,2328.8405095
+4,2,3,18,561,594.63055986,94.344293393,87.889472654,101.16432143,0.1578725749,0.0716925632,0.0861800117,601.5539709,522.6176633
+5,1,3,18,195,235.47706841,82.810611374,73.303643956,93.249970894,0.0047117085,0.0009579423,0.0037537662,219.58229776,172.61327183
+5,2,3,18,31,57.947952225,53.496282111,38.732191424,72.198634749,0.000077491,1.4640308E-9,0.0000774895,41.837630371,22.444511782
+6,1,3,18,2528,3344.8511471,75.578849067,73.12344673,78.098629785,1.446359E-49,0,1.446359E-49,2612.2829142,2445.8704468
+6,2,3,18,1099,1020.4731831,107.69513773,102.40794191,113.19417434,0.0192644181,0.0078132019,0.0114512162,1155.1161939,1045.0455845
+7,1,3,18,697,731.03251638,95.344596087,89.483094843,101.50267601,0.2023315739,0.0955213025,0.1068102714,742.01756661,654.15051996
+7,2,3,18,206,202.78608728,101.58487831,90.230026581,114.0216561,0.8607549987,0.4200202504,0.4407347483,231.22005505,182.97394046
+8,1,3,18,2563,2082.6570925,123.06394601,119.09305163,127.13822992,4.971464E-19,0,4.971464E-19,2647.8533628,2480.2998865
+8,2,3,18,615,605.93368185,101.49625585,94.859445763,108.49120503,0.7298905584,0.3616908242,0.3681997342,657.38475312,574.7853323
+9,1,3,18,3303,3390.1347442,97.429755726,94.658164772,100.26480492,0.1308032466,0.0628834836,0.067919763,3399.1119878,3209.0393321
+9,2,3,18,1152,982.85427448,117.20964439,111.58786339,123.05131596,1.47506E-6,7.9960441E-8,1.3950995E-6,1209.4151187,1096.7460851
+10,1,3,18,2423,2000.5238408,121.11827665,117.09964168,125.24456895,5.247818E-16,0,5.247818E-16,2505.5474613,2342.6062493
+10,2,3,18,653,544.95818662,119.825707,112.21847714,127.83096969,0.0000450154,3.8489409E-6,0.0000411664,696.62533439,611.54377806
+11,1,3,18,1411,1307.6619931,107.90250137,103.22144606,112.74865117,0.0063684449,0.0024594916,0.0039089533,1474.371259,1349.7876188
+11,2,3,18,361,347.84974981,103.78043974,94.962418579,113.22520526,0.5028141147,0.2472696861,0.2555444285,393.85359323,330.32653544
+12,1,3,18,2216,1880.3878951,117.84802518,113.76068531,122.04994092,9.188348E-12,2.78666E-14,9.160482E-12,2295.012315,2139.142156
+12,2,3,18,669,532.44321966,125.64720055,117.76501891,133.93669843,6.4435419E-7,6.907649E-9,6.3744654E-7,713.1368694,627.03185833
+13,1,3,18,1370,1369.6191042,100.02781034,95.624541232,104.58872924,1,0.4994871244,0.5005128756,1432.4672166,1309.6919851
+13,2,3,18,333,349.10613699,95.386464092,86.954475483,104.44344757,0.3920922821,0.1895574322,0.2025348499,364.61848513,303.5634103
+14,1,3,18,637,761.24734896,83.678452328,78.300701058,89.341197893,2.1092076E-6,9.0663114E-8,2.0185445E-6,680.1075005,596.06201103
+14,2,3,18,103,198.47571965,51.89551658,43.780717515,61.127513244,6.085805E-14,0,6.085805E-14,121.32327182,86.894094156
+15,1,3,18,603,736.00971558,81.928266331,76.518736036,87.63268561,2.4015678E-7,3.4552518E-9,2.3670153E-7,644.98508012,563.18533146
+15,2,3,18,124,184.68173694,67.142535074,57.541998109,77.939894137,1.3264282E-6,2.252935E-10,1.3262029E-6,143.94075026,106.26956158
+16,1,3,18,639,696.61162885,91.729734839,85.843654318,97.927245823,0.0236084717,0.009276598,0.0143318737,682.17258221,597.99687861
+16,2,3,18,106,185.95734916,57.002318262,48.211304691,66.985127805,1.253304E-10,0,1.253304E-10,124.563768,89.652464199
+17,1,3,18,331,417.6749363,79.248231395,72.222125176,86.796760655,6.4095126E-6,1.17849E-7,6.2916636E-6,362.52831478,301.65371532
+17,2,3,18,69,107.02693971,64.469749566,52.256453703,78.77126764,0.0000565373,5.0101732E-8,0.0000564872,84.306477127,55.928483201
diff --git a/tests/data/smr_results_95_95_CL0.99.csv b/tests/data/smr_results_95_95_CL0.99.csv
new file mode 100644
index 0000000..af4dd0c
--- /dev/null
+++ b/tests/data/smr_results_95_95_CL0.99.csv
@@ -0,0 +1,35 @@
+region,sex,_TYPE_,_FREQ_,sp_evnt,sp_exp,smr,l99,u99,p,p_u,p_l,u_lam99,l_lam99
+1,1,3,18,980,1122.3446554,87.317206463,80.299995971,94.761854214,8.6420493E-6,9.5088721E-7,7.6911621E-6,1063.5546061,901.24271308
+1,2,3,18,176,274.84175425,64.036849306,52.287121399,77.551690549,1.139262E-10,0,1.139262E-10,213.14442676,143.7068417
+2,1,3,18,1128,1206.3764712,93.50314988,86.487747527,100.91598878,0.0200568739,0.0082111084,0.0118457655,1217.4267444,1043.3678367
+2,2,3,18,260,294.92659408,88.157529778,74.711852065,103.24254933,0.0326435648,0.0117565355,0.0208870293,304.48973439,220.34512067
+3,1,3,18,1049,1052.2923871,99.687122407,91.937589698,107.89241923,0.9263273816,0.4585978319,0.4677295498,1135.3437138,967.45225727
+3,2,3,18,238,277.10687434,85.887439844,72.225259514,101.29562963,0.01281423,0.0038281995,0.0089860306,280.6971531,200.14115912
+4,1,3,18,2409,2182.0974548,110.398369,104.69069606,116.32521954,5.2652047E-6,9.2202175E-7,4.3431829E-6,2538.3296548,2284.4530141
+4,2,3,18,561,594.63055986,94.344293393,84.400201209,105.09733076,0.1578725749,0.0716925632,0.0861800117,624.94084632,501.86938898
+5,1,3,18,195,235.47706841,82.810611374,68.333447979,99.346004159,0.0047117085,0.0009579423,0.0037537662,233.93705818,160.90960005
+5,2,3,18,31,57.947952225,53.496282111,31.984232354,83.590627252,0.000077491,1.4640308E-9,0.0000774895,48.439056745,18.534207684
+6,1,3,18,2528,3344.8511471,75.578849067,71.763075349,79.537590338,1.446359E-49,0,1.446359E-49,2660.4140028,2400.368049
+6,2,3,18,1099,1020.4731831,107.69513773,99.511399785,116.34876298,0.0192644181,0.0078132019,0.0114512162,1187.307925,1015.4871489
+7,1,3,18,697,731.03251638,95.344596087,86.299191255,105.04726151,0.2023315739,0.0955213025,0.1068102714,767.92963923,630.87514944
+7,2,3,18,206,202.78608728,101.58487831,84.280437194,121.27818185,0.8607549987,0.4200202504,0.4407347483,245.93527969,170.90900093
+8,1,3,18,2563,2082.6570925,123.06394601,116.89272341,129.46477332,4.971464E-19,0,4.971464E-19,2696.3072838,2434.4745947
+8,2,3,18,615,605.93368185,101.49625585,91.264200327,112.52177427,0.7298905584,0.3616908242,0.3681997342,681.80732973,553.00052925
+9,1,3,18,3303,3390.1347442,97.429755726,93.118461608,101.88201427,0.1308032466,0.0628834836,0.067919763,3453.937564,3156.8413202
+9,2,3,18,1152,982.85427448,117.20964439,108.50564255,126.40142832,1.47506E-6,7.9960441E-8,1.3950995E-6,1242.3418413,1066.4523459
+10,1,3,18,2423,2000.5238408,121.11827665,114.87422115,127.60139923,5.247818E-16,0,5.247818E-16,2552.6964128,2298.0861811
+10,2,3,18,653,544.95818662,119.825707,108.09207535,132.44130814,0.0000450154,3.8489409E-6,0.0000411664,721.74975121,589.05661372
+11,1,3,18,1411,1307.6619931,107.90250137,100.64697604,115.52437475,0.0063684449,0.0024594916,0.0039089533,1510.6683413,1316.1222528
+11,2,3,18,361,347.84974981,103.78043974,90.25113427,118.69632859,0.5028141147,0.2472696861,0.2555444285,412.88488204,313.93834476
+12,1,3,18,2216,1880.3878951,117.84802518,111.49950098,124.45095288,9.188348E-12,2.78666E-14,9.160482E-12,2340.1606533,2096.6231196
+12,2,3,18,669,532.44321966,125.64720055,113.48723523,138.70975487,6.4435419E-7,6.907649E-9,6.3744654E-7,738.55068479,604.25508915
+13,1,3,18,1370,1369.6191042,100.02781034,93.2038963,107.20153658,1,0.4994871244,0.5005128756,1468.252725,1276.5383696
+13,2,3,18,333,349.10613699,95.386464092,82.4604912,109.69480167,0.3920922821,0.1895574322,0.2025348499,382.95128459,287.87463537
+14,1,3,18,637,761.24734896,83.678452328,75.385224044,92.603141312,2.1092076E-6,9.0663114E-8,2.0185445E-6,704.93895829,573.86801955
+14,2,3,18,103,198.47571965,51.89551658,39.670848381,66.578579341,6.085805E-14,0,6.085805E-14,132.14231448,78.737001816
+15,1,3,18,603,736.00971558,81.928266331,73.589627464,90.920206302,2.4015678E-7,3.4552518E-9,2.3670153E-7,669.18155181,541.62680779
+15,2,3,18,124,184.68173694,67.142535074,52.628767555,84.292622608,1.3264282E-6,2.252935E-10,1.3262029E-6,155.67307955,97.195722051
+16,1,3,18,639,696.61162885,91.729734839,82.652372859,101.49713698,0.0236084717,0.009276598,0.0143318737,707.04085913,575.76604086
+16,2,3,18,106,185.95734916,57.002318262,43.751457985,72.876161674,1.253304E-10,0,1.253304E-10,135.51857842,81.359051487
+17,1,3,18,331,417.6749363,79.248231395,68.478134112,91.173802638,6.4095126E-6,1.17849E-7,6.2916636E-6,380.81012209,286.01600303
+17,2,3,18,69,107.02693971,64.469749566,46.233113308,87.289631314,0.0000565373,5.0101732E-8,0.0000564872,93.423421081,49.481886307
diff --git a/tests/db_source.py b/tests/db_source.py
new file mode 100644
index 0000000..125a928
--- /dev/null
+++ b/tests/db_source.py
@@ -0,0 +1,74 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: db_source.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/db_source.py,v $
+
+import unittest
+import SOOMv0
+from SOOMv0 import DataSourceColumn
+from SOOMv0.Sources.DB import DBDataSource
+
+class DummyCursor:
+ def __init__(self, rows):
+ self.rows = rows
+ self.description = [('a_col',), ('b_col',)]
+
+ def execute(self, cmd, *args, **kwargs):
+ pass
+
+ def fetchmany(self, count):
+ try:
+ return self.rows[:count]
+ finally:
+ self.rows = self.rows[count:]
+
+class DummyDB:
+ def __init__(self, rows):
+ self.rows = rows
+
+ def cursor(self):
+ return DummyCursor(self.rows[:])
+
+class DummyDataType:
+ def __init__(self, name):
+ self.name = name
+
+class DummyCol:
+ def __init__(self, name, datatype):
+ self.name = name
+ self.datatype = DummyDataType(datatype)
+
+class db_data_source_test(unittest.TestCase):
+ def test_source(self):
+ rows = zip(range(30), range(29,-1,-1))
+ columns = [
+ DataSourceColumn('a_col', label='A Column'),
+ DataSourceColumn('b_col', label='B Column'),
+ ]
+ dummy_dataset = [
+ DummyCol('a_col', 'int'),
+ DummyCol('b_col', 'int'),
+ ]
+ source = DBDataSource('test', columns,
+ db=DummyDB(rows), table='',
+ fetchcount=5)
+ source.register_dataset_types(dummy_dataset)
+ for db_row in source:
+ row = rows.pop(0)
+ self.assertEqual(db_row, {'a_col': row[0], 'b_col': row[1]})
+ self.failIf(rows)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/filters.py b/tests/filters.py
new file mode 100644
index 0000000..cf83305
--- /dev/null
+++ b/tests/filters.py
@@ -0,0 +1,649 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: filters.py 3699 2009-02-20 05:46:02Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/filters.py,v $
+
+import unittest
+from SOOMv0 import Filter, DatasetColumn, Soom, Dataset, \
+ ColumnNotFound, ExpressionError
+from mx import DateTime
+
+
+class DummyDataset:
+
+ name = 'dummyds'
+ label = 'dummyds'
+ backed = False
+ length = 0
+ generation = 0
+
+ def __init__(self, cols):
+ for i, (name, (datatype, items)) in enumerate(cols.iteritems()):
+ if self.length:
+ assert self.length == len(items)
+ else:
+ self.length = len(items)
+ col = DatasetColumn.get_dataset_col(self, name, i, datatype=datatype)
+ col.store_column(items)
+ setattr(self, name, col)
+
+ def get_column(self, name):
+ return getattr(self, name)
+
+ def __len__(self):
+ return self.length
+
+
+class filter_test(unittest.TestCase):
+
+ words = [
+ 'adulate','adulterate', 'advocating','aesthete','afterword',None
+ ]
+
+ def _test(self, dataset, filterexpr, expected_record_ids):
+ filter = Filter.DatasetFilter(dataset, 'test_filter', filterexpr)
+ self.assertEqual(list(filter.record_ids), expected_record_ids,
+ 'expr %r returned record ids %r, expected %r' %\
+ (filterexpr, list(filter.record_ids),
+ expected_record_ids))
+
+ def _try_all(self, dataset, col, ops, value, expect):
+ for op in ops:
+ if type(value) is str:
+ filterexpr = '%s %s "%s"' % (col, op, value)
+ else:
+ filterexpr = '%s %s %s' % (col, op, value)
+ self._test(dataset, filterexpr, expect)
+
+
+class filter_logical_categorical_op_test(filter_test):
+
+ data = [1,2,1,2,-1,None]
+ datatype = int
+
+ def test_eq(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ assert ds.a.coltype == 'categorical'
+ ops = '=', '==', 'equal to', 'equals', 'eq'
+ self._try_all(ds, 'a', ops, 1, [0, 2])
+
+ def test_eq_null(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = '=', '==', 'equal to', 'equals', 'eq'
+ self._try_all(ds, 'a', ops, None, [5])
+
+ def test_not(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ self._test(ds, 'not a=1', [1,3,4,5])
+
+ def test_gt(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'greaterthan', 'gt', '>', 'greater than'
+ self._try_all(ds, 'a', ops, 1, [1, 3])
+
+ def test_ge(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'greaterthanorequalto', 'greaterequal', '>=', '=>', 'ge', \
+ 'greater than or equal to'
+ self._try_all(ds, 'a', ops, 1, [0, 1, 2, 3])
+
+ def test_lt(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'lessthan', 'lt', '<', 'less than'
+ self._try_all(ds, 'a', ops, 1, [4])
+
+ def test_le(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'lessthanorequalto', 'lessequal', '<=', '=<', 'le',\
+ 'less than or equal to'
+ self._try_all(ds, 'a', ops, 1, [0,2,4])
+
+ def test_ne(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'notequalto', 'notequal', '!=', '<>', 'doesnotequal', \
+ 'ne', '!==', '#', 'does not equal', 'not equal to'
+ self._try_all(ds, 'a', ops, 1, [1,3,4,5])
+
+ def test_ne_null(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'notequalto', 'notequal', '!=', '<>', 'doesnotequal', \
+ 'ne', '!==', '#', 'does not equal', 'not equal to'
+ self._try_all(ds, 'a', ops, None, [0,1,2,3,4])
+
+ def test_in(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'in',
+ self._try_all(ds, 'a', ops, (1,2), [0, 1, 2, 3])
+
+ def test_in_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'in:',
+ self._try_all(ds, 'a', ops, ('adu', 'af') , [0, 1, 4])
+
+ def test_not_in(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'notin',
+ self._try_all(ds, 'a', ops, (1,2), [4,5])
+
+ def test_not_in_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'notin:',
+ self._try_all(ds, 'a', ops, ('adu', 'af') , [2, 3])
+
+ def test_regexp(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = '~',
+ self._try_all(ds, 'a', ops, '[ae]te', [0, 1, 3])
+
+ def test_not_regexp(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = '!~',
+ self._try_all(ds, 'a', ops, '[ae]te', [2, 4])
+
+
+
+class filter_logical_scalar_op_test(filter_test):
+ # Derived from filter_logical_categorical_op_test
+
+ data = [1.0, 2.0, 1.0, 2.0, -1.0, None]
+ datatype = float
+
+ def test_eq(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ assert ds.a.coltype == 'scalar'
+ ops = '=', '==', 'equal to', 'equals', 'eq'
+ self._try_all(ds, 'a', ops, 1, [0, 2])
+
+ def test_eq_null(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = '=', '==', 'equal to', 'equals', 'eq'
+ self._try_all(ds, 'a', ops, None, [5])
+
+ def test_not(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ self._test(ds, 'not a=1', [1,3,4,5])
+
+ def test_gt(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'greaterthan', 'gt', '>', 'greater than'
+ self._try_all(ds, 'a', ops, 1, [1, 3])
+
+ def test_ge(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'greaterthanorequalto', 'greaterequal', '>=', '=>', 'ge', \
+ 'greater than or equal to'
+ self._try_all(ds, 'a', ops, 1, [0, 1, 2, 3])
+
+ def test_lt(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'lessthan', 'lt', '<', 'less than'
+ self._try_all(ds, 'a', ops, 1, [4])
+
+ def test_le(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'lessthanorequalto', 'lessequal', '<=', '=<', 'le',\
+ 'less than or equal to'
+ self._try_all(ds, 'a', ops, 1, [0,2,4])
+
+ def test_ne(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'notequalto', 'notequal', '!=', '<>', 'doesnotequal', \
+ 'ne', '!==', '#', 'does not equal', 'not equal to'
+ self._try_all(ds, 'a', ops, 1, [1,3,4,5])
+
+ def test_ne_null(self):
+ ds = DummyDataset({'a': (self.datatype, self.data)})
+ ops = 'notequalto', 'notequal', '!=', '<>', 'doesnotequal', \
+ 'ne', '!==', '#', 'does not equal', 'not equal to'
+ self._try_all(ds, 'a', ops, None, [0,1,2,3,4])
+
+
+
+class filter_startswith_test(filter_test):
+
+ def test_eq_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = '=:', 'starting with', '==:', 'startingwith', 'eq:'
+ self._try_all(ds, 'a', ops, 'ad', [0,1,2])
+
+ def test_ne_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'notequalto:', 'notequal:', '!=:', '<>:', 'doesnotequal:', \
+ 'ne:', '!==:', '#:', 'does not equal:', 'not equal to:'
+ self._try_all(ds, 'a', ops, 'ad', [3,4])
+
+ def test_lt_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'lessthan:', 'lt:', '<:', 'less than:'
+ self._try_all(ds, 'a', ops, 'adv', [0, 1])
+
+ def test_le_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'lessthanorequalto:', 'lessequal:', 'le:', '<=:', '=<:', \
+ 'less than or equal to:'
+ self._try_all(ds, 'a', ops, 'adv', [0, 1, 2])
+
+ def test_gt_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'greaterthan:', 'gt:', '>:', 'greater than:'
+ self._try_all(ds, 'a', ops, 'adu', [2, 3, 4])
+
+ def test_ge_col(self):
+ ds = DummyDataset({'a': (str, self.words)})
+ ops = 'greaterthanorequalto:', 'greaterequal:', 'ge:', '>=:', '=>:', \
+ 'greater than or equal to:'
+ self._try_all(ds, 'a', ops, 'adu', [0, 1, 2, 3, 4])
+
+
+class filter_combinational_test(filter_test):
+
+ def _test_ds(self):
+ return DummyDataset({'a': (int, [1,2,3,4,5]),
+ 'b': (str, ['a','b','c','d','e']),
+ 'c': (str, ['one','two','three','four','five']),
+ 'd': (int, [1,2,1,2,-1]),
+ 'e': (int, [1,2,-3,4,5]),
+ 'f': (float, [1.2,3.45678,-9.34,0.1,0.321]),
+ 'g': (int, [1,1,2,1,1]),
+ 'h': (str, ['a','a','b','a','a']),
+ 'i': (str, ['one','one','two','one','one'])})
+
+ def test_and(self):
+ self._test(self._test_ds(), 'd=1 and b="c"', [2])
+
+ def test_or(self):
+ self._test(self._test_ds(), 'd=1 or b="d"', [0,2,3])
+
+ def test_andnot(self):
+ self._test(self._test_ds(), 'd=1 and not b="c"', [0])
+
+ def test_ornot(self):
+ self._test(self._test_ds(), 'd=1 or not b="c"', [0,1,2,3,4])
+
+ def test_andor(self):
+ ds = self._test_ds()
+ self._test(ds, 'd=1 or (b="d" and d=2)', [0,2,3])
+ self._test(ds, 'b="d" and d=2 or d=1', [0,2,3])
+ self._test(ds, 'd=1 or d=2 and b="d"', [0,2,3])
+
+ def test_oror(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=1 or b="c" or c="five"', [0,2,4])
+ self._test(ds, 'a=1 or b="a" or c="one"', [0])
+ self._test(ds, 'a=1 or b="a" or c="five"', [0,4])
+ self._test(ds, 'a=1 or b="f" or c="five"', [0,4])
+
+ def test_andand(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=1 and b="a" and c="one"', [0])
+ self._test(ds, 'a=1 and b="a" and c="two"', [])
+ self._test(ds, 'a=1 and b="b" and c="two"', [])
+ self._test(ds, 'a=1 and b="b" and c="three"', [])
+ self._test(ds, 'g=1 and h="a" and i="one"', [0,1,3,4])
+ self._test(ds, 'g=1 and h="b" and i="one"', [])
+ self._test(ds, 'g=1 and h="a" and i="two"', [])
+ self._test(ds, 'g=1 and h="a" and i="three"', [])
+ self._test(ds, 'g=11 and h="z" and i="fred"', [])
+ self._test(ds, 'g=11 and h="z" and i=3', [])
+
+ def test_andandnot(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=1 and b="a" and not c="one"', [])
+ self._test(ds, 'a=1 and b="a" and not c="one"', [])
+ self._test(ds, 'a=1 and b="a" and not c="two"', [0])
+ self._test(ds, 'a=1 and not b="a" and c="one"', [])
+ self._test(ds, 'a=1 and not b="b" and c="one"', [0])
+
+ def test_andnotandnot(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=1 and not b="a" and not c="one"', [])
+ self._test(ds, 'a=1 and not b="b" and not c="three"', [0])
+ self._test(ds, 'a=1 and not b="a" and not c="three"', [])
+ self._test(ds, 'a=4 and not b="z" and not c="eight"', [3])
+
+ def test_notandnotandnot(self):
+ ds = self._test_ds()
+ self._test(ds, 'not a=4 and not b="z" and not c="eight"', [0,1,2,4])
+ self._test(ds, 'not a=9 and not b="z" and not c="eight"', [0,1,2,3,4])
+ self._test(ds, 'not a=1 and not b="a" and not c="one"', [1,2,3,4])
+ self._test(ds, 'not a=1 and not b="b" and not c="three"', [3,4])
+
+ def test_ornotornot(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=1 or not b="a" or not c="one"', [0,1,2,3,4])
+
+ def test_notornotornot(self):
+ ds = self._test_ds()
+ self._test(ds, 'not a=1 or not b="a" or not c="one"', [1,2,3,4])
+ self._test(ds, 'not a=1 or not b="b" or not c="three"', [0,1,2,3,4])
+ self._test(ds, 'not a=1 or not a=2 or not a=3', [0,1,2,3,4])
+
+ def test_notandnotornot(self):
+ ds = self._test_ds()
+ self._test(ds, 'not a=1 and not b="a" or not c="one"', [1,2,3,4])
+ self._test(ds, 'not a=1 or not b="b" and not c="three"', [0,1,2,3,4])
+
+ def test_andin(self):
+ ds = self._test_ds()
+ self._test(ds, 'a in (1,3,5) and b in ("b","d")', [])
+ self._test(ds, 'a in (1,3,5) and b in ("b","d") '
+ 'or c in ("four","eleven")', [3])
+ self._test(ds, 'a in (1,3,5) and b in ("b","d") '
+ 'or c in ("fourth","eleven")', [])
+ self._test(ds, 'a in (1,3,4) and b in ("b","d") '
+ 'or c in: ("fourth","eleven")', [3])
+
+ def test_orin(self):
+ ds = self._test_ds()
+ self._test(ds, 'a in (1,3,5) or b in ("b","d") or '
+ 'c in ("fourth","eleven")', [0,1,2,3,4])
+
+ def test_notinnotin(self):
+ ds = self._test_ds()
+ self._test(ds, 'a notin (1,3,5) and b notin ("b","d")', [])
+
+ def test_notnotinnotnotin(self):
+ ds = self._test_ds()
+ self._test(ds, 'not a notin (1,3,5) and not b notin ("b","d")', [])
+ self._test(ds, 'not a notin (1,3,5) and not b notin ("a","c") '
+ 'and not c notin ("three","five")', [2])
+
+ def test_notinnotnotin(self):
+ ds = self._test_ds()
+ self._test(ds, 'a notin (1,3,5) and not b notin ("b","d")', [1,3])
+
+ def test_negnums1(self):
+ ds = self._test_ds()
+ self._test(ds, 'e=-3 and f=-9.34', [2])
+ self._test(ds, 'e=-3 and f=-9.34 and c startingwith "th"', [2])
+
+
+class filter_paren_test(filter_test):
+
+ def _test_ds(self):
+ ds = Dataset('paren')
+ for name in 'abcdefghij':
+ ds.addcolumnfromseq(name, datatype='int', data=range(10))
+ return ds
+
+ def test_parentheses(self):
+ ds = self._test_ds()
+ self._test(ds, 'a=0 or b=0 and c=1 or d=1 and e=2 '
+ 'or f=2 and g=3 or h=3 and i=4 or j=4', [0,4])
+
+ self._test(ds, '(a=0 or b=0) and (c=1 or d=1) and '
+ '(e=2 or f=2) and (g=3 or h=3) and (i=4 or j=4)', [])
+
+ self._test(ds, 'a=0 and b=0 or c=1 and d=1 or e=2 and f=2 '
+ 'or g=3 and h=3 or i=4 and j=4', [0,1,2,3,4])
+
+ self._test(ds, 'a=0 and (b=0 or c=1) and (d=1 or e=2) and '
+ '(f=2 or g=3) and (h=3 or i=4) and j=4', [])
+
+ self._test(ds, 'a=0 and (b=0 or c=1) and (d=0 or e=2) '
+ 'and (f=0 or g=3) and (h=0 or i=4) and j=0', [0])
+
+ self._test(ds, '(a=0 and b=0) or (c=1 and d=0) or (((e=2 and '
+ 'f=0 or g=3) and (h=0 or i=4)) or j=3)', [0,3])
+
+ self._test(ds, '(a=2 and (b=0 or (c=2 and (d in (2,3) and (e=2 or '
+ '(f=3 and (g=3 or (h=4 and (i=3 or j=4)))))))))', [2])
+
+ self._test(ds, '(((((((((a=2 and b=0) or c=2) and d in (2,3)) and e=2)'
+ 'or f=3) and g=3) or h=4) and i=3) or j=4)', [3,4])
+
+ def test_manyin(self):
+ ds = self._test_ds()
+ self._test(ds,
+ 'a in (1,2,3,4,5,6,7,8,9) and b in (0,2,3,4,5,6,7,8,9) and '
+ 'c in (0,1,3,4,5,6,7,8,9) and d in (0,1,2,4,5,6,7,8,9) and '
+ 'e in (0,1,2,3,5,6,7,8,9) and f in (0,1,2,3,4,6,7,8,9) and '
+ 'g in (0,1,2,3,4,5,7,8,9) and h in (0,1,2,3,4,5,6,8,9) and '
+ 'i in (0,1,2,3,4,5,6,7,9) and j in (0,1,2,3,4,5,6,7,8)',
+ [])
+
+ self._test(ds,
+ 'a in (1,2,3,4,5,6,7,8,9) and b in (0,2,3,4,5,6,7,8,9) and '
+ 'c in (0,1,3,4,5,6,7,8,9) and d in (0,1,2,3,5,6,7,8,9) and '
+ 'e in (0,1,2,3,5,6,7,8,9) and f in (0,1,2,3,4,6,7,8,9) and '
+ 'g in (0,1,2,3,4,5,7,8,9) and h in (0,1,2,3,4,5,6,8,9) and '
+ 'i in (0,1,2,3,4,5,6,7,9) and j in (0,1,3,4,5,6,7,8,9)',
+ [3,9])
+
+ self._test(ds,
+ 'a in (11) and b in (0,2,3,4,5,6,7,8,9) and '
+ 'c in (0,1,3,4,5,6,7,8,9) and d in (0,1,2,3,5,6,7,8,9) and '
+ 'e in (0,1,2,3,5,6,7,8,9) and f in (0,1,2,3,4,6,7,8,9) and '
+ 'g in (0,1,2,3,4,5,7,8,9) and h in (0,1,2,3,4,5,6,8,9) and '
+ 'i in (0,1,2,3,4,5,6,7,9) and j in (0,1,3,4,5,6,7,8,9)',
+ [])
+
+ self._test(ds,
+ 'a notin (0,1,2,4,5,6,7,8) and b notin (0,2,8) and '
+ 'c notin (0,4,5,6,7,8) and d notin (0,1,2,5,6,7) and '
+ 'e notin (0,1,2,5,6,7,8) and f notin (0,8,8,8,8,8,8,8) and '
+ 'g notin (0,1,2,4,5,7,8) and h notin (4,5,6,8) and '
+ 'i notin (0,6,7) and j notin (0,1,4,5,6,7,8)',
+ [3,9])
+
+
+class datetime_test(filter_test):
+
+ def _get_dates_ds(self):
+ ds = Dataset('dates_and_times')
+ ds.addcolumnfromseq('a', label='Date 1',
+ datatype='date',
+ data=[DateTime.Date(1956,4,23),
+ DateTime.Date(2003,9,30),
+ DateTime.Date(2002,3,1),
+ DateTime.Date(2000,6,21),
+ DateTime.Date(2009,5,27),
+ DateTime.Date(3003,9,11),
+ DateTime.Date(1903,4,2),
+ DateTime.Date(1803,9,9),
+ DateTime.Date(1803,9,9),
+ DateTime.Date(103,9,29),
+ None])
+ ds.addcolumnfromseq('b', label='Time 1',
+ datatype='time',
+ data=[DateTime.Time(1,4,23.1),
+ DateTime.Time(20,9,30.2),
+ DateTime.Time(8,3,1.3),
+ DateTime.Time(18,6,21.44),
+ DateTime.Time(0,0,0.0),
+ DateTime.Time(12,9,11.5),
+ DateTime.Time(19,4,2),
+ DateTime.Time(18,9,9.789876353663554648477647863563),
+ DateTime.Time(18,9,9),
+ DateTime.Time(23,59,59.9999999999999999999999),
+ None])
+ ds.addcolumnfromseq('c', label='Datetime 1',
+ datatype='datetime',
+ data=[DateTime.DateTime(1956,4,23,23,59,59.9999999999999999999999),
+ DateTime.DateTime(2003,9,30,18,9,9),
+ DateTime.DateTime(2002,3,1,18,9,9.789876353663554648477647863563),
+ DateTime.DateTime(2000,6,21,19,4,2),
+ DateTime.DateTime(2009,5,27,12,9,11.5),
+ DateTime.DateTime(3003,9,11,0,0,0.0),
+ DateTime.DateTime(1903,4,2,18,6,21.44),
+ DateTime.DateTime(1803,9,9,8,3,1.3),
+ DateTime.DateTime(1803,9,9,20,9,30.2),
+ DateTime.DateTime(103,9,29,1,4,23.1),
+ None])
+ return ds
+
+ def _get_reldate_ds(self):
+ def rdt(**kwargs):
+ return now + DateTime.RelativeDateTime(hour=12, **kwargs)
+ now = DateTime.now()
+ data = [
+ rdt(days=1), # tomorrow
+ now,
+ rdt(days=-1), # yesterday
+ rdt(days=-7), # last week
+ rdt(days=-7, weekday=(DateTime.Monday,0)), # Monday a week ago
+ rdt(months=-1), # a month ago
+ rdt(months=-1, day=1), # begining of last mnth
+ rdt(years=-1), # a year go
+ rdt(years=-1, month=DateTime.January, day=1),# begining of last year
+ ]
+ ds = Dataset('reldate')
+ ds.addcolumnfromseq('a', label='dates relative to today',
+ datatype= 'date', data=data)
+ return ds
+
+ def test_dates_simple(self):
+ ds = self._get_dates_ds()
+ self._test(ds, 'a ge date(1960,1,1)', [1,2,3,4,5])
+ self._test(ds, 'a ge date 1960-1-1', [1,2,3,4,5])
+ self._test(ds, 'a le date(1903,4,2)', [6,7,8,9])
+ self._test(ds, 'a le date 1903-4-2', [6,7,8,9])
+ self._test(ds, 'a == date(3003,9,11)', [5])
+ self._test(ds, 'a == date 3003-9-11', [5])
+ self._test(ds, 'a > date(4000,1,1)', [])
+ self._test(ds, 'a > date 4000-1-1', [])
+ self._test(ds, 'a < date(103,9,29)', [])
+
+ def test_dates_comb(self):
+ ds = self._get_dates_ds()
+ self._test(ds, 'a ge date(100,1,1) and a le date(200,1,1)', [9])
+
+ def test_dates_between(self):
+ ds = self._get_dates_ds()
+ self._test(ds, 'a between (date(1850,1,1),date(1950,1,1))', [6])
+ self._test(ds, 'a between (date(100,1,1),date(4000,1,1))',
+ [0,1,2,3,4,5,6,7,8,9])
+
+ def test_reldat(self):
+ # Picking values for relative datetime tests is difficult - the clock
+ # is running, and first-of-month (etc) change the result.
+ ds = self._get_reldate_ds()
+ self.assertRaises(ValueError, Filter.DatasetFilter, ds, 'test_filter',
+ 'a >= reldate(days=-1, months=-1)')
+ self.assertRaises(TypeError, Filter.DatasetFilter, ds, 'test_filter',
+ 'a >= reldate(poo=1)')
+ self.assertRaises(ValueError, Filter.DatasetFilter, ds, 'test_filter',
+ 'a >= reldate(align="xxx")')
+ self._test(ds, 'a >= reldate(days=+1)', [0])
+ self._test(ds, 'a >= reldate()', [0, 1])
+ self._test(ds, 'a >= reldate(days=0)', [0, 1])
+ self._test(ds, 'a >= reldate(days=-1)', [0, 1, 2])
+ if DateTime.now().day_of_week == 0:
+ # We have a special version of the test for Mondays!
+ self._test(ds, 'a >= reldate(days=-7)', [0, 1, 2, 3, 4])
+ else:
+ self._test(ds, 'a >= reldate(days=-7)', [0, 1, 2, 3])
+ self._test(ds, 'a >= reldate(days=-7, align="monday")', [0, 1, 2, 3, 4])
+ if DateTime.now().day == 1:
+ expect = [0, 1, 2, 3, 4, 5, 6]
+ else:
+ expect = [0, 1, 2, 3, 4, 5]
+ self._test(ds, 'a >= reldate(months=-1)', expect)
+ self._test(ds, 'a >= reldate(months=-1, align="bom")', [0, 1, 2, 3, 4, 5, 6])
+ if DateTime.now().day == 1 and DateTime.now().month == DateTime.January:
+ expect = [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ else:
+ expect = [0, 1, 2, 3, 4, 5, 6, 7]
+ self._test(ds, 'a >= reldate(years=-1)', expect)
+ self._test(ds, 'a >= reldate(years=-1, align="boy")', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ if DateTime.now().day_of_week == 0:
+ self._test(ds, 'a between(reldate(days=-7), reldate(days=-2))', [3, 4]) # also fails on Mondays
+ else:
+ self._test(ds, 'a between(reldate(days=-7), reldate(days=-2))', [3])
+ # Note: currently no support for times or datetime in filters
+
+
+class sort_test(unittest.TestCase):
+
+ def _test_ds(self):
+ words = ['framers', "expertism's", 'cumulonimbus', 'Keynes', 'halters']
+ ds = Dataset('dummyds', 'dummyds')
+ ds.addcolumnfromseq('a', [1, 3, 2, 4, 0], datatype=int)
+ ds.addcolumnfromseq('b', words, datatype='str')
+ ds.addcolumnfromseq('c', words, datatype='recode')
+ ds.addcolumnfromseq('d',
+ [0.3, 1.8, 3.5, 0.4, 9.6], datatype=float)
+ ds.addcolumnfromseq('e',
+ [DateTime.DateTime(2003,9,30,18,9,9),
+ DateTime.DateTime(2002,3,1,18,9,9.7),
+ DateTime.DateTime(2009,5,27,12,9,11.5),
+ DateTime.DateTime(3003,9,11,0,0,0.0),
+ DateTime.DateTime(1903,4,2,18,6,21.44)],
+ datatype='datetime')
+ ds.addcolumnfromseq('f', [1, 1, 2, 0, 0], datatype=int)
+ return ds
+
+ def _test_idx(self, expected_record_ids, *args):
+ dataset = self._test_ds()
+ filter = Filter.sorted_ds(dataset, *args)
+ self.assertEqual(list(filter.record_ids), expected_record_ids,
+ 'sort %r returned record ids %r, expected %r' %\
+ (filter.filter_label, list(filter.record_ids),
+ expected_record_ids))
+
+ def _test_col(self, col):
+ dataset = self._test_ds()
+ data_in = list(dataset[col].data)
+ data_in.sort()
+ sorted_ds = Filter.sorted_ds(dataset, col)
+ data_out = list(sorted_ds[col].data)
+ self.assertEqual(data_in, data_out)
+
+ def test_oops(self):
+ dataset = self._test_ds()
+ self.assertRaises(ColumnNotFound, Filter.sorted_ds, dataset, 'foo')
+ self.assertRaises(ExpressionError, Filter.sorted_ds, dataset, '')
+ self.assertRaises(ExpressionError, Filter.sorted_ds, dataset, ',')
+ self.assertRaises(ExpressionError, Filter.sorted_ds, dataset, 'a x')
+ self.assertRaises(ExpressionError, Filter.sorted_ds, dataset, 'a asc x')
+
+ def test_int(self):
+ self._test_col('a')
+ self._test_idx([4, 0, 2, 1, 3], 'a')
+ self._test_idx([4, 0, 2, 1, 3], 'a asc')
+ self._test_idx([3, 1, 2, 0, 4], 'a desc')
+
+ def test_int_two(self):
+ self._test_idx([4, 3, 0, 1, 2], 'f', 'a')
+ self._test_idx([3, 4, 1, 0, 2], 'f', 'a desc')
+ self._test_idx([3, 4, 1, 0, 2], 'f, a desc')
+ self._test_idx([2, 0, 1, 4, 3], 'f desc', 'a')
+ self._test_idx([2, 0, 1, 4, 3], 'f desc, a')
+ self._test_idx([2, 1, 0, 3, 4], 'f desc', 'a desc')
+ self._test_idx([2, 1, 0, 3, 4], 'f desc, a desc')
+
+ def test_str(self):
+ self._test_col('b')
+ self._test_idx([3, 2, 1, 0, 4], 'b')
+ self._test_idx([4, 0, 1, 2, 3], 'b desc')
+
+ def test_recode(self):
+ self._test_col('c')
+ self._test_idx([3, 2, 1, 0, 4], 'c')
+ self._test_idx([4, 0, 1, 2, 3], 'c desc')
+
+ def test_float(self):
+ self._test_col('d')
+ self._test_idx([0, 3, 1, 2, 4], 'd')
+ self._test_idx([4, 2, 1, 3, 0], 'd desc')
+
+ def test_datetime(self):
+ self._test_col('e')
+ self._test_idx([4, 1, 0, 2, 3], 'e')
+ self._test_idx([3, 2, 0, 1, 4], 'e desc')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/indirect_std_SAS.py b/tests/indirect_std_SAS.py
new file mode 100644
index 0000000..b3c8d9d
--- /dev/null
+++ b/tests/indirect_std_SAS.py
@@ -0,0 +1,176 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+# Compare our calc_indirectly_std_ratios with verified SAS results see:
+# tests/SAS/indirect_std_check.sas and the data files tests/data/smr_results*
+
+import sys, os
+import unittest
+import csv
+import itertools
+import Numeric
+
+import SOOMv0
+from SOOMv0.Sources.CSV import CSVDataSource
+from SOOMv0.DataSourceColumn import DataSourceColumn
+from SOOMv0.Analysis import calc_indirectly_std_ratios as calc_ind
+from SOOMv0.CrossTab import CrossTab
+
+thisdir = os.path.abspath(os.path.dirname(__file__))
+soomobj = os.path.join(thisdir, '..', '..', 'SOOM_objects')
+
+try:
+ event_ds = SOOMv0.dsload('syndeath', path=soomobj)
+ pop_ds = SOOMv0.dsload('synpop', path=soomobj)
+ skip = 0
+except SOOMv0.DatasetNotFound:
+ sys.stderr.write('WARNING - %s tests skipped because syndeath datasets ' 'were not found\n' % __file__)
+ skip = 1
+
+
+colnames = [ 'observed', 'expected', 'isr', 'isr_ll', 'isr_ul', ]
+
+if not skip:
+ class CSVCols(SOOMv0.Dataset):
+ def __init__(self, filename):
+ SOOMv0.Dataset.__init__(self, filename)
+ f = open(filename, 'rb')
+ ds
+ try:
+ reader = csv.reader(f)
+ self.colmap = dict([(c, i)
+ for i, c in enumerate(reader.next())])
+ self.cols = []
+ for i in xrange(len(self.colmap)):
+ self.cols.append([])
+ for row in reader:
+ for i, v in enumerate(row):
+ if v:
+ v = float(v)
+ self.cols[i].append(v)
+ finally:
+ f.close()
+
+ def __getitem__(self, col):
+ return self.cols[self.colmap[col]]
+
+ def csv_to_ds(filename):
+ name = os.path.basename(filename).replace('.', '_')
+ SOOMv0.soom.writepath = '/tmp' # XXX
+ ds = SOOMv0.Dataset(name, summary=True, path='/tmp')
+ ds.addcolumn('sex', datatype='int', coltype='categorical')
+ ds.addcolumn('region', datatype='int', coltype='categorical')
+ ds.addcolumn('observed', datatype='float', coltype='scalar')
+ ds.addcolumn('expected', datatype='float', coltype='scalar')
+ ds.addcolumn('isr', datatype='float', coltype='scalar')
+ ds.addcolumn('isr_ll', datatype='float', coltype='scalar')
+ ds.addcolumn('isr_ul', datatype='float', coltype='scalar')
+ cols = [
+ DataSourceColumn('region', ordinalpos=0),
+ DataSourceColumn('sex', ordinalpos=1),
+ DataSourceColumn('observed', ordinalpos=4),
+ DataSourceColumn('expected', ordinalpos=5),
+ DataSourceColumn('isr', ordinalpos=6),
+ DataSourceColumn('isr_ll', ordinalpos=7),
+ DataSourceColumn('isr_ul', ordinalpos=8),
+ ]
+ source = CSVDataSource(name, cols, filename, header_rows=1)
+ ds.lock()
+ ds.loaddata(source, initialise=1, finalise=1)
+ return ds
+
+
+ class _BaseIndirectSAS(unittest.TestCase):
+ def basetest(self):
+ sasdata = csv_to_ds(os.path.join(thisdir, self.sasresults_file))
+ summset = event_ds.summ('sex', 'region', zeros=True,
+ filterexpr=self.e_filterexpr)
+ stdsumset = event_ds.summ('agegrp', 'sex', zeros=True,
+ filterexpr=self.s_filterexpr)
+ ind = calc_ind(summset, pop_ds, stdsumset, pop_ds,
+ conflev=self.conflev,
+ popset_popcol='pop', stdpopset_popcol='pop')
+ sasct = CrossTab.from_summset(sasdata)
+ indct = CrossTab.from_summset(ind, shaped_like=sasct)
+ for colname in colnames:
+ a = sasct[colname].data
+ b = indct[colname].data.filled(0)
+ if not Numeric.allclose(a, b):
+ if 0:
+ print
+ print ind
+ print a
+ print b
+ self.fail('%s not equal' % colname)
+
+ class test_37_37_90(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_37_37_CL0.90.csv'
+ s_filterexpr = 'causeofdeath = 37'
+ e_filterexpr = 'causeofdeath = 37'
+ conflev = 0.90
+ test = _BaseIndirectSAS.basetest
+
+ class test_37_37_99(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_37_37_CL0.99.csv'
+ s_filterexpr = 'causeofdeath = 37'
+ e_filterexpr = 'causeofdeath = 37'
+ conflev = 0.99
+ test = _BaseIndirectSAS.basetest
+
+ class test_37_95_90(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_37_95_CL0.90.csv'
+ s_filterexpr = 'causeofdeath = 37'
+ e_filterexpr = 'causeofdeath = 95'
+ conflev = 0.90
+ test = _BaseIndirectSAS.basetest
+
+ class test_37_95_99(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_37_95_CL0.99.csv'
+ s_filterexpr = 'causeofdeath = 37'
+ e_filterexpr = 'causeofdeath = 95'
+ conflev = 0.99
+ test = _BaseIndirectSAS.basetest
+
+ class test_95_37_90(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_95_37_CL0.90.csv'
+ s_filterexpr = 'causeofdeath = 95'
+ e_filterexpr = 'causeofdeath = 37'
+ conflev = 0.90
+ test = _BaseIndirectSAS.basetest
+
+ class test_95_37_99(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_95_37_CL0.99.csv'
+ s_filterexpr = 'causeofdeath = 95'
+ e_filterexpr = 'causeofdeath = 37'
+ conflev = 0.99
+ test = _BaseIndirectSAS.basetest
+
+ class test_95_95_90(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_95_95_CL0.90.csv'
+ s_filterexpr = 'causeofdeath = 95'
+ e_filterexpr = 'causeofdeath = 95'
+ conflev = 0.90
+ test = _BaseIndirectSAS.basetest
+
+ class test_95_95_99(_BaseIndirectSAS):
+ sasresults_file = 'data/smr_results_95_95_CL0.99.csv'
+ s_filterexpr = 'causeofdeath = 95'
+ e_filterexpr = 'causeofdeath = 95'
+ conflev = 0.99
+ test = _BaseIndirectSAS.basetest
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/poprate.py b/tests/poprate.py
new file mode 100644
index 0000000..0411835
--- /dev/null
+++ b/tests/poprate.py
@@ -0,0 +1,298 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: poprate.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/poprate.py,v $
+
+from SOOMv0 import *
+from SOOMv0 import Analysis
+import unittest
+import MA
+
+# The following data and results are originally from Selvin, S. Statistical Analysis of
+# Epidemiologic Data (Monographs in Epidemiology and Biostatistics, V. 35), Oxford University Press;
+# 3rd edition (May 1, 2004), via the EpiTools pacakge for R (see http://www.medepi.net/epitools/ )
+
+agegrp_outtrans = {1:"<1",2:"1-4",3:"5-14",4:"15-24",5:"25-34",6:"35-44",7:"45-54",
+ 8:"55-64",9:"65-74",10:"75-84",11:"85+"}
+
+def _get_ds1():
+ ds = Dataset('deaths')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11,1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[141,926,1253,1080,1869,4891,14956,30888,41725,26501,5928,
+ 45,201,320,670,1126,3160,9723,17935,22179,13461,2238])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11 + [1940]*11)
+ return ds
+
+def _get_pop_ds1():
+ ds = Dataset('pops')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11,1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[1784033,7065148,15658730,10482916,9939972,10563872,
+ 9114202,6850263,4702482,1874619,330915,
+ 906897,3794573,10003544,10629526,9465330,8249558,
+ 7294330,5022499,2920220,1019504,142532])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11 + [1940]*11)
+ return ds
+
+def _get_ds1_40():
+ ds = Dataset('deaths40')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[45,201,320,670,1126,3160,9723,17935,22179,13461,2238])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1940]*11)
+ return ds
+
+def _get_pop_ds1_40():
+ ds = Dataset('pops40')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[906897,3794573,10003544,10629526,9465330,8249558,
+ 7294330,5022499,2920220,1019504,142532])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1940]*11)
+ return ds
+
+def _get_ds1_60():
+ ds = Dataset('deaths60')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[141,926,1253,1080,1869,4891,14956,30888,41725,26501,5928])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11)
+ return ds
+
+def _get_pop_ds1_60():
+ ds = Dataset('pops60')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[1784033,7065148,15658730,10482916,9939972,10563872,
+ 9114202,6850263,4702482,1874619,330915])
+ ds.addcolumnfromseq('year', label='Year',
+ coltype='ordinal', datatype='int',
+ data=[1960]*11)
+ return ds
+
+def _get_std_ds1():
+ ds = Dataset('std')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',outtrans=agegrp_outtrans,
+ data=[1,2,3,4,5,6,7,8,9,10,11])
+ data=[1784033,7065148,15658730,10482916,9939972,10563872,
+ 9114202,6850263,4702482,1874619,330915]
+ ds.addcolumnfromseq('_stdpop_', label='Standard population',
+ coltype='scalar', datatype='int',
+ data=data)
+ return ds
+
+class dir_indir_std_rate_test1(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ def ma_fmt(v, prec):
+ if v is None:
+ return 'None'
+ return '%.*f' % (prec, v)
+ first = ', '.join([ma_fmt(v, prec) for v in first])
+ second = ', '.join([ma_fmt(v, prec) for v in second])
+ self.assertEqual(first, second, '[%s] != [%s]' % (first, second))
+
+ def test_dsr_and_cr(self):
+ ds = _get_ds1()
+ pop = _get_pop_ds1()
+ std = _get_std_ds1()
+ results = Analysis.calc_directly_std_rates(ds, pop, std)
+ self.assertListNear(results['dsr'], [139.25054, 166.08744])
+ self.assertListNear(results['dsr_ll'], [138.21410, 165.18636])
+ self.assertListNear(results['dsr_ul'], [140.29275, 166.99223])
+ self.assertListNear(results['cr'], [119.52864, 166.08744])
+ self.assertListNear(results['cr_ll'], [118.65139, 165.18636])
+ self.assertListNear(results['cr_ul'], [120.41077, 166.99223])
+
+ # AM - disabled because test is broken
+ def XXX_test_isr(self):
+ ds = _get_ds1()
+ pop = _get_pop_ds1()
+ #ds.makefilter('d40',expr='year == 1940')
+ #ds.makefilter('d60',expr='year == 1960')
+ #pop.makefilter('p40',expr='year == 1940')
+ #pop.makefilter('p60',expr='year == 1960')
+ #dths40 = ds.filter(name='d40')
+ #dths60 = ds.filter(name='d60')
+ #pops40 = pop.filter(name='p40')
+ #pops60 = pop.filter(name='p60')
+
+ dths40 = _get_ds1_40()
+ dths60 = _get_ds1_60()
+ pops40 = _get_pop_ds1_40()
+ pops60 = _get_pop_ds1_60()
+
+ dthsumm40 = dths40.summ(SummaryStats.asum('_freq_'))
+
+ if 0:
+ print dths40
+ print dthsumm40
+ print dthsumm40.describe_with_cols()
+ print dths60
+ print pops40
+ print pops60
+
+ results = Analysis.calc_indirectly_std_ratios(dths40, pops40, dths60, pops60,
+ popset_popcol='_freq_', stdpopset_popcol='_freq_')
+ if 0:
+ print results
+ #self.assertListNear(results['dsr'], [139.25054, 166.08744])
+ #self.assertListNear(results['dsr_ll'], [138.21410, 165.18636])
+ #self.assertListNear(results['dsr_ul'], [140.29275, 166.99223])
+ #self.assertListNear(results['cr'], [119.52864, 166.08744])
+ #self.assertListNear(results['cr_ll'], [118.65139, 165.18636])
+ #self.assertListNear(results['cr_ul'], [120.41077, 166.99223])
+
+# The following are synthethic tests data - counts and populations are
+# arbitrarily chosen numbers. Results checked against those produced by
+# known-good SAS macros written and used by the Centre for Epidemiology
+# and Research, NSW Dept of Health - see
+# http://www.health.nsw.gov.au/public-health/chorep/toc/app_methods.htm#3.2
+
+def _get_ds2():
+ ds = Dataset('visits')
+ ds.addcolumnfromseq('sex', label='Sex',
+ coltype='categorical', datatype='int',
+ all_value=-1,
+ data=[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,])
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',
+ data=[ 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10,11,12,13,14,15,16,17,18,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10,11,12,13,14,15,16,17,18,])
+ ds.addcolumnfromseq('_freq_', label='Frequency',
+ coltype='scalar', datatype='int',
+ data=[659,146,102,140,221,177,268,302,276,
+ 240,207,163,143,117, 94, 65, 43, 38,
+ 549, 97, 93,248,299,300,288,292,231,
+ 168,149,149,180,144,132,128, 67, 85,])
+ ds.addcolumnfromseq('freq_wgtd_by_wgt', label='Statistical weighting',
+ coltype='scalar', datatype='float',
+ data=[ 19380, 3831, 2592, 3624, 5676, 4522,
+ 6836, 7783, 7186, 6195, 5358, 4239,
+ 3628, 2950, 2401, 1634, 1108, 954,
+ 16419, 2566, 2337, 6512, 7907, 7808,
+ 7597, 7690, 6008, 4274, 3736, 3901,
+ 4707, 3723, 3420, 3256, 1676, 2151,])
+ return ds
+
+def _get_pop_ds2():
+ ds = Dataset('pop')
+ ds.addcolumnfromseq('agegrp', label='Age Group',
+ coltype='categorical', datatype='int',
+ data=[1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8,
+ 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
+ 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18])
+ ds.addcolumnfromseq('race', label='Race',
+ coltype='categorical', datatype='int',
+ data=[1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,
+ 1,1,2,2,5,5,1,1,2,2,5,5,1,1,2,2,5,5,])
+ ds.addcolumnfromseq('sex', label='Sex',
+ coltype='categorical', datatype='int',
+ data=[1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,
+ 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,])
+ ds.addcolumnfromseq('_freq_', label='Population',
+ coltype='scalar', datatype='int',
+ data=
+ [7995000,7592000,1584000,1540000, 566000, 548000,8162000,7759000,1703000,
+ 1654000, 548000, 519000,7910000,7497000,1591000,1542000, 530000, 509000,
+ 7938000,7450000,1567000,1539000, 506000, 483000,7208000,6916000,1282000,
+ 1378000, 510000, 478000,7757000,7650000,1267000,1440000, 572000, 571000,
+ 8541000,8425000,1355000,1504000, 520000, 566000,9420000,9307000,1386000,
+ 1550000, 510000, 553000,8945000,8905000,1247000,1409000, 465000, 523000,
+ 7767000,7845000, 984000,1154000, 387000, 444000,6386000,6578000, 670000,
+ 820000, 290000, 318000,4870000,5142000, 527000, 672000, 217000, 236000,
+ 4116000,4482000, 428000, 567000, 168000, 197000,3905000,4535000, 398000,
+ 529000, 129000, 171000,3396000,4318000, 289000, 418000, 101000, 135000,
+ 2628000,3683000, 208000, 327000, 67000, 91000,1558000,2632000, 111000,
+ 212000, 39000, 52000, 986000,2443000, 83000, 199000, 28000, 44000,])
+ return ds
+
+if 0:
+ # Test broken by API changes
+ class popn_rate_test(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ def ma_fmt(v, prec):
+ if v is None:
+ return 'None'
+ return '%.*f' % (prec, v)
+ first = ', '.join([ma_fmt(v, prec) for v in first])
+ second = ', '.join([ma_fmt(v, prec) for v in second])
+ self.assertEqual(first, second, '[%s] != [%s]' % (first, second))
+
+ def test_simple(self):
+ ds = _get_ds1()
+ pop = _get_pop_ds1()
+ Analysis.calc_directly_std_rates(ds, pop)
+ # AM - This result has not be verified at this time...
+ self.assertListNear(ds['pop_rate_wgtd_by_wgt'],
+ [0.001910, 0.000368, 0.000258, 0.000362, 0.000631, 0.000471,
+ 0.000656, 0.000688, 0.000674, 0.000678, 0.000729, 0.000755,
+ 0.000770, 0.000666, 0.000634, 0.000563, 0.000649, 0.000870,
+ 0.001696, 0.000258, 0.000245, 0.000687, 0.000901, 0.000808,
+ 0.000724, 0.000674, 0.000554, 0.000453, 0.000484, 0.000645,
+ 0.000897, 0.000711, 0.000702, 0.000794, 0.000579, 0.000801],
+ prec=5)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/source_datatypes.py b/tests/source_datatypes.py
new file mode 100644
index 0000000..c0d85ae
--- /dev/null
+++ b/tests/source_datatypes.py
@@ -0,0 +1,179 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+import unittest
+from mx.DateTime import DateTime, Time
+import SOOMv0
+from SOOMv0 import SourceDataTypes
+
+class TestSourceDataTypes(unittest.TestCase):
+ def test_int1(self):
+ conv = SourceDataTypes.get_conversion('int')
+ self.assertEqual(conv('123'), 123)
+ self.assertRaises(TypeError, conv, None)
+ self.assertRaises(ValueError, conv, '123.4')
+ self.assertRaises(ValueError, conv, 'abc')
+ self.assertRaises(ValueError, conv, '12 3')
+
+ def test_int2(self):
+ conv = SourceDataTypes.get_conversion('int')
+ self.assertEqual(conv('-123'), -123)
+ self.assertRaises(ValueError, conv, '-123.4')
+ self.assertRaises(ValueError, conv, '12,3')
+
+ def test_int3(self):
+ conv = SourceDataTypes.get_conversion('int')
+ self.assertEqual(conv('1234567890'), 1234567890)
+
+ def test_int4(self):
+ conv = SourceDataTypes.get_conversion('int')
+ self.assertEqual(conv('12345678901234567890123456789012345678901234567890'), 12345678901234567890123456789012345678901234567890)
+
+ def test_long1(self):
+ conv = SourceDataTypes.get_conversion('long')
+ self.assertEqual(conv('123'), 123)
+ self.assertRaises(TypeError, conv, None)
+ self.assertRaises(ValueError, conv, '123.4')
+ self.assertRaises(ValueError, conv, 'abc')
+ self.assertRaises(ValueError, conv, '12 3')
+ self.assertRaises(ValueError, conv, '-123.4')
+ self.assertRaises(ValueError, conv, '12,3')
+
+ def test_long2(self):
+ conv = SourceDataTypes.get_conversion('long')
+ self.assertEqual(conv('1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'), \
+ 1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890)
+
+ def test_str(self):
+ conv = SourceDataTypes.get_conversion('str')
+ self.assertEqual(conv('123'), '123')
+ self.assertEqual(conv(123), '123')
+ self.assertEqual(conv(123.1), '123.1')
+ self.assertEqual(conv(None), 'None')
+ self.assertNotEqual(conv(123.45678901234567890123456789012345678901234567890), \
+ '123.45678901234567890123456789012345678901234567890')
+
+ def test_float(self):
+ conv = SourceDataTypes.get_conversion('float')
+ self.assertEqual(conv('123'), 123)
+ self.assertEqual(conv('123.4'), 123.4)
+ self.assertEqual(conv('-123.4'), -123.4)
+ self.assertEqual(conv('123.45678901234567890123456789012345678901234567890'), \
+ 123.45678901234567890123456789012345678901234567890)
+ self.assertRaises(TypeError, conv, None)
+
+ def test_date(self):
+ conv = SourceDataTypes.get_conversion('date')
+ self.assertEqual(conv('23/12/1970'), DateTime(1970,12,23))
+ self.assertEqual(conv('23/12/70'), DateTime(1970,12,23))
+ self.assertEqual(conv('25/12/00'), DateTime(2000,12,25))
+ self.assertEqual(conv('25/12/1900'), DateTime(1900,12,25))
+ self.assertEqual(conv('25/12/900'), DateTime(900,12,25))
+ self.assertEqual(conv('25/12/9'), DateTime(2009,12,25))
+ self.assertEqual(conv('3/2/2004'), DateTime(2004,2,3))
+ self.assertEqual(conv('3/2/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('03/2/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('3/02/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('03/02/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('29/02/04'), DateTime(2004,2,29))
+ self.assertEqual(conv(None), None)
+ self.assertEqual(conv(''), None)
+ self.assertRaises(ValueError, conv, '2 3 04')
+ self.assertRaises(ValueError, conv, '23-12-1970')
+ # how to trap this error?
+ # self.assertRaises(RangeError, conv, '29/02/2003')
+
+ def test_early_date(self):
+ "This test illustrates inability to handle dates earlier than 100 AD."
+ conv = SourceDataTypes.get_conversion('date')
+ self.assertNotEqual(conv('25/12/0000'), DateTime(0,12,25))
+
+ def test_iso_date(self):
+ conv = SourceDataTypes.get_conversion('iso-date')
+ self.assertEqual(conv('1970-12-23'), DateTime(1970,12,23))
+ self.assertEqual(conv('70-12-23'), DateTime(1970,12,23))
+ self.assertEqual(conv('2004-2-3'), DateTime(2004,2,3))
+ self.assertEqual(conv('04-2-3'), DateTime(2004,2,3))
+ self.assertEqual(conv('00-12-25'), DateTime(2000,12,25))
+ self.assertEqual(conv('1900-12-25'), DateTime(1900,12,25))
+ self.assertEqual(conv('900-12-25'), DateTime(900,12,25))
+ self.assertEqual(conv('9-12-25'), DateTime(2009,12,25))
+ self.assertEqual(conv('04-2-03'), DateTime(2004,2,3))
+ self.assertEqual(conv('04-02-3'), DateTime(2004,2,3))
+ self.assertEqual(conv('04-02-03'), DateTime(2004,2,3))
+ self.assertEqual(conv('04-02-29'), DateTime(2004,2,29))
+ self.assertEqual(conv(None), None)
+ self.assertEqual(conv(''), None)
+ self.assertRaises(ValueError, conv, '2 3 04')
+ self.assertRaises(ValueError, conv, '1970/12/23')
+ # how to trap this error?
+ # self.assertRaises(RangeError, conv, '2003-02-29')
+
+ def test_us_date(self):
+ conv = SourceDataTypes.get_conversion('us-date')
+ self.assertEqual(conv('12/23/1970'), DateTime(1970,12,23))
+ self.assertEqual(conv('12/23/70'), DateTime(1970,12,23))
+ self.assertEqual(conv('12/25/00'), DateTime(2000,12,25))
+ self.assertEqual(conv('12/25/1900'), DateTime(1900,12,25))
+ self.assertEqual(conv('12/25/900'), DateTime(900,12,25))
+ self.assertEqual(conv('12/25/9'), DateTime(2009,12,25))
+ self.assertEqual(conv('2/3/2004'), DateTime(2004,2,3))
+ self.assertEqual(conv('2/3/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('2/03/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('02/3/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('02/03/04'), DateTime(2004,2,3))
+ self.assertEqual(conv('02/29/04'), DateTime(2004,2,29))
+ self.assertEqual(conv(None), None)
+ self.assertEqual(conv(''), None)
+ self.assertRaises(ValueError, conv, '3 2 04')
+ self.assertRaises(ValueError, conv, '12-23-1970')
+ # how to trap this error?
+ # self.assertRaises(RangeError, conv, '02/29/2003')
+
+ def test_iso_time(self):
+ conv = SourceDataTypes.get_conversion('iso-time')
+ self.assertEqual(conv('01:12:23.34'), Time(1,12,23.34))
+ self.assertEqual(conv('1:12:23.4'), Time(1,12,23.4))
+ self.assertEqual(conv('1:2:3.4'), Time(1,2,3.4))
+ self.assertEqual(conv('01:02:03.4'), Time(1,2,3.4))
+ self.assertEqual(conv(None), None)
+ self.assertEqual(conv(''), None)
+
+ def test_iso_time_leap_seconds(self):
+ """Err, shouldn't some of these throw errors, even allowing for leap seconds?"""
+ conv = SourceDataTypes.get_conversion('iso-time')
+ self.assertEqual(conv('01:02:60.4'), Time(1,2,60.4))
+ self.assertEqual(conv('01:02:61.4'), Time(1,2,61.4))
+ self.assertEqual(conv('01:02:62.4'), Time(1,2,62.4))
+
+ def test_iso_datetime(self):
+ conv = SourceDataTypes.get_conversion('iso-datetime')
+ self.assertEqual(conv('1970-12-23 01:12:23.34'),
+ DateTime(1970,12,23,1,12,23.34))
+ self.assertEqual(conv('1970-12-23 01:12:23.34'),
+ DateTime(1970,12,23,1,12,23.34))
+ self.assertEqual(conv('70-12-23 1:12:23.4'),
+ DateTime(1970,12,23,1,12,23.4))
+ self.assertEqual(conv('70-12-23 1:2:3.4'),
+ DateTime(1970,12,23,1,2,3.4))
+ self.assertEqual(conv(None), None)
+ self.assertEqual(conv(''), None)
+
+ def test_time_optional_seconds(self):
+ conv = SourceDataTypes.get_format('time', 'HH:MM')
+ self.assertEqual(conv('12:23'), Time(12,23))
+ self.assertEqual(conv('12:23'), Time(12,23,0.0))
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/stats.py b/tests/stats.py
new file mode 100644
index 0000000..6188b79
--- /dev/null
+++ b/tests/stats.py
@@ -0,0 +1,2894 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: stats.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/stats.py,v $
+
+import unittest
+from SOOMv0 import Stats
+try:
+ from SOOMv0 import Cstats
+except ImportError:
+ import sys
+ sys.stderr.write('WARNING - Cstats module not available - tests skipped\n')
+ Cstats = None
+rpy_tests = True
+if not rpy_tests:
+ sys.stderr.write('WARNING - R-based stat method tests disabled\n')
+import Numeric, MA
+
+empty_numeric = Numeric.array([])
+empty_ma = MA.array([],mask=[])
+populated_numeric = Numeric.array([1,2,3,3,5])
+populated_ma = MA.array([1,2,3,3,5],mask=[0,0,0,0,0])
+null_mask = MA.array([1,2,3,5,3], mask=[0,0,0,0,0])
+full_mask = MA.array([1,2,3,5,3], mask=[1,1,1,1,1])
+partial_mask = MA.array([1,2,3,5,3], mask=[1,0,1,0,1])
+two_elements_numeric = Numeric.array([2,5])
+two_elements_ma = MA.array([2,5],mask=[0,0])
+one_element_numeric = Numeric.array([2])
+one_element_ma = MA.array([2],mask=[0])
+one_masked_element_ma = MA.array([2],mask=[1])
+one_neg_element_numeric = Numeric.array([-2])
+one_neg_element_ma = MA.array([-2],mask=[0])
+all_neg_numeric = Numeric.array([-3,-4,-5,-2,-76])
+all_neg_ma = MA.array([-3,-4,-5,-2,-76],mask=[0,0,0,0,0])
+twenty_ele = Numeric.array([4, 5, 14, 19, 17, 13, 12, 9, 8, 0,
+ 15, 18, 2, 1, 16, 6, 7, 10, 11, 3])
+
+n1001_nomissing_numpy = Numeric.arrayrange(990, -11, -1, typecode=Numeric.Int)
+w1001_nomissing_numpy = Numeric.arrayrange(990, -11, -1,
+ typecode=Numeric.Float) / 3.0
+n1006_nomissing_numpy = Numeric.arrayrange(995, -11, -1, typecode=Numeric.Int)
+w1006_nomissing_numpy = Numeric.arrayrange(995, -11, -1,
+ typecode=Numeric.Float) / 3.0
+n1001_nomissing_MA = MA.array(range(990, -11, -1),
+ typecode=MA.Int, mask=Numeric.zeros(1001))
+w1001_nomissing_MA = MA.array(range(990, -11, -1),
+ typecode=MA.Float, mask=Numeric.zeros(1001)) / 3.0
+n1006_nomissing_MA = MA.array(range(995, -11, -1),
+ typecode=MA.Int, mask=Numeric.zeros(1006))
+w1006_nomissing_MA = MA.array(range(995, -11, -1),
+ typecode=MA.Float, mask=Numeric.zeros(1006)) / 3.0
+
+def makemask(n, mmod):
+ mmask = []
+ for z in range(n, -11, -1):
+ if (z % mmod == 0 and mmod == 7 and z < 500) or \
+ (z % mmod == 0 and mmod == 13 and z > 500):
+ mmask.append(1)
+ else:
+ mmask.append(0)
+ return Numeric.array(mmask, typecode=Numeric.Int)
+
+n1001_missing = MA.array(range(990, -11, -1),
+ typecode=MA.Int, mask=makemask(990, 7))
+w1001_missing = MA.array(range(990, -11, -1),
+ typecode=MA.Int, mask=makemask(990, 13)) / 3.0
+n1006_missing = MA.array(range(995, -11, -1),
+ typecode=MA.Int, mask=makemask(995, 7))
+w1006_missing = MA.array(range(995, -11, -1),
+ typecode=MA.Int, mask=makemask(995, 13)) / 3.0
+
+class StatsTests(unittest.TestCase):
+ def _test(self, fn, data, expect, **kwargs):
+ if type(data) is tuple:
+ result = fn(*data,**kwargs)
+ else:
+ result = fn(data,**kwargs)
+ if result is MA.masked or expect is MA.masked:
+ ok = result is expect
+ elif type(result) in (float, int) or type(expect) in (float, int):
+ ok = round(result, 4) == round(expect, 4)
+ else:
+ ok = result == expect
+ self.failUnless(ok, '%s(%s) - expected %s, got %s' %
+ (fn.__name__, data, expect, result))
+
+ def _stricttest(self, fn, data, expect, **kwargs):
+ if type(data) is tuple:
+ result = fn(*data,**kwargs)
+ else:
+ result = fn(data,**kwargs)
+ if result is MA.masked or expect is MA.masked:
+ ok = result is expect
+ elif type(result) in (float, int) or type(expect) in (float, int):
+ ok = round(result, 7) == round(expect, 7)
+ elif type(result) in (tuple, list) or type(expect) in (tuple, list):
+ oks = []
+ for i in range(len(result)):
+ r_val = result[i]
+ e_val = expect[i]
+ if e_val is None or r_val is None:
+ oks.append(r_val == e_val)
+ else:
+ oks.append(round(r_val, 7) == round(e_val, 7))
+ ok = True
+ for r in oks:
+ if not r:
+ ok = False
+ else:
+ ok = result == expect
+ self.failUnless(ok, '%s(%s) - expected %s, got %s' %
+ (fn.__name__, data, expect, result))
+
+ def test_wn_misc(self):
+ self._test(Stats.wn, (empty_numeric,), 0)
+ self._test(Stats.wn, (empty_ma,), 0)
+ self._test(Stats.wn, (populated_numeric,), 14)
+ self._test(Stats.wn, (populated_ma,), 14)
+ self._test(Stats.wn, (full_mask,), 0)
+ self._test(Stats.wn, (null_mask,), 14)
+ self._test(Stats.wn, (partial_mask,), 7)
+ self._test(Stats.wn, (two_elements_numeric,), 7)
+ self._test(Stats.wn, (two_elements_ma,), 7)
+ self._test(Stats.wn, (one_element_numeric,), 2)
+ self._test(Stats.wn, (one_element_ma,), 2)
+ self._test(Stats.wn, (one_neg_element_numeric,), 0)
+ self._test(Stats.wn, (one_neg_element_ma,), 0)
+ self._test(Stats.wn, (one_masked_element_ma,), 0)
+ self._test(Stats.wn, (all_neg_numeric,), 0)
+ self._test(Stats.wn, (all_neg_ma,), 0)
+
+ def test_wn_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wn, (empty_numeric,), 0,exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (empty_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (populated_numeric,), 14, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (populated_ma,), 14, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (null_mask,), 14, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (partial_mask,), 7, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (two_elements_numeric,), 7, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (two_elements_ma,), 7, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (one_neg_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (one_neg_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (one_masked_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (all_neg_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (all_neg_ma,), 0, exclude_nonpositive_weights=True)
+
+ def test_wn_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wn, (w1001_nomissing_numpy,), 163515)
+ self._test(Stats.wn, (w1001_nomissing_MA,), 163515)
+ self._test(Stats.wn, (w1001_missing,), 154046.66666667)
+
+ def test_wn_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wn, (w1001_nomissing_numpy,), 163515, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (w1001_nomissing_MA,), 163515, exclude_nonpositive_weights=True)
+ self._test(Stats.wn, (w1001_missing,), 154046.66666667, exclude_nonpositive_weights=True)
+
+ def test_nonmissing_misc(self):
+ self._test(Stats.nonmissing, empty_numeric, 0)
+ self._test(Stats.nonmissing, empty_ma, 0)
+ self._test(Stats.nonmissing, populated_numeric, 5)
+ self._test(Stats.nonmissing, populated_ma, 5)
+ self._test(Stats.nonmissing, null_mask, 5)
+ self._test(Stats.nonmissing, full_mask, 0)
+ self._test(Stats.nonmissing, partial_mask, 2)
+ self._test(Stats.nonmissing, two_elements_numeric, 2)
+ self._test(Stats.nonmissing, two_elements_ma, 2)
+ self._test(Stats.nonmissing, one_element_numeric, 1)
+ self._test(Stats.nonmissing, one_element_ma, 1)
+ self._test(Stats.nonmissing, one_neg_element_numeric, 1)
+ self._test(Stats.nonmissing, one_neg_element_ma, 1)
+ self._test(Stats.nonmissing, one_masked_element_ma, 0)
+ self._test(Stats.nonmissing, all_neg_numeric, 5)
+ self._test(Stats.nonmissing, all_neg_ma, 5)
+
+ def test_nonmissing_1001(self):
+ self._test(Stats.nonmissing, n1001_nomissing_numpy, 1001)
+ self._test(Stats.nonmissing, n1001_nomissing_MA, 1001)
+ self._test(Stats.nonmissing, n1001_missing, 928)
+ self._test(Stats.nonmissing, n1006_nomissing_numpy, 1006)
+ self._test(Stats.nonmissing, n1006_nomissing_MA, 1006)
+ self._test(Stats.nonmissing, n1006_missing, 933)
+
+ def test_wnonmissing_misc(self):
+ self._test(Stats.wnonmissing, (empty_numeric,empty_numeric), 0)
+ self._test(Stats.wnonmissing, (empty_ma,empty_numeric), 0)
+ self._test(Stats.wnonmissing, (empty_numeric,empty_ma), 0)
+ self._test(Stats.wnonmissing, (empty_ma,empty_ma), 0)
+ self._test(Stats.wnonmissing, (populated_numeric,populated_numeric), 5)
+ self._test(Stats.wnonmissing, (populated_ma,populated_numeric), 5)
+ self._test(Stats.wnonmissing, (populated_numeric,populated_ma), 5)
+ self._test(Stats.wnonmissing, (populated_ma,populated_ma), 5)
+ self._test(Stats.wnonmissing, (populated_numeric,full_mask), 0)
+ self._test(Stats.wnonmissing, (populated_ma,full_mask), 0)
+ self._test(Stats.wnonmissing, (null_mask,null_mask), 5)
+ self._test(Stats.wnonmissing, (null_mask,partial_mask), 2)
+ self._test(Stats.wnonmissing, (null_mask,full_mask), 0)
+ self._test(Stats.wnonmissing, (partial_mask,null_mask), 2)
+ self._test(Stats.wnonmissing, (partial_mask,partial_mask), 2)
+ self._test(Stats.wnonmissing, (partial_mask,full_mask), 0)
+ self._test(Stats.wnonmissing, (full_mask,null_mask), 0)
+ self._test(Stats.wnonmissing, (full_mask,partial_mask), 0)
+ self._test(Stats.wnonmissing, (full_mask,full_mask), 0)
+ self._test(Stats.wnonmissing, (two_elements_numeric,two_elements_numeric), 2)
+ self._test(Stats.wnonmissing, (two_elements_ma,two_elements_numeric), 2)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_element_numeric), 1)
+ self._test(Stats.wnonmissing, (one_element_ma,one_element_ma), 1)
+ self._test(Stats.wnonmissing, (one_element_ma,one_element_numeric), 1)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_element_ma), 1)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_neg_element_numeric), 1)
+ self._test(Stats.wnonmissing, (one_element_ma,one_neg_element_numeric), 1)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_neg_element_ma), 1)
+ self._test(Stats.wnonmissing, (one_element_ma,one_neg_element_ma), 1)
+ self._test(Stats.wnonmissing, (one_masked_element_ma,one_element_ma), 0)
+ self._test(Stats.wnonmissing, (one_masked_element_ma,one_neg_element_numeric), 0)
+ self._test(Stats.wnonmissing, (one_element_ma,one_masked_element_ma), 0)
+ self._test(Stats.wnonmissing, (all_neg_numeric,all_neg_numeric), 5)
+ self._test(Stats.wnonmissing, (all_neg_numeric,all_neg_ma), 5)
+ self._test(Stats.wnonmissing, (all_neg_ma,all_neg_numeric), 5)
+ self._test(Stats.wnonmissing, (all_neg_ma,all_neg_ma), 5)
+
+ def test_wnonmissing_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wnonmissing, (empty_numeric,empty_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (empty_ma,empty_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (empty_numeric,empty_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (empty_ma,empty_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_numeric,populated_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_ma,populated_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_numeric,populated_ma,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_ma,populated_ma,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_numeric,full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (populated_ma,full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (null_mask,null_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (null_mask,partial_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (null_mask,full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (partial_mask,null_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (partial_mask,partial_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (partial_mask,full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (full_mask,null_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (full_mask,partial_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (full_mask,full_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (two_elements_numeric,two_elements_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (two_elements_ma,two_elements_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_element_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_ma,one_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_ma,one_element_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_neg_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_ma,one_neg_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_numeric,one_neg_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_ma,one_neg_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_masked_element_ma,one_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_masked_element_ma,one_neg_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (one_element_ma,one_masked_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (all_neg_numeric,all_neg_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (all_neg_numeric,all_neg_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (all_neg_ma,all_neg_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (all_neg_ma,all_neg_ma,), 0, exclude_nonpositive_weights=True)
+
+ def test_wnonmissing_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_nomissing_numpy), 1001)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_nomissing_numpy), 1001)
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_nomissing_MA), 1001)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_nomissing_MA), 1001)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_nomissing_numpy), 928)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_nomissing_MA), 928)
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_missing), 963)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_missing), 963)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_missing), 890)
+
+ def test_wnonmissing_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_nomissing_numpy,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_nomissing_MA,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_nomissing_MA,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_nomissing_numpy,), 919, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_nomissing_MA,), 919, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_nomissing_numpy,w1001_missing,), 952, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_nomissing_MA,w1001_missing,), 952, exclude_nonpositive_weights=True)
+ self._test(Stats.wnonmissing, (n1001_missing,w1001_missing,), 881, exclude_nonpositive_weights=True)
+
+ def test_missing_misc(self):
+ self._test(Stats.missing, empty_numeric, 0)
+ self._test(Stats.missing, empty_ma, 0)
+ self._test(Stats.missing, populated_numeric, 0)
+ self._test(Stats.missing, populated_ma, 0)
+ self._test(Stats.missing, null_mask, 0)
+ self._test(Stats.missing, full_mask, 5)
+ self._test(Stats.missing, partial_mask, 3)
+ self._test(Stats.missing, two_elements_numeric, 0)
+ self._test(Stats.missing, two_elements_ma, 0)
+ self._test(Stats.missing, one_element_numeric, 0)
+ self._test(Stats.missing, one_element_ma, 0)
+ self._test(Stats.missing, one_neg_element_numeric, 0)
+ self._test(Stats.missing, one_neg_element_ma, 0)
+ self._test(Stats.missing, one_masked_element_ma, 1)
+ self._test(Stats.missing, all_neg_numeric, 0)
+ self._test(Stats.missing, all_neg_ma, 0)
+
+ def test_wmissing_misc(self):
+ self._test(Stats.wmissing, (empty_numeric,empty_numeric), 0)
+ self._test(Stats.wmissing, (empty_ma,empty_numeric), 0)
+ self._test(Stats.wmissing, (empty_numeric,empty_ma), 0)
+ self._test(Stats.wmissing, (empty_ma,empty_ma), 0)
+ self._test(Stats.wmissing, (populated_numeric,populated_numeric), 0)
+ self._test(Stats.wmissing, (populated_ma,populated_numeric), 0)
+ self._test(Stats.wmissing, (populated_numeric,populated_ma), 0)
+ self._test(Stats.wmissing, (populated_ma,populated_ma), 0)
+ self._test(Stats.wmissing, (populated_numeric,full_mask), 5)
+ self._test(Stats.wmissing, (populated_ma,full_mask), 5)
+ self._test(Stats.wmissing, (null_mask,null_mask), 0)
+ self._test(Stats.wmissing, (null_mask,partial_mask), 3)
+ self._test(Stats.wmissing, (null_mask,full_mask), 5)
+ self._test(Stats.wmissing, (partial_mask,null_mask), 3)
+ self._test(Stats.wmissing, (partial_mask,partial_mask), 3)
+ self._test(Stats.wmissing, (partial_mask,full_mask), 5)
+ self._test(Stats.wmissing, (full_mask,null_mask), 5)
+ self._test(Stats.wmissing, (full_mask,partial_mask), 5)
+ self._test(Stats.wmissing, (full_mask,full_mask), 5)
+ self._test(Stats.wmissing, (two_elements_numeric,two_elements_numeric), 0)
+ self._test(Stats.wmissing, (two_elements_ma,two_elements_numeric), 0)
+ self._test(Stats.wmissing, (one_element_numeric,one_element_numeric), 0)
+ self._test(Stats.wmissing, (one_element_ma,one_element_ma), 0)
+ self._test(Stats.wmissing, (one_element_ma,one_element_numeric), 0)
+ self._test(Stats.wmissing, (one_element_numeric,one_element_ma), 0)
+ self._test(Stats.wmissing, (one_element_numeric,one_neg_element_numeric), 0)
+ self._test(Stats.wmissing, (one_element_ma,one_neg_element_numeric), 0)
+ self._test(Stats.wmissing, (one_element_numeric,one_neg_element_ma), 0)
+ self._test(Stats.wmissing, (one_element_ma,one_neg_element_ma), 0)
+ self._test(Stats.wmissing, (one_masked_element_ma,one_neg_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_masked_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (all_neg_numeric,all_neg_numeric), 0)
+ self._test(Stats.wmissing, (all_neg_numeric,all_neg_ma), 0)
+ self._test(Stats.wmissing, (all_neg_ma,all_neg_numeric), 0)
+ self._test(Stats.wmissing, (all_neg_ma,all_neg_ma), 0)
+
+ # note that the results of wmissing() with exclude_nonpositive_weights=True differ from those
+ # returned by SAS. This is because SAS distinguishes between excluded observations (excluded because they
+ # have non-positive weights) and missing values. Thus, in SAS, with the EXCLNPWGTS option set,
+ # the number of missing plus the number of non-missing observations does not equal the total number
+ # of observations (because some are "excluded"). In NetEpi SOOOM, we don't distinguish between
+ # missing and "excluded", or rather, we exclude observations by setting the data value to missing.
+
+ def test_wmissing_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wmissing, (empty_numeric,empty_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (empty_ma,empty_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (empty_numeric,empty_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (empty_ma,empty_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_numeric,populated_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_ma,populated_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_numeric,populated_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_ma,populated_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_numeric,full_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (populated_ma,full_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (null_mask,null_mask,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (null_mask,partial_mask,), 3, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (null_mask,full_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (partial_mask,null_mask,), 3, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (partial_mask,partial_mask,), 3, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (partial_mask,full_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (full_mask,null_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (full_mask,partial_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (full_mask,full_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (two_elements_numeric,two_elements_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (two_elements_ma,two_elements_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_numeric,one_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_element_numeric,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_numeric,one_element_ma,), 0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_numeric,one_neg_element_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_neg_element_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_numeric,one_neg_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_neg_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_masked_element_ma,one_neg_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (one_element_ma,one_masked_element_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (all_neg_numeric,all_neg_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (all_neg_numeric,all_neg_ma,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (all_neg_ma,all_neg_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (all_neg_ma,all_neg_ma,), 5, exclude_nonpositive_weights=True)
+
+ def test_missing_1001(self):
+ self._test(Stats.missing, n1001_nomissing_numpy, 0)
+ self._test(Stats.missing, n1001_nomissing_MA, 0)
+ self._test(Stats.missing, n1001_missing, 73)
+
+ def test_missing_1006(self):
+ self._test(Stats.missing, n1006_nomissing_numpy, 0)
+ self._test(Stats.missing, n1006_nomissing_MA, 0)
+ self._test(Stats.missing, n1006_missing, 73)
+
+ def test_wmissing_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_nomissing_numpy), 0)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_nomissing_numpy), 0)
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_nomissing_MA), 0)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_nomissing_MA), 0)
+ self._test(Stats.wmissing, (n1001_missing,w1001_nomissing_numpy), 73)
+ self._test(Stats.wmissing, (n1001_missing,w1001_nomissing_MA), 73)
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_missing), 38)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_missing), 38)
+ self._test(Stats.wmissing, (n1001_missing,w1001_missing), 111)
+
+ def test_wmissing_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 11, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_nomissing_numpy,), 11, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_nomissing_MA,), 11, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_nomissing_MA,), 11, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_missing,w1001_nomissing_numpy,), 82, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_missing,w1001_nomissing_MA,), 82, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_nomissing_numpy,w1001_missing,), 49, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_nomissing_MA,w1001_missing,), 49, exclude_nonpositive_weights=True)
+ self._test(Stats.wmissing, (n1001_missing,w1001_missing,), 120, exclude_nonpositive_weights=True)
+
+ def test_aminimum_misc(self):
+ self._test(Stats.aminimum, empty_numeric, None)
+ self._test(Stats.aminimum, empty_ma, None)
+ self._test(Stats.aminimum, populated_numeric, 1)
+ self._test(Stats.aminimum, populated_ma, 1)
+ self._test(Stats.aminimum, null_mask, 1)
+ self._test(Stats.aminimum, full_mask, None)
+ self._test(Stats.aminimum, partial_mask, 2)
+ self._test(Stats.aminimum, two_elements_numeric, 2)
+ self._test(Stats.aminimum, two_elements_ma, 2)
+ self._test(Stats.aminimum, one_element_numeric, 2)
+ self._test(Stats.aminimum, one_element_ma, 2)
+ self._test(Stats.aminimum, one_masked_element_ma, None)
+ self._test(Stats.aminimum, one_neg_element_numeric, -2)
+ self._test(Stats.aminimum, one_neg_element_ma, -2)
+ self._test(Stats.aminimum, all_neg_numeric, -76)
+ self._test(Stats.aminimum, all_neg_ma, -76)
+
+ def test_wminimum_misc(self):
+ self._test(Stats.wminimum, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wminimum, (empty_ma,empty_numeric), None)
+ self._test(Stats.wminimum, (empty_numeric,empty_ma), None)
+ self._test(Stats.wminimum, (empty_ma,empty_ma), None)
+ self._test(Stats.wminimum, (populated_numeric,populated_numeric), 1)
+ self._test(Stats.wminimum, (populated_ma,populated_numeric), 1)
+ self._test(Stats.wminimum, (populated_numeric,populated_ma), 1)
+ self._test(Stats.wminimum, (populated_ma,populated_ma), 1)
+ self._test(Stats.wminimum, (populated_numeric,full_mask), None)
+ self._test(Stats.wminimum, (populated_ma,full_mask), None)
+ self._test(Stats.wminimum, (null_mask,null_mask), 1)
+ self._test(Stats.wminimum, (null_mask,partial_mask), 2)
+ self._test(Stats.wminimum, (null_mask,full_mask), None)
+ self._test(Stats.wminimum, (partial_mask,null_mask), 2)
+ self._test(Stats.wminimum, (partial_mask,partial_mask), 2)
+ self._test(Stats.wminimum, (partial_mask,full_mask), None)
+ self._test(Stats.wminimum, (full_mask,null_mask), None)
+ self._test(Stats.wminimum, (full_mask,partial_mask), None)
+ self._test(Stats.wminimum, (full_mask,full_mask), None)
+ self._test(Stats.wminimum, (two_elements_numeric,two_elements_numeric), 2)
+ self._test(Stats.wminimum, (two_elements_ma,two_elements_numeric), 2)
+ self._test(Stats.wminimum, (one_element_numeric,one_element_numeric), 2)
+ self._test(Stats.wminimum, (one_element_ma,one_element_ma), 2)
+ self._test(Stats.wminimum, (one_element_ma,one_element_numeric), 2)
+ self._test(Stats.wminimum, (one_element_numeric,one_element_ma), 2)
+ self._test(Stats.wminimum, (one_element_numeric,one_neg_element_numeric), 2)
+ self._test(Stats.wminimum, (one_element_ma,one_neg_element_numeric), 2)
+ self._test(Stats.wminimum, (one_element_numeric,one_neg_element_ma), 2)
+ self._test(Stats.wminimum, (one_element_ma,one_neg_element_ma), 2)
+ self._test(Stats.wminimum, (one_masked_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wminimum, (one_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wminimum, (all_neg_numeric,all_neg_numeric), -76)
+ self._test(Stats.wminimum, (all_neg_numeric,all_neg_ma), -76)
+ self._test(Stats.wminimum, (all_neg_ma,all_neg_numeric), -76)
+ self._test(Stats.wminimum, (all_neg_ma,all_neg_ma), -76)
+
+ def test_wminimum_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wminimum, (empty_numeric,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (empty_ma,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (empty_numeric,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (empty_ma,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_numeric,populated_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_ma,populated_numeric,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_numeric,populated_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_ma,populated_ma,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_numeric,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (populated_ma,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (null_mask,null_mask,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (null_mask,partial_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (null_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (partial_mask,null_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (partial_mask,partial_mask,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (partial_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (full_mask,null_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (full_mask,partial_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (full_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_numeric,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_ma,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_ma,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_numeric,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_numeric,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_ma,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_numeric,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_masked_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (one_element_ma,one_masked_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (two_elements_numeric,two_elements_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (two_elements_ma,two_elements_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (all_neg_numeric,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (all_neg_numeric,all_neg_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (all_neg_ma,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (all_neg_ma,all_neg_ma,), None, exclude_nonpositive_weights=True)
+
+ def test_aminimum_1001(self):
+ self._test(Stats.aminimum, n1001_nomissing_numpy, -10)
+ self._test(Stats.aminimum, n1001_nomissing_MA, -10)
+ self._test(Stats.aminimum, n1001_missing, -10)
+
+ def test_aminimum_1006(self):
+ self._test(Stats.aminimum, n1006_nomissing_numpy, -10)
+ self._test(Stats.aminimum, n1006_nomissing_MA, -10)
+ self._test(Stats.aminimum, n1006_missing, -10)
+
+ def test_wminimum_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_nomissing_numpy), -10)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_nomissing_numpy), -10)
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_nomissing_MA), -10)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_nomissing_MA), -10)
+ self._test(Stats.wminimum, (n1001_missing,w1001_nomissing_numpy), -10)
+ self._test(Stats.wminimum, (n1001_missing,w1001_nomissing_MA), -10)
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_missing), -10)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_missing), -10)
+ self._test(Stats.wminimum, (n1001_missing,w1001_missing), -10)
+
+ def test_wminimum_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_nomissing_numpy,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_nomissing_MA,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_nomissing_MA,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_missing,w1001_nomissing_numpy,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_missing,w1001_nomissing_MA,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_nomissing_numpy,w1001_missing,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_nomissing_MA,w1001_missing,), 1, exclude_nonpositive_weights=True)
+ self._test(Stats.wminimum, (n1001_missing,w1001_missing,), 1, exclude_nonpositive_weights=True)
+
+ def test_amaximum_misc(self):
+ self._test(Stats.amaximum, empty_numeric, None)
+ self._test(Stats.amaximum, empty_ma, None)
+ self._test(Stats.amaximum, populated_numeric, 5)
+ self._test(Stats.amaximum, populated_ma, 5)
+ self._test(Stats.amaximum, null_mask, 5)
+ self._test(Stats.amaximum, full_mask, None)
+ self._test(Stats.amaximum, partial_mask, 5)
+ self._test(Stats.amaximum, two_elements_numeric, 5)
+ self._test(Stats.amaximum, two_elements_ma, 5)
+ self._test(Stats.amaximum, one_element_numeric, 2)
+ self._test(Stats.amaximum, one_element_ma, 2)
+ self._test(Stats.amaximum, one_masked_element_ma, None)
+ self._test(Stats.amaximum, one_neg_element_numeric, -2)
+ self._test(Stats.amaximum, one_neg_element_ma, -2)
+ self._test(Stats.amaximum, all_neg_numeric, -2)
+ self._test(Stats.amaximum, all_neg_ma, -2)
+
+ def test_wmaximum_misc(self):
+ self._test(Stats.wmaximum, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wmaximum, (empty_ma,empty_numeric), None)
+ self._test(Stats.wmaximum, (empty_numeric,empty_ma), None)
+ self._test(Stats.wmaximum, (empty_ma,empty_ma), None)
+ self._test(Stats.wmaximum, (populated_numeric,populated_numeric), 5)
+ self._test(Stats.wmaximum, (populated_ma,populated_numeric), 5)
+ self._test(Stats.wmaximum, (populated_numeric,populated_ma), 5)
+ self._test(Stats.wmaximum, (populated_ma,populated_ma), 5)
+ self._test(Stats.wmaximum, (populated_numeric,full_mask), None)
+ self._test(Stats.wmaximum, (populated_ma,full_mask), None)
+ self._test(Stats.wmaximum, (null_mask,null_mask), 5)
+ self._test(Stats.wmaximum, (null_mask,partial_mask), 5)
+ self._test(Stats.wmaximum, (null_mask,full_mask), None)
+ self._test(Stats.wmaximum, (partial_mask,null_mask), 5)
+ self._test(Stats.wmaximum, (partial_mask,partial_mask), 5)
+ self._test(Stats.wmaximum, (partial_mask,full_mask), None)
+ self._test(Stats.wmaximum, (full_mask,null_mask), None)
+ self._test(Stats.wmaximum, (full_mask,partial_mask), None)
+ self._test(Stats.wmaximum, (full_mask,full_mask), None)
+ self._test(Stats.wmaximum, (two_elements_numeric,two_elements_numeric), 5)
+ self._test(Stats.wmaximum, (two_elements_ma,two_elements_numeric), 5)
+ self._test(Stats.wmaximum, (one_element_numeric,one_element_numeric), 2)
+ self._test(Stats.wmaximum, (one_element_ma,one_element_ma), 2)
+ self._test(Stats.wmaximum, (one_element_ma,one_element_numeric), 2)
+ self._test(Stats.wmaximum, (one_element_numeric,one_element_ma), 2)
+ self._test(Stats.wmaximum, (one_element_numeric,one_neg_element_numeric), 2)
+ self._test(Stats.wmaximum, (one_element_ma,one_neg_element_numeric), 2)
+ self._test(Stats.wmaximum, (one_element_numeric,one_neg_element_ma), 2)
+ self._test(Stats.wmaximum, (one_element_ma,one_neg_element_ma), 2)
+ self._test(Stats.wmaximum, (one_masked_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wmaximum, (one_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wmaximum, (all_neg_numeric,all_neg_numeric), -2)
+ self._test(Stats.wmaximum, (all_neg_numeric,all_neg_ma), -2)
+ self._test(Stats.wmaximum, (all_neg_ma,all_neg_numeric), -2)
+ self._test(Stats.wmaximum, (all_neg_ma,all_neg_ma), -2)
+
+ def test_wmaximum_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wmaximum, (empty_numeric,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (empty_ma,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (empty_numeric,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (empty_ma,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_numeric,populated_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_ma,populated_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_numeric,populated_ma,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_ma,populated_ma,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_numeric,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (populated_ma,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (null_mask,null_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (null_mask,partial_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (null_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (partial_mask,null_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (partial_mask,partial_mask,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (partial_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (full_mask,null_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (full_mask,partial_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (full_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (two_elements_numeric,two_elements_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (two_elements_ma,two_elements_numeric,), 5, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_numeric,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_ma,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_ma,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_numeric,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_numeric,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_ma,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_numeric,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_masked_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (one_element_ma,one_masked_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (all_neg_numeric,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (all_neg_numeric,all_neg_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (all_neg_ma,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (all_neg_ma,all_neg_ma,), None, exclude_nonpositive_weights=True)
+
+ def test_amaximum_1001(self):
+ self._test(Stats.amaximum, n1001_nomissing_numpy, 990)
+ self._test(Stats.amaximum, n1001_nomissing_MA, 990)
+ self._test(Stats.amaximum, n1001_missing, 990)
+
+ def test_amaximum_1006(self):
+ self._test(Stats.amaximum, n1006_nomissing_numpy, 995)
+ self._test(Stats.amaximum, n1006_nomissing_MA, 995)
+ self._test(Stats.amaximum, n1006_missing, 995)
+
+ def test_wmaximum_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_nomissing_numpy), 990)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_nomissing_numpy), 990)
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_nomissing_MA), 990)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_nomissing_MA), 990)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_nomissing_numpy), 990)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_nomissing_MA), 990)
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_missing), 990)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_missing), 990)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_missing), 990)
+
+ def test_wmaximum_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_nomissing_numpy,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_nomissing_MA,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_nomissing_MA,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_nomissing_numpy,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_nomissing_MA,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_nomissing_numpy,w1001_missing,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_nomissing_MA,w1001_missing,), 990, exclude_nonpositive_weights=True)
+ self._test(Stats.wmaximum, (n1001_missing,w1001_missing,), 990, exclude_nonpositive_weights=True)
+
+ def test_asum_misc(self):
+ self._test(Stats.asum, empty_numeric, None)
+ self._test(Stats.asum, empty_ma, None)
+ self._test(Stats.asum, populated_numeric, 14)
+ self._test(Stats.asum, populated_ma, 14)
+ self._test(Stats.asum, null_mask, 14)
+ self._test(Stats.asum, full_mask, None)
+ self._test(Stats.asum, partial_mask, 7)
+ self._test(Stats.asum, two_elements_numeric, 7)
+ self._test(Stats.asum, two_elements_ma, 7)
+ self._test(Stats.asum, one_element_numeric, 2)
+ self._test(Stats.asum, one_element_ma, 2)
+ self._test(Stats.asum, one_masked_element_ma, None)
+ self._test(Stats.asum, one_neg_element_numeric, -2)
+ self._test(Stats.asum, one_neg_element_ma, -2)
+ self._test(Stats.asum, all_neg_numeric, -90)
+ self._test(Stats.asum, all_neg_ma, -90)
+
+ def test_wsum_misc(self):
+ self._test(Stats.wsum, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wsum, (empty_ma,empty_numeric), None)
+ self._test(Stats.wsum, (empty_numeric,empty_ma), None)
+ self._test(Stats.wsum, (empty_ma,empty_ma), None)
+ self._test(Stats.wsum, (populated_numeric,populated_numeric), 48)
+ self._test(Stats.wsum, (populated_ma,populated_numeric), 48)
+ self._test(Stats.wsum, (populated_numeric,populated_ma), 48)
+ self._test(Stats.wsum, (populated_ma,populated_ma), 48)
+ self._test(Stats.wsum, (populated_numeric,full_mask), None)
+ self._test(Stats.wsum, (populated_ma,full_mask), None)
+ self._test(Stats.wsum, (null_mask,null_mask), 48)
+ self._test(Stats.wsum, (null_mask,partial_mask), 29)
+ self._test(Stats.wsum, (null_mask,full_mask), None)
+ self._test(Stats.wsum, (partial_mask,null_mask), 29)
+ self._test(Stats.wsum, (partial_mask,partial_mask), 29)
+ self._test(Stats.wsum, (partial_mask,full_mask), None)
+ self._test(Stats.wsum, (full_mask,null_mask), None)
+ self._test(Stats.wsum, (full_mask,partial_mask), None)
+ self._test(Stats.wsum, (full_mask,full_mask), None)
+ self._test(Stats.wsum, (two_elements_numeric,two_elements_numeric), 29)
+ self._test(Stats.wsum, (two_elements_ma,two_elements_numeric), 29)
+ self._test(Stats.wsum, (one_element_numeric,one_element_numeric), 4)
+ self._test(Stats.wsum, (one_element_ma,one_element_ma), 4)
+ self._test(Stats.wsum, (one_element_ma,one_element_numeric), 4)
+ self._test(Stats.wsum, (one_element_numeric,one_element_ma), 4)
+ self._test(Stats.wsum, (one_element_numeric,one_neg_element_numeric), 0)
+ self._test(Stats.wsum, (one_element_ma,one_neg_element_numeric), 0)
+ self._test(Stats.wsum, (one_element_numeric,one_neg_element_ma), 0)
+ self._test(Stats.wsum, (one_element_ma,one_neg_element_ma), 0)
+ self._test(Stats.wsum, (one_masked_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wsum, (one_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wsum, (all_neg_numeric,all_neg_numeric), 0)
+ self._test(Stats.wsum, (all_neg_numeric,all_neg_ma), 0)
+ self._test(Stats.wsum, (all_neg_ma,all_neg_numeric), 0)
+ self._test(Stats.wsum, (all_neg_ma,all_neg_ma), 0)
+
+ def test_wsum_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wsum, (empty_numeric,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (empty_ma,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (empty_numeric,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (empty_ma,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_numeric,populated_numeric,), 48, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_ma,populated_numeric,), 48, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_numeric,populated_ma,), 48, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_ma,populated_ma,), 48, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_numeric,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (populated_ma,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (null_mask,null_mask,), 48, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (null_mask,partial_mask,), 29, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (null_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (partial_mask,null_mask,), 29, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (partial_mask,partial_mask,), 29, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (partial_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (full_mask,null_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (full_mask,partial_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (full_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (two_elements_numeric,two_elements_numeric,), 29, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (two_elements_ma,two_elements_numeric,), 29, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_numeric,one_element_numeric,), 4, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_ma,one_element_ma,), 4, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_ma,one_element_numeric,), 4, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_numeric,one_element_ma,), 4, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_numeric,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_ma,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_numeric,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (one_masked_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wsum, (one_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wsum, (all_neg_numeric,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (all_neg_numeric,all_neg_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (all_neg_ma,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (all_neg_ma,all_neg_ma,), None, exclude_nonpositive_weights=True)
+
+ def test_asum_1001(self):
+ self._test(Stats.asum, n1001_nomissing_numpy, 490490)
+ self._test(Stats.asum, n1001_nomissing_MA, 490490)
+ self._test(Stats.asum, n1001_missing, 472605)
+
+ def test_asum_1006(self):
+ self._test(Stats.asum, n1006_nomissing_numpy, 495455)
+ self._test(Stats.asum, n1006_nomissing_MA, 495455)
+ self._test(Stats.asum, n1006_missing, 477570)
+
+ def test_wsum_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_nomissing_numpy), 107974405)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_nomissing_numpy), 107974405)
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_nomissing_MA), 107974405)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_nomissing_MA), 107974405)
+ self._test(Stats.wsum, (n1001_missing,w1001_nomissing_numpy), 105984417)
+ self._test(Stats.wsum, (n1001_missing,w1001_nomissing_MA), 105984417)
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_missing), 100639410.6667)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_missing), 100639410.6667)
+ self._test(Stats.wsum, (n1001_missing,w1001_missing), 98649422.6667)
+
+ def test_wsum_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 107974405, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_nomissing_numpy,), 107974405, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_nomissing_MA,), 107974405, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_nomissing_MA,), 107974405, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_missing,w1001_nomissing_numpy,), 105984417, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_missing,w1001_nomissing_MA,), 105984417, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_nomissing_numpy,w1001_missing,), 100639410.6667, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_nomissing_MA,w1001_missing,), 100639410.6667, exclude_nonpositive_weights=True)
+ self._test(Stats.wsum, (n1001_missing,w1001_missing,), 98649422.6667, exclude_nonpositive_weights=True)
+
+ def test_amean_misc(self):
+ self._test(Stats.amean, empty_numeric, None)
+ self._test(Stats.amean, empty_ma, None)
+ self._test(Stats.amean, populated_numeric, 2.8)
+ self._test(Stats.amean, populated_ma, 2.8)
+ self._test(Stats.amean, null_mask, 2.8)
+ self._test(Stats.amean, full_mask, None)
+ self._test(Stats.amean, partial_mask, 3.5)
+ self._test(Stats.amean, two_elements_numeric, 3.5)
+ self._test(Stats.amean, two_elements_ma, 3.5)
+ self._test(Stats.amean, one_element_numeric, 2)
+ self._test(Stats.amean, one_element_ma, 2)
+ self._test(Stats.amean, one_masked_element_ma, None)
+ self._test(Stats.amean, one_neg_element_numeric, -2)
+ self._test(Stats.amean, one_neg_element_ma, -2)
+ self._test(Stats.amean, all_neg_numeric, -18)
+ self._test(Stats.amean, all_neg_ma, -18)
+
+ def test_wamean_misc(self):
+ self._test(Stats.wamean, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wamean, (empty_ma,empty_numeric), None)
+ self._test(Stats.wamean, (empty_numeric,empty_ma), None)
+ self._test(Stats.wamean, (empty_ma,empty_ma), None)
+ self._test(Stats.wamean, (populated_numeric,populated_numeric), 3.42857142857)
+ self._test(Stats.wamean, (populated_ma,populated_numeric), 3.42857142857)
+ self._test(Stats.wamean, (populated_numeric,populated_ma), 3.42857142857)
+ self._test(Stats.wamean, (populated_ma,populated_ma), 3.42857142857)
+ self._test(Stats.wamean, (populated_numeric,full_mask), None)
+ self._test(Stats.wamean, (populated_ma,full_mask), None)
+ self._test(Stats.wamean, (null_mask,null_mask), 3.42857142857)
+ self._test(Stats.wamean, (null_mask,partial_mask), 4.142857142)
+ self._test(Stats.wamean, (null_mask,full_mask), None)
+ self._test(Stats.wamean, (partial_mask,null_mask), 4.142857142)
+ self._test(Stats.wamean, (partial_mask,partial_mask), 4.142857142)
+ self._test(Stats.wamean, (partial_mask,full_mask), None)
+ self._test(Stats.wamean, (full_mask,null_mask), None)
+ self._test(Stats.wamean, (full_mask,partial_mask), None)
+ self._test(Stats.wamean, (full_mask,full_mask), None)
+ self._test(Stats.wamean, (two_elements_numeric,two_elements_numeric), 4.142857142)
+ self._test(Stats.wamean, (two_elements_ma,two_elements_numeric), 4.142857142)
+ self._test(Stats.wamean, (one_element_numeric,one_element_numeric), 2)
+ self._test(Stats.wamean, (one_element_ma,one_element_ma), 2)
+ self._test(Stats.wamean, (one_element_ma,one_element_numeric), 2)
+ self._test(Stats.wamean, (one_element_numeric,one_element_ma), 2)
+ self._test(Stats.wamean, (one_element_numeric,one_neg_element_numeric), None)
+ self._test(Stats.wamean, (one_element_ma,one_neg_element_numeric), None)
+ self._test(Stats.wamean, (one_element_numeric,one_neg_element_ma), None)
+ self._test(Stats.wamean, (one_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wamean, (one_masked_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wamean, (one_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wamean, (all_neg_numeric,all_neg_numeric), None)
+ self._test(Stats.wamean, (all_neg_numeric,all_neg_ma), None)
+ self._test(Stats.wamean, (all_neg_ma,all_neg_numeric), None)
+ self._test(Stats.wamean, (all_neg_ma,all_neg_ma), None)
+
+ def test_wamean_misc_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ self._test(Stats.wamean, (empty_numeric,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (empty_ma,empty_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (empty_numeric,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (empty_ma,empty_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_numeric,populated_numeric,), 3.42857142857, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_ma,populated_numeric,), 3.42857142857, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_numeric,populated_ma,), 3.42857142857, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_ma,populated_ma,), 3.42857142857, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_numeric,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (populated_ma,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (null_mask,null_mask,), 3.42857142857, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (null_mask,partial_mask,), 4.142857142, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (null_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (partial_mask,null_mask,), 4.142857142, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (partial_mask,partial_mask,), 4.142857142, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (partial_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (full_mask,null_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (full_mask,partial_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (full_mask,full_mask,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (two_elements_numeric,two_elements_numeric,), 4.142857142, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (two_elements_ma,two_elements_numeric,), 4.142857142, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_numeric,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_ma,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_ma,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_numeric,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_numeric,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_ma,one_neg_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_numeric,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_masked_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (one_element_ma,one_masked_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (all_neg_numeric,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (all_neg_numeric,all_neg_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (all_neg_ma,all_neg_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (all_neg_ma,all_neg_ma,), None, exclude_nonpositive_weights=True)
+
+ def test_amean_1001(self):
+ self._test(Stats.amean, n1001_nomissing_numpy, 490.0)
+ self._test(Stats.amean, n1001_nomissing_MA, 490.0)
+ self._test(Stats.amean, n1001_missing, 509.272629)
+
+ def test_amean_1006(self):
+ self._test(Stats.amean, n1006_nomissing_numpy, 492.5)
+ self._test(Stats.amean, n1006_nomissing_MA, 492.5)
+ self._test(Stats.amean, n1006_missing, 511.865)
+
+ def test_ameancl_1001(self):
+ self._stricttest(Stats.ameancl, n1001_nomissing_numpy, (490.0,472.0684723242,507.9315276758))
+ self._stricttest(Stats.ameancl, n1001_nomissing_MA, (490.0,472.0684723242,507.9315276758))
+ self._stricttest(Stats.ameancl, n1001_missing, (509.2726293103,490.6730245555,527.8722340652))
+
+ def test_ameancl_1006(self):
+ self._stricttest(Stats.ameancl, n1006_nomissing_numpy, (492.5,474.5238970310,510.4761029690))
+ self._stricttest(Stats.ameancl, n1006_nomissing_MA, (492.5,474.5238970310,510.4761029690))
+ self._stricttest(Stats.ameancl, n1006_missing, (511.8649517686,493.2264201498,530.5034833872))
+
+ def test_wamean_1001(self):
+ # now with 1001 element arrays
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_nomissing_numpy), 660.3333)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_nomissing_numpy), 660.3333)
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_nomissing_MA), 660.3333)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_nomissing_MA), 660.3333)
+ self._test(Stats.wamean, (n1001_missing,w1001_nomissing_numpy), 672.699107)
+ self._test(Stats.wamean, (n1001_missing,w1001_nomissing_MA), 672.699107)
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_missing), 653.304695547)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_missing), 653.304695547)
+ self._test(Stats.wamean, (n1001_missing,w1001_missing), 666.1780537)
+
+ def test_wamean_1001_exclnpwgts(self):
+ # repeat with, exclude_nonpositive_weights=True
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 660.3333, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_nomissing_numpy,), 660.3333, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_nomissing_MA,), 660.3333, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_nomissing_MA,), 660.3333, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_missing,w1001_nomissing_numpy,), 672.699107, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_missing,w1001_nomissing_MA,), 672.699107, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_nomissing_numpy,w1001_missing,), 653.304695547, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_nomissing_MA,w1001_missing,), 653.304695547, exclude_nonpositive_weights=True)
+ self._test(Stats.wamean, (n1001_missing,w1001_missing,), 666.1780537, exclude_nonpositive_weights=True)
+
+ # Note that the following values have NOT been checked against SAS because SAS v8.2 does not produce conf limits
+ # for weighted means (despite the SAS documentation giving the formula to be used). However, the values appear to
+ # be correct, at least nominally. They have been checked against SAS PROC SURVEYMEANS using no CLASS or STRATA
+ # and the confidence limits are very nearly the same, but not identical (at the third decimal place) - probably
+ # due to computation issues due to the use of taylor series expansion to calculate the standard error in
+ # PROC SURVEYMEANS.
+ def test_wameancl_1001(self):
+ # now with 1001 element arrays
+ self._stricttest(Stats.wameancl, (n1001_nomissing_numpy,w1001_nomissing_numpy), (660.33333333332018, 645.84588892710951, 674.82077773953085))
+ self._stricttest(Stats.wameancl, (n1001_nomissing_MA,w1001_nomissing_numpy), (660.33333333332018, 645.84588892710951, 674.82077773953085))
+ self._stricttest(Stats.wameancl, (n1001_nomissing_numpy,w1001_nomissing_MA), (660.33333333332018, 645.84588892710951, 674.82077773953085))
+ self._stricttest(Stats.wameancl, (n1001_nomissing_MA,w1001_nomissing_MA), (660.33333333332018, 645.84588892710951, 674.82077773953085))
+ self._stricttest(Stats.wameancl, (n1001_missing,w1001_nomissing_numpy), (672.69910695583076, 658.02179536228903, 687.37641854937249))
+ self._stricttest(Stats.wameancl, (n1001_missing,w1001_nomissing_MA), (672.69910695583076, 658.02179536228903, 687.37641854937249))
+ self._stricttest(Stats.wameancl, (n1001_nomissing_numpy,w1001_missing), (653.30469554679121, 638.35896025595366, 668.25043083762876))
+ self._stricttest(Stats.wameancl, (n1001_nomissing_MA,w1001_missing), (653.30469554679121, 638.35896025595366, 668.25043083762876))
+ self._stricttest(Stats.wameancl, (n1001_missing,w1001_missing), (666.17805369972359, 650.9948410875752, 681.36126631187199))
+
+ def test_ameancl_misc(self):
+ self._stricttest(Stats.ameancl, empty_numeric, (None,None,None))
+ self._stricttest(Stats.ameancl, empty_ma, (None,None,None))
+ self._stricttest(Stats.ameancl, populated_numeric, (2.8,0.9583146670,4.6416853330))
+ self._stricttest(Stats.ameancl, populated_ma, (2.8,0.9583146670,4.6416853330))
+ self._stricttest(Stats.ameancl, null_mask, (2.8,0.9583146670,4.6416853330))
+ self._stricttest(Stats.ameancl, full_mask, (None,None,None))
+ self._stricttest(Stats.ameancl, partial_mask, (3.5,-15.5593071043,22.5593071043))
+ self._stricttest(Stats.ameancl, two_elements_numeric, (3.5,-15.5593071043,22.5593071043))
+ self._stricttest(Stats.ameancl, two_elements_ma, (3.5,-15.5593071043,22.5593071043))
+ self._stricttest(Stats.ameancl, one_element_numeric, (2,None,None))
+ self._stricttest(Stats.ameancl, one_element_ma, (2,None,None))
+ self._stricttest(Stats.ameancl, one_masked_element_ma, (None,None,None))
+ self._stricttest(Stats.ameancl, one_neg_element_numeric, (-2,None,None))
+ self._stricttest(Stats.ameancl, one_neg_element_ma, (-2,None,None))
+ self._stricttest(Stats.ameancl, all_neg_numeric, (-18,-58.2823817862,22.2823817862))
+ self._stricttest(Stats.ameancl, all_neg_ma, (-18,-58.2823817862,22.2823817862))
+
+ def test_wameancl_misc(self):
+ self._stricttest(Stats.wameancl, (empty_numeric,empty_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (empty_ma,empty_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (empty_numeric,empty_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (empty_ma,empty_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (populated_numeric,populated_numeric), (3.4285714285714284, 1.6327300183955371, 5.2244128387473197))
+ self._stricttest(Stats.wameancl, (populated_ma,populated_numeric), (3.4285714285714284, 1.6327300183955371, 5.2244128387473197))
+ self._stricttest(Stats.wameancl, (populated_numeric,populated_ma), (3.4285714285714284, 1.6327300183955371, 5.2244128387473197))
+ self._stricttest(Stats.wameancl, (populated_ma,populated_ma), (3.4285714285714284, 1.6327300183955371, 5.2244128387473197))
+ self._stricttest(Stats.wameancl, (populated_numeric,full_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (populated_ma,full_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (null_mask,null_mask), (3.4285714285714284, 1.6327300183955371, 5.2244128387473197))
+ self._stricttest(Stats.wameancl, (null_mask,partial_mask), (4.1428571428571432, -13.077377449741839, 21.363091735456123))
+ self._stricttest(Stats.wameancl, (null_mask,full_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (partial_mask,null_mask),(4.1428571428571432, -13.077377449741839, 21.363091735456123) )
+ self._stricttest(Stats.wameancl, (partial_mask,partial_mask), (4.1428571428571432, -13.077377449741839, 21.363091735456123))
+ self._stricttest(Stats.wameancl, (partial_mask,full_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (full_mask,null_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (full_mask,partial_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (full_mask,full_mask), (None,None,None))
+ self._stricttest(Stats.wameancl, (two_elements_numeric,two_elements_numeric), (4.1428571428571432, -13.077377449741839, 21.363091735456123))
+ self._stricttest(Stats.wameancl, (two_elements_ma,two_elements_numeric), (4.1428571428571432, -13.077377449741839, 21.363091735456123))
+ self._stricttest(Stats.wameancl, (one_element_numeric,one_element_numeric), (2,None,None))
+ self._stricttest(Stats.wameancl, (one_element_ma,one_element_ma), (2,None,None))
+ self._stricttest(Stats.wameancl, (one_element_ma,one_element_numeric), (2,None,None))
+ self._stricttest(Stats.wameancl, (one_element_numeric,one_element_ma), (2,None,None))
+ self._stricttest(Stats.wameancl, (one_element_numeric,one_neg_element_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (one_element_ma,one_neg_element_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (one_element_numeric,one_neg_element_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (one_element_ma,one_neg_element_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (one_masked_element_ma,one_neg_element_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (one_element_ma,one_masked_element_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (all_neg_numeric,all_neg_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (all_neg_numeric,all_neg_ma), (None,None,None))
+ self._stricttest(Stats.wameancl, (all_neg_ma,all_neg_numeric), (None,None,None))
+ self._stricttest(Stats.wameancl, (all_neg_ma,all_neg_ma), (None,None,None))
+
+ # To-Do: test for arange and wrange required!
+
+ # To-Do: more tests for geomean are required here.
+ def test_geomean(self):
+ self._test(Stats.geomean, empty_numeric, None)
+ self._test(Stats.geomean, empty_ma, None)
+ self._test(Stats.geomean, populated_numeric, 2.4595)
+ self._test(Stats.geomean, populated_ma, 2.4595)
+ self._test(Stats.geomean, null_mask, 2.4595)
+ self._test(Stats.geomean, full_mask, None)
+ self._test(Stats.geomean, partial_mask, 3.1623)
+
+ def test_median_empty(self):
+ # with exclude_nonpositive_weights = False
+ self._test(Stats.median, (empty_numeric,1), None)
+ self._test(Stats.median, (empty_numeric,2), None)
+ self._test(Stats.median, (empty_numeric,3), None)
+ self._test(Stats.median, (empty_numeric,4), None)
+ self._test(Stats.median, (empty_numeric,5), None)
+
+ self._test(Stats.median, (empty_ma,1), None)
+ self._test(Stats.median, (empty_ma,2), None)
+ self._test(Stats.median, (empty_ma,3), None)
+ self._test(Stats.median, (empty_ma,4), None)
+ self._test(Stats.median, (empty_ma,5), None)
+
+ def test_wmedian_empty(self):
+ self._test(Stats.wmedian, (empty_numeric,empty_numeric) , None)
+ self._test(Stats.wmedian, (empty_numeric,empty_ma) , None)
+ self._test(Stats.wmedian, (empty_ma,empty_numeric) , None)
+ self._test(Stats.wmedian, (empty_ma,empty_ma) , None)
+
+ self._test(Stats.wmedian, (populated_numeric,full_mask) , None)
+ self._test(Stats.wmedian, (populated_ma,full_mask) , None)
+
+ self._test(Stats.wmedian, (populated_numeric,all_neg_numeric) , None)
+ self._test(Stats.wmedian, (populated_numeric,all_neg_numeric) , None)
+
+ def test_median_misc(self):
+ self._test(Stats.median, (one_element_numeric,1), 2)
+ self._test(Stats.median, (one_element_ma,1), 2)
+ self._test(Stats.median, (one_masked_element_ma,1), None)
+ self._test(Stats.median, (one_neg_element_numeric,1), -2)
+ self._test(Stats.median, (one_neg_element_ma,1), -2)
+
+ self._test(Stats.median, (one_element_numeric,2), 2)
+ self._test(Stats.median, (one_element_ma,2), 2)
+ self._test(Stats.median, (one_masked_element_ma,2), None)
+ self._test(Stats.median, (one_neg_element_numeric,2), -2)
+ self._test(Stats.median, (one_neg_element_ma,2), -2)
+
+ self._test(Stats.median, (one_element_numeric,3), 2)
+ self._test(Stats.median, (one_element_ma,3), 2)
+ self._test(Stats.median, (one_masked_element_ma,3), None)
+ self._test(Stats.median, (one_neg_element_numeric,3), -2)
+ self._test(Stats.median, (one_neg_element_ma,3), -2)
+
+ self._test(Stats.median, (one_element_numeric,4), 2)
+ self._test(Stats.median, (one_element_ma,4), 2)
+ self._test(Stats.median, (one_masked_element_ma,4), None)
+ self._test(Stats.median, (one_neg_element_numeric,4), -2)
+ self._test(Stats.median, (one_neg_element_ma,4), -2)
+
+ self._test(Stats.median, (one_element_numeric,5), 2)
+ self._test(Stats.median, (one_element_ma,5), 2)
+ self._test(Stats.median, (one_masked_element_ma,5), None)
+ self._test(Stats.median, (one_neg_element_numeric,5), -2)
+ self._test(Stats.median, (one_neg_element_ma,5), -2)
+
+ def test_median_misc_exclnpwgts(self):
+ self._test(Stats.median, (one_element_numeric,1,), 2)
+ self._test(Stats.median, (one_element_ma,1,), 2)
+ self._test(Stats.median, (one_masked_element_ma,1,), None)
+ self._test(Stats.median, (one_neg_element_numeric,1,), -2)
+ self._test(Stats.median, (one_neg_element_ma,1,), -2)
+
+ self._test(Stats.median, (one_element_numeric,2,), 2)
+ self._test(Stats.median, (one_element_ma,2,), 2)
+ self._test(Stats.median, (one_masked_element_ma,2,), None)
+ self._test(Stats.median, (one_neg_element_numeric,2,), -2)
+ self._test(Stats.median, (one_neg_element_ma,2,), -2)
+
+ self._test(Stats.median, (one_element_numeric,3,), 2)
+ self._test(Stats.median, (one_element_ma,3,), 2)
+ self._test(Stats.median, (one_masked_element_ma,3,), None)
+ self._test(Stats.median, (one_neg_element_numeric,3,), -2)
+ self._test(Stats.median, (one_neg_element_ma,3,), -2)
+
+ self._test(Stats.median, (one_element_numeric,4,), 2)
+ self._test(Stats.median, (one_element_ma,4,), 2)
+ self._test(Stats.median, (one_masked_element_ma,4,), None)
+ self._test(Stats.median, (one_neg_element_numeric,4,), -2)
+ self._test(Stats.median, (one_neg_element_ma,4,), -2)
+
+ self._test(Stats.median, (one_element_numeric,5,), 2)
+ self._test(Stats.median, (one_element_ma,5,), 2)
+ self._test(Stats.median, (one_masked_element_ma,5,), None)
+ self._test(Stats.median, (one_neg_element_numeric,5,), -2)
+ self._test(Stats.median, (one_neg_element_ma,5,), -2)
+
+ def test_wmedian_misc(self):
+ self._test(Stats.wmedian, (one_element_numeric,one_element_numeric), 2)
+ self._test(Stats.wmedian, (one_element_ma,one_element_numeric), 2)
+ self._test(Stats.wmedian, (one_element_numeric,one_element_ma), 2)
+ self._test(Stats.wmedian, (one_element_ma,one_element_ma), 2)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_element_numeric), None)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_element_ma), None)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_masked_element_ma), None)
+ self._test(Stats.wmedian, (one_neg_element_numeric,one_element_ma), -2)
+ self._test(Stats.wmedian, (one_neg_element_numeric,one_element_numeric), -2)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_element_ma), -2)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_element_numeric), -2)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_neg_element_ma), -2)
+
+ def test_wmedian_misc_exclnpwgts(self):
+ self._test(Stats.wmedian, (one_element_numeric,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_element_ma,one_element_numeric,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_element_numeric,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_element_ma,one_element_ma,), 2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_element_numeric,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_masked_element_ma,one_masked_element_ma,), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_neg_element_numeric,one_element_ma,), -2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_neg_element_numeric,one_element_numeric,), -2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_element_ma,), -2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_element_numeric,), -2, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (one_neg_element_ma,one_neg_element_ma,), None, exclude_nonpositive_weights=True)
+
+ def test_median_nonmissing_odd(self):
+ self._test(Stats.median, (n1001_nomissing_numpy,1) , 489.5)
+ self._test(Stats.median, (n1001_nomissing_numpy,2) , 489.0)
+ self._test(Stats.median, (n1001_nomissing_numpy,3) , 490.0)
+ self._test(Stats.median, (n1001_nomissing_numpy,4) , 490.0)
+ self._test(Stats.median, (n1001_nomissing_numpy,5) , 490.0)
+
+ self._test(Stats.median, (n1001_nomissing_MA,1) , 489.5)
+ self._test(Stats.median, (n1001_nomissing_MA,2) , 489.0)
+ self._test(Stats.median, (n1001_nomissing_MA,3) , 490.0)
+ self._test(Stats.median, (n1001_nomissing_MA,4) , 490.0)
+ self._test(Stats.median, (n1001_nomissing_MA,5) , 490.0)
+
+ def test_wmedian_nonmissing_odd(self):
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_nomissing_numpy) , 700.0)
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_nomissing_MA) , 700.0)
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_missing) , 693.0)
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_nomissing_numpy) , 700.0)
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_nomissing_MA) , 700.0)
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_missing) , 693.0)
+
+ def test_median_nonmissing_even(self):
+ self._test(Stats.median, (n1006_nomissing_numpy,1) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_numpy,2) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_numpy,3) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_numpy,4) , 492.5)
+ self._test(Stats.median, (n1006_nomissing_numpy,5) , 492.5)
+
+ self._test(Stats.median, (n1006_nomissing_MA,1) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_MA,2) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_MA,3) , 492.0)
+ self._test(Stats.median, (n1006_nomissing_MA,4) , 492.5)
+ self._test(Stats.median, (n1006_nomissing_MA,5) , 492.5)
+
+ def test_wmedian_nonmissing_even(self):
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_nomissing_numpy) , 704.0)
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_nomissing_MA) , 704.0)
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_missing) , 696.0)
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_nomissing_numpy) , 704.0)
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_nomissing_MA) , 704.0)
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_missing) , 696.0)
+
+ def test_median_nonmissing_odd(self):
+ self._test(Stats.median, (n1001_missing,1) , 526.0)
+ self._test(Stats.median, (n1001_missing,2) , 526.0)
+ self._test(Stats.median, (n1001_missing,3) , 526.0)
+ self._test(Stats.median, (n1001_missing,4) , 526.5)
+ self._test(Stats.median, (n1001_missing,5) , 526.5)
+
+ def test_median_nonmissing_even(self):
+ self._test(Stats.median, (n1006_missing,1) , 528.5)
+ self._test(Stats.median, (n1006_missing,2) , 528.0)
+ self._test(Stats.median, (n1006_missing,3) , 529.0)
+ self._test(Stats.median, (n1006_missing,4) , 529.0)
+ self._test(Stats.median, (n1006_missing,5) , 529.0)
+
+ def test_wmedian_nonmissing_odd(self):
+ self._test(Stats.wmedian, (n1001_missing,w1001_nomissing_numpy) , 713.0)
+ self._test(Stats.wmedian, (n1001_missing,w1001_nomissing_MA) , 713.0)
+ self._test(Stats.wmedian, (n1001_missing,w1001_missing) , 707.0)
+
+ def test_wmedian_nonmissing_even(self):
+ self._test(Stats.wmedian, (n1006_missing,w1006_nomissing_numpy) , 717.0)
+ self._test(Stats.wmedian, (n1006_missing,w1006_nomissing_MA) , 717.0)
+ self._test(Stats.wmedian, (n1006_missing,w1006_missing) , 710.0)
+
+ # with exclude_nonpositive_weights = True
+ def test_wmedian_nonmissing_odd_exclnpwgts(self):
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_nomissing_numpy,) , 700.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_nomissing_MA,) , 700.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_nomissing_numpy,w1001_missing,) , 693.0, exclude_nonpositive_weights=True)
+
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_nomissing_numpy,) , 700.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_nomissing_MA,) , 700.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_nomissing_MA,w1001_missing,) , 693.0, exclude_nonpositive_weights=True)
+
+ def test_wmedian_nonmissing_even_exclnpwgts(self):
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_nomissing_numpy,) , 704.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_nomissing_MA,) , 704.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_nomissing_numpy,w1006_missing,) , 696.0, exclude_nonpositive_weights=True)
+
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_nomissing_numpy,) , 704.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_nomissing_MA,) , 704.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_nomissing_MA,w1006_missing,) , 696.0, exclude_nonpositive_weights=True)
+
+ def test_wmedian_missing_odd_exclnpwgts(self):
+ self._test(Stats.wmedian, (n1001_missing,w1001_nomissing_numpy,) , 713.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_missing,w1001_nomissing_MA,) , 713.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1001_missing,w1001_missing,) , 707.0, exclude_nonpositive_weights=True)
+
+ def test_wmedian_missing_even_exclnpwgts(self):
+ self._test(Stats.wmedian, (n1006_missing,w1006_nomissing_numpy,) , 717.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_missing,w1006_nomissing_MA,) , 717.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wmedian, (n1006_missing,w1006_missing,) , 710.0, exclude_nonpositive_weights=True)
+
+ def test_quantile_p0_odd_nonmissing(self):
+ # 0th percentile
+ self._test(Stats.quantile, (n1001_nomissing_numpy,), -10.0, p=0.0, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -10.0, p=0.0, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -10.0, p=0.0, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -10.0, p=0.0, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -10.0, p=0.0, defn=5)
+
+ self._test(Stats.quantile, (n1001_nomissing_MA,), -10.0, p=0.0, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -10.0, p=0.0, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -10.0, p=0.0, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -10.0, p=0.0, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -10.0, p=0.0, defn=5)
+
+ def test_quantile_p0_even_nonmissing(self):
+ # 0th percentile
+ self._test(Stats.quantile, (n1006_nomissing_numpy,), -10.0, p=0.0, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -10.0, p=0.0, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -10.0, p=0.0, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -10.0, p=0.0, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -10.0, p=0.0, defn=5)
+
+ self._test(Stats.quantile, (n1006_nomissing_MA,), -10.0, p=0.0, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -10.0, p=0.0, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -10.0, p=0.0, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -10.0, p=0.0, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -10.0, p=0.0, defn=5)
+
+ def test_quantile_p1_odd_nonmissing(self):
+ # 1st percentile
+ self._test(Stats.quantile, (n1001_nomissing_numpy,), -0.99, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 0.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , -0.98, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 0.0, p=0.01, defn=5)
+
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -0.99, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 0.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , -0.98, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 0.0, p=0.01, defn=5)
+
+ def test_wquantile_p1_odd_nonmissing(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy,) , 99.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA,) , 99.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing,) , 96.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy,) , 99.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA,) , 99.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing,) , 96.0, p=0.01)
+
+ def test_quantile_p1_even_nonmissing(self):
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -0.94, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 0.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , -0.93, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 0.0, p=0.01, defn=5)
+
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -0.94, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 0.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , -0.93, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 0.0, p=0.01, defn=5)
+
+ def test_wquantile_p1_even_nonmissing(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy,) , 100.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA,) , 100.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing,) , 97.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy,) , 100.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA,) , 100.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing,) , 97.0, p=0.01)
+
+ def test_quantile_p1_odd_missing(self):
+ self._test(Stats.quantile, (n1001_missing,) , -0.44, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1001_missing,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1001_missing,) , 1.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1001_missing,) , -0.42, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1001_missing,) , 1.0, p=0.01, defn=5)
+
+ def test_quantile_p1_even_missing(self):
+ self._test(Stats.quantile, (n1006_missing,) , -0.34, p=0.01, defn=1)
+ self._test(Stats.quantile, (n1006_missing,) , -1.0, p=0.01, defn=2)
+ self._test(Stats.quantile, (n1006_missing,) , 1.0, p=0.01, defn=3)
+ self._test(Stats.quantile, (n1006_missing,) , -0.32, p=0.01, defn=4)
+ self._test(Stats.quantile, (n1006_missing,) , 1.0, p=0.01, defn=5)
+
+ def test_wquantile_p1_odd_missing(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy,) , 106.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA,) , 106.0, p=0.01)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing,) , 102.0, p=0.01)
+
+ def test_wquantile_p1_even_missing(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy,) , 106.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA,) , 106.0, p=0.01)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing,) , 102.0, p=0.01)
+
+ # with exclude_nonpositive_weights = True
+ def test_wquantile_p1_odd_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy) , 99.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA) , 99.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing) , 96.0, exclude_nonpositive_weights=True, p=0.01)
+
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy) , 99.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA) , 99.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing) , 96.0, exclude_nonpositive_weights=True, p=0.01)
+
+ def test_wquantile_p1_even_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy) , 100.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA) , 100.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing) , 97.0, exclude_nonpositive_weights=True, p=0.01)
+
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy) , 100.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA) , 100.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing) , 97.0, exclude_nonpositive_weights=True, p=0.01)
+
+ def test_wquantile_p1_odd_missing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy) , 106.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA) , 106.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing) , 102.0, exclude_nonpositive_weights=True, p=0.01)
+
+ def test_wquantile_p1_even_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy) , 106.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA) , 106.0, exclude_nonpositive_weights=True, p=0.01)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing) , 102.0, exclude_nonpositive_weights=True, p=0.01)
+
+ # 100th percentile
+ def test_quantile_p100_odd_nonmissing(self):
+ self._test(Stats.quantile, (n1001_nomissing_numpy,), 990.0, p=1.0, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 990.0, p=1.0, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 990.0, p=1.0, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 990.0, p=1.0, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 990.0, p=1.0, defn=5)
+
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 990.0, p=1.0, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 990.0, p=1.0, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 990.0, p=1.0, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 990.0, p=1.0, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 990.0, p=1.0, defn=5)
+
+ def test_quantile_p100_even_nonmissing(self):
+ self._test(Stats.quantile, (n1006_nomissing_numpy,), 995.0, p=1.0, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 995.0, p=1.0, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 995.0, p=1.0, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 995.0, p=1.0, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 995.0, p=1.0, defn=5)
+
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 995.0, p=1.0, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 995.0, p=1.0, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 995.0, p=1.0, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 995.0, p=1.0, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 995.0, p=1.0, defn=5)
+
+ def test_wquantile_p100_odd_nonmissing(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy,) , 990.0, p=1.0)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA,) , 990.0, p=1.0)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing,) , 990.0, p=1.0)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy,) , 990.0, p=1.0)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA,) , 990.0, p=1.0)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing,) , 990.0, p=1.0)
+
+ def test_wquantile_p100_even_nonmissing(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy,) , 995.0, p=1.0)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA,) , 995.0, p=1.0)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing,) , 995.0, p=1.0)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy,) , 995.0, p=1.0)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA,) , 995.0, p=1.0)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing,) , 995.0, p=1.0)
+
+ # 99th percentile
+ def test_quantile_p99_odd_nonmissing(self):
+ self._test(Stats.quantile, (n1001_nomissing_numpy,), 979.99, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 980.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 980.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 980.98, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 980.0, p=0.99, defn=5)
+
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 979.99, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 980.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 980.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 980.98, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 980.0, p=0.99, defn=5)
+
+ def test_wquantile_p99_odd_nonmissing(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing,) , 985.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing,) , 985.0, p=0.99)
+
+ def test_quantile_p99_even_nonmissing(self):
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 984.94, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 985.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 985.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 985.93, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 985.0, p=0.99, defn=5)
+
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 984.94, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 985.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 985.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 985.93, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 985.0, p=0.99, defn=5)
+
+ def test_wquantile_p99_even_nonmissing(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing,) , 991.0, p=0.99)
+
+ def test_wquantile_p99_odd_missing(self):
+ self._test(Stats.quantile, (n1001_missing,) , 980.72, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1001_missing,) , 981.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1001_missing,) , 981.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1001_missing,) , 981.71, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1001_missing,) , 981.0, p=0.99, defn=5)
+
+ def test_wquantile_p99_even_missing(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA,) , 986.0, p=0.99)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing,) , 985.0, p=0.99)
+
+ def test_quantile_p99_even_missing(self):
+ self._test(Stats.quantile, (n1006_missing,) , 985.67, p=0.99, defn=1)
+ self._test(Stats.quantile, (n1006_missing,) , 986.0, p=0.99, defn=2)
+ self._test(Stats.quantile, (n1006_missing,) , 986.0, p=0.99, defn=3)
+ self._test(Stats.quantile, (n1006_missing,) , 986.66, p=0.99, defn=4)
+ self._test(Stats.quantile, (n1006_missing,) , 986.0, p=0.99, defn=5)
+
+ def test_wquantile_p99_even_missing(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA,) , 991.0, p=0.99)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing,) , 991.0, p=0.99)
+
+ # with exclude_nonpositive_weights = True
+ def test_wquantile_p99_odd_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing) , 985.0, exclude_nonpositive_weights=True, p=0.99)
+
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing) , 985.0, exclude_nonpositive_weights=True, p=0.99)
+
+ def test_wquantile_p99_even_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+
+ def test_wquantile_p99_odd_missing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA) , 986.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing) , 985.0, exclude_nonpositive_weights=True, p=0.99)
+
+ def test_wquantile_p99_even_missing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing) , 991.0, exclude_nonpositive_weights=True, p=0.99)
+
+ # 75th percentile
+ def test_quantile_p75_odd_nonmissing(self):
+ self._test(Stats.quantile, (n1001_nomissing_numpy,), 739.75, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 740.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 740.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 740.5, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_numpy,) , 740.0, p=0.75, defn=5)
+
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 739.75, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 740.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 740.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 740.5, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1001_nomissing_MA,) , 740.0, p=0.75, defn=5)
+
+ def test_wquantile_p75_odd_nonmissing(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy,) , 858.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA,) , 858.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing,) , 854.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy,) , 858.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA,) , 858.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing,) , 854.0, p=0.75)
+
+ def test_quantile_p75_even_nonmissing(self):
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 743.5, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 743.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 744.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 744.25, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_numpy,) , 744.0, p=0.75, defn=5)
+
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 743.5, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 743.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 744.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 744.25, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1006_nomissing_MA,) , 744.0, p=0.75, defn=5)
+
+ def test_wquantile_p75_even_nonmissing(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy,) , 862.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA,) , 862.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing,) , 860.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy,) , 862.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA,) , 862.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing,) , 860.0, p=0.75)
+
+ def test_quantile_p75_odd_missing(self):
+ self._test(Stats.quantile, (n1001_missing,) , 758.0, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1001_missing,) , 758.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1001_missing,) , 758.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1001_missing,) , 758.75, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1001_missing,) , 758.5, p=0.75, defn=5)
+
+ def test_wquantile_p75_odd_missing(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy,) , 863.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA,) , 863.0, p=0.75)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing,) , 860.0, p=0.75)
+
+ def test_quantile_p75_even_missing(self):
+ self._test(Stats.quantile, (n1006_missing,) , 761.75, p=0.75, defn=1)
+ self._test(Stats.quantile, (n1006_missing,) , 762.0, p=0.75, defn=2)
+ self._test(Stats.quantile, (n1006_missing,) , 762.0, p=0.75, defn=3)
+ self._test(Stats.quantile, (n1006_missing,) , 762.5, p=0.75, defn=4)
+ self._test(Stats.quantile, (n1006_missing,) , 762.0, p=0.75, defn=5)
+
+ def test_wquantile_p75_even_missing(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy,) , 867.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA,) , 867.0, p=0.75)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing,) , 865.0, p=0.75)
+
+ # with exclude_nonpositive_weights = True
+ def test_wquantile_p75_odd_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_numpy) , 858.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_nomissing_MA) , 858.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_numpy,w1001_missing) , 854.0, exclude_nonpositive_weights=True, p=0.75)
+
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_numpy) , 858.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_nomissing_MA) , 858.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_nomissing_MA,w1001_missing) , 854.0, exclude_nonpositive_weights=True, p=0.75)
+
+ def test_wquantile_p75_even_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_numpy) , 862.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_nomissing_MA) , 862.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_numpy,w1006_missing) , 860.0, exclude_nonpositive_weights=True, p=0.75)
+
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_numpy) , 862.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_nomissing_MA) , 862.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_nomissing_MA,w1006_missing) , 860.0, exclude_nonpositive_weights=True, p=0.75)
+
+ def test_wquantile_p75_odd_missing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_numpy) , 863.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_missing,w1001_nomissing_MA) , 863.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1001_missing,w1001_missing) , 860.0, exclude_nonpositive_weights=True, p=0.75)
+
+ def test_wquantile_p75_even_missing_exclnpwgts(self):
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_numpy) , 867.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_missing,w1006_nomissing_MA) , 867.0, exclude_nonpositive_weights=True, p=0.75)
+ self._test(Stats.wquantile, (n1006_missing,w1006_missing) , 865.0, exclude_nonpositive_weights=True, p=0.75)
+
+ def test_quantile_p25_misc(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.25)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.25)
+ self._test(Stats.quantile, (populated_numeric,), 2.0, p=0.25)
+ self._test(Stats.quantile, (populated_ma,), 2.0, p=0.25)
+ self._test(Stats.quantile, (null_mask,), 2.0, p=0.25)
+ self._test(Stats.quantile, (full_mask,), None, p=0.25)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.25)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.25)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.25)
+ self._test(Stats.quantile, (twenty_ele,), 4.5, p=0.25)
+ self._test(Stats.quantile, (twenty_ele,), 14.5, p=0.75)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.75)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.25)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.75)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.25)
+
+ def test_quantile_p0_p100_misc_def5(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=1.0,defn=5)
+ self._test(Stats.quantile, (empty_ma,), None, p=1.0,defn=5)
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.0,defn=5)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.0,defn=5)
+ self._test(Stats.quantile, (populated_numeric,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (populated_ma,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (populated_numeric,), 1.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (populated_ma,), 1.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (null_mask,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (full_mask,), None, p=1.0,defn=5)
+ self._test(Stats.quantile, (null_mask,), 1.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (full_mask,), None, p=0.0,defn=5)
+ self._test(Stats.quantile, (partial_mask,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (two_elements_numeric,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (two_elements_ma,), 5.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, (twenty_ele,), 19.0, p=1.0,defn=5)
+ self._test(Stats.quantile, (twenty_ele,), 0.0, p=0.0,defn=5)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=1.0,defn=5)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=1.0,defn=5)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=1.0,defn=5)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=1.0,defn=5)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.0,defn=5)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.0,defn=5)
+
+ def test_quantile_p0_p100_misc_def4(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=1.0,defn=4)
+ self._test(Stats.quantile, (empty_ma,), None, p=1.0,defn=4)
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.0,defn=4)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.0,defn=4)
+ self._test(Stats.quantile, (populated_numeric,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (populated_ma,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (populated_numeric,), 1.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (populated_ma,), 1.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (null_mask,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (full_mask,), None, p=1.0,defn=4)
+ self._test(Stats.quantile, (null_mask,), 1.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (full_mask,), None, p=0.0,defn=4)
+ self._test(Stats.quantile, (partial_mask,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (two_elements_numeric,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (two_elements_ma,), 5.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, (twenty_ele,), 19.0, p=1.0,defn=4)
+ self._test(Stats.quantile, (twenty_ele,), 0.0, p=0.0,defn=4)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=1.0,defn=4)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=1.0,defn=4)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=1.0,defn=4)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=1.0,defn=4)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.0,defn=4)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.0,defn=4)
+
+ def test_quantile_p0_p100_misc_def3(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=1.0,defn=3)
+ self._test(Stats.quantile, (empty_ma,), None, p=1.0,defn=3)
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.0,defn=3)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.0,defn=3)
+ self._test(Stats.quantile, (populated_numeric,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (populated_ma,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (populated_numeric,), 1.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (populated_ma,), 1.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (null_mask,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (full_mask,), None, p=1.0,defn=3)
+ self._test(Stats.quantile, (null_mask,), 1.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (full_mask,), None, p=0.0,defn=3)
+ self._test(Stats.quantile, (partial_mask,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (two_elements_numeric,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (two_elements_ma,), 5.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, (twenty_ele,), 19.0, p=1.0,defn=3)
+ self._test(Stats.quantile, (twenty_ele,), 0.0, p=0.0,defn=3)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=1.0,defn=3)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=1.0,defn=3)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=1.0,defn=3)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=1.0,defn=3)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.0,defn=3)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.0,defn=3)
+
+ def test_quantile_p0_p100_misc_def2(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=1.0,defn=2)
+ self._test(Stats.quantile, (empty_ma,), None, p=1.0,defn=2)
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.0,defn=2)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.0,defn=2)
+ self._test(Stats.quantile, (populated_numeric,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (populated_ma,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (populated_numeric,), 1.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (populated_ma,), 1.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (null_mask,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (full_mask,), None, p=1.0,defn=2)
+ self._test(Stats.quantile, (null_mask,), 1.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (full_mask,), None, p=0.0,defn=2)
+ self._test(Stats.quantile, (partial_mask,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (two_elements_numeric,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (two_elements_ma,), 5.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, (twenty_ele,), 19.0, p=1.0,defn=2)
+ self._test(Stats.quantile, (twenty_ele,), 0.0, p=0.0,defn=2)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=1.0,defn=2)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=1.0,defn=2)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=1.0,defn=2)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=1.0,defn=2)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.0,defn=2)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.0,defn=2)
+
+ def test_quantile_p0_p100_misc_def1(self):
+ self._test(Stats.quantile, (empty_numeric,), None, p=1.0,defn=1)
+ self._test(Stats.quantile, (empty_ma,), None, p=1.0,defn=1)
+ self._test(Stats.quantile, (empty_numeric,), None, p=0.0,defn=1)
+ self._test(Stats.quantile, (empty_ma,), None, p=0.0,defn=1)
+ self._test(Stats.quantile, (populated_numeric,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (populated_ma,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (populated_numeric,), 1.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (populated_ma,), 1.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (null_mask,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (full_mask,), None, p=1.0,defn=1)
+ self._test(Stats.quantile, (null_mask,), 1.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (full_mask,), None, p=0.0,defn=1)
+ self._test(Stats.quantile, (partial_mask,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (partial_mask,), 2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (two_elements_numeric,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (two_elements_ma,), 5.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (two_elements_numeric,), 2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (two_elements_ma,), 2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, (twenty_ele,), 19.0, p=1.0,defn=1)
+ self._test(Stats.quantile, (twenty_ele,), 0.0, p=0.0,defn=1)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=1.0,defn=1)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=1.0,defn=1)
+ self._test(Stats.quantile, one_element_numeric, 2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, one_element_ma, 2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=1.0,defn=1)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=1.0,defn=1)
+ self._test(Stats.quantile, one_neg_element_numeric, -2.0, p=0.0,defn=1)
+ self._test(Stats.quantile, one_neg_element_ma, -2.0, p=0.0,defn=1)
+
+ def test_quantiles_p1_p99_p75_odd_nonmissing(self):
+ # 1st percentile
+ self._test(Stats.quantiles, (n1001_nomissing_numpy,), (-0.99000000000000021,979.99,739.75), p=(0.01,0.99,0.75), defn=1)
+ self._test(Stats.quantiles, (n1001_nomissing_numpy,), (-1.0,980.0,740.0), p=(0.01,0.99,0.75), defn=2)
+ self._test(Stats.quantiles, (n1001_nomissing_numpy,), (0.0,980.0,740.0), p=(0.01,0.99,0.75), defn=3)
+ self._test(Stats.quantiles, (n1001_nomissing_numpy,), (-0.98000000000000043,980.98,740.5), p=(0.01,0.99,0.75), defn=4)
+ self._test(Stats.quantiles, (n1001_nomissing_numpy,), (0.0,980.0,740.0), p=(0.01,0.99,0.75), defn=5)
+
+ self._test(Stats.quantiles, (n1001_nomissing_MA,), (-0.99000000000000021,979.99,739.75), p=(0.01,0.99,0.75), defn=1)
+ self._test(Stats.quantiles, (n1001_nomissing_MA,), (-1.0,980.0,740.0), p=(0.01,0.99,0.75), defn=2)
+ self._test(Stats.quantiles, (n1001_nomissing_MA,), (0.0,980.0,740.0), p=(0.01,0.99,0.75), defn=3)
+ self._test(Stats.quantiles, (n1001_nomissing_MA,), (-0.98000000000000043,980.98,740.5), p=(0.01,0.99,0.75), defn=4)
+ self._test(Stats.quantiles, (n1001_nomissing_MA,), (0.0,980.0,740.0), p=(0.01,0.99,0.75), defn=5)
+
+ def test_wquantiles_p1_p75_p99_even_nonmissing_exclnpwgts(self):
+ self._test(Stats.wquantiles, (n1006_missing,w1006_nomissing_numpy) , (106.0,867.0,991.0), exclude_nonpositive_weights=True, p=[0.01,0.75,0.99])
+ self._test(Stats.wquantiles, (n1006_missing,w1006_nomissing_MA) , (106.0,867.0,991.0), exclude_nonpositive_weights=True, p=(0.01,0.75,0.99))
+ self._test(Stats.wquantiles, (n1006_missing,w1006_missing) , (102.0,865.0,991.0), exclude_nonpositive_weights=True, p=[0.01,0.75,0.99])
+
+ def test_quantiles_misc(self):
+ self._test(Stats.quantiles, (empty_numeric,), (None,None,None,None), p=(0.01,0.25,0.73,1))
+ self._test(Stats.quantiles, (empty_ma,), (None,None,None,None), p=(0.01,0.25,0.73,1))
+ self._test(Stats.quantiles, (full_mask,), (None,None,None,None), p=(0.01,0.25,0.73,1))
+ self._test(Stats.quantiles, one_element_numeric, (2.0,2.0,2.0,2.0,2.0), p=(0,0.000000001,0.01,0.1,0.75))
+ self._test(Stats.quantiles, one_element_ma, (2.0,2.0,2.0,2.0,2.0), p=(0,0.000000001,0.01,0.1,0.75))
+ self._test(Stats.quantiles, one_neg_element_numeric, (-2.0,-2.0,-2.0,-2.0,-2.0), p=(0,0.000000001,0.01,0.1,0.75))
+ self._test(Stats.quantiles, one_neg_element_ma, (-2.0,-2.0,-2.0,-2.0,-2.0), p=(0,0.000000001,0.01,0.1,0.75))
+
+ def test_samplevar_misc(self):
+ self._test(Stats.samplevar, empty_numeric, None)
+ self._test(Stats.samplevar, empty_ma, None)
+ self._test(Stats.samplevar, populated_numeric, 2.2)
+ self._test(Stats.samplevar, populated_ma, 2.2)
+ self._test(Stats.samplevar, null_mask, 2.2)
+ self._test(Stats.samplevar, full_mask, None)
+ self._test(Stats.samplevar, partial_mask, 4.5)
+ self._test(Stats.samplevar, two_elements_numeric, 4.5)
+ self._test(Stats.samplevar, two_elements_ma, 4.5)
+ self._test(Stats.samplevar, one_element_numeric, None)
+ self._test(Stats.samplevar, one_element_ma, None)
+ self._test(Stats.samplevar, one_neg_element_numeric, None)
+ self._test(Stats.samplevar, one_neg_element_ma, None)
+ self._test(Stats.samplevar, all_neg_numeric, 1052.50000)
+ self._test(Stats.samplevar, all_neg_ma, 1052.50000)
+
+ def test_wsamplevar_misc(self):
+ self._test(Stats.wsamplevar, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wsamplevar, (empty_ma,empty_numeric), None)
+ self._test(Stats.wsamplevar, (empty_numeric,empty_ma), None)
+ self._test(Stats.wsamplevar, (empty_ma,empty_ma), None)
+ self._test(Stats.wsamplevar, (populated_numeric,populated_numeric), 1.8021978022)
+ self._test(Stats.wsamplevar, (populated_numeric,populated_ma), 1.8021978022)
+ self._test(Stats.wsamplevar, (populated_ma,populated_numeric), 1.8021978022)
+ self._test(Stats.wsamplevar, (populated_ma,populated_ma), 1.8021978022)
+ self._test(Stats.wsamplevar, (null_mask,null_mask), 1.8021978022)
+ self._test(Stats.wsamplevar, (full_mask,null_mask), None)
+ self._test(Stats.wsamplevar, (full_mask,partial_mask), None)
+ self._test(Stats.wsamplevar, (full_mask,full_mask), None)
+ self._test(Stats.wsamplevar, (null_mask,full_mask), None)
+ self._test(Stats.wsamplevar, (partial_mask,full_mask), None)
+ self._test(Stats.wsamplevar, (full_mask,full_mask), None)
+ self._test(Stats.wsamplevar, (partial_mask,partial_mask), 2.1428571429)
+ self._test(Stats.wsamplevar, (two_elements_numeric,two_elements_numeric), 2.1428571429)
+ self._test(Stats.wsamplevar, (two_elements_ma,two_elements_numeric), 2.1428571429)
+ self._test(Stats.wsamplevar, (two_elements_numeric,two_elements_ma), 2.1428571429)
+ self._test(Stats.wsamplevar, (two_elements_ma,two_elements_ma), 2.1428571429)
+ self._test(Stats.wsamplevar, (one_element_numeric,one_element_numeric), 0.0)
+ self._test(Stats.wsamplevar, (one_element_ma,one_element_numeric), 0.0)
+ self._test(Stats.wsamplevar, (one_element_numeric,one_element_ma), 0.0)
+ self._test(Stats.wsamplevar, (one_element_ma,one_element_ma), 0.0)
+ self._test(Stats.wsamplevar, (one_neg_element_numeric,one_neg_element_ma), None)
+ self._test(Stats.wsamplevar, (one_neg_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wsamplevar, (one_neg_element_numeric,one_neg_element_numeric), None)
+ self._test(Stats.wsamplevar, (one_neg_element_ma,one_neg_element_numeric), None)
+ self._test(Stats.wsamplevar, (all_neg_numeric,all_neg_numeric), None)
+ self._test(Stats.wsamplevar, (all_neg_ma,all_neg_numeric), None)
+ self._test(Stats.wsamplevar, (all_neg_numeric,all_neg_ma), None)
+ self._test(Stats.wsamplevar, (all_neg_ma,all_neg_ma), None)
+
+ def test_wsamplevar_misc_exclnpwgts(self):
+ self._test(Stats.wsamplevar, (empty_numeric,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (empty_ma,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (empty_numeric,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (empty_ma,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (populated_numeric,populated_numeric), 1.8021978022, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (populated_numeric,populated_ma), 1.8021978022, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (populated_ma,populated_numeric), 1.8021978022, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (populated_ma,populated_ma), 1.8021978022, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (null_mask,null_mask), 1.8021978022, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (full_mask,null_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (full_mask,partial_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (null_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (partial_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (partial_mask,partial_mask), 2.1428571429, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (two_elements_numeric,two_elements_numeric), 2.1428571429, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (two_elements_ma,two_elements_numeric), 2.1428571429, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (two_elements_numeric,two_elements_ma), 2.1428571429, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (two_elements_ma,two_elements_ma), 2.1428571429, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_element_numeric,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_element_ma,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_element_numeric,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_element_ma,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_neg_element_numeric,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_neg_element_ma,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_neg_element_numeric,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (one_neg_element_ma,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (all_neg_numeric,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (all_neg_ma,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (all_neg_numeric,all_neg_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (all_neg_ma,all_neg_ma), None, exclude_nonpositive_weights=True)
+
+ def test_samplevar_1001(self):
+ self._test(Stats.samplevar, n1001_nomissing_numpy, 83583.5)
+ self._test(Stats.samplevar, n1001_nomissing_MA, 83583.5)
+ self._test(Stats.samplevar, n1001_missing, 83353.6095197)
+
+ def test_samplevar_1006(self):
+ self._test(Stats.samplevar, n1006_nomissing_numpy, 84420.1666667)
+ self._test(Stats.samplevar, n1006_nomissing_MA, 84420.1666667)
+ self._test(Stats.samplevar, n1006_missing, 84155.0396823)
+
+ def test_wsamplevar_1001(self):
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_nomissing_numpy), 54505.2222235835)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_nomissing_numpy), 54505.2222235835)
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_nomissing_MA), 54505.2222235835)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_nomissing_MA), 54505.2222235835)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_nomissing_numpy), 51849.4940772112)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_nomissing_MA), 51849.4940772112)
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_missing), 55798.5328446147)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_missing), 55798.5328446147)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_missing), 53205.0071243415)
+
+ def test_wsamplevar_1001_exclnpwgts(self):
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 54505.2222235835, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_nomissing_numpy,), 54505.2222235835, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_nomissing_MA,), 54505.2222235835, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_nomissing_MA,), 54505.2222235835, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_nomissing_numpy,), 51849.4940772112, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_nomissing_MA,), 51849.4940772112, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_nomissing_numpy,w1001_missing,), 55798.5328446147, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_nomissing_MA,w1001_missing,), 55798.5328446147, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1001_missing,w1001_missing,), 53205.0071243415, exclude_nonpositive_weights=True)
+
+ def test_wsamplevar_1006(self):
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_nomissing_numpy), 55056.8888902326)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_nomissing_numpy), 55056.8888902326)
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_nomissing_MA), 55056.8888902326)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_nomissing_MA), 55056.8888902326)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_nomissing_numpy), 52365.9372357347)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_nomissing_MA), 52365.9372357347)
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_missing), 56418.9816615686)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_missing), 56418.9816615686)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_missing), 53784.5056180943)
+
+ def test_wsamplevar_1006_exclnpwgts(self):
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_nomissing_numpy,), 55056.8888902326, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_nomissing_numpy,), 55056.8888902326, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_nomissing_MA,), 55056.8888902326, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_nomissing_MA,), 55056.8888902326, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_nomissing_numpy,), 52365.9372357347, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_nomissing_MA,), 52365.9372357347, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_nomissing_numpy,w1006_missing,), 56418.9816615686, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_nomissing_MA,w1006_missing,), 56418.9816615686, exclude_nonpositive_weights=True)
+ self._test(Stats.wsamplevar, (n1006_missing,w1006_missing,), 53784.5056180943, exclude_nonpositive_weights=True)
+
+ def test_populationvar_misc(self):
+ self._test(Stats.populationvar, empty_numeric, None)
+ self._test(Stats.populationvar, empty_ma, None)
+ self._test(Stats.populationvar, populated_numeric, 1.7600000)
+ self._test(Stats.populationvar, populated_ma, 1.7600000)
+ self._test(Stats.populationvar, null_mask, 1.7600000)
+ self._test(Stats.populationvar, full_mask, None)
+ self._test(Stats.populationvar, partial_mask, 2.25000)
+ self._test(Stats.populationvar, two_elements_numeric, 2.25000)
+ self._test(Stats.populationvar, two_elements_ma, 2.25000)
+ self._test(Stats.populationvar, one_element_numeric, 0.0)
+ self._test(Stats.populationvar, one_element_ma, 0.0)
+ self._test(Stats.populationvar, one_neg_element_numeric, 0.0)
+ self._test(Stats.populationvar, one_neg_element_ma, 0.0)
+ self._test(Stats.populationvar, all_neg_numeric, 842.00000)
+ self._test(Stats.populationvar, all_neg_ma, 842.00000)
+
+ def test_wpopulationvar_misc(self):
+ self._test(Stats.wpopulationvar, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wpopulationvar, (empty_ma,empty_numeric), None)
+ self._test(Stats.wpopulationvar, (empty_numeric,empty_ma), None)
+ self._test(Stats.wpopulationvar, (empty_ma,empty_ma), None)
+ self._test(Stats.wpopulationvar, (populated_numeric,populated_numeric), 1.6734693878)
+ self._test(Stats.wpopulationvar, (populated_numeric,populated_ma), 1.6734693878)
+ self._test(Stats.wpopulationvar, (populated_ma,populated_numeric), 1.6734693878)
+ self._test(Stats.wpopulationvar, (populated_ma,populated_ma), 1.6734693878)
+ self._test(Stats.wpopulationvar, (null_mask,null_mask), 1.6734693878)
+ self._test(Stats.wpopulationvar, (full_mask,null_mask), None)
+ self._test(Stats.wpopulationvar, (full_mask,partial_mask), None)
+ self._test(Stats.wpopulationvar, (full_mask,full_mask), None)
+ self._test(Stats.wpopulationvar, (null_mask,full_mask), None)
+ self._test(Stats.wpopulationvar, (partial_mask,full_mask), None)
+ self._test(Stats.wpopulationvar, (full_mask,full_mask), None)
+ self._test(Stats.wpopulationvar, (partial_mask,partial_mask), 1.8367346939)
+ self._test(Stats.wpopulationvar, (two_elements_numeric,two_elements_numeric), 1.8367346939)
+ self._test(Stats.wpopulationvar, (two_elements_ma,two_elements_numeric), 1.8367346939)
+ self._test(Stats.wpopulationvar, (two_elements_numeric,two_elements_ma), 1.8367346939)
+ self._test(Stats.wpopulationvar, (two_elements_ma,two_elements_ma), 1.8367346939)
+ self._test(Stats.wpopulationvar, (one_element_numeric,one_element_numeric), 0.0)
+ self._test(Stats.wpopulationvar, (one_element_ma,one_element_numeric), 0.0)
+ self._test(Stats.wpopulationvar, (one_element_numeric,one_element_ma), 0.0)
+ self._test(Stats.wpopulationvar, (one_element_ma,one_element_ma), 0.0)
+ self._test(Stats.wpopulationvar, (one_neg_element_numeric,one_neg_element_ma), None)
+ self._test(Stats.wpopulationvar, (one_neg_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wpopulationvar, (one_neg_element_numeric,one_neg_element_numeric), None)
+ self._test(Stats.wpopulationvar, (one_neg_element_ma,one_neg_element_numeric), None)
+ self._test(Stats.wpopulationvar, (all_neg_numeric,all_neg_numeric), None)
+ self._test(Stats.wpopulationvar, (all_neg_ma,all_neg_numeric), None)
+ self._test(Stats.wpopulationvar, (all_neg_numeric,all_neg_ma), None)
+ self._test(Stats.wpopulationvar, (all_neg_ma,all_neg_ma), None)
+
+ def test_wpopulationvar_misc_exclnpwgts(self):
+ self._test(Stats.wpopulationvar, (empty_numeric,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (empty_ma,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (empty_numeric,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (empty_ma,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (populated_numeric,populated_numeric), 1.6734693878, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (populated_numeric,populated_ma), 1.6734693878, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (populated_ma,populated_numeric), 1.6734693878, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (populated_ma,populated_ma), 1.6734693878, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (null_mask,null_mask), 1.6734693878, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (full_mask,null_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (full_mask,partial_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (null_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (partial_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (partial_mask,partial_mask), 1.8367346939, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (two_elements_numeric,two_elements_numeric), 1.8367346939, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (two_elements_ma,two_elements_numeric), 1.8367346939, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (two_elements_numeric,two_elements_ma), 1.8367346939, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (two_elements_ma,two_elements_ma), 1.8367346939, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_element_numeric,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_element_ma,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_element_numeric,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_element_ma,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_neg_element_numeric,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_neg_element_ma,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_neg_element_numeric,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (one_neg_element_ma,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (all_neg_numeric,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (all_neg_ma,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (all_neg_numeric,all_neg_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (all_neg_ma,all_neg_ma), None, exclude_nonpositive_weights=True)
+
+ def test_populationvar_1001(self):
+ self._test(Stats.populationvar, n1001_nomissing_numpy, 83500.0000)
+ self._test(Stats.populationvar, n1001_nomissing_MA, 83500.0000)
+ self._test(Stats.populationvar, n1001_missing, 83263.7888198109)
+
+ def test_populationvar_1006(self):
+ self._test(Stats.populationvar, n1006_nomissing_numpy, 84336.2500)
+ self._test(Stats.populationvar, n1006_nomissing_MA, 84336.2500)
+ self._test(Stats.populationvar, n1006_missing, 84064.8413546869)
+
+ def test_wpopulationvar_1001(self):
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_nomissing_numpy), 54504.8888888911)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_nomissing_numpy), 54504.8888888911)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_nomissing_MA), 54504.8888888911)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_nomissing_MA), 54504.8888888911)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_nomissing_numpy), 51849.1649806388)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_nomissing_MA), 51849.1649806388)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_missing), 55798.170622262425)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_missing), 55798.170622262425)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_missing), 53204.6478317361)
+
+ def test_wpopulationvar_1006(self):
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_nomissing_numpy), 55056.5555555538)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_nomissing_numpy), 55056.5555555538)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_nomissing_MA), 55056.5555555538)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_nomissing_MA), 55056.5555555538)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_nomissing_numpy), 52365.6083163646)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_nomissing_MA), 52365.6083163646)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_missing), 56418.6193084682)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_missing), 56418.6193084682)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_missing), 53784.14642265374)
+
+ def test_wpopulationvar_1001_exclnpwgts(self):
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 54504.8888888911, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_nomissing_numpy,), 54504.8888888911, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_nomissing_MA,), 54504.8888888911, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_nomissing_MA,), 54504.8888888911, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_nomissing_numpy,), 51849.1649806388, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_nomissing_MA,), 51849.1649806388, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_numpy,w1001_missing,), 55798.170622262425, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_nomissing_MA,w1001_missing,), 55798.170622262425, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1001_missing,w1001_missing,), 53204.6478317361, exclude_nonpositive_weights=True)
+
+ def test_wpopulationvar_1006_exclnpwgts(self):
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_nomissing_numpy,), 55056.5555555538, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_nomissing_numpy,), 55056.5555555538, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_nomissing_MA,), 55056.5555555538, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_nomissing_MA,), 55056.5555555538, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_nomissing_numpy,), 52365.6083163646, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_nomissing_MA,), 52365.6083163646, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_numpy,w1006_missing,), 56418.6193084682, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_nomissing_MA,w1006_missing,), 56418.6193084682, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulationvar, (n1006_missing,w1006_missing,), 53784.14642265374, exclude_nonpositive_weights=True)
+
+ def test_sample_stddev_1001(self):
+ self._test(Stats.sample_stddev, n1001_nomissing_numpy, 289.108111266)
+ self._test(Stats.sample_stddev, n1001_nomissing_MA, 289.108111266)
+ self._test(Stats.sample_stddev, n1001_missing, 288.710251844)
+
+ def test_sample_stddev_1006(self):
+ self._test(Stats.sample_stddev, n1006_nomissing_numpy, 290.551487118)
+ self._test(Stats.sample_stddev, n1006_nomissing_MA, 290.551487118)
+ self._test(Stats.sample_stddev, n1006_missing, 290.094880483)
+
+ def test_wsample_stddev_1001(self):
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_nomissing_numpy), 233.463535105)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_nomissing_numpy), 233.463535105)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_nomissing_MA), 233.463535105)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_nomissing_MA), 233.463535105)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_nomissing_numpy), 227.704839819)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_nomissing_MA), 227.704839819)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_missing), 236.217130718)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_missing), 236.217130718)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_missing), 230.662105957)
+
+ def test_wsample_stddev_1001_exclnpwgts(self):
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 233.463535105, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_nomissing_numpy,), 233.463535105, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_nomissing_MA,), 233.463535105, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_nomissing_MA,), 233.463535105, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_nomissing_numpy,), 227.704839819, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_nomissing_MA,), 227.704839819, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_numpy,w1001_missing,), 236.217130718, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_nomissing_MA,w1001_missing,), 236.217130718, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_stddev, (n1001_missing,w1001_missing,), 230.662105957, exclude_nonpositive_weights=True)
+
+ def test_population_stddev_1001(self):
+ self._test(Stats.population_stddev, n1001_nomissing_numpy, 288.963665536)
+ self._test(Stats.population_stddev, n1001_nomissing_MA, 288.963665536)
+ self._test(Stats.population_stddev, n1001_missing, 288.554654823)
+
+ def test_population_stddev_1006(self):
+ self._test(Stats.population_stddev, n1006_nomissing_numpy, 290.407)
+ self._test(Stats.population_stddev, n1006_nomissing_MA, 290.407)
+ self._test(Stats.population_stddev, n1006_missing, 289.939375309)
+
+ def test_wpopulation_stddev_1006(self):
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_numpy,w1006_nomissing_numpy), 234.64133386)
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_MA,w1006_nomissing_numpy), 234.64133386)
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_numpy,w1006_nomissing_MA), 234.64133386)
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_MA,w1006_nomissing_MA), 234.64133386)
+ self._test(Stats.wpopulation_stddev, (n1006_missing,w1006_nomissing_numpy), 228.835330131)
+ self._test(Stats.wpopulation_stddev, (n1006_missing,w1006_nomissing_MA), 228.835330131)
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_numpy,w1006_missing), 237.526)
+ self._test(Stats.wpopulation_stddev, (n1006_nomissing_MA,w1006_missing), 237.526)
+ self._test(Stats.wpopulation_stddev, (n1006_missing,w1006_missing), 231.914092773)
+
+ def test_sample_cv_misc(self):
+ self._test(Stats.sample_cv, empty_numeric, None)
+ self._test(Stats.sample_cv, empty_ma, None)
+ self._test(Stats.sample_cv, populated_numeric, 52.9728463364)
+ self._test(Stats.sample_cv, populated_ma, 52.9728463364)
+ self._test(Stats.sample_cv, null_mask, 52.9728463364)
+ self._test(Stats.sample_cv, full_mask, None)
+ self._test(Stats.sample_cv, partial_mask, 60.6091526731)
+ self._test(Stats.sample_cv, two_elements_numeric, 60.6091526731)
+ self._test(Stats.sample_cv, two_elements_ma, 60.6091526731)
+ self._test(Stats.sample_cv, one_element_numeric, None)
+ self._test(Stats.sample_cv, one_element_ma, None)
+ self._test(Stats.sample_cv, one_neg_element_numeric, None)
+ self._test(Stats.sample_cv, one_neg_element_ma, None)
+ self._test(Stats.sample_cv, all_neg_numeric, -180.2347577501)
+ self._test(Stats.sample_cv, all_neg_ma, -180.2347577501)
+
+ def test_wsample_cv_misc(self):
+ self._test(Stats.wsample_cv, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wsample_cv, (empty_ma,empty_numeric), None)
+ self._test(Stats.wsample_cv, (empty_numeric,empty_ma), None)
+ self._test(Stats.wsample_cv, (empty_ma,empty_ma), None)
+ self._test(Stats.wsample_cv, (populated_numeric,populated_numeric), 39.1550719335)
+ self._test(Stats.wsample_cv, (populated_numeric,populated_ma), 39.1550719335)
+ self._test(Stats.wsample_cv, (populated_ma,populated_numeric), 39.1550719335)
+ self._test(Stats.wsample_cv, (populated_ma,populated_ma), 39.1550719335)
+ self._test(Stats.wsample_cv, (null_mask,null_mask), 39.1550719335)
+ self._test(Stats.wsample_cv, (full_mask,null_mask), None)
+ self._test(Stats.wsample_cv, (full_mask,partial_mask), None)
+ self._test(Stats.wsample_cv, (full_mask,full_mask), None)
+ self._test(Stats.wsample_cv, (null_mask,full_mask), None)
+ self._test(Stats.wsample_cv, (partial_mask,full_mask), None)
+ self._test(Stats.wsample_cv, (full_mask,full_mask), None)
+ self._test(Stats.wsample_cv, (partial_mask,partial_mask), 35.3343129861)
+ self._test(Stats.wsample_cv, (two_elements_numeric,two_elements_numeric), 35.3343129861)
+ self._test(Stats.wsample_cv, (two_elements_ma,two_elements_numeric), 35.3343129861)
+ self._test(Stats.wsample_cv, (two_elements_numeric,two_elements_ma), 35.3343129861)
+ self._test(Stats.wsample_cv, (two_elements_ma,two_elements_ma), 35.3343129861)
+ self._test(Stats.wsample_cv, (one_element_numeric,one_element_numeric), 0.0)
+ self._test(Stats.wsample_cv, (one_element_ma,one_element_numeric), 0.0)
+ self._test(Stats.wsample_cv, (one_element_numeric,one_element_ma), 0.0)
+ self._test(Stats.wsample_cv, (one_element_ma,one_element_ma), 0.0)
+ self._test(Stats.wsample_cv, (one_neg_element_numeric,one_neg_element_ma), None)
+ self._test(Stats.wsample_cv, (one_neg_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wsample_cv, (one_neg_element_numeric,one_neg_element_numeric), None)
+ self._test(Stats.wsample_cv, (one_neg_element_ma,one_neg_element_numeric), None)
+ self._test(Stats.wsample_cv, (all_neg_numeric,all_neg_numeric), None)
+ self._test(Stats.wsample_cv, (all_neg_ma,all_neg_numeric), None)
+ self._test(Stats.wsample_cv, (all_neg_numeric,all_neg_ma), None)
+ self._test(Stats.wsample_cv, (all_neg_ma,all_neg_ma), None)
+
+ def test_wsample_cv_misc_exclnpwgts(self):
+ self._test(Stats.wsample_cv, (empty_numeric,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (empty_ma,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (empty_numeric,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (empty_ma,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (populated_numeric,populated_numeric), 39.1550719335, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (populated_numeric,populated_ma), 39.1550719335, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (populated_ma,populated_numeric), 39.1550719335, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (populated_ma,populated_ma), 39.1550719335, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (null_mask,null_mask), 39.1550719335, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (full_mask,null_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (full_mask,partial_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (null_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (partial_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (partial_mask,partial_mask), 35.3343129861, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (two_elements_numeric,two_elements_numeric), 35.3343129861, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (two_elements_ma,two_elements_numeric), 35.3343129861, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (two_elements_numeric,two_elements_ma), 35.3343129861, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (two_elements_ma,two_elements_ma), 35.3343129861, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_element_numeric,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_element_ma,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_element_numeric,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_element_ma,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_neg_element_numeric,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_neg_element_ma,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_neg_element_numeric,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (one_neg_element_ma,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (all_neg_numeric,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (all_neg_ma,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (all_neg_numeric,all_neg_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (all_neg_ma,all_neg_ma), None, exclude_nonpositive_weights=True)
+
+ def test_sample_cv_1001(self):
+ self._test(Stats.sample_cv, n1001_nomissing_numpy, 59.0016553605)
+ self._test(Stats.sample_cv, n1001_nomissing_MA, 59.0016553605)
+ self._test(Stats.sample_cv, n1001_missing, 56.6907065543)
+
+ def test_sample_cv_1006(self):
+ self._test(Stats.sample_cv, n1006_nomissing_numpy, 58.9952258108)
+ self._test(Stats.sample_cv, n1006_nomissing_MA, 58.9952258108)
+ self._test(Stats.sample_cv, n1006_missing, 56.6741050507)
+
+ def test_wsample_cv_1001(self):
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_nomissing_numpy), 35.3554066287)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_nomissing_numpy), 35.3554066287)
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_nomissing_MA), 35.3554066287)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_nomissing_MA), 35.3554066287)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_nomissing_numpy), 33.8494339393)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_nomissing_MA), 33.8494339393)
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_missing), 36.1572681672)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_missing), 36.1572681672)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_missing), 34.6246930045)
+
+ def test_wsample_cv_1001_exclnpwgts(self):
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_nomissing_numpy,), 35.3554066287, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_nomissing_numpy,), 35.3554066287, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_nomissing_MA,), 35.3554066287, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_nomissing_MA,), 35.3554066287, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_nomissing_numpy,), 33.8494339393, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_nomissing_MA,), 33.8494339393, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_nomissing_numpy,w1001_missing,), 36.1572681672, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_nomissing_MA,w1001_missing,), 36.1572681672, exclude_nonpositive_weights=True)
+ self._test(Stats.wsample_cv, (n1001_missing,w1001_missing,), 34.6246930045, exclude_nonpositive_weights=True)
+
+ def test_population_cv_misc(self):
+ self._test(Stats.population_cv, empty_numeric, None)
+ self._test(Stats.population_cv, empty_ma, None)
+ self._test(Stats.population_cv, populated_numeric, 47.3803541479)
+ self._test(Stats.population_cv, populated_ma, 47.3803541479)
+ self._test(Stats.population_cv, null_mask, 47.3803541479)
+ self._test(Stats.population_cv, full_mask, None)
+ self._test(Stats.population_cv, partial_mask, 42.8571428571)
+ self._test(Stats.population_cv, two_elements_numeric, 42.8571428571)
+ self._test(Stats.population_cv, two_elements_ma, 42.8571428571)
+ self._test(Stats.population_cv, one_element_numeric, 0.0)
+ self._test(Stats.population_cv, one_element_ma, 0.0)
+ self._test(Stats.population_cv, one_neg_element_numeric, 0.0)
+ self._test(Stats.population_cv, one_neg_element_ma, 0.0)
+ self._test(Stats.population_cv, all_neg_numeric, -161.2068680950)
+ self._test(Stats.population_cv, all_neg_ma, -161.2068680950)
+
+ def test_wpopulation_cv_misc(self):
+ self._test(Stats.wpopulation_cv, (empty_numeric,empty_numeric), None)
+ self._test(Stats.wpopulation_cv, (empty_ma,empty_numeric), None)
+ self._test(Stats.wpopulation_cv, (empty_numeric,empty_ma), None)
+ self._test(Stats.wpopulation_cv, (empty_ma,empty_ma), None)
+ self._test(Stats.wpopulation_cv, (populated_numeric,populated_numeric), 37.7307714089)
+ self._test(Stats.wpopulation_cv, (populated_numeric,populated_ma), 37.7307714089)
+ self._test(Stats.wpopulation_cv, (populated_ma,populated_numeric), 37.7307714089)
+ self._test(Stats.wpopulation_cv, (populated_ma,populated_ma), 37.7307714089)
+ self._test(Stats.wpopulation_cv, (null_mask,null_mask), 37.7307714089)
+ self._test(Stats.wpopulation_cv, (full_mask,null_mask), None)
+ self._test(Stats.wpopulation_cv, (full_mask,partial_mask), None)
+ self._test(Stats.wpopulation_cv, (full_mask,full_mask), None)
+ self._test(Stats.wpopulation_cv, (null_mask,full_mask), None)
+ self._test(Stats.wpopulation_cv, (partial_mask,full_mask), None)
+ self._test(Stats.wpopulation_cv, (full_mask,full_mask), None)
+ self._test(Stats.wpopulation_cv, (partial_mask,partial_mask), 32.7132171742)
+ self._test(Stats.wpopulation_cv, (two_elements_numeric,two_elements_numeric), 32.7132171742)
+ self._test(Stats.wpopulation_cv, (two_elements_ma,two_elements_numeric), 32.7132171742)
+ self._test(Stats.wpopulation_cv, (two_elements_numeric,two_elements_ma), 32.7132171742)
+ self._test(Stats.wpopulation_cv, (two_elements_ma,two_elements_ma), 32.7132171742)
+ self._test(Stats.wpopulation_cv, (one_element_numeric,one_element_numeric), 0.0)
+ self._test(Stats.wpopulation_cv, (one_element_ma,one_element_numeric), 0.0)
+ self._test(Stats.wpopulation_cv, (one_element_numeric,one_element_ma), 0.0)
+ self._test(Stats.wpopulation_cv, (one_element_ma,one_element_ma), 0.0)
+ self._test(Stats.wpopulation_cv, (one_neg_element_numeric,one_neg_element_ma), None)
+ self._test(Stats.wpopulation_cv, (one_neg_element_ma,one_neg_element_ma), None)
+ self._test(Stats.wpopulation_cv, (one_neg_element_numeric,one_neg_element_numeric), None)
+ self._test(Stats.wpopulation_cv, (one_neg_element_ma,one_neg_element_numeric), None)
+ self._test(Stats.wpopulation_cv, (all_neg_numeric,all_neg_numeric), None)
+ self._test(Stats.wpopulation_cv, (all_neg_ma,all_neg_numeric), None)
+ self._test(Stats.wpopulation_cv, (all_neg_numeric,all_neg_ma), None)
+ self._test(Stats.wpopulation_cv, (all_neg_ma,all_neg_ma), None)
+
+ def test_wpopulation_cv_misc_exclnpwgts(self):
+ self._test(Stats.wpopulation_cv, (empty_numeric,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (empty_ma,empty_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (empty_numeric,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (empty_ma,empty_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (populated_numeric,populated_numeric), 37.7307714089, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (populated_numeric,populated_ma), 37.7307714089, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (populated_ma,populated_numeric), 37.7307714089, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (populated_ma,populated_ma), 37.7307714089, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (null_mask,null_mask), 37.7307714089, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (full_mask,null_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (full_mask,partial_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (null_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (partial_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (full_mask,full_mask), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (partial_mask,partial_mask), 32.7132171742, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (two_elements_numeric,two_elements_numeric), 32.7132171742, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (two_elements_ma,two_elements_numeric), 32.7132171742, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (two_elements_numeric,two_elements_ma), 32.7132171742, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (two_elements_ma,two_elements_ma), 32.7132171742, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_element_numeric,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_element_ma,one_element_numeric), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_element_numeric,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_element_ma,one_element_ma), 0.0, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_neg_element_numeric,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_neg_element_ma,one_neg_element_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_neg_element_numeric,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (one_neg_element_ma,one_neg_element_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (all_neg_numeric,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (all_neg_ma,all_neg_numeric), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (all_neg_numeric,all_neg_ma), None, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (all_neg_ma,all_neg_ma), None, exclude_nonpositive_weights=True)
+
+ def test_population_cv_1001(self):
+ self._test(Stats.population_cv, n1001_nomissing_numpy, 58.9721766400)
+ self._test(Stats.population_cv, n1001_nomissing_MA, 58.9721766400)
+ self._test(Stats.population_cv, n1001_missing, 56.6601537596)
+
+ def test_population_cv_1006(self):
+ self._test(Stats.population_cv, n1006_nomissing_numpy, 58.9658968377)
+ self._test(Stats.population_cv, n1006_nomissing_MA, 58.9658968377)
+ self._test(Stats.population_cv, n1006_missing, 56.6437249332)
+
+ def test_wpopulation_cv_1006(self):
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_nomissing_numpy), 35.3552989241)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_nomissing_numpy), 35.3552989241)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_nomissing_MA), 35.3552989241)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_nomissing_MA), 35.3552989241)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_nomissing_numpy), 33.8499398934)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_nomissing_MA), 33.8499398934)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_missing), 36.1577798983)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_missing), 36.1577798983)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_missing), 34.6248792762)
+
+ def test_wpopulation_cv_1006_exclnpwgts(self):
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_nomissing_numpy,), 35.3552989241, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_nomissing_numpy,), 35.3552989241, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_nomissing_MA,), 35.3552989241, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_nomissing_MA,), 35.3552989241, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_nomissing_numpy,), 33.8499398934, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_nomissing_MA,), 33.8499398934, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_numpy,w1006_missing,), 36.1577798983, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_nomissing_MA,w1006_missing,), 36.1577798983, exclude_nonpositive_weights=True)
+ self._test(Stats.wpopulation_cv, (n1006_missing,w1006_missing,), 34.6248792762, exclude_nonpositive_weights=True)
+
+ def test_stderr_misc(self):
+ self._test(Stats.stderr, empty_numeric, None)
+ self._test(Stats.stderr, empty_ma, None)
+ self._test(Stats.stderr, populated_numeric, 0.6633249581)
+ self._test(Stats.stderr, populated_ma, 0.6633249581)
+ self._test(Stats.stderr, null_mask, 0.6633249581)
+ self._test(Stats.stderr, full_mask, None)
+ self._test(Stats.stderr, partial_mask, 1.5)
+ self._test(Stats.stderr, two_elements_numeric, 1.5)
+ self._test(Stats.stderr, two_elements_ma, 1.5)
+ self._test(Stats.stderr, one_element_numeric, None)
+ self._test(Stats.stderr, one_element_ma, None)
+ self._test(Stats.stderr, one_neg_element_numeric, None)
+ self._test(Stats.stderr, one_neg_element_ma, None)
+ self._test(Stats.stderr, all_neg_numeric, 14.5086181285)
+ self._test(Stats.stderr, all_neg_ma, 14.5086181285)
+
+ def test_stderr_1001(self):
+ self._test(Stats.stderr, n1001_nomissing_numpy, 9.1378334412)
+ self._test(Stats.stderr, n1001_nomissing_MA, 9.1378334412)
+ self._test(Stats.stderr, n1001_missing, 9.4773783249)
+
+ def test_stderr_1006(self):
+ self._test(Stats.stderr, n1006_nomissing_numpy, 9.1606040558)
+ self._test(Stats.stderr, n1006_nomissing_MA, 9.1606040558)
+ self._test(Stats.stderr, n1006_missing, 9.4972800124)
+
+ def test_wstderr_1006(self):
+ self._test(Stats.wstderr, (n1006_nomissing_numpy,w1006_nomissing_numpy), 7.40153)
+ self._test(Stats.wstderr, (n1006_nomissing_MA,w1006_nomissing_numpy), 7.40153)
+ self._test(Stats.wstderr, (n1006_nomissing_numpy,w1006_nomissing_MA), 7.40153)
+ self._test(Stats.wstderr, (n1006_nomissing_MA,w1006_nomissing_MA), 7.40153)
+ self._test(Stats.wstderr, (n1006_missing,w1006_nomissing_numpy), 7.49575)
+ self._test(Stats.wstderr, (n1006_missing,w1006_nomissing_MA), 7.49575)
+ self._test(Stats.wstderr, (n1006_nomissing_numpy,w1006_missing), 7.63832)
+ self._test(Stats.wstderr, (n1006_nomissing_MA,w1006_missing), 7.63832)
+ self._test(Stats.wstderr, (n1006_missing,w1006_missing), 7.75637)
+
+ def test_wstderr_misc(self):
+ self._test(Stats.wstderr, (Numeric.array([1,2,3,4,5]),Numeric.array([2,2,1,1,1])), 0.699854212224)
+
+ def test_t_misc(self):
+ self._test(Stats.t, empty_numeric, None)
+ self._test(Stats.t, empty_ma, None)
+ self._test(Stats.t, populated_numeric, 4.2211588241)
+ self._test(Stats.t, populated_ma, 4.2211588241)
+ self._test(Stats.t, null_mask, 4.2211588241)
+ self._test(Stats.t, full_mask, None)
+ self._test(Stats.t, partial_mask, 2.333333333)
+ self._test(Stats.t, two_elements_numeric, 2.333333333)
+ self._test(Stats.t, two_elements_ma, 2.333333333)
+ self._test(Stats.t, one_element_numeric, None)
+ self._test(Stats.t, one_element_ma, None)
+ self._test(Stats.t, one_neg_element_numeric, None)
+ self._test(Stats.t, one_neg_element_ma, None)
+ self._test(Stats.t, all_neg_numeric, -1.2406419302)
+ self._test(Stats.t, all_neg_ma, -1.2406419302)
+
+ def test_t_1001(self):
+ self._test(Stats.t, n1001_nomissing_numpy, 53.6232142061)
+ self._test(Stats.t, n1001_nomissing_MA, 53.6232142061)
+ self._test(Stats.t, n1001_missing, 53.7356019620)
+
+ def test_t_1006(self):
+ self._test(Stats.t, n1006_nomissing_numpy, 53.7628301585)
+ self._test(Stats.t, n1006_nomissing_MA, 53.7628301585)
+ self._test(Stats.t, n1006_missing, 53.8959524307)
+
+ def test_probit(self):
+ # all results calculated by SAS V8.2 on Windows probit() function, except
+ # where indicated
+ self._stricttest(Stats.probit,0.0000001, -5.199337582)
+ self._stricttest(Stats.probit,0.000001, -4.753424309)
+ self._stricttest(Stats.probit,0.00001, -4.264890794)
+ self._stricttest(Stats.probit,0.0001, -3.719016485)
+ self._stricttest(Stats.probit,0.001, -3.090232306)
+ self._stricttest(Stats.probit,0.01, -2.326347874)
+ self._stricttest(Stats.probit,0.3000007, -0.524398510595) # SAS probit(0.3000007) gives -0.524398499
+ self._stricttest(Stats.probit,0.300007, -0.52438038)
+ self._stricttest(Stats.probit,0.30007, -0.524199196)
+ self._stricttest(Stats.probit,0.3007, -0.522388301)
+ self._stricttest(Stats.probit,0.307, -0.504371986)
+ self._stricttest(Stats.probit,0.37, -0.331853358115) # SAS probit(0.37) gives -0.331853346
+ self._stricttest(Stats.probit,0.5000001, 2.5066283e-7)
+ self._stricttest(Stats.probit,0.500001, 2.5066283e-6)
+ self._stricttest(Stats.probit,0.50001, 0.0000250663)
+ self._stricttest(Stats.probit,0.5001, 0.0002506628)
+ self._stricttest(Stats.probit,0.501, 0.0025066309)
+ self._stricttest(Stats.probit,0.51, 0.0250689083)
+ self._stricttest(Stats.probit,0.8456789, 1.0180752422)
+ self._stricttest(Stats.probit,0.845678, 1.01807143956) # SAS probit(0.845678) gives 1.0180714543
+ self._stricttest(Stats.probit,0.84567, 1.0180377846)
+ self._stricttest(Stats.probit,0.8456, 1.0177432241)
+ self._stricttest(Stats.probit,0.845, 1.0152220332)
+ self._stricttest(Stats.probit,0.84, 0.9944578832)
+ self._stricttest(Stats.probit,0.9999999, 5.1993375823)
+ self._stricttest(Stats.probit,0.999999, 4.7534243088)
+ self._stricttest(Stats.probit,0.99999, 4.2648907939)
+ self._stricttest(Stats.probit,0.9999, 3.7190164855)
+ self._stricttest(Stats.probit,0.999, 3.0902323062)
+ self._stricttest(Stats.probit,0.99, 2.326347874)
+ self._stricttest(Stats.probit,0.975, 1.9599639845)
+ self._stricttest(Stats.probit,0.025, -1.959963985 )
+ self._stricttest(Stats.probit,0.0, -1.0e20) # SAS returns an error
+ self._stricttest(Stats.probit,0.1, -1.281551566)
+ self._stricttest(Stats.probit,0.2, -0.841621234)
+ self._stricttest(Stats.probit,0.3, -0.524400513)
+ self._stricttest(Stats.probit,0.4, -0.253347103)
+ self._stricttest(Stats.probit,0.5, -4.06379E-17) # NetEpi Analysis Probit returns 0.0
+ self._stricttest(Stats.probit,0.6, 0.2533471031)
+ self._stricttest(Stats.probit,0.7, 0.5244005127)
+ self._stricttest(Stats.probit,0.8, 0.8416212336)
+ self._stricttest(Stats.probit,0.9, 1.2815515655)
+ self._stricttest(Stats.probit,1.0, 1.0e20) # SAS returns an error
+
+ def test_cdf_gauss_GL(self):
+ # all results calculated by SAS V8.2 on Windows probnorm() function, except
+ # where indicated
+ self._stricttest(Stats.cdf_gauss_GL,-10,7.619853E-24)
+ self._stricttest(Stats.cdf_gauss_GL,-9.5, 1.049452E-21)
+ self._stricttest(Stats.cdf_gauss_GL,-9.0,1.128588E-19)
+ self._stricttest(Stats.cdf_gauss_GL,-8.5, 9.479535E-18)
+ self._stricttest(Stats.cdf_gauss_GL,-8.0,6.220961E-16)
+ self._stricttest(Stats.cdf_gauss_GL,-7.5, 3.190892E-14)
+ self._stricttest(Stats.cdf_gauss_GL,-7.0,1.279813E-12)
+ self._stricttest(Stats.cdf_gauss_GL,-6.5, 4.016001E-11)
+ self._stricttest(Stats.cdf_gauss_GL,-6.0,9.865876E-10)
+ self._stricttest(Stats.cdf_gauss_GL,-5.5, 1.8989562E-8)
+ self._stricttest(Stats.cdf_gauss_GL,-5.0,2.8665157E-7)
+ self._stricttest(Stats.cdf_gauss_GL,-4.5, 3.3976731E-6)
+ self._stricttest(Stats.cdf_gauss_GL,-4.0,0.0000316712)
+ self._stricttest(Stats.cdf_gauss_GL,-3.5, 0.0002326291)
+ self._stricttest(Stats.cdf_gauss_GL,-3.0,0.001349898)
+ self._stricttest(Stats.cdf_gauss_GL,-2.5, 0.0062096653)
+ self._stricttest(Stats.cdf_gauss_GL,-2.0,0.0227501319)
+ self._stricttest(Stats.cdf_gauss_GL,-1.9, 0.0287165598)
+ self._stricttest(Stats.cdf_gauss_GL,-1.8, 0.0359303191)
+ self._stricttest(Stats.cdf_gauss_GL,-1.7, 0.0445654628)
+ self._stricttest(Stats.cdf_gauss_GL,-1.6, 0.0547992917)
+ self._stricttest(Stats.cdf_gauss_GL,-1.5, 0.0668072013)
+ self._stricttest(Stats.cdf_gauss_GL,-1.4, 0.0807566592)
+ self._stricttest(Stats.cdf_gauss_GL,-1.3, 0.0968004846)
+ self._stricttest(Stats.cdf_gauss_GL,-1.2, 0.1150696702)
+ self._stricttest(Stats.cdf_gauss_GL,-1.1, 0.1356660609)
+ self._stricttest(Stats.cdf_gauss_GL,-1.0,0.1586552539)
+ self._stricttest(Stats.cdf_gauss_GL,-0.9, 0.1840601253)
+ self._stricttest(Stats.cdf_gauss_GL,-0.8, 0.2118553986)
+ self._stricttest(Stats.cdf_gauss_GL,-0.7, 0.2419636522)
+ self._stricttest(Stats.cdf_gauss_GL,-0.6, 0.2742531178)
+ self._stricttest(Stats.cdf_gauss_GL,-0.5, 0.3085375387)
+ self._stricttest(Stats.cdf_gauss_GL,-0.4, 0.3445782584)
+ self._stricttest(Stats.cdf_gauss_GL,-0.3, 0.3820885778)
+ self._stricttest(Stats.cdf_gauss_GL,-0.2, 0.4207402906)
+ self._stricttest(Stats.cdf_gauss_GL,-0.1, 0.4601721627)
+ self._stricttest(Stats.cdf_gauss_GL,0.0,0.5)
+ self._stricttest(Stats.cdf_gauss_GL,0.1, 0.5398278373)
+ self._stricttest(Stats.cdf_gauss_GL,0.2, 0.5792597094)
+ self._stricttest(Stats.cdf_gauss_GL,0.3, 0.6179114222)
+ self._stricttest(Stats.cdf_gauss_GL,0.4, 0.6554217416)
+ self._stricttest(Stats.cdf_gauss_GL,0.5, 0.6914624613)
+ self._stricttest(Stats.cdf_gauss_GL,0.6, 0.7257468822)
+ self._stricttest(Stats.cdf_gauss_GL,0.7, 0.7580363478)
+ self._stricttest(Stats.cdf_gauss_GL,0.8, 0.7881446014)
+ self._stricttest(Stats.cdf_gauss_GL,0.9, 0.8159398747)
+ self._stricttest(Stats.cdf_gauss_GL,1.0,0.8413447461)
+ self._stricttest(Stats.cdf_gauss_GL,1.1, 0.8643339391)
+ self._stricttest(Stats.cdf_gauss_GL,1.2, 0.8849303298)
+ self._stricttest(Stats.cdf_gauss_GL,1.3, 0.9031995154)
+ self._stricttest(Stats.cdf_gauss_GL,1.4, 0.9192433408)
+ self._stricttest(Stats.cdf_gauss_GL,1.5, 0.9331927987)
+ self._stricttest(Stats.cdf_gauss_GL,1.6, 0.9452007083)
+ self._stricttest(Stats.cdf_gauss_GL,1.7, 0.9554345372)
+ self._stricttest(Stats.cdf_gauss_GL,1.8, 0.9640696809)
+ self._stricttest(Stats.cdf_gauss_GL,1.9, 0.9712834402)
+ self._stricttest(Stats.cdf_gauss_GL,2.0,0.9772498681)
+ self._stricttest(Stats.cdf_gauss_GL,2.5, 0.9937903347)
+ self._stricttest(Stats.cdf_gauss_GL,3.0,0.998650102)
+ self._stricttest(Stats.cdf_gauss_GL,3.5, 0.9997673709)
+ self._stricttest(Stats.cdf_gauss_GL,4.0,0.9999683288)
+ self._stricttest(Stats.cdf_gauss_GL,4.5, 0.9999966023)
+ self._stricttest(Stats.cdf_gauss_GL,5.0,0.9999997133)
+ self._stricttest(Stats.cdf_gauss_GL,5.5, 0.999999981)
+ self._stricttest(Stats.cdf_gauss_GL,6.0,0.999999999)
+ self._stricttest(Stats.cdf_gauss_GL,6.5, 1)
+ self._stricttest(Stats.cdf_gauss_GL,7.0,1)
+ self._stricttest(Stats.cdf_gauss_GL,7.5, 1)
+ self._stricttest(Stats.cdf_gauss_GL,8.0,1)
+ self._stricttest(Stats.cdf_gauss_GL,8.5, 1)
+ self._stricttest(Stats.cdf_gauss_GL,9.0,1)
+ self._stricttest(Stats.cdf_gauss_GL,9.5, 1)
+ self._stricttest(Stats.cdf_gauss_GL,10.0,1 )
+
+ def test_cdf_gauss(self):
+ if Cstats is None:
+ return
+ # all results calculated by SAS V8.2 on Windows probnorm() function,
+ # except where indicated
+ self._stricttest(Cstats.cdf_gauss,-10,7.619853E-24)
+ self._stricttest(Cstats.cdf_gauss,-9.5, 1.049452E-21)
+ self._stricttest(Cstats.cdf_gauss,-9.0,1.128588E-19)
+ self._stricttest(Cstats.cdf_gauss,-8.5, 9.479535E-18)
+ self._stricttest(Cstats.cdf_gauss,-8.0,6.220961E-16)
+ self._stricttest(Cstats.cdf_gauss,-7.5, 3.190892E-14)
+ self._stricttest(Cstats.cdf_gauss,-7.0,1.279813E-12)
+ self._stricttest(Cstats.cdf_gauss,-6.5, 4.016001E-11)
+ self._stricttest(Cstats.cdf_gauss,-6.0,9.865876E-10)
+ self._stricttest(Cstats.cdf_gauss,-5.5, 1.8989562E-8)
+ self._stricttest(Cstats.cdf_gauss,-5.0,2.8665157E-7)
+ self._stricttest(Cstats.cdf_gauss,-4.5, 3.3976731E-6)
+ self._stricttest(Cstats.cdf_gauss,-4.0,0.0000316712)
+ self._stricttest(Cstats.cdf_gauss,-3.5, 0.0002326291)
+ self._stricttest(Cstats.cdf_gauss,-3.0,0.001349898)
+ self._stricttest(Cstats.cdf_gauss,-2.5, 0.0062096653)
+ self._stricttest(Cstats.cdf_gauss,-2.0,0.0227501319)
+ self._stricttest(Cstats.cdf_gauss,-1.9, 0.0287165598)
+ self._stricttest(Cstats.cdf_gauss,-1.8, 0.0359303191)
+ self._stricttest(Cstats.cdf_gauss,-1.7, 0.0445654628)
+ self._stricttest(Cstats.cdf_gauss,-1.6, 0.0547992917)
+ self._stricttest(Cstats.cdf_gauss,-1.5, 0.0668072013)
+ self._stricttest(Cstats.cdf_gauss,-1.4, 0.0807566592)
+ self._stricttest(Cstats.cdf_gauss,-1.3, 0.0968004846)
+ self._stricttest(Cstats.cdf_gauss,-1.2, 0.1150696702)
+ self._stricttest(Cstats.cdf_gauss,-1.1, 0.1356660609)
+ self._stricttest(Cstats.cdf_gauss,-1.0,0.1586552539)
+ self._stricttest(Cstats.cdf_gauss,-0.9, 0.1840601253)
+ self._stricttest(Cstats.cdf_gauss,-0.8, 0.2118553986)
+ self._stricttest(Cstats.cdf_gauss,-0.7, 0.2419636522)
+ self._stricttest(Cstats.cdf_gauss,-0.6, 0.2742531178)
+ self._stricttest(Cstats.cdf_gauss,-0.5, 0.3085375387)
+ self._stricttest(Cstats.cdf_gauss,-0.4, 0.3445782584)
+ self._stricttest(Cstats.cdf_gauss,-0.3, 0.3820885778)
+ self._stricttest(Cstats.cdf_gauss,-0.2, 0.4207402906)
+ self._stricttest(Cstats.cdf_gauss,-0.1, 0.4601721627)
+ self._stricttest(Cstats.cdf_gauss,0.0,0.5)
+ self._stricttest(Cstats.cdf_gauss,0.1, 0.5398278373)
+ self._stricttest(Cstats.cdf_gauss,0.2, 0.5792597094)
+ self._stricttest(Cstats.cdf_gauss,0.3, 0.6179114222)
+ self._stricttest(Cstats.cdf_gauss,0.4, 0.6554217416)
+ self._stricttest(Cstats.cdf_gauss,0.5, 0.6914624613)
+ self._stricttest(Cstats.cdf_gauss,0.6, 0.7257468822)
+ self._stricttest(Cstats.cdf_gauss,0.7, 0.7580363478)
+ self._stricttest(Cstats.cdf_gauss,0.8, 0.7881446014)
+ self._stricttest(Cstats.cdf_gauss,0.9, 0.8159398747)
+ self._stricttest(Cstats.cdf_gauss,1.0,0.8413447461)
+ self._stricttest(Cstats.cdf_gauss,1.1, 0.8643339391)
+ self._stricttest(Cstats.cdf_gauss,1.2, 0.8849303298)
+ self._stricttest(Cstats.cdf_gauss,1.3, 0.9031995154)
+ self._stricttest(Cstats.cdf_gauss,1.4, 0.9192433408)
+ self._stricttest(Cstats.cdf_gauss,1.5, 0.9331927987)
+ self._stricttest(Cstats.cdf_gauss,1.6, 0.9452007083)
+ self._stricttest(Cstats.cdf_gauss,1.7, 0.9554345372)
+ self._stricttest(Cstats.cdf_gauss,1.8, 0.9640696809)
+ self._stricttest(Cstats.cdf_gauss,1.9, 0.9712834402)
+ self._stricttest(Cstats.cdf_gauss,2.0,0.9772498681)
+ self._stricttest(Cstats.cdf_gauss,2.5, 0.9937903347)
+ self._stricttest(Cstats.cdf_gauss,3.0,0.998650102)
+ self._stricttest(Cstats.cdf_gauss,3.5, 0.9997673709)
+ self._stricttest(Cstats.cdf_gauss,4.0,0.9999683288)
+ self._stricttest(Cstats.cdf_gauss,4.5, 0.9999966023)
+ self._stricttest(Cstats.cdf_gauss,5.0,0.9999997133)
+ self._stricttest(Cstats.cdf_gauss,5.5, 0.999999981)
+ self._stricttest(Cstats.cdf_gauss,6.0,0.999999999)
+ self._stricttest(Cstats.cdf_gauss,6.5, 1)
+ self._stricttest(Cstats.cdf_gauss,7.0,1)
+ self._stricttest(Cstats.cdf_gauss,7.5, 1)
+ self._stricttest(Cstats.cdf_gauss,8.0,1)
+ self._stricttest(Cstats.cdf_gauss,8.5, 1)
+ self._stricttest(Cstats.cdf_gauss,9.0,1)
+ self._stricttest(Cstats.cdf_gauss,9.5, 1)
+ self._stricttest(Cstats.cdf_gauss,10.0,1 )
+
+ if rpy_tests:
+ def test_propcl_wald_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.2521901258121082, 0.36377945593694128),method='wald')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.052730006059349958, 0.14997269664335275),method='wald')
+ self._stricttest(Stats.propcl,(0,20),(0.0,0.0, 0.0),method='wald')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.0, 0.10089224371696039),method='wald')
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0020328784065148978, 0.003961451281617949),method='wald')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707, 0.054977863564014226, 0.15325761369619989),method='wald',noninteger='accept')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10135135135135136,0.052730006059349958, 0.14997269664335275),method='wald',noninteger='truncate')
+ self._stricttest(Stats.propcl,(14.65,148.3897),(0.10135135135135136,0.052730006059349958, 0.14997269664335275),method='wald',noninteger='round')
+
+ if rpy_tests:
+ def test_propcl_wilsonscore_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.25528851952360065, 0.36620957741579707),method='wilsonscore')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.062386399313902226, 0.16048724222274599),method='wilsonscore')
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.16112515999567781),method='wilsonscore')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.0061132142310903054, 0.17175522023715417),method='wilsonscore')
+ self._stricttest(Stats.propcl,(0,1234567890),(0.0, 0.0, 3.111581708121877e-09),method='wilsonscore')
+ self._stricttest(Stats.propcl,(1,1234567890),(8.1000000737100002e-10,1.4298488937441067e-10, 4.5885968284487044e-09),method='wilsonscore')
+ self._stricttest(Stats.propcl,(1234567889,1234567890),(0.99999999919000004,0.99999999541140339, 0.99999999985701526),method='wilsonscore')
+ self._stricttest(Stats.propcl,(1234567890,1234567890),(1.0,0.99999999688841834, 1.0),method='wilsonscore')
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0021753177561375232, 0.004128225708659488),method='wilsonscore')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707,0.064573854610111614, 0.1636413093053814),method='wilsonscore',noninteger='accept')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10135135135135136,0.062386399313902226, 0.16048724222274599),method='wilsonscore',noninteger='truncate')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10135135135135136,0.062386399313902226, 0.16048724222274599),method='wilsonscore',noninteger='round')
+
+ if rpy_tests:
+ def test_propcl_fleiss_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.25350868194154358, 0.36817620147624969),method='fleissquadratic')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.059778208217201693, 0.16444977986733081),method='fleissquadratic')
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.20045334688974203),method='fleissquadratic')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.001802640192242886, 0.19628175244935558),method='fleissquadratic')
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0021412605029732137, 0.0041751050961838752),method='fleissquadratic')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707,0.061959128597117254, 0.167581040827433),method='fleissquadratic',noninteger='accept')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10135135135135136,0.059778208217201693, 0.16444977986733081),method='fleissquadratic',noninteger='truncate')
+ self._stricttest(Stats.propcl,(15.45,147.8897),(0.10135135135135136,0.059778208217201693, 0.16444977986733081),method='fleissquadratic',noninteger='round')
+
+ if rpy_tests:
+ def test_propcl_exact_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.25273674558527148, 0.36762192260135129),method='exact')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.057844010083448541, 0.16165049034947895),method='exact')
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.16843347098308536),method='exact')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.00087264688357992194, 0.17764429548872296),method='exact')
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0021111318528365865, 0.0041288536907305717),method='exact')
+ # note that OpenEpi gives (0.0629, 0.1612) for the following, but Agresti's R code gives same answers as Stats.propcl
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707,0.060026025569085137, 0.16482490419242998),method='exact',noninteger='accept')
+
+ if rpy_tests:
+ def test_propcl_modwald_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.25522066483389699, 0.36627743210550062),method='modwald')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.061385974691729225, 0.16148766684491894),method='modwald')
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.18980956277351352),method='modwald')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.0, 0.18628651021203402),method='modwald')
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0021631574386132956, 0.0041403860261837152),method='modwald')
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707,0.063601455596412559, 0.16461370831908045),method='modwald',noninteger='accept')
+
+ if rpy_tests:
+ def test_propcl_blaker_agresti(self):
+ # results checked against those given by R code by Alan Agresti at
+ # http://web.stat.ufl.edu/~aa/cda/R/one_sample/R1/
+ # which is a bit circular, since the NetEpi code is derived from this same R code...
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.25391674558527266, 0.36647192260135014),method='blaker')
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.057954010083448602, 0.16012049034947745),method='blaker')
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.16013347098307706),method='blaker')
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.0017626468835799233, 0.16604429548871136),method='blaker')
+ self._stricttest(Stats.propcl,(81,263),(0.30798479087452474,0.23771735816553957, 0.38513832566974371),method='blaker',conflev=0.99)
+ self._stricttest(Stats.propcl,(15,148),(0.10135135135135136,0.048324559728836966, 0.181747531538276),method='blaker',conflev=0.99)
+ self._stricttest(Stats.propcl,(0,20),(0.0, 0.0, 0.224209500989066),method='blaker',conflev=0.99)
+ self._stricttest(Stats.propcl,(1,29),(0.034482758620689655,0.00034283133272151867, 0.22375030657503353),method='blaker',conflev=0.99)
+ self._stricttest(Stats.propcl,(37,12345),(0.0029971648440664236,0.0021311318528365866, 0.004098853690730573),method='blaker')
+ # the following values cause the Agresti R code to fail (probably reasonably so, as non-intger iputs aren't entirely
+ # kosher, so the results are not independently checked, but are almost identical to the exact results
+ self._stricttest(Stats.propcl,(15.45,148.3897),(0.10411773863010707,0.06002602556908513, 0.16482490419243001),method='blaker',noninteger='accept')
+
+ if rpy_tests:
+ def test_propcl_badinpts(self):
+ # tests for bad inputs (code shared by all methods)
+ self.assertRaises(ValueError, Stats.propcl,-1,20)
+ self.assertRaises(ValueError, Stats.propcl,1,-20)
+ self.assertRaises(ValueError, Stats.propcl,-1,-20)
+ self.assertRaises(ValueError, Stats.propcl,21,20)
+ self.assertRaises(ValueError, Stats.propcl,20.0000000001,20.0,noninteger='accept')
+ self.assertRaises(ValueError, Stats.propcl,21,0)
+ self.assertRaises(ValueError, Stats.propcl,21,42,method='humperdinck')
+ self.assertRaises(ValueError, Stats.propcl,21,42,conflev=95)
+ self.assertRaises(ValueError, Stats.propcl,21,42,conflev=0.45)
+ self.assertRaises(ValueError, Stats.propcl,21,42,conflev=-0.95)
+ self.assertRaises(ValueError, Stats.propcl,21.3,42,conflev=0.95)
+ self.assertRaises(ValueError, Stats.propcl,21,42.2,conflev=0.95)
+ self.assertRaises(ValueError, Stats.propcl,21.4,42.2,conflev=0.95)
+ self.assertRaises(ValueError, Stats.propcl,21.4,42.2,conflev=0.95,noninteger='reject')
+ self.assertRaises(ValueError, Stats.propcl,21.4,42.2,conflev=0.95,noninteger='undecided')
+
+ if rpy_tests:
+ def test_ratecl_rg_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com, except for case of 123456789,123456789987654321 (and v-v) where results
+ # checked against those given by SAS V8.2 using Daly macro at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.ratecl,(5,25),(20.0, 8.324556212833075, 48.050609518782899),method='rg',basepop=100)
+ self._stricttest(Stats.ratecl,(66,2098),(3.145853194, 2.4715123437810247, 4.0041848627893932),method='rg',basepop=100)
+ self._stricttest(Stats.ratecl,(66,123456789),(5.3460000486486007e-05,4.2000386849949533e-05, 6.8046317340491808e-05),method='rg',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789,123456789987654321),(9.9999999199999995e-08,9.9982360291405311e-08, 1.0001764045305388e-07),basepop=100,method='rg')
+ self._stricttest(Stats.ratecl,(123456789987654321,123456789),(100000000800.00002,100000000242.1844, 100000001357.81537),method='rg',basepop=100)
+
+ if rpy_tests:
+ def test_ratecl_byar_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com, except for case of 123456789,123456789987654321 (and v-v) where results
+ # checked against those given by SAS V8.2 using Daly macro at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.ratecl,(5,25),(20.0, 6.4453908037658438, 46.67265670862578),method='byar',basepop=100)
+ self._stricttest(Stats.ratecl,(66,2098),(3.145853194, 2.4328873947163197, 4.0023584323110661),method='byar',basepop=100)
+ self._stricttest(Stats.ratecl,(66,123456789),(5.3460000486486007e-05,4.1344002184560613e-05, 6.8015279345946824e-05),method='byar',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789,123456789987654321),(9.9999999199999995e-08,9.9982360291405311e-08, 1.0001764045305388e-07),method='byar',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789987654321,123456789),(100000000800.00002,100000000242.18448, 100000001357.81552),method='byar',basepop=100)
+
+ if rpy_tests:
+ def test_ratecl_normal_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com, except for case of 123456789,123456789987654321 (and v-v) where results
+ # checked against those given by SAS V8.2 using Daly macro at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.ratecl,(5,25),(20.0, 2.4695490624761534, 37.530450937523852),method='normal',basepop=100)
+ self._stricttest(Stats.ratecl,(66,2098),(3.145853194, 2.3869007246642115, 3.9048056623710594),method='normal',basepop=100)
+ self._stricttest(Stats.ratecl,(66,123456789),(5.3460000486486007e-05,4.0562513903917558e-05, 6.6357487069054457e-05),method='normal',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789,123456789987654321),(9.9999999199999995e-08,9.9982360291405311e-08, 1.0001764045305388e-07),method='normal',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789987654321,123456789),(100000000800.00002,100000000242.18448, 100000001357.81555),method='normal',basepop=100)
+
+ if rpy_tests:
+ def test_ratecl_daly_openepi(self):
+ # results checked against those given by SAS V8.2 using macro by Daly at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.ratecl,(5,25),(20.0, 6.4939455604736835, 46.673328317290668),method='daly',basepop=100)
+ self._stricttest(Stats.ratecl,(66,2098),(3.145853194, 2.4330026319657412, 4.0022965892520039),method='daly',basepop=100)
+ self._stricttest(Stats.ratecl,(5,25),(20.0, 4.3117129626092785, 56.599037644092057),method='daly',conflev=0.99,basepop=100)
+ self._stricttest(Stats.ratecl,(66,2098),(3.145853194,2.2379465861483445, 4.2877203401224167),method='daly',conflev=0.99,basepop=100)
+ self._stricttest(Stats.ratecl,(66,123456789),(5.3460000486486007e-05,4.1345960503347653e-05, 6.8014228397360178e-05),method='daly',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789,123456789987654321),(9.9999999199999995e-08,9.9982360291405311e-08, 1.0001764045305388e-07),method='daly',basepop=100)
+ self._stricttest(Stats.ratecl,(123456789987654321,123456789),(100000000800.00002,100000000242.18448, 100000001357.81554),method='daly',basepop=100)
+
+ if rpy_tests:
+ def test_ratecl_badinpts(self):
+ # tests for bad inputs (code shared by all methods)
+ self.assertRaises(ValueError, Stats.ratecl,-1,20)
+ self.assertRaises(ValueError, Stats.ratecl,1,-20)
+ self.assertRaises(ValueError, Stats.ratecl,-1,-20)
+ self.assertRaises(ValueError, Stats.ratecl,21,0)
+ self.assertRaises(ValueError, Stats.ratecl,21,42,method='humperdinck')
+ self.assertRaises(ValueError, Stats.ratecl,21,42,conflev=95)
+ self.assertRaises(ValueError, Stats.ratecl,21,42,conflev=0.45)
+ self.assertRaises(ValueError, Stats.ratecl,21,42,conflev=-0.95)
+ self.assertRaises(ValueError, Stats.ratecl,21,42,basepop=0)
+ self.assertRaises(ValueError, Stats.ratecl,21,42,basepop=-100)
+
+ if rpy_tests:
+ def test_freqcl_byar_openepi(self):
+ # results checked against those given for OpenEpi for conflev=0.95 - see
+ # http://www.openepi.com, except for case of 123456789,123456789987654321 (and v-v) where results
+ # checked against those given by SAS V8.2 using Daly macro at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.freqcl,(66,),(66.0, 51.041977541148384, 83.969479909886161),method='byar')
+ self._stricttest(Stats.freqcl,(3,),(3.0, 0.602972562598596, 8.7653644442764342),method='byar')
+ self._stricttest(Stats.freqcl,(123456789,),(123456789.0, 123435012.56950366, 123478568.32489048),method='byar')
+ self._stricttest(Stats.freqcl,(123456789987654321,),(1.2345678998765432e+17, 1.2345678929899318e+17, 1.2345679067631546e+17),method='byar')
+ self._stricttest(Stats.freqcl,(0,),(0.0, 0.0, 3.6680118656769207),method='byar')
+
+ if rpy_tests:
+ def test_freqcl_daly_openepi(self):
+ # results checked against those given by SAS V8.2 using macro by Daly at
+ # http://www.listserv.uga.edu/cgi-bin/wa?A2=ind9809d&L=sas-l&F=&S=&P=17761
+ self._stricttest(Stats.freqcl,(66,),(66.0, 51.044395218641284, 83.968182442506816),method='daly')
+ self._stricttest(Stats.freqcl,(3,),(3.0, 0.61867212289560147, 8.7672730697423251),method='daly')
+ self._stricttest(Stats.freqcl,(123456789,),(123456789.0, 123435012.5696615, 123478568.32473257),method='daly')
+ self._stricttest(Stats.freqcl,(123456789987654321,),(1.2345678998765432e+17, 1.2345678929899317e+17, 1.2345679067631547e+17),method='daly')
+ self._stricttest(Stats.freqcl,(0,),(0.0, 0.0, 3.6888794541139354),method='daly')
+ pass
+
+ def test_freqcl_badinpts(self):
+ # tests for bad inputs (code shared by all methods)
+ self.assertRaises(ValueError, Stats.freqcl,-1)
+ self.assertRaises(ValueError, Stats.freqcl,21,method='humperdinck')
+ self.assertRaises(ValueError, Stats.freqcl,21,conflev=95)
+ self.assertRaises(ValueError, Stats.freqcl,21,conflev=0.45)
+ self.assertRaises(ValueError, Stats.freqcl,21,conflev=-0.95)
+ pass
+
+ if rpy_tests:
+ def test_wncl_misc_daly(self):
+ self._stricttest(Stats.wncl, (empty_numeric,), (0.0,0.0,0.0), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (empty_ma,), (0.0,0.0,0.0), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (populated_numeric,), (14.0, 4.545761892331579, 32.671329822103466), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (populated_ma,), (14.0, 4.545761892331579, 32.671329822103466),conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (full_mask,),(0.0,0.0,0.0) ,conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (null_mask,), (14.0, 4.545761892331579, 32.671329822103466), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (partial_mask,), (7.0, 0.84773247490387726, 25.286406837033859), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (two_elements_numeric,), (7.0, 0.84773247490387726, 25.286406837033859), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (two_elements_ma,), (7.0, 0.84773247490387726, 25.286406837033859), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (one_element_numeric,), (2.0, 0.050635615968579795, 11.143286781877794), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (one_element_ma,), (2.0, 0.050635615968579795, 11.143286781877794), conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (one_neg_element_numeric,), (0.0,0.0,0.0),conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (one_neg_element_ma,), (0.0,0.0,0.0),conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (one_masked_element_ma,), (0.0,0.0,0.0),conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (all_neg_numeric,), (0.0,0.0,0.0),conflev=0.95, method='daly')
+ self._stricttest(Stats.wncl, (all_neg_ma,), (0.0,0.0,0.0),conflev=0.95, method='daly')
+
+ if rpy_tests:
+ def test_wncl_misc__daly_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ #self._stricttest(Stats.wncl, (empty_numeric,), 0,exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (empty_ma,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (populated_numeric,), 14, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (populated_ma,), 14, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (full_mask,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (null_mask,), 14, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (partial_mask,), 7, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (two_elements_numeric,), 7, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (two_elements_ma,), 7, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (one_element_numeric,), 2, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (one_element_ma,), 2, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (one_neg_element_numeric,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (one_neg_element_ma,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (one_masked_element_ma,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (all_neg_numeric,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ #self._stricttest(Stats.wncl, (all_neg_ma,), 0, exclude_nonpositive_weights=True,conflev=0.95, method='daly')
+ pass
+
+ if rpy_tests:
+ def test_wncl_1001(self):
+ # now with 1001 element arrays
+ #self._stricttest(Stats.wncl, (w1001_nomissing_numpy,), 163515)
+ #self._stricttest(Stats.wncl, (w1001_nomissing_MA,), 163515)
+ #self._stricttest(Stats.wncl, (w1001_missing,), 154046.66666667)
+ pass
+
+ if rpy_tests:
+ def test_wncl_1001_exclnpwgts(self):
+ # repeat with exclude_nonpositive_weights=True
+ #self._stricttest(Stats.wncl, (w1001_nomissing_numpy,), 163515, exclude_nonpositive_weights=True)
+ #self._stricttest(Stats.wncl, (w1001_nomissing_MA,), 163515, exclude_nonpositive_weights=True)
+ #self._stricttest(Stats.wncl, (w1001_missing,), 154046.66666667, exclude_nonpositive_weights=True)
+ pass
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/summ.py b/tests/summ.py
new file mode 100644
index 0000000..44b72ea
--- /dev/null
+++ b/tests/summ.py
@@ -0,0 +1,423 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: summ.py 2859 2007-10-18 07:45:37Z andrewm $
+
+from SOOMv0 import *
+import MA
+import unittest
+
+def _get_ds():
+ ds = Dataset('apples')
+ ds.addcolumnfromseq('variety', label='Variety',
+ coltype='categorical', datatype='int',
+ all_value=-1,
+ data=[1, 1, 2, 3, 4, 1, 2, 5],
+ use_outtrans=True, outtrans={
+ 1: 'Granny Smith',
+ 2: 'Golden Delicious',
+ 3: 'Fuji',
+ 4: 'Braeburn',
+ 5: 'Pink Lady',
+ })
+ ds.addcolumnfromseq('grade', label='Grade',
+ coltype='categorical', datatype='int',
+ all_value=-1,
+ data=[1, 3, 2, 1, 1, 1, 3, 0],
+ use_outtrans=True, outtrans={
+ 0: 'Extra Fine',
+ 1: 'Fine',
+ 2: 'Good',
+ 3: 'Poor',
+ })
+ ds.addcolumnfromseq('size', label='Size (in cm)',
+ coltype='scalar', datatype='float',
+ data=[6.0, 8.4, 6.5, 6.6, 9.2, 6.8, 9.2, 6.5])
+ ds.addcolumnfromseq('supplier', label='Supplier',
+ coltype='categorical', datatype='int',
+ data=[1, 1, 0, 0, 2, 2, 1, 0],
+ missingvalues={0: None},
+ use_outtrans=True, outtrans={
+ 0: 'No',
+ 1: 'Mistyvale',
+ 2: 'Moss Glen',
+ })
+ ds.addcolumnfromseq('weighting', label='Statistical weighting',
+ coltype='scalar', datatype='float',
+ data=[8.9, 1.7, 2.8, 2.2, 4.1, 3.7, 7.1, 7.6])
+ return ds
+
+class summ_test(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ def ma_round(v, prec):
+ if v is None or type(v) is MA.MaskedScalar:
+ return None
+ return round(v, prec)
+ def ma_fmt(v, prec):
+ if v is None:
+ return 'None'
+ return '%.*f' % (prec, v)
+ first = [ma_round(v, prec) for v in first]
+ second = [ma_round(v, prec) for v in second]
+ first_str = ', '.join([ma_fmt(v, prec) for v in first])
+ second_str = ', '.join([ma_fmt(v, prec) for v in second])
+ self.assertEqual(first, second,
+ '[%s] != [%s]' % (first_str, second_str))
+
+ def test_none(self):
+ ds = _get_ds()
+ summ = ds.summ()
+ self.assertEqual(list(summ['row_ordinal']), [0])
+ self.assertEqual(list(summ['_freq_']), [8])
+
+ def test_one(self):
+ ds = _get_ds()
+ summ = ds.summ('variety')
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['variety']), range(1,6))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+
+ def test_errors(self):
+ ds = _get_ds()
+ self.assertRaises(Error, ds.summ, 'size')
+ self.assertRaises(Error, ds.summ, condcol)
+ self.assertRaises(Error, ds.summ, condcol('size'))
+ self.assertRaises(Error, condcol, 'variety', 0)
+ self.assertRaises(Error, condcol, 'variety', order)
+ self.assertRaises(Error, condcol, 'variety', order(), order())
+ self.assertRaises(TypeError, coalesce, foo=1)
+
+ def test_two(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', 'grade')
+ self.assertEqual(list(summ['row_ordinal']), range(20))
+ self.assertEqual(list(summ['variety']), [1, 1, 1, 1, 2, 2, 2, 2, 3, 3,
+ 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
+ self.assertEqual(list(summ['grade']), [0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
+ self.assertEqual(list(summ['_freq_']), [0, 2, 0, 1, 0, 0, 1, 1, 0, 1,
+ 0, 0, 0, 1, 0, 0, 1, 0, 0, 0])
+ self.assertEqual(list(summ['_condcols_']), [('variety', 'grade')] * 20)
+
+ def test_allcalc(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', 'grade', allcalc=True)
+ self.assertEqual(list(summ['row_ordinal']), range(30))
+ self.assertEqual(list(summ['variety']),
+ [-1, 1, 2, 3, 4, 5, -1, -1, -1, -1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
+ self.assertEqual(list(summ['grade']),
+ [-1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
+ self.assertEqual(list(summ['_freq_']),
+ [8, 3, 2, 1, 1, 1, 1, 4, 1, 2, 0, 2, 0, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0])
+ self.assertEqual(list(summ['_condcols_']),
+ [()] + [('variety',)] * 5 + [('grade',)] * 4 +
+ [('variety', 'grade')] * 20)
+
+ def test_stats(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', minimum('size'), mean('size'))
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['variety']), range(1,6))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertListNear(summ['minimum_of_size'],
+ [6.0, 6.5, 6.6, 9.2, 6.5])
+ self.assertListNear(summ['mean_of_size'],
+ [7.07, 7.85, 6.6, 9.2, 6.5], prec=2)
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+
+ def test_stats_percentile(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', p25('size'), p75('size'))
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['variety']), range(1,6))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertListNear(summ['p25_of_size'],
+ [6.0, 6.5, 6.6, 9.2, 6.5])
+ self.assertListNear(summ['p75_of_size'],
+ [8.4, 9.2, 6.6, 9.2, 6.5])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+
+ def test_stats_wgtfreq1(self):
+ def check():
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertListNear(summ['freq_wgtd_by_weighting'],
+ [14.3, 9.9, 2.2, 4.1, 7.6], prec=4)
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+ ds = _get_ds()
+ summ = ds.summ('variety', freq(), weightcol='weighting')
+ check()
+ summ = ds.summ('variety', freq(weightcol='weighting'))
+ check()
+ ds.weightcol = 'weighting'
+ summ = ds.summ('variety', freq())
+ check()
+
+ def test_stats_wgtmean(self):
+ def check():
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertListNear(list(summ['mean_of_size_wgtd_by_weighting']),
+ [6.49, 8.44, 6.6, 9.2, 6.5])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+ ds = _get_ds()
+ summ = ds.summ('variety', mean('size'), weightcol='weighting')
+ check()
+ summ = ds.summ('variety', mean('size', weightcol='weighting'))
+ check()
+ ds.weightcol = 'weighting'
+ summ = ds.summ('variety', mean('size'))
+ check()
+
+ def test_stats_applyto(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', applyto('size', minimum, mean))
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['variety']), range(1,6))
+ self.assertEqual(list(summ['_freq_']), [3, 2, 1, 1, 1])
+ self.assertEqual(list(summ['minimum_of_size']),
+ [6.0, 6.5, 6.6, 9.2, 6.5])
+ self.assertEqual(list(summ['mean_of_size']),
+ [7.0666666666666666, 7.85, 6.6, 9.2, 6.5])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 5)
+
+ def test_filternamed(self):
+ ds = _get_ds()
+ ds.makefilter('not_delicious', expr='variety != 2',
+ label='Not Delicious!')
+ summ = ds.summ('variety', filtername='not_delicious')
+ self.assertEqual(list(summ['row_ordinal']), range(4))
+ self.assertEqual(list(summ['variety']), [1, 3, 4, 5])
+ self.assertEqual(list(summ['_freq_']), [3, 1, 1, 1])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 4)
+
+ def test_filteranon(self):
+ ds = _get_ds()
+ summ = ds.summ('variety',
+ filterexpr='variety != 2', filterlabel='Not Delicious!')
+ self.assertEqual(list(summ['row_ordinal']), range(4))
+ self.assertEqual(list(summ['variety']), [1, 3, 4, 5])
+ self.assertEqual(list(summ['_freq_']), [3, 1, 1, 1])
+ self.assertEqual(list(summ['_condcols_']), [('variety',)] * 4)
+
+ def test_missing(self):
+ ds = _get_ds()
+ summ = ds.summ('variety', 'supplier')
+ self.assertEqual(list(summ['row_ordinal']), range(15))
+ self.assertEqual(list(summ['variety']), [1, 1, 1, 2, 2, 2, 3, 3,
+ 3, 4, 4, 4, 5, 5, 5])
+ self.assertEqual(list(summ['supplier']), [0, 1, 2, 0, 1, 2, 0, 1,
+ 2, 0, 1, 2, 0, 1, 2])
+ self.assertEqual(list(summ['_freq_']), [0, 2, 1, 1, 1, 0, 1, 0,
+ 0, 0, 0, 1, 1, 0, 0])
+ self.assertEqual(list(summ['_condcols_']),
+ [('variety', 'supplier')] * 15)
+
+ def test_proportions(self):
+ def check():
+ self.assertListNear(summ['_prop_of_all-grade-supplier'],
+ [ 1, 0.125, 0.5, 0.125, 0.25,
+ 0.375, 0.375, 0.25, 0.125, 0,
+ 0, 0.125, 0.125, 0.25, 0.125,
+ 0, 0, 0, 0.25, 0], prec=4)
+ self.assertListNear(summ['_prop_of_all-supplier'],
+ [1, 1, 1, 1, 1,
+ 0.375, 0.375, 0.25, 1, 0,
+ 0, 0.25, 0.25, 0.5, 1,
+ 0, 0, 0, 1, 0], prec=4)
+ self.assertListNear(summ['_prop_of_all-grade'],
+ [1, 0.125, 0.5, 0.125, 0.25,
+ 1, 1, 1, 0.3333, 0,
+ 0, 0.3333, 0.3333, 1, 0.3333,
+ 0, 0, 0, 0.6667, 0], prec=4)
+ ds = _get_ds()
+ summ = ds.summ('grade', 'supplier', proportions=True)
+ check()
+ summ = ds.summ('grade', 'supplier',
+ freq(weightcol='weighting'), proportions=True)
+ check()
+
+ def test_weighted_proportions1(self):
+ def check(wgted_freq_col_name, wgted_freq_col_values):
+ self.assertEqual(list(summ['row_ordinal']), range(5))
+ self.assertEqual(list(summ['_freq_']), [8, 1, 4, 1, 2])
+ self.assertListNear(summ[wgted_freq_col_name],
+ wgted_freq_col_values, prec=4)
+ self.assertListNear(summ['_prop_of_all-grade'],
+ [1.0, 0.1995, 0.4961,
+ 0.0735, 0.2310], prec=4)
+ self.assertEqual(list(summ['_condcols_']), [()] + [('grade',)] * 4)
+
+ ds = _get_ds()
+ colname = 'freq_wgtd_by_weighting'
+ colvalues = [38.1, 7.6, 18.9, 2.8, 8.8]
+ summ = ds.summ('grade', freq(), weightcol='weighting', proportions=True)
+ check(colname, colvalues)
+ summ = ds.summ('grade', freq(weightcol='weighting'),
+ weightcol='weighting', proportions=True)
+ check(colname, colvalues)
+ summ = ds.summ('grade', freq(), weightcol='weighting', proportions=True)
+ check(colname, colvalues)
+ ds.weightcol = 'weighting'
+ summ = ds.summ('grade', freq(), proportions=True)
+ check(colname, colvalues)
+ summ = ds.summ('grade', freq(weightcol='weighting'), proportions=True)
+ check(colname, colvalues)
+ colname = 'freq_wgtd_by_size'
+ colvalues = [59.2, 6.5, 28.6, 6.5, 17.6]
+ summ = ds.summ('grade', freq(weightcol='size'), proportions=True)
+ check(colname, colvalues)
+ ds.weightcol = None
+ summ = ds.summ('grade', freq(weightcol='size'),
+ weightcol='weighting', proportions=True)
+ check(colname, colvalues)
+
+ def test_weighted_proportions2(self):
+ def check():
+ self.assertListNear(summ['_prop_of_all-grade-supplier'],
+ [1.00, 0.20, 0.50, 0.07, 0.23,
+ 0.33, 0.46, 0.20, 0.20, 0.00,
+ 0.00, 0.06, 0.23, 0.20, 0.07,
+ 0.00, 0.00, 0.00, 0.23, 0.00])
+ self.assertListNear(summ['_prop_of_all-supplier'],
+ [1.00, 1.00, 1.00, 1.00, 1.00,
+ 0.33, 0.46, 0.20, 1.00, 0.00,
+ 0.00, 0.12, 0.47, 0.41, 1.00,
+ 0.00, 0.00, 0.00, 1.00, 0.00])
+ self.assertListNear(summ['_prop_of_all-grade'],
+ [1.00, 0.20, 0.50, 0.07, 0.23,
+ 1.00, 1.00, 1.00, 0.60, 0.00,
+ 0.00, 0.17, 0.50, 1.00, 0.22,
+ 0.00, 0.00, 0.00, 0.50, 0.00])
+ ds = _get_ds()
+ summ = ds.summ('grade', 'supplier',
+ proportions=True, weightcol='weighting')
+ check()
+ ds.weightcol = 'weighting'
+ summ = ds.summ('grade', 'supplier',
+ proportions=True)
+ check()
+ summ = ds.summ('grade', 'supplier',
+ freq(weightcol='weighting'), proportions=True )
+ check()
+
+ def test_weighted_and_filtered_proportions(self):
+ ds = _get_ds()
+ summ = ds.summ('grade', 'supplier',
+ proportions=True, weightcol='weighting',
+ filterexpr='size > 6.5')
+ # AM - checked these results by hand, 15-Nov-04
+ self.assertListNear(summ['_prop_of_all-grade-supplier'],
+ [1.00, 0.53, 0.47, 0.12, 0.47, 0.41,
+ 0.12, 0.00, 0.41, 0.00, 0.47, 0.00])
+ self.assertListNear(summ['_prop_of_all-supplier'],
+ [1.00, 1.00, 1.00, 0.12, 0.47, 0.41,
+ 0.22, 0.00, 0.78, 0.00, 1.00, 0.00])
+ self.assertListNear(summ['_prop_of_all-grade'],
+ [1.00, 0.53, 0.47, 1.00, 1.00, 1.00,
+ 1.00, 0.00, 1.00, 0.00, 1.00, 0.00])
+
+ def test_value_suppression(self):
+ ds = _get_ds()
+ summ = ds.summ(condcol('grade', suppress(3)),
+ condcol('supplier', suppress(None)),
+ mean('size'))
+ self.assertListNear(summ['row_ordinal'], [0, 1, 2, 3, 4, 5])
+ self.assertListNear(summ['grade'], [0, 0, 1, 1, 2, 2])
+ self.assertListNear(summ['supplier'], [1, 2, 1, 2, 1, 2])
+ self.assertListNear(summ['mean_of_size'],
+ [None, None, 6.0, 8.0, None, None])
+ # And some boundary cases:
+ summ = ds.summ(condcol('grade', suppress()), 'supplier')
+ self.assertEqual(len(summ), 12)
+
+ def test_value_suppression_and_filter(self):
+ ds = _get_ds()
+ summ = ds.summ(condcol('grade', suppress(3)),
+ condcol('supplier', suppress(None)),
+ mean('size'),
+ filterexpr='supplier != 1')
+ self.assertListNear(summ['row_ordinal'], [0, 1, 2])
+ self.assertListNear(summ['grade'], [0, 1, 2])
+ self.assertListNear(summ['supplier'], [2, 2, 2])
+ self.assertListNear(summ['mean_of_size'], [None, 8.0, None])
+
+ def test_value_suppression_and_propn_nomt(self):
+ ds = _get_ds()
+ summ = ds.summ(condcol('grade', suppress(3)),
+ condcol('supplier', suppress(None)),
+ mean('size'),
+ proportions=True, nomt=True)
+ self.assertListNear(summ['grade'], [0, 0, 1, 1, 2, 2])
+ self.assertListNear(summ['supplier'], [1, 2, 1, 2, 1, 2])
+ self.assertListNear(summ['mean_of_size'],
+ [None, None, 6.0, 8.0, None, None])
+ self.assertListNear(summ['_prop_of_all-grade-supplier'],
+ [ 0.00, 0.00, 0.13, 0.25, 0.00, 0.00])
+ self.assertListNear(summ['_prop_of_all-supplier'],
+ [ 0.00, 0.00, 0.25, 0.50, 0.00, 0.00])
+ self.assertListNear(summ['_prop_of_all-grade'],
+ [ 0.00, 0.00, 0.33, 1.00, 0.00, 0.00])
+
+ def test_coalesce(self):
+ ds = _get_ds()
+ summ = ds.summ(condcol('grade', coalesce(2,3), coalesce(0, 1)),
+ 'supplier', mean('size'))
+ self.assertListNear(summ['grade'], [0, 0, 0, 2, 2, 2])
+ self.assertListNear(summ['supplier'], [None, 1, 2, None, 1, 2])
+ self.assertListNear(summ['mean_of_size'],
+ [6.55, 6.0, 8.0, 6.5, 8.8, None])
+ translated = map(summ['grade'].do_outtrans, summ['grade'])
+ self.assertEqual(translated,
+ ['Extra Fine, Fine'] * 3 + ['Good, Poor'] * 3)
+
+ def test_coalesce_value_label(self):
+ ds = _get_ds()
+ grade = condcol('grade',
+ coalesce(2,3, value=9, label='Cheap'),
+ coalesce(0,1, value=8, label='Expensive'))
+ summ = ds.summ(grade, 'supplier', mean('size'))
+ self.assertListNear(summ['grade'], [8, 8, 8, 9, 9, 9])
+ self.assertListNear(summ['supplier'], [None, 1, 2, None, 1, 2])
+ self.assertListNear(summ['mean_of_size'],
+ [6.55, 6.0, 8.0, 6.5, 8.8, None])
+ translated = map(summ['grade'].do_outtrans, summ['grade'])
+ self.assertEqual(translated, ['Expensive'] * 3 + ['Cheap'] * 3)
+
+ def test_coalesce_value_error(self):
+ ds = _get_ds()
+ grade = condcol('grade', coalesce(2,3, value=0))
+ self.assertRaises(Error, ds.summ, grade)
+
+ def test_order(self):
+ ds = _get_ds()
+ summ = ds.summ(condcol('variety', order(4, 3, 2, 1, 5)),
+ condcol('grade', reversed()),
+ mean('size'))
+ self.assertListNear(summ['variety'],
+ [4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 5, 5, 5, 5])
+ self.assertListNear(summ['grade'],
+ [3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0])
+ self.assertListNear(summ['mean_of_size'],
+ [None, None, 9.20, None, None, None, 6.60, None, 9.20, 6.50,
+ None, None, 8.40, None, 6.40, None, None, None, None, 6.50])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/summ_higher_order.py b/tests/summ_higher_order.py
new file mode 100644
index 0000000..8e424cf
--- /dev/null
+++ b/tests/summ_higher_order.py
@@ -0,0 +1,340 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: summ_higher_order.py 2626 2007-03-09 04:35:54Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/tests/summ_higher_order.py,v $
+
+from SOOMv0 import *
+import unittest
+
+def _get_ds():
+ cola_data=['c','c','c','a','b','b','a','c','c','c','a','b','c','a','c','c','c','c','c','b','b','c','b','a','a','b','b','a','c','a',
+ 'b','c','b','b','c','c','b','a','a','a','b','c','b','a','b','b','a','c','c','a','c','a','b','a','c','c','b','c','c','b','a','b','c',
+ 'c','a','c','a','a','b','b','a','c','c','a','a','b','b','c','c','c','c','a','c','a','a','a','c','a','c','a','a','a','c','a','b','b',
+ 'a','c','b','c','b','c','a','a','b','a','b','c','a','b','a','a','b','b','c','b','c','c','c','c','a','b','c','c','b','a','a','c','a',
+ 'c','c','c','c','a','a','a','b','b','c','c','b','a','b','c','c','b','a','a','a','b','b','b','a','c','b','c','a','c','c','c','a','b',
+ 'c','c','a','c','c','c','c','c','c','c','b','c','b','c','a','a','a','a','b','a','b','b','c','b','c','c','b','a','b','c','b','a','a',
+ 'a','a','c','c','a']
+ colb_data=['a','b','a','c','c','a','a','c','a','b','a','c','c','a','c','c','c','c','b','b','c','b','b','b','b','a','b','c','b','b',
+ 'b','a','b','c','a','a','a','c','b','c','a','b','c','c','c','c','c','c','c','b','a','c','a','a','c','b','c','a','a','a','c','a','a',
+ 'c','c','b','b','c','a','c','b','a','c','c','a','c','b','b','a','b','b','c','a','c','c','b','a','b','c','b','c','c','a','c','c','c',
+ 'c','a','b','a','c','c','a','a','a','b','b','a','c','c','b','a','b','c','a','c','b','b','b','c','b','a','c','c','b','c','a','b','a',
+ 'a','a','c','a','c','a','a','a','b','a','b','b','b','c','c','a','b','b','c','b','c','c','a','a','b','c','a','a','a','a','c','a','c',
+ 'c','c','b','a','b','c','c','b','a','b','a','b','c','c','c','a','a','b','b','c','c','c','c','b','a','c','b','c','a','a','b','c','a',
+ 'b','a','a','c','a']
+ colc_data=['a','a','c','b','a','b','b','c','b','c','c','c','a','c','a','c','a','b','a','a','b','a','c','c','a','c','b','a','b','b',
+ 'b','a','c','c','b','c','b','c','c','a','b','c','a','a','c','b','c','b','b','c','a','a','c','b','b','c','b','c','b','b','b','c','b',
+ 'b','b','c','c','a','a','a','a','b','b','b','a','b','a','b','a','c','a','b','c','c','a','a','c','b','b','a','a','a','c','c','c','c',
+ 'a','a','c','c','b','b','c','a','b','c','b','c','c','a','b','b','a','b','a','c','a','a','b','c','b','a','b','c','b','c','b','a','c',
+ 'b','a','a','c','a','a','c','b','b','a','a','c','b','c','b','b','b','a','b','a','c','b','c','b','b','c','a','b','b','b','b','b','a',
+ 'b','a','b','a','c','a','b','c','c','b','c','b','c','b','a','b','c','c','a','c','c','a','b','a','a','a','c','b','c','a','c','b','a',
+ 'b','c','b','b','c']
+ cold_data=['c','a','b','c','b','a','c','c','c','c','c','b','b','a','b','a','b','c','b','a','c','a','a','b','b','c','b','b','b','a',
+ 'c','c','c','a','c','b','a','c','b','b','a','c','c','b','a','c','b','c','b','b','c','b','b','c','a','b','b','c','c','c','c','c','c',
+ 'b','a','a','c','a','b','b','c','a','c','b','b','c','a','b','b','c','c','a','a','a','c','a','a','b','b','a','a','b','c','a','b','c',
+ 'b','c','c','b','b','b','b','b','b','a','c','b','a','a','b','b','c','b','b','b','b','b','c','a','c','b','c','b','b','a','b','a','c',
+ 'a','b','b','c','c','b','b','a','b','a','a','c','b','c','a','c','b','b','c','c','a','c','b','c','b','c','b','a','c','c','b','a','a',
+ 'a','a','c','c','b','b','b','b','a','b','c','c','a','a','a','a','c','b','b','a','b','b','a','b','a','c','b','b','b','c','c','c','a',
+ 'b','a','b','b','a']
+ cole_data=[0.3571170775020100,0.7761841811128800,0.5312500551954100,0.7021558627961900,0.6575277478702000,0.5282215660103600,
+ 0.4827106695076000,0.0941159148207400,0.7926955035853600,0.0969011011053300,0.2589461045614100,0.0754211275258200,0.3515378927586300,
+ 0.2391162716034400,0.8484617233502000,0.2886365928168500,0.8726803776215200,0.9333756696122500,0.7385261527907600,0.2568027839329100,
+ 0.6309593360083900,0.4043286244405100,0.3900779971806600,0.3176672413561800,0.5117277393637800,0.6056445071500000,0.4005583056251300,
+ 0.4108953747949000,0.9142303824956600,0.6488470810692900,0.5778574815848100,0.0083997203076257,0.7854928871549100,0.7203341115826400,
+ 0.4442541131024500,0.8191027561291600,0.9306504092787600,0.5744151415184200,0.1756906007303300,0.5825717545033300,0.3044757914284500,
+ 0.7436330764291900,0.8762406203319500,0.9753779065680500,0.8142601027219800,0.7156344501840100,0.1328585614137600,0.8701852205536200,
+ 0.3867031793979400,0.6085042164700500,0.1060311734238700,0.0560597325936238,0.2751670974656700,0.5376034726098100,0.1633436969310700,
+ 0.7105775245980200,0.8224636012792800,0.5036567745281600,0.7212445092020700,0.6967697305124100,0.2162879888044100,0.5146122800719900,
+ 0.0257929023475353,0.4412799968576400,0.2694013594972900,0.1086779018438700,0.4722444617525800,0.6883156228290400,0.7134534626796100,
+ 0.8128526773363500,0.5603648477980700,0.8197487182075800,0.6852849874111200,0.0947032622502600,0.7302073937515700,0.9391698352709200,
+ 0.6110993859409800,0.1954966574886300,0.4773301693970900,0.0624214066483180,0.9930433849771700,0.3388694158470500,0.5998717279172800,
+ 0.5288271343004000,0.2736410071484900,0.4660192087600000,0.4013773917226900,0.0678981151747900,0.5408607626058400,0.9314801548288500,
+ 0.9592310557883300,0.7224279272940100,0.0443864157630068,0.5673041355643900,0.6706950290457700,0.8633093986955000,0.2843092732524000,
+ 0.7915918556002800,0.8056836886357900,0.5476102016622200,0.3147646092412800,0.6972641277579800,0.6712932245206500,0.0859611123269200,
+ 0.2142461488089700,0.7444272505791900,0.1335863262105600,0.5768507391106500,0.2414977560944300,0.1012862436945000,0.3856627360850800,
+ 0.3012459978001400,0.8644447768407100,0.1341387895560500,0.0402606548928938,0.3713916844555100,0.7549977678596000,0.5735631238546000,
+ 0.2245133962596300,0.8326630880276900,0.1983434442423000,0.7670700684036400,0.7767444787438700,0.1167921992562600,0.4297029853005400,
+ 0.6873500154760400,0.9997965442015700,0.2789010392869300,0.5395008155794300,0.3255139204326600,0.9606196684579400,0.0727675473656300,
+ 0.9884289121201300,0.5951148516475700,0.0480222492702409,0.4014916207648300,0.6481790387296000,0.5991297110911100,0.1195951048841600,
+ 0.8376663508069000,0.9869529795772100,0.2947044290111800,0.9515349324566900,0.1491484279507500,0.2987826407415700,0.2679868532661200,
+ 0.1220279699759600,0.1839851612150600,0.4334474887854600,0.8688170844078100,0.0692642056705700,0.8229022756325500,0.3538812824310100,
+ 0.8505122772653100,0.1458675321870700,0.9539552028076500,0.4509884950010900,0.6192829784095600,0.3153717817344500,0.6486643611680000,
+ 0.7771429791008700,0.5022215496293300,0.6774720599304200,0.8373987105849100,0.9223509467776600,0.1408785675377000,0.8263698037836500,
+ 0.3677158394678100,0.7338101885904600,0.5453924134119300,0.9750851713936200,0.4240025828704200,0.6555361038332500,0.0099530573980664,
+ 0.2133658422219400,0.1942526098313900,0.2494984191141500,0.2456394770395100,0.3090840961360700,0.4396128251401700,0.2391627208512100,
+ 0.2948221393371100,0.8492826022437200,0.6634358785410500,0.4008184133101300,0.4450878186361300,0.9385009617258300,0.4723023080603600,
+ 0.1978112199333500,0.0051124254265392,0.6313251567219000,0.8746297265750400,0.9285209001640400,0.8961469954327400,0.2281481024008900,
+ 0.0463868235453902,0.8852949775221200,0.1832539025616100,0.2388267806911900,0.4134999259437900]
+ colf_data=[0.2211140195006100,0.4360741406847100,0.8613446042227300,0.3287501173693400,0.3726444083138500,0.5584100981049200,
+ 0.1459010635250700,0.6773561475227300,0.3194984334146100,0.0721468879245900,0.8571478104484900,0.6273517909587100,0.1998612257651300,
+ 0.0229027066486435,0.1169962129169100,0.3450669489545100,0.7447339197363300,0.2099804762797300,0.4145630017922000,0.1287495378073000,
+ 0.4100549069280000,0.9491590778106600,0.4594824097396200,0.7963610574586100,0.0886591603460900,0.7485923505148800,0.8800058294460200,
+ 0.0742016998465100,0.7864648824494600,0.9806749285108700,0.4340192626388800,0.2946438674324300,0.5858129312218200,0.1684787795732100,
+ 0.5006322164557000,0.1411859226139100,0.6483001386040300,0.8667062259589800,0.8874083281901700,0.9374856529466200,0.8792743053656400,
+ 0.3914916330908800,0.7249504866660300,0.6859824143750500,0.3820324355652700,0.4205282663090700,0.5164968615940200,0.1421931712618900,
+ 0.0196812832819676,0.9981697062021900,0.1755885231194900,0.2947326103666400,0.6474645760131300,0.2692372092368200,0.1501168111106900,
+ 0.8747214311150400,0.5941169679137400,0.8134219678181300,0.8300854032067900,0.5348064496809600,0.6361397489142300,0.4672722781390200,
+ 0.4085832379798300,0.3521629848294700,0.9214896629198800,0.5397186472731200,0.5769544260469000,0.3518576432726600,0.2548196559095800,
+ 0.2568630926575800,0.6790804218869100,0.9233051151611400,0.5564388272149600,0.4809615563046900,0.2671961096428300,0.5309178873574900,
+ 0.9366439906585200,0.7178008867044900,0.4742393938238900,0.2739507305780100,0.0325503824430287,0.3058399694533200,0.2035991578379600,
+ 0.7624100748274500,0.3256649041202200,0.6021165203312900,0.2312960425537500,0.3223130597371200,0.1910035122144000,0.9777761488118000,
+ 0.4510701496391800,0.3411166976863100,0.0590524105630128,0.1893069600636600,0.3624322551127600,0.5505328232192100,0.2980210265601100,
+ 0.8214905093524000,0.5951572915516500,0.0164013002144179,0.4369477822617300,0.1448122440580300,0.0540642389348076,0.7410481887595000,
+ 0.4306576612548200,0.6152215048741600,0.6732566979123500,0.2016790216796400,0.4125241885951300,0.6613378965581400,0.6762378791702100,
+ 0.6273308562242100,0.3980484234160000,0.3758690256513000,0.9505785540447400,0.5432862451035900,0.3546959964347500,0.9624781957652700,
+ 0.1521718400307800,0.4872834875654800,0.0711031100112400,0.5547880174381600,0.9489611549996500,0.6918532027359300,0.9653979674751800,
+ 0.1580477748802100,0.5239184789936600,0.6256258937649500,0.6644906581679700,0.8498434479580400,0.0884186807500200,0.7239693048056000,
+ 0.5220856892513500,0.4746197459635400,0.0132281947942582,0.4744862464603400,0.8283824826722800,0.0824273471173000,0.2823508127044600,
+ 0.9465440269310600,0.0735692624345200,0.7231765024937500,0.7558123528751600,0.1957031610401800,0.1186832571954800,0.2554836055522200,
+ 0.2569638924938400,0.2698718464327300,0.0796054196914600,0.8639274262189500,0.0600095764081970,0.8431675736062000,0.1715707034671500,
+ 0.5270473810504400,0.9663848676562200,0.0477994550242086,0.5613348076871200,0.3687937689799700,0.8370007960298100,0.8878275844677400,
+ 0.9222255008864300,0.6077968169040000,0.7749797104741300,0.1546510803302000,0.1648641033865800,0.7828307150783100,0.2208448561005500,
+ 0.8652610689705500,0.1270430177110400,0.4437622211145900,0.0762391863745800,0.7827070801438300,0.2073762911406200,0.1463289685297400,
+ 0.0503129400547189,0.8952161883447300,0.5186702029400800,0.9281161268838200,0.3755391353627300,0.9205843191224000,0.8542798887259700,
+ 0.7465386962269100,0.5741802410102300,0.0629949341821400,0.7173693341749500,0.7518019935823000,0.2204383091164900,0.3672280033897700,
+ 0.3966629590823600,0.3096910791051000,0.8951317094709400,0.1297102375560000,0.9097227886830000,0.4116849542649800,0.3119660850204300,
+ 0.2198845791723000,0.4694267620655800,0.3389506350918400,0.0473834290389826,0.4535701896313400]
+ ds = Dataset('randomdata')
+ ds.addcolumnfromseq('cola', label='Column A',
+ coltype='ordinal', datatype='str',
+ all_value='.',
+ data=cola_data
+ )
+ ds.addcolumnfromseq('colb', label='Column B',
+ coltype='ordinal', datatype='str',
+ all_value='.',
+ data=colb_data
+ )
+ ds.addcolumnfromseq('colc', label='Column C',
+ coltype='ordinal', datatype='str',
+ all_value='.',
+ data=colc_data
+ )
+ ds.addcolumnfromseq('cold', label='Column D',
+ coltype='ordinal', datatype='str',
+ all_value='.',
+ data=cold_data
+ )
+ ds.addcolumnfromseq('cole', label='Column E',
+ coltype='scalar', datatype='float',
+ data=cole_data
+ )
+ ds.addcolumnfromseq('colf', label='Column F',
+ coltype='scalar', datatype='float',
+ data=colf_data
+ )
+ return ds
+
+class summ_test(unittest.TestCase):
+
+ def assertListNear(self, first, second, prec=2):
+ first = [round(v, prec) for v in first]
+ second = [round(v, prec) for v in second]
+ first_str = ', '.join(['%.*f' % (prec, v) for v in first])
+ second_str = ', '.join(['%.*f' % (prec, v) for v in second])
+ self.assertEqual(first, second,
+ '[%s]:%d != [%s]:%d' % (first_str, len(first), second_str, len(second)))
+
+ def assertOrdinal(self, a, b):
+ a=list(a)
+ b=list(b)
+ self.assertEqual(a, b)
+
+ def test_props(self):
+ ds = _get_ds()
+ ds_summ = ds.summ('cola','colb','colc',proportions=True)
+ freq = ds_summ['_freq_'][37:]
+ all_cola_colb_colc = ds_summ['_prop_of_all-cola-colb-colc'][37:]
+ all_colb_colc = ds_summ['_prop_of_all-colb-colc'][37:]
+ all_cola_colc = ds_summ['_prop_of_all-cola-colc'][37:]
+ all_cola_colb = ds_summ['_prop_of_all-cola-colb'][37:]
+ all_colc = ds_summ['_prop_of_all-colc'][37:]
+ all_colb = ds_summ['_prop_of_all-colb'][37:]
+ all_cola = ds_summ['_prop_of_all-cola'][37:]
+ sas_freq = [ 4, 8, 8, 6, 7, 6,11, 8, 7, 2, 6, 6, 5, 6, 6, 6, 7,11,12,10,10, 8, 6, 7, 7, 16, 4]
+ sas_all_cola_colb_colc = [0.020,0.040,0.040,0.030,0.035,0.030,0.055,0.040,0.035,0.010,0.030,0.030,0.025,0.030,0.030,0.030,0.035,
+ 0.055,0.060,0.050,0.050,0.040,0.030,0.035,0.035,0.080,0.020]
+ sas_all_colb_colc = [0.06154,0.12308,0.12308,0.09231,0.10769,0.09231,0.16923,0.12308,0.10769,0.03636,0.10909,0.10909,
+ 0.09091,0.10909,0.10909,0.10909,0.12727,0.20000,0.15000,0.12500,0.12500,0.10000,0.07500,0.08750,0.08750,0.20000,0.05000]
+ sas_all_cola_colc = [0.06061,0.12121,0.12121,0.10526,0.12281,0.10526,0.14286,0.10390,0.09091,0.03030,0.09091,0.09091,0.08772,
+ 0.10526,0.10526,0.07792,0.09091,0.14286,0.18182,0.15152,0.15152,0.14035,0.10526,0.12281,0.09091,0.20779,0.05195]
+ sas_all_cola_colb = [0.06557,0.10811,0.12308,0.09836,0.09459,0.09231,0.18033,0.10811,0.10769,0.03279,0.08108,0.09231,0.08197,
+ 0.08108,0.09231,0.09836,0.09459,0.16923,0.19672,0.13514,0.15385,0.13115,0.08108,0.10769,0.11475,0.21622,0.06154]
+ sas_all_colc = [0.20000,0.40000,0.40000,0.31579,0.36842,0.31579,0.42308,0.30769,0.26923,0.14286,0.42857,0.42857,0.29412,
+ 0.35294,0.35294,0.25000,0.29167,0.45833,0.37500,0.31250,0.31250,0.38095,0.28571,0.33333,0.25926,0.59259,0.14815]
+ sas_all_colb = [0.19048,0.34783,0.38095,0.28571,0.30435,0.28571,0.52381,0.34783,0.33333,0.15385,0.31579,0.26087,0.38462,
+ 0.31579,0.26087,0.46154,0.36842,0.47826,0.44444,0.31250,0.47619,0.29630,0.18750,0.33333,0.25926,0.50000,0.19048]
+ sas_all_cola = [0.22222,0.33333,0.33333,0.31579,0.36842,0.31579,0.45833,0.25806,0.31818,0.11111,0.25000,0.25000,0.26316,
+ 0.31579,0.31579,0.25000,0.22581,0.50000,0.66667,0.41667,0.41667,0.42105,0.31579,0.36842,0.29167,0.51613,0.18182]
+
+ self.assertEqual(freq, sas_freq)
+ self.assertListNear(all_cola_colb_colc,sas_all_cola_colb_colc,prec=5)
+ self.assertListNear(all_colb_colc,sas_all_colb_colc,prec=5)
+ self.assertListNear(all_cola_colc,sas_all_cola_colc,prec=5)
+ self.assertListNear(all_cola_colb,sas_all_cola_colb,prec=5)
+ self.assertListNear(all_colc,sas_all_colc,prec=5)
+ self.assertListNear(all_colb,sas_all_colb,prec=5)
+ self.assertListNear(all_cola,sas_all_cola,prec=5)
+
+ def test_means(self):
+ ds = _get_ds()
+ ds_summ = ds.summ('cola','colb','colc',mean('cole'),allcalc=True)
+ meane = ds_summ['mean_of_cole']
+ sas_meane = [ 0.50458, 0.44366, 0.56386, 0.51332, 0.50864, 0.49714, 0.50660, 0.53073, 0.46595, 0.52402,
+ 0.44798, 0.43934, 0.44351, 0.59345, 0.52470, 0.57434, 0.50945, 0.52714, 0.50715, 0.47214, 0.40956, 0.45255,
+ 0.57775, 0.49251, 0.61496, 0.55367, 0.49071, 0.49590, 0.46343, 0.50073, 0.55046, 0.56836,
+ 0.39956, 0.52351, 0.55141, 0.47971, 0.49562, 0.27308, 0.51863, 0.46478, 0.50418, 0.36631, 0.45969,
+ 0.52704, 0.33833, 0.43244, 0.74026, 0.55376, 0.58420, 0.48332, 0.40147, 0.68242, 0.60226, 0.51806,
+ 0.59493, 0.48074, 0.45459, 0.59876, 0.66965, 0.43645, 0.44200, 0.54612, 0.53363, 0.33305]
+ self.assertListNear(meane,sas_meane,prec=5)
+
+ def test_wgt_means(self):
+ ds = _get_ds()
+ ds_summ = ds.summ('cola','colb','colc',mean('cole'),weightcol='colf',allcalc=True)
+ wgt_meane = ds_summ['mean_of_cole_wgtd_by_colf']
+ sas_wgt_meane = [ 0.51011, 0.46426, 0.56850, 0.50277, 0.50307, 0.51894, 0.50849, 0.51706, 0.49314,
+ 0.52289, 0.45728, 0.48350, 0.45242, 0.59330, 0.51480, 0.59430, 0.47552, 0.55798, 0.48309, 0.54532, 0.42662, 0.43904,
+ 0.54814, 0.51334, 0.62257, 0.48056, 0.52840, 0.48942, 0.38417, 0.52220, 0.55449,
+ 0.57464, 0.44026, 0.53482, 0.54327, 0.50189, 0.48420, 0.24795, 0.52692, 0.45436, 0.62705, 0.41431,
+ 0.44871, 0.58258, 0.32366, 0.41221, 0.75019, 0.55875, 0.59515, 0.48492, 0.37255, 0.70013, 0.55947,
+ 0.61484, 0.60037, 0.35862, 0.49284, 0.61293, 0.60316, 0.53818, 0.50413, 0.47065, 0.55433, 0.29520]
+ self.assertListNear(wgt_meane,sas_wgt_meane,prec=5)
+
+ def _get_nofilt_summds(self, *extras, **kwargs):
+ kwargs['allcalc'] = True
+ ds = _get_ds()
+ ds_summ = ds.summ('cola','colb','cold', *extras, **kwargs)
+ self.assertOrdinal(ds_summ['cola'], '.abc......aaabbbcccaaabbbccc.........aaaaaaaaabbbbbbbbbccccccccc')
+ self.assertOrdinal(ds_summ['colb'], '....abc...abcabcabc.........aaabbbcccaaabbbcccaaabbbcccaaabbbccc')
+ self.assertOrdinal(ds_summ['cold'], '.......abc.........abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc')
+ return ds_summ
+
+ def test_unwgt_nofilt_mean(self):
+ ds_summ = self._get_nofilt_summds(mean('cole'))
+ unwgt_nofilt_mean_cole = ds_summ['mean_of_cole']
+ sas_unwgt_nofilt_mean_cole = [0.50458,0.44366,0.56386,0.51332,0.50864,0.49714,0.50660,0.51620,0.47888,0.52696,0.44798,0.43934,0.44351,
+ 0.51694,0.36432,0.47279,0.59345,0.52470,0.57434,0.52998,0.49459,0.67090,0.50945,0.52714,0.50715,0.50646,0.56066,0.46411,0.54617,
+ 0.53420,0.46103,0.53788,0.44752,0.54443,0.48288,0.46730,0.59361,0.46283,0.46257,0.41362,0.69769,0.29699,0.51735,0.48251,0.36270,
+ 0.49168,0.60288,0.57069,0.61814,0.41933,0.36849,0.72608,0.53671,0.53720,0.64900,0.59710,0.56558,0.43953,0.48115,0.65337,0.35508,
+ 0.44297,0.48595,0.63867]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_unwgt_nofilt_mean_cole
+ sas_unwgt_nofilt_mean_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(unwgt_nofilt_mean_cole,sas_unwgt_nofilt_mean_cole,prec=5)
+
+ def _get_filt_summds(self, *extras, **kwargs):
+ kwargs['allcalc'] = True
+ kwargs['filterexpr'] = "colc ne 'c' and colf > 0.1"
+ ds = _get_ds()
+ ds_summ = ds.summ('cola','colb','cold', *extras, **kwargs)
+ self.assertOrdinal(ds_summ['cola'], '.abc......aaabbbcccaaabbbccc.........aaaaaaaaabbbbbbbbbccccccccc')
+ self.assertOrdinal(ds_summ['colb'], '....abc...abcabcabc.........aaabbbcccaaabbbcccaaabbbcccaaabbbccc')
+ self.assertOrdinal(ds_summ['cold'], '.......abc.........abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc')
+ return ds_summ
+
+ def test_unwgt_filt_mean(self):
+ ds_summ = self._get_filt_summds(mean('cole'))
+ unwgt_filt_mean_cole = ds_summ['mean_of_cole']
+ sas_unwgt_filt_mean_cole = [0.49498,0.45467,0.53575,0.50219,0.47171,0.47439,0.52672,0.50175,0.46328,0.52891,0.47212,0.44457,
+ 0.44962,0.52113,0.35965,0.52039,0.60038,0.42263,0.59157,0.48537,0.47839,0.67933,0.42001,0.53714,0.55883,0.49449,0.53636,0.47342,
+ 0.52640,0.45128,0.44512,0.57904,0.42062,0.47044,0.42548,0.50170,0.65115,0.42548,0.52930,0.45807,0.68212,0.18334,0.74136,0.50106,
+ 0.38865,0.47789,0.60288,0.56492,0.69677,0.43395,0.35650,0.52530,0.30175,0.54835,0.79050,0.55084,0.23361,0.42277,0.57427,0.63590,
+ 0.11723,0.40374,0.55750,0.74758]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_unwgt_filt_mean_cole
+ sas_unwgt_filt_mean_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(unwgt_filt_mean_cole,sas_unwgt_filt_mean_cole,prec=5)
+
+ def test_wgt_nofilt_mean(self):
+ ds_summ = self._get_nofilt_summds(mean('cole'),weightcol='colf')
+ wgt_nofilt_mean_cole = ds_summ['mean_of_cole_wgtd_by_colf']
+ sas_wgt_nofilt_mean_cole = [0.51011,0.46426,0.56850,0.50277,0.50307,0.51894,0.50849,0.53046,0.46285,0.55382,0.45728,0.48350,0.45242,0.53660,
+ 0.39202,0.47259,0.59330,0.51480,0.59430,0.56511,0.48186,0.67457,0.47552,0.55798,0.48309,0.49845,0.50888,0.49880,0.54893,
+ 0.47541,0.48861,0.58780,0.45281,0.59885,0.47339,0.46614,0.59850,0.51168,0.44388,0.39149,0.71972,0.34020,0.54318,0.43487,
+ 0.43394,0.49991,0.58433,0.58928,0.61409,0.51459,0.35895,0.69267,0.57281,0.51157,0.68635,0.56097,0.37039,0.48800,0.49992,
+ 0.62638,0.35855,0.45023,0.45355,0.57727]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_wgt_nofilt_mean_cole
+ sas_wgt_nofilt_mean_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(wgt_nofilt_mean_cole,sas_wgt_nofilt_mean_cole,prec=5)
+
+ def test_wgt_filt_mean(self):
+ ds_summ = self._get_filt_summds(mean('cole'), weightcol='colf')
+ wgt_filt_mean_cole = ds_summ['mean_of_cole_wgtd_by_colf']
+ sas_wgt_filt_mean_cole = [0.50331,0.48098,0.52792,0.50444,0.47379,0.51078,0.52242,0.52194,0.45893,0.54800,0.46007,
+ 0.51715,0.47039,0.54293,0.40475,0.50732,0.59180,0.42452,0.59276,0.51071,0.45889,0.66665,0.42522,0.57561,0.52822,0.50896,
+ 0.49810,0.50816,0.53654,0.38694,0.46685,0.62624,0.44756,0.47325,0.40233,0.50930,0.66051,0.46592,0.44992,0.47023,0.71378,
+ 0.24441,0.63108,0.45359,0.48090,0.47352,0.58433,0.56410,0.69677,0.56828,0.37085,0.45494,0.29330,0.55509,0.81168,0.55984,
+ 0.18526,0.44533,0.57132,0.60836,0.11933,0.40501,0.50971,0.72437]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_wgt_filt_mean_cole
+ sas_wgt_filt_mean_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(wgt_filt_mean_cole,sas_wgt_filt_mean_cole,prec=5)
+
+ def test_unwgt_nofilt_stderr(self):
+ ds_summ = self._get_nofilt_summds(stderr('cole'))
+ unwgt_nofilt_stderr_cole = ds_summ['stderr_of_cole']
+ sas_unwgt_nofilt_stderr_cole = [0.02020,0.03275,0.03669,0.03406,0.03492,0.03768,0.03324,0.03666,0.03111,0.03806,0.05767,
+ 0.05978,0.05514,0.05439,0.05685,0.05202,0.05592,0.06595,0.06322,0.07350,0.05223,0.06473,0.05700,0.06955,0.05432,0.06798,
+ 0.04739,0.06513,0.06679,0.06486,0.05296,0.07472,0.04751,0.08253,0.05542,0.05163,0.06737,0.10143,0.13339,0.04980,0.09700,
+ 0.06077,0.11763,0.07707,0.11231,0.10276,0.13042,0.10656,0.03922,0.10332,0.05341,0.11031,0.13120,0.09163,0.12445,0.12432,
+ 0.10559,0.08192,0.14137,0.06976,0.16797,0.10474,0.07288,0.12727]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_unwgt_nofilt_stderr_cole
+ sas_unwgt_nofilt_stderr_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(unwgt_nofilt_stderr_cole,sas_unwgt_nofilt_stderr_cole,prec=5)
+
+ def test_unwgt_filt_stderr(self):
+ ds_summ = self._get_filt_summds(stderr('cole'))
+ unwgt_filt_stderr_cole = [v for v in ds_summ['stderr_of_cole']]
+ unwgt_filt_stderr_cole[48] = 9999.99 # is supposed to be missing, use 9999.99 to represent this
+ sas_unwgt_filt_stderr_cole = [0.02607,0.04803,0.04815,0.04015,0.04655,0.04894,0.04142,0.04716,0.04197,0.04716,0.08235,
+ 0.10281,0.07489,0.07834,0.08476,0.07621,0.08484,0.06868,0.08840,0.09212,0.06827,0.08991,0.07028,0.08269,0.05880,0.08129,
+ 0.06090,0.07185,0.08374,0.10545,0.06735,0.08106,0.06484,0.13497,0.07845,0.06468,0.06656,0.12767,0.20621,0.05445,0.13539,
+ 0.06678,0.18099,0.13933,0.14212,0.12049,0.13042,0.17602,9999.99,0.17715,0.04287,0.21261,0.20047,0.13841,0.07101,0.19565,
+ 0.12866,0.08569,0.13732,0.09648,0.10728,0.11965,0.07959,0.08064]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_unwgt_filt_stderr_cole
+ sas_unwgt_filt_stderr_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(unwgt_filt_stderr_cole,sas_unwgt_filt_stderr_cole,prec=5)
+
+ def test_wgt_nofilt_stderr(self):
+ ds_summ = self._get_nofilt_summds(stderr('cole'),weightcol='colf')
+ wgt_nofilt_stderr_cole = ds_summ['stderr_of_cole_wgtd_by_colf']
+ sas_wgt_nofilt_stderr_cole = [0.01937,0.03210,0.03651,0.03182,0.03308,0.03378,0.03384,0.03534,0.03017,0.03545,0.05913,
+ 0.05569,0.05440,0.05551,0.05511,0.04763,0.05669,0.06222,0.06571,0.07145,0.05178,0.06450,0.05264,0.05954,0.05463,0.06203,
+ 0.04917,0.05853,0.06162,0.06361,0.05008,0.07188,0.04339,0.07182,0.05453,0.05480,0.06620,0.10208,0.13542,0.05630,0.09605,
+ 0.05596,0.07591,0.07481,0.12058,0.09567,0.13175,0.10332,0.03846,0.08945,0.04058,0.11625,0.13744,0.09822,0.11889,0.11329,
+ 0.09552,0.07623,0.13715,0.07260,0.15026,0.09479,0.08045,0.13000]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_wgt_nofilt_stderr_cole
+ sas_wgt_nofilt_stderr_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(wgt_nofilt_stderr_cole,sas_wgt_nofilt_stderr_cole,prec=5)
+
+ def test_wgt_filt_stderr(self):
+ ds_summ = self._get_filt_summds(stderr('cole',weightcol='colf'))
+ wgt_filt_stderr_cole = [v for v in ds_summ['stderr_of_cole_wgtd_by_colf']]
+ wgt_filt_stderr_cole[48] = 9999.99 # is supposed to be missing, use 9999.99 to represent this
+ sas_wgt_filt_stderr_cole = [0.02509,0.04724,0.04714,0.03830,0.04662,0.04397,0.04081,0.04475,0.04091,0.04511,
+ 0.09045,0.09084,0.07378,0.07934,0.08389,0.07411,0.08551,0.06196,0.09154,0.08979,0.06214,0.09908,0.06841,0.07346,0.05724,
+ 0.07265,0.06518,0.06598,0.07734,0.10902,0.06777,0.07537,0.05802,0.11836,0.07156,0.06518,0.06385,0.13645,0.21210,0.05514,
+ 0.13067,0.06559,0.14352,0.13709,0.14179,0.11976,0.13175,0.18098,9999.99,0.11548,0.04170,0.21781,0.20029,0.13807,0.06798,
+ 0.16915,0.13090,0.08252,0.13654,0.09936,0.10726,0.10117,0.08789,0.06618]
+ # need to adjust order of results as calculated by SAS...
+ s = sas_wgt_filt_stderr_cole
+ sas_wgt_filt_stderr_cole = s[0:13] + s[16:19] + s[22:25] + s[13:16] + s[19:22] + s[25:]
+ self.assertListNear(wgt_filt_stderr_cole,sas_wgt_filt_stderr_cole,prec=5)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/web/install.py b/web/install.py
new file mode 100644
index 0000000..eee674b
--- /dev/null
+++ b/web/install.py
@@ -0,0 +1,80 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+#
+# $Id: install.py 3686 2009-02-06 09:03:06Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/install.py,v $
+
+import sys, os
+parent_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+sys.path.insert(0, parent_dir)
+from simpleinst import *
+
+config.appname = 'nea'
+config.apptitle = 'NetEpi-Analysis'
+config.debug = False
+config.install_owner = 'root'
+config.install_exclude = 'CVS'
+config.soompath = 'SOOM_objects'
+config.session_secret = secret()
+
+# script_dir contains application and libraries
+config.script_dir = joinpath(config.cgi_dir, config.appname)
+
+# data_dir is a directory writeable by the web user, but NOT PUBLISHED
+# by the web server (contains confidential data)
+config.data_dir = joinpath(config.cgi_dir, config.appname, 'data')
+
+# static_target contains content that does not change, such as images and
+# style sheets. This directory should be published by the web server
+
+config.static_target = joinpath(config.html_dir, config.appname)
+
+# dynamic_target contains content that is generated by the application and is
+# intended to be published by the web server.
+config.dynamic_target = joinpath(config.html_dir, config.appname, 'dynamic')
+
+
+# Load any existing run-time config file
+config.source_file(15, 'config', config.script_dir,
+ exclude=['cgi_target', 'html_target', 'scratchdir'])
+
+config_exclude = (
+ 'base_dir', 'cgi_dir', 'compile_py', 'create_db', 'html_dir',
+ 'install_*', 'platform', 'python', 'web_user',
+)
+
+config.config_owner = '%s:%s' % (config.install_owner,
+ user_lookup(config.web_user)[1])
+config.write_file(joinpath(config.script_dir, 'config.py'),
+ config_exclude, owner=config.config_owner, mode=0640)
+
+on_install('*.py', py_compile)
+
+# Sundry static content
+install(target = config.static_target,
+ base = 'static', files = ['*.css', '*.png', '*.ico', '*.html', '*.js'])
+
+# App modules and pages
+install(target = config.script_dir,
+ files = ['pages', 'libsoomexplorer'],
+ include = ['*.py', '*.html'])
+
+# Application
+install(target = config.script_dir,
+ filter = python_bang_path_filter,
+ files = 'nea.py', mode = 0755)
+
+make_dirs(config.data_dir, owner=config.web_user)
+make_dirs(config.dynamic_target, owner=config.web_user)
diff --git a/web/libsoomexplorer/__init__.py b/web/libsoomexplorer/__init__.py
new file mode 100644
index 0000000..a83eeb0
--- /dev/null
+++ b/web/libsoomexplorer/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/__init__.py,v $
diff --git a/web/libsoomexplorer/colvals.py b/web/libsoomexplorer/colvals.py
new file mode 100644
index 0000000..72d36d8
--- /dev/null
+++ b/web/libsoomexplorer/colvals.py
@@ -0,0 +1,190 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: colvals.py 3685 2009-02-06 07:14:49Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/colvals.py,v $
+
+import re
+import sets
+import fnmatch
+import cPickle, binascii
+from libsoomexplorer.common import *
+
+def make_re(pattern):
+ if pattern.find('*') < 0:
+ pattern = '*%s*' % pattern
+ return re.compile(fnmatch.translate(pattern), re.IGNORECASE)
+
+def shorten_trans(col, v, split_long=50):
+ if v is None:
+ v = 'None'
+ v_trans = col.do_format(col.do_outtrans(v)).strip()
+ if len(v_trans) > split_long:
+ n = split_long / 2 - 2
+ v_trans = '%s ... %s' % (v_trans[:n], v_trans[-n:])
+ return v_trans
+
+def to_datatype(col, v):
+ if not v:
+ return None
+ try:
+ return col.datatype.as_pytype(v)
+ except (ValueError, TypeError):
+ return v
+
+def encode_val(v):
+ return binascii.b2a_base64(cPickle.dumps(v, -1)).strip()
+
+def decode_val(d):
+ if d:
+ return cPickle.loads(binascii.a2b_base64(d))
+
+class ColValSelect:
+ too_many_results = 200
+
+ def __init__(self, name, value, ignore_vals=None, multiple=True):
+ self.name = name
+ self.multiple = multiple
+ if self.multiple:
+ if value is None:
+ self.value = []
+ else:
+ self.value = [encode_val(v) for v in value]
+ else:
+ self.value = encode_val(value)
+ self.search_pat = ''
+ self.errorstr = ''
+ if ignore_vals is None:
+ ignore_vals = sets.ImmutableSet()
+ self.ignore_vals = ignore_vals
+
+ def cardinality_is_high(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ return col.cardinality() > 100
+
+ def select_values(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ values = col.inverted.keys()
+ if col.is_ordered():
+ values.sort()
+ values = [(shorten_trans(col, v), encode_val(v))
+ for v in values
+ if v not in self.ignore_vals]
+ if not col.is_ordered():
+ values.sort()
+ return [(v, l) for l, v in values]
+
+ def search(self, workspace):
+ self.errorstr = ''
+ pattern = self.search_pat
+ if not pattern:
+ return []
+ col = workspace.get_dataset()[self.name]
+ values = []
+ ignore_vals = sets.Set(self.ignore_vals)
+ if self.multiple:
+ ignore_vals.union_update(self.value)
+ else:
+ ignore_vals.add(self.value)
+ if pattern.find('*') < 0:
+ pattern = '*%s*' % pattern
+ pattern = make_re(pattern)
+ ignored = False
+ for v in col.inverted.keys():
+ if v not in ignore_vals:
+ v_trans = col.do_format((col.do_outtrans(v)))
+ if pattern.match(v_trans):
+ values.append((v_trans, encode_val(v)))
+ else:
+ ignored = True
+ if not values:
+ if ignored:
+ self.errorstr = 'No more matches'
+ else:
+ self.errorstr = 'No matches'
+ elif len(values) > self.too_many_results:
+ self.errorstr = 'Too many matches (%d)' % len(values)
+ values = []
+ values.sort()
+ return [(v, l) for l, v in values]
+
+ def trans_values(self, workspace):
+ values = self.value
+ if not self.multiple:
+ values = [self.value]
+ col = workspace.get_dataset()[self.name]
+ values = [(col.do_outtrans(decode_val(v)), v) for v in values]
+ values.sort()
+ return [(v, l) for l, v in values]
+
+ def __nonzero__(self):
+ return len(self) > 0
+
+ def __len__(self):
+ if self.multiple:
+ return len(self.value)
+ elif self.value is None:
+ return 0
+ else:
+ return 1
+
+ def pretty_value(self, workspace):
+ values = self.value
+ if not self.multiple:
+ values = [self.value]
+ col = workspace.get_dataset()[self.name]
+ values = [shorten_trans(col, decode_val(v)) for v in values]
+ return ', '.join(values)
+
+ def op_add(self, workspace, value):
+ if self.multiple:
+ if value not in self.value:
+ self.value.append(value)
+ else:
+ self.value = value
+
+ def op_del(self, workspace, value):
+ assert self.multiple
+ try:
+ self.value.remove(value)
+ except ValueError:
+ pass
+
+ def op_all(self, workspace, value):
+ assert self.multiple
+ for v, l in self.search(workspace):
+ self.value.append(v)
+
+ def op_none(self, workspace, value):
+ assert self.multiple
+ self.value = []
+
+ def op_clr(self, workspace, value):
+ self.search_pat = ''
+ self.errorstr = ''
+
+ def sop(self, workspace, op, field):
+ meth = getattr(self, 'op_' + op)
+ return meth(workspace, field)
+
+ def as_pytypes(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ if self.multiple:
+ values = []
+ for value in self.value:
+ values.append(decode_val(value))
+ return tuple(values)
+ else:
+ assert type(self.value) not in (tuple, list)
+ return decode_val(self.value)
diff --git a/web/libsoomexplorer/common.py b/web/libsoomexplorer/common.py
new file mode 100644
index 0000000..9e164a8
--- /dev/null
+++ b/web/libsoomexplorer/common.py
@@ -0,0 +1,64 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: common.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/common.py,v $
+
+import SOOMv0
+from time import time as _time
+
+class UIError(Exception): pass
+
+ConversionError = ValueError, TypeError
+
+class Timer:
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.last_t = None
+ self.times = []
+ self.last_name = None
+ self.start_t = None
+
+ def _flush(self, now):
+ if self.last_name:
+ self.times.append((self.last_name, now - self.last_t))
+ self.last_name = None
+
+ def __len__(self):
+ self._flush(_time())
+ return len(self.times)
+
+ def __call__(self, name):
+ now = _time()
+ if self.start_t is None:
+ self.start_t = now
+ self._flush(now)
+ self.last_name = name
+ self.last_t = now
+
+ def end(self):
+ now = _time()
+ self._flush(now)
+ if self.start_t is not None:
+ self.times.append(('TOTAL', now - self.start_t))
+ try:
+ return self.times
+ finally:
+ self.reset()
+
+# The timer is global, but we only service one request at a time, so this is
+# acceptable.
+timer = Timer()
diff --git a/web/libsoomexplorer/condcol.py b/web/libsoomexplorer/condcol.py
new file mode 100644
index 0000000..329f93b
--- /dev/null
+++ b/web/libsoomexplorer/condcol.py
@@ -0,0 +1,222 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: condcol.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/condcol.py,v $
+
+import sets
+import copy
+import fnmatch
+from libsoomexplorer.common import *
+from libsoomexplorer import colvals
+
+class _ValSelectBase(colvals.ColValSelect):
+ param_ctor = None
+
+ def to_param(self, workspace, params):
+ values = self.as_pytypes(workspace)
+ if values:
+ params.insert(0, self.param_ctor(values))
+
+
+class _SuppressVals(_ValSelectBase):
+ op = 'suppress'
+ param_ctor = SOOMv0.suppress
+
+class _RetainVals(_ValSelectBase):
+ op = 'retain'
+ param_ctor = SOOMv0.retain
+
+class _CoalesceVals(_ValSelectBase):
+ op = 'coalesce'
+ param_ctor = SOOMv0.coalesce
+
+ def __init__(self, name, values, label, ignore_vals, idx=None):
+ self.label = label
+ self.idx = idx
+ _ValSelectBase.__init__(self, name, values, ignore_vals)
+
+ def to_param(self, workspace, params):
+ values = self.as_pytypes(workspace)
+ param = self.param_ctor(values, label=self.label)
+ if self.idx is None:
+ if values:
+ params.append(param)
+ else:
+ if values:
+ params[self.idx] = param
+ else:
+ del params[self.idx]
+
+
+class _ColParams:
+ """
+ Represents the pre-conditioning being applied to column (zero,
+ one or more of suppress, retain, coalese).
+ """
+ _ops = _SuppressVals, _RetainVals, _CoalesceVals
+ op_map = dict([(m.op, m) for m in _ops])
+
+ def __init__(self, name, label, params):
+ self.name = name
+ self.label = label
+ self.edit = None
+ self.params = params
+
+ def __cmp__(self, other):
+ return cmp(self.label, other.label)
+
+ def __repr__(self):
+ return 'colparams(%r, edit=%s, %r)' %\
+ (self.name, bool(self.edit), self.params)
+
+ def describe(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ res = []
+ for param in self.params:
+ s = ', '.join([colvals.shorten_trans(col, v) for v in param.values])
+ label = getattr(param, 'label', None)
+ if label:
+ s = '%r: %s' % (label, s)
+ res.append((param.__class__.__name__, s))
+ return res
+
+ def do_clear(self, workspace):
+ self.params = []
+
+ def _single(self, workspace, cls):
+ values = []
+ params = []
+ for param in self.params:
+ if not isinstance(param, (SOOMv0.suppress, SOOMv0.retain)):
+ params.append(param)
+ if isinstance(param, cls.param_ctor):
+ values.extend(param.values)
+ self.params = params
+ self.edit = cls(self.name, values)
+
+ def do_suppress(self, workspace):
+ self._single(workspace, _SuppressVals)
+
+ def do_retain(self, workspace):
+ self._single(workspace, _RetainVals)
+
+ def do_coalesce(self, workspace, idx=None):
+ ignore_vals = sets.Set()
+ for param in self.params:
+ if isinstance(param, SOOMv0.coalesce):
+ ignore_vals.union_update(param.values)
+ if not idx:
+ self.edit = _CoalesceVals(self.name, [], '', ignore_vals)
+ else:
+ idx = int(idx)
+ param = self.params[idx]
+ ignore_vals.difference_update(param.values)
+ self.edit = _CoalesceVals(self.name, param.values, param.label,
+ ignore_vals, idx)
+
+ def do_del(self, workspace, idx):
+ idx = int(idx)
+ del self.params[idx]
+
+ def maybe_search(self, workspace):
+ if self.edit is not None:
+ return self.edit.search(workspace)
+ return []
+
+ def done_edit(self, workspace):
+ if self.edit is not None:
+ self.edit.to_param(workspace, self.params)
+ self.edit = None
+
+
+class CondColParams:
+ """
+ A collection of column parameters (_ColParams), one per conditioning
+ column.
+ """
+ inhibit_suppress = False
+
+ def __init__(self, workspace, param_map, condcols):
+ self.edit_col = None
+ ds = workspace.get_dataset()
+ self.init_cols(ds, condcols, param_map)
+
+ def init_cols(self, ds, colnames, param_map):
+ """
+ Given a list of conditioning column names, populate our collection
+ of _ColParams, potentially initialising the _ColParams with any
+ previously set column operations.
+ """
+ self.cols = []
+ for colname in colnames:
+ col = ds[colname]
+ if col.is_discrete():
+ self.cols.append(self.new_colparam(col, param_map))
+ self.cols.sort()
+
+ def new_colparam(self, col, param_map):
+ return _ColParams(col.name, col.label, param_map.get(col.name, []))
+
+ def __getitem__(self, i):
+ return self.cols[i]
+
+ def __len__(self):
+ return len(self.cols)
+
+ def clear(self, workspace):
+ self.done_edit(workspace)
+ for col in self.cols:
+ col.do_clear(workspace)
+
+ def do_col(self, workspace, op, colname, *args):
+ """
+ Dispatch a web "click" to the appropriate _ColParams instance
+ and method.
+ """
+ self.done_edit(workspace)
+ for col in self.cols:
+ if col.name == colname:
+ break
+ else:
+ return
+ col_op = getattr(col, 'do_' + op)
+ col_op(workspace, *args)
+ self.edit_col = col
+
+ def done_edit(self, workspace):
+ if self.edit_col is not None:
+ self.edit_col.done_edit(workspace)
+ self.edit_col = None
+
+ def maybe_search(self, workspace):
+ if self.edit_col is not None:
+ return self.edit_col.maybe_search(workspace)
+ return []
+
+ def get_map(self, workspace):
+ self.done_edit(workspace)
+ param_map = {}
+ for col in self.cols:
+ param_map[col.name] = [p for p in col.params if p.values]
+ return param_map
+
+
+class StratifyParams(CondColParams):
+ inhibit_suppress = True
+
+ def __init__(self, workspace, param_map, condcols):
+ self.edit_col = None
+ ds = workspace.get_dataset()
+ self.init_cols(ds, condcols[2:], param_map)
diff --git a/web/libsoomexplorer/dsparams.py b/web/libsoomexplorer/dsparams.py
new file mode 100644
index 0000000..eb83078
--- /dev/null
+++ b/web/libsoomexplorer/dsparams.py
@@ -0,0 +1,177 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: dsparams.py 3703 2009-03-03 04:58:06Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/dsparams.py,v $
+
+import copy
+import SOOMv0
+from libsoomexplorer.common import *
+from libsoomexplorer.filterstore import filterstore
+from libsoomexplorer.filterparse import parse, FilterParseError
+
+class DSParams:
+ """
+ Dataset-specific parameters (only filter at this time).
+ """
+
+ def __init__(self, dsname=None):
+ self.dsname = dsname
+ self.clear()
+
+ def __getstate__(self):
+ state = dict(self.__dict__)
+ state['_DSParams__filtered_ds'] = None
+ return state
+
+ def __setstate__(self, state):
+ self.__dict__.update(state)
+ if self.filter:
+ self.filter.loaded()
+
+ def set_dsname(self, dsname):
+ if dsname is not None and self.dsname != dsname:
+ self.dsname = dsname
+ self.clear()
+
+ def clear(self):
+ self.filterexpr = None
+ self.filterlabel = None
+ self.filter = None
+ self.filtername = None
+ self.__filtered_ds = None
+
+ def get_dataset(self):
+ return SOOMv0.dsload(self.dsname)
+
+ def get_label(self):
+ ds = self.get_dataset()
+ return ds.label or ds.name
+
+ def have_filterexpr(self):
+ # User supplied filterexpr (which doesn't match /filter/)?
+ if (self.filterexpr and (self.filter is None
+ or self.filter.as_string() != self.filterexpr)):
+ return True
+ return False
+
+ def filter_modified(self):
+ return (self.filter is not None and self.filter.modified())
+
+ def _clean_filterexpr(self):
+ return self.filterexpr.replace('\n', ' ').replace('\r', '')
+
+ def filter_args(self, kw):
+ if self.filterexpr:
+ kw['filterexpr'] = self._clean_filterexpr()
+ if self.filterlabel:
+ kw['filterlabel'] = self.filterlabel
+
+ def available_filters(self):
+ if not self.dsname:
+ return []
+ filter_list = [(f.label.lower(), f.name, f.label)
+ for f in filterstore.available_filters(self.dsname)]
+ filter_list.sort()
+ if self.have_filterexpr() or self.filter_modified():
+ filter_list.insert(0, ('', '', '%s<modified>' % self.filterlabel))
+ return [nl[1:] for nl in filter_list]
+
+ def use_filter(self, filter):
+ self.filterexpr = filter.as_string()
+ if filter.label:
+ self.filterlabel = filter.label
+ else:
+ label = self.filterexpr
+ if len(label) > 60:
+ label = label[:60] + ' ...'
+ self.filterlabel = label
+ self.filter = filter
+ if filter.modified():
+ self.filtername = ''
+ else:
+ self.filtername = filter.name
+
+ def save_filter(self, filter):
+ filter.clear_edit()
+ filterstore.update_filter(filter)
+ self.use_filter(filter)
+
+ def delete_filter(self, filter):
+ if filter.name:
+ filterstore.delete_filter(filter)
+ self.clear()
+
+ def get_filtered_dataset(self):
+ if self.__filtered_ds is None:
+ kw = {}
+ self.filter_args(kw)
+ self.__filtered_ds = self.get_dataset().filter(kwargs=kw)
+ return self.__filtered_ds
+
+ # page_process() methods
+ def do_clear(self, ctx):
+ self.clear()
+
+ def do_new(self, ctx):
+ if self.have_filterexpr() or self.filter_modified():
+ ctx.msg('warn', 'New filter (select "Abandon" to return to '
+ 'previously unsaved filter)')
+ self.edit_filter = filterstore.new_filter(self.dsname)
+ ctx.push_page('filter', self)
+
+ def do_edit(self, ctx):
+ # User supplied filterexpr (which doesn't match /filter/)? parse & edit
+ if self.have_filterexpr():
+ self.filterlabel = self.filtername = ''
+ try:
+ root = parse(self.get_dataset(), self.filterexpr)
+ except FilterParseError, e:
+ self.edit_filter = None
+ raise UIError(str(e))
+ else:
+ self.edit_filter = filterstore.new_filter(self.dsname)
+ self.edit_filter.set_root(root)
+ if self.filter is not None:
+ self.edit_filter.name = self.filter.name
+ self.edit_filter.label = self.filter.label
+ # User selected filtername? load & edit
+ elif self.filtername:
+ if self.filtername == self.filter.name:
+ self.edit_filter = self.filter
+ else:
+ self.edit_filter = self.load_filter(self.filtername)
+ if self.filter_modified():
+ ctx.msg('warn', 'Loaded %r (select "Abandon" to return to '
+ 'previously unsaved filter)' % self.edit_filter.label)
+ else:
+ ctx.msg('info', 'Loaded %r' % self.edit_filter.label)
+ elif self.filter:
+ # Edit previously edited filter
+ self.edit_filter = copy.deepcopy(self.filter)
+ else:
+ # No other request - create new filter
+ self.edit_filter = filterstore.new_filter(self.dsname)
+ ctx.push_page('filter', self)
+
+ def load_filter(self, filtername):
+ return filterstore.load_filter(self.dsname, filtername)
+
+ def do_load(self, ctx):
+ if self.filtername:
+ if self.have_filterexpr() or self.filter_modified():
+ ctx.msg('warn', 'Previously modified filter has been discarded')
+ self.use_filter(self.load_filter(self.filtername))
+ else:
+ ctx.msg('warn', 'Not loading - modified filter is already active')
diff --git a/web/libsoomexplorer/fields.py b/web/libsoomexplorer/fields.py
new file mode 100644
index 0000000..c67cd07
--- /dev/null
+++ b/web/libsoomexplorer/fields.py
@@ -0,0 +1,735 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Abstractions used to describe the markup elements that make up a
+plot/table configuration page.
+
+Plot type descriptions subclass PlotTypeBase. Class attributes on the
+concrete subclass describe aspects of the resulting form. Attributes include:
+
+ name internal name of plot type
+ label user visible name of plot type
+ fields a list of _FieldBase subclasses describing markup elements
+ within the form.
+ options used by the OptionsField, describes markup elements within
+ the options field.
+
+
+Fields include:
+
+ _FieldBase abstract base
+ ColField select a column
+ MeasureColField select a measure column
+ ColsField select zero or more columns
+ GroupByColField select a discrete group-by column
+ StatColsField select zero or more stat methods and cols
+ CondColParamsField buttons to push to suppress/coalesce pages
+ OutputField select plot output options
+ OptionsField select options (described by options attr)
+
+ _SimpleFieldBase abstract base, args: param, label, default
+ BoolField checkbox
+ ChooseOneField radio, args: + list of 2-tuple name/labels,
+ pytype.
+ ChooseManyField checkbox, args: + list of 2-tuple name/labels,
+ pytype.
+ DropField option list, args: + list of 2-tuple name/label,
+ pytype.
+ TextField
+ TextAreaField
+ FloatField
+ IntField
+
+"""
+# $Id: fields.py 3701 2009-02-26 05:56:34Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/fields.py,v $
+
+import SOOMv0
+# Application modules
+from common import UIError, ConversionError
+
+def _propn_optionexpr(ds, cols):
+ return SOOMv0.propn_names_and_labels(ds, filter(None, cols))
+
+def _get_measure(methname, colname, weightcol, conflev=None):
+ kwargs = {}
+ method = getattr(SOOMv0, methname)
+ if weightcol == '_none_':
+ kwargs['weightcol'] = None
+ elif weightcol != '_default_':
+ kwargs['weightcol'] = weightcol
+ if methname.endswith('cl') and conflev:
+ kwargs['conflev'] = conflev
+ if methname in ('freq', 'freqcl'):
+ return method(**kwargs)
+ else:
+ if not colname:
+ raise UIError('%r statistical method requires that a scalar '
+ 'column be specified' % method.__doc__)
+ return method(colname, **kwargs)
+
+def _get_param(ns, param, pytype=None):
+ value = getattr(ns, param, None)
+ if str(value).lower() == 'other':
+ value = getattr(ns, 'other_' + param)
+ if value == 'None':
+ value = None
+ if pytype and value is not None:
+ value = pytype(value)
+ return value
+
+def set_target(kwargs, targets, name, value):
+ if targets:
+ assert isinstance(targets, list)
+ for target in targets:
+ try:
+ param = kwargs[target]
+ except KeyError:
+ pass
+ else:
+ param.set(name, value)
+ else:
+ kwargs[name] = value
+
+def _get_conflev(ns):
+ try:
+ return _get_param(ns, 'conflev', float) / 100
+ except (AttributeError, ValueError):
+ return None
+
+
+# Column filters, used by _ColFieldBase and derived classes
+def anycol(col, workspace):
+ return True
+anyds = anycol
+
+def discretecol(col, workspace):
+ return col.is_discrete()
+
+def scalarcol(col, workspace):
+ return col.is_scalar()
+
+def ordinalcol(col, workspace):
+ return col.is_scalar() or col.is_ordered()
+
+def weightingcol(col, workspace):
+ return col.is_weighting()
+
+def datetimecol(col, workspace):
+ return col.is_datetimetype()
+
+def notstandardisecol(col, workspace):
+ return col.name != workspace.params.standardiseby
+
+def notcol(param):
+ "exclude a column that appears in another parameter"
+ def _notcol(col, workspace):
+ return col.name != getattr(workspace.params, param)
+ return _notcol
+
+def filterlist(filters):
+ def _filterlist(o, workspace):
+ for filter in filters:
+ if not filter(o, workspace):
+ return False
+ return True
+ return _filterlist
+
+
+class _FieldBase:
+ """
+ Abstract base class for plot parameter fields
+ """
+ default = None
+ note = None
+ target = None
+
+ def __init__(self, param=None, label=None, target=None,
+ note=None, default=None):
+ if param is not None:
+ self.param = param
+ if label is not None:
+ self.label = label
+ if target is not None:
+ self.target = target.split('|')
+ if note is not None:
+ self.note = note
+ if default is not None:
+ self.default = default
+# AM - debug aid - find targetless fields
+# elif getattr(self, 'param', None):
+# print self.param
+
+ def get_params(self, ns, kwargs):
+ pass
+
+ def set_default(self, workspace, ns):
+ pass
+
+ def enabled(self, workspace):
+ return True
+
+
+class ShowDatasetField(_FieldBase):
+ markup = 'showdataset'
+ label = 'Dataset'
+
+ def __init__(self, label=None, target=None, note=None):
+ _FieldBase.__init__(self, label=label, target=target, note=note)
+
+
+class AnalysisTypeField(_FieldBase):
+ label = 'Analysis type'
+ markup = 'analysis'
+
+
+class FilterField(_FieldBase):
+ label = 'Filter'
+ markup = 'filter'
+ param = 'dsparams'
+
+
+class _SimpleFieldBase(_FieldBase):
+ def _find_leaf(self, ns):
+ path = self.param.split('.')
+ ns = reduce(getattr, path[:-1], ns)
+ attr = path[-1]
+ return ns, attr, getattr(ns, attr, None)
+
+ def set_default(self, workspace, ns):
+ ns, attr, value = self._find_leaf(ns)
+ ns.set_default(attr, self.default)
+
+ def get_params(self, ns, kwargs):
+ value = getattr(ns, self.param, None)
+ set_target(kwargs, self.target, self.param, value)
+
+
+class _ColsAttrMixin:
+ # A field containing a re-orderable list of columns, with optional extra
+ # attributes. Examples include condcols, statcols, and ordercols.
+ attr_options = ('availablecols',)
+
+ def attr_meths(self):
+ width_css = 'width: %.0f%%;' % (100.0 / len(self.attr_options))
+ for i, option_fn in enumerate(self.attr_options):
+ if len(self.attr_options) > 1:
+ attr_name = 'workspace.params.%s[%%s][%s]' % (self.param, i)
+ else:
+ attr_name = 'workspace.params.%s[%%s]' % self.param
+ option_meth = getattr(self, option_fn)
+ yield attr_name, option_meth, width_css
+
+
+class BoolField(_SimpleFieldBase):
+ markup = 'bool'
+
+ def get_params(self, ns, kwargs):
+ value = getattr(ns, self.param, None) == 'True'
+ set_target(kwargs, self.target, self.param, value)
+
+
+class DropField(_SimpleFieldBase):
+ markup = 'drop'
+
+ def __init__(self, param, label, options, target=None,
+ note=None, default=None):
+ _SimpleFieldBase.__init__(self, param, label, target=target,
+ note=note, default=default)
+ self.options = options
+
+ def set_default(self, workspace, ns):
+ ns, attr, value = self._find_leaf(ns)
+ default = self.default
+ if default is None:
+ default = self.options[0][0]
+ ns.set_default(attr, default)
+
+
+class ChooseOneField(_SimpleFieldBase):
+ markup = 'chooseone'
+
+ def __init__(self, param, label, options, target=None, note=None,
+ default=None, horizontal=False,
+ pytype=None):
+ _SimpleFieldBase.__init__(self, param, label, target=target,
+ note=note, default=default)
+ self.options = options
+ self.horizontal = horizontal
+ self.pytype = pytype
+
+ def set_default(self, workspace, ns):
+ value = getattr(ns, self.param, None)
+ optval = [o[0] for o in self.options]
+ default = self.default
+ if default is None:
+ default = optval[0]
+ if value not in optval:
+ setattr(ns, self.param, default)
+
+ def get_params(self, ns, kwargs):
+ try:
+ value = _get_param(ns, self.param, self.pytype)
+ except (TypeError, ValueError), e:
+ raise UIError('Bad value for %s field' % self.label)
+ set_target(kwargs, self.target, self.param, value)
+
+ def onchangejs(self):
+ return r"document.nea.elements['workspace.params.%s'][%d].checked = 1;" % (self.param, len(self.options)-1)
+
+
+class ChooseManyField(_SimpleFieldBase):
+ markup = 'choosemany'
+
+ def __init__(self, param, label, options, target=None, note=None,
+ default=None, horizontal=False, pytype=None):
+ _SimpleFieldBase.__init__(self, param, label, target=target,
+ note=note, default=default)
+ self.options = options
+ self.horizontal = horizontal
+ self.pytype = pytype
+
+ def set_default(self, workspace, ns):
+ ns, attr, values = self._find_leaf(ns)
+ if values is not None:
+ # Remove any illegal values
+ okay = [option[0] for option in self.options]
+ values = [value for value in values if value in okay]
+ if not values:
+ # Removed all values, so set default
+ values = None
+ if values is None:
+ if self.default is None:
+ values = []
+ else:
+ values = self.default
+ if not isinstance(values, list):
+ values = [values]
+ setattr(ns, attr, values)
+
+ def get_params(self, ns, kwargs):
+ ns, attr, value = self._find_leaf(ns)
+ if self.pytype:
+ try:
+ value = [self.pytype(v) for v in value]
+ except (TypeError, ValueError), e:
+ raise UIError('Bad value for %s field' % self.label)
+ set_target(kwargs, self.target, self.param, value)
+
+
+class TextField(_SimpleFieldBase):
+ markup = 'text'
+
+
+class TextAreaField(_SimpleFieldBase):
+ markup = 'textarea'
+
+
+class FloatField(_SimpleFieldBase):
+ markup = 'float'
+
+ def get_params(self, ns, kwargs):
+ ns, attr, value = self._find_leaf(ns)
+ try:
+ value = float(value)
+ except ConversionError:
+ value = None
+ set_target(kwargs, self.target, self.param, value)
+
+
+class IntField(_SimpleFieldBase):
+ markup = 'int'
+
+ def get_params(self, ns, kwargs):
+ ns, attr, value = self._find_leaf(ns)
+ try:
+ value = int(value)
+ except ConversionError:
+ value = None
+ set_target(kwargs, self.target, self.param, value)
+
+
+class DatasetField(_FieldBase):
+ label = 'Dataset'
+ markup = 'dataset'
+
+ def __init__(self, param=None, label=None, target=None,
+ note=None, dsfilter=None):
+ _FieldBase.__init__(self, param, label, target=target, note=note)
+ if dsfilter is not None:
+ self.dsfilter = dsfilter
+
+ def set_default(self, workspace, ns):
+ avail = [n for n, l in self.availablesets(workspace)]
+ if avail and getattr(ns, self.param, None) not in avail:
+ setattr(ns, self.param, avail[0][0])
+
+ def availablesets(self, workspace):
+ filter = self.dsfilter
+ if filter is None:
+ filter = anyds
+ elif type(filter) in (tuple, list):
+ filter = filterlist(filter)
+ return workspace.available_datasets(filter)
+
+
+class ProportionColsField(_FieldBase):
+ label = 'Proportions'
+ markup = 'propcols'
+
+ def propcols(self, workspace):
+ ds = workspace.get_dataset()
+ return _propn_optionexpr(ds, workspace.plottype.get_collist())
+
+
+class OutputField(_FieldBase):
+ label = 'Output'
+ markup = 'output'
+
+
+class _ColFieldBase(_FieldBase):
+ """
+ Abstract base class for fields that manipulate column names.
+ """
+ colfilter = None
+
+ def __init__(self, param=None, label=None, target=None,
+ note=None, colfilter=None):
+ _FieldBase.__init__(self, param, label, target=target, note=note)
+ if colfilter is not None:
+ self.colfilter = colfilter
+
+ def _getds(self, workspace):
+ return workspace.get_dataset()
+
+ def availablecols(self, workspace, filter=None):
+ if filter is None:
+ filter = self.colfilter
+ if filter is None:
+ filter = anycol
+ elif type(filter) in (tuple, list):
+ filter = filterlist(filter)
+ ds = self._getds(workspace)
+ if ds is None:
+ return []
+ cols = [(col.label.lower(), col.name, col.label)
+ for col in ds.get_columns()
+ if filter(col, workspace)]
+ cols.sort()
+ return [col[1:] for col in cols]
+
+
+class ColsField(_ColFieldBase,_ColsAttrMixin):
+ """
+ Base class for fields that manipulate lists of column names.
+ """
+ markup = 'colset'
+ target = ['stratacols', 'summcols', 'plotcols']
+ min = 0
+
+ def __init__(self, param=None, label=None, target=None, note=None,
+ colfilter=None, min=None):
+ _ColFieldBase.__init__(self, param, label, target=target,
+ note=note, colfilter=colfilter)
+ if min is not None:
+ self.min = min
+
+ def set_default(self, workspace, ns):
+ # We remove any values not permitted by this plottype
+ values = getattr(ns, self.param, [])
+ okay = [v[0] for v in self.availablecols(workspace)]
+ values = [v for v in values if v in okay]
+ while len(values) < self.min:
+ try:
+ v = okay.pop(0)
+ except IndexError:
+ break
+ if v not in values:
+ values.append(v)
+ setattr(ns, self.param, values)
+
+ def get_params(self, ns, kwargs):
+ cols = filter(None, getattr(ns, self.param, []))
+ if len(cols) < self.min:
+ raise UIError('Must specify at least %d %s' %
+ (self.min,self.label.lower()))
+ if self.target:
+ set_target(kwargs, self.target, self.param, cols)
+
+
+class OrderColsField(ColsField):
+ target = ['orderby']
+ # _ColsAttrMixin
+ attr_options = ('availablecols', 'availabledirs')
+
+ def __init__(self, param='orderby', label='Order by',
+ target=None, note=None, colfilter=None):
+ ColsField.__init__(self, param, label, target=target,
+ note=note, colfilter=colfilter)
+
+ def availabledirs(self, workspace):
+ return [('asc', 'Ascending'), ('desc', 'Descending')]
+
+ def get_params(self, ns, kwargs):
+ if self.target:
+ col_dir = getattr(ns, self.param, [])
+ orders = ['%s %s' % (c, d) for c, d in col_dir if c]
+ set_target(kwargs, self.target, self.param, orders)
+
+
+class StratifyColField(_ColFieldBase):
+ markup = 'groupbycol'
+ label = 'Stratify by'
+ target = ['stratacols', 'summcols', 'plotcols']
+ allow_stack=False
+
+ def __init__(self, param=None, label=None, target=None, note=None,
+ colfilter=discretecol):
+ _ColFieldBase.__init__(self, param, label, target=target, note=note,
+ colfilter=colfilter)
+
+ def groupbycols(self, workspace):
+ return [('', 'None')] + self.availablecols(workspace)
+
+ def set_default(self, workspace, ns):
+ ns.set_default(self.param, [])
+
+ def get_params(self, ns, kwargs):
+ value = getattr(ns, self.param, None)
+ if value and self.target:
+ set_target(kwargs, self.target, self.param, value)
+
+
+class GroupByColField(StratifyColField):
+ label = 'Group-by Column'
+ target = ['stratacols', 'summcols', 'plotkw']
+
+ def __init__(self, param='groupby', label=None, target=None, note=None,
+ colfilter=discretecol, allow_stack=False):
+ StratifyColField.__init__(self, param, label, target=target, note=note,
+ colfilter=colfilter)
+ self.allow_stack = allow_stack
+
+ def set_default(self, workspace, ns):
+ StratifyColField.set_default(self, workspace, ns)
+ ns.set_default('stack', False)
+
+ def get_params(self, ns, kwargs):
+ value = getattr(ns, self.param, None)
+ if value:
+ if self.allow_stack and ns.stack == 'True':
+ param = 'stackby'
+ else:
+ param = 'groupby'
+ set_target(kwargs, self.target, param, value)
+
+
+class WeightColField(_ColFieldBase):
+ label = 'Weight by column'
+ markup = 'weightcol'
+
+ def weightcols(self, workspace):
+ collist = self.availablecols(workspace, weightingcol)
+ collist.insert(0, ('', 'No weighting'))
+ return collist
+
+
+class _StatColFieldBase(_ColFieldBase):
+ param = 'statcols'
+ target = ['summcols', 'measures']
+
+ def statcols(self, workspace):
+ collist = self.availablecols(workspace, scalarcol)
+ collist.insert(0, '')
+ return collist
+
+ def statmethods(self, workspace):
+ stat_methods = [(m.__doc__, m.__name__)
+ for m in SOOMv0.stat_methods()
+ if m.__name__ not in ('applyto', 'quantile')]
+ stat_methods.sort()
+ return [(n, l) for l, n in stat_methods]
+
+ def weightcols(self, workspace):
+ collist = self.availablecols(workspace, weightingcol)
+ collist.insert(0, ('_none_', 'No weighting'))
+ return collist
+
+
+class MeasureColField(_StatColFieldBase):
+ label = 'Measure Column'
+ markup = 'measurecol'
+ target = ['summcols', 'measures', 'plotkw']
+
+ def statmethods(self, workspace):
+ ds = workspace.get_dataset()
+ stat_methods = []
+ condcols = workspace.plottype.get_collist()
+ stat_methods.extend(_propn_optionexpr(ds, condcols))
+ stat_methods.extend(_StatColFieldBase.statmethods(self, workspace))
+ return stat_methods
+
+ def get_params(self, ns, kwargs):
+ if getattr(ns, 'measure_stat', None):
+ propn_cols = SOOMv0.extract_propn_cols(ns.measure_stat)
+ if propn_cols:
+ value = propn_cols
+ if ns.measure_weight != '_none_':
+ set_target(kwargs, self.target, 'weightcol', ns.measure_weight)
+ else:
+ value = _get_measure(ns.measure_stat, ns.measure_col,
+ ns.measure_weight, _get_conflev(ns))
+ set_target(kwargs, self.target, self.param, value)
+
+ def set_default(self, workspace, ns):
+ if not hasattr(ns, 'measure_stat'):
+ ns.measure_stat = 'freq'
+ ns.measure_col = ''
+ ds = workspace.get_dataset()
+ ns.measure_weight = ds.weightcol
+
+
+class StatColsField(_StatColFieldBase,_ColsAttrMixin):
+ # _StatColFieldBase
+ label = 'Statistic(s)'
+ markup = 'colset'
+ # _ColsAttrMixin
+ attr_options = 'statmethods', 'statcols', 'weightcols'
+
+ def weightcols(self, workspace):
+ collist = _StatColFieldBase.weightcols(self, workspace)
+ collist.insert(0, ('_default_', 'Default weighting'))
+ return collist
+
+ def get_params(self, ns, kwargs):
+ for meth_args in ns.statcols:
+ if meth_args[0]:
+ value = _get_measure(*meth_args + [_get_conflev(ns)])
+ if self.target:
+ set_target(kwargs, self.target, self.param, value)
+
+
+class CondColParamsField(_FieldBase):
+ label = 'Column Parameters'
+ markup = 'condcolparams'
+
+
+class ColField(_ColFieldBase):
+ markup = 'colname'
+ target = ['stratacols', 'summcols', 'plotcols']
+
+ def __init__(self, param, label, target=None, note=None, colfilter=None,
+ logscale_attr=None):
+ _ColFieldBase.__init__(self, param, label, target=target, note=note,
+ colfilter=colfilter)
+ self.logscale_attr = logscale_attr
+
+ def get_params(self, ns, kwargs):
+ value = getattr(ns, self.param, None)
+ if value and self.target:
+ set_target(kwargs, self.target, self.param, value)
+ if self.logscale_attr:
+ logscale = getattr(ns, self.logscale_attr, None)
+ if logscale and logscale != 'No':
+ try:
+ value = float(logscale)
+ except (ValueError, TypeError):
+ pass
+ else:
+ set_target(kwargs, self.target, self.logscale_attr, value)
+
+ def set_default(self, workspace, ns):
+ value = getattr(ns, self.param, None)
+ cols = self.availablecols(workspace)
+ if not cols:
+ raise UIError('Dataset does not support this analysis type')
+ # is the current value one of the available cols? Fix, if not.
+ for name, label in cols:
+ if value == name:
+ break
+ else:
+ setattr(ns, self.param, cols[0][0])
+ if self.logscale_attr and not hasattr(ns, self.logscale_attr):
+ setattr(ns, self.logscale_attr, 'No')
+
+
+class TwoByTwoColField(ColField):
+ markup = 'twobytwocolname'
+
+ def get_condcolparams(self, workspace):
+ return getattr(workspace.params, self.param+'_params')
+
+
+def dssummarised(ds, workspace):
+ return ds.is_summarised()
+
+def dshascols(target):
+ def _dshascols(ds, workspace):
+ for colname in workspace.plottype.get_collist(target):
+ if not ds.has_column(colname):
+ return False
+ return True
+ return _dshascols
+
+
+class PopulationDSField(_ColFieldBase, DatasetField):
+ markup = 'popdataset'
+ dsfilter = [dssummarised]
+
+ def __init__(self, param=None, label=None, target=None,
+ note=None, colfilter=scalarcol, dsfilter=None):
+ _ColFieldBase.__init__(self, param, label, target=target, note=note,
+ colfilter=colfilter)
+ if dsfilter is not None:
+ self.dsfilter = dsfilter
+
+ def _getds(self, workspace):
+ dsname = getattr(workspace.params, self.param, None)
+ if dsname:
+ return SOOMv0.dsload(dsname)
+ return None
+
+ def set_default(self, workspace, ns):
+ if not hasattr(ns, self.param):
+ cols = self.availablecols(workspace)
+ if cols:
+ setattr(ns, self.param + '_popcol', cols[0][0])
+
+
+class ConfLevField(ChooseOneField):
+ options = [
+ (None, 'None'),
+ ('90', '90%'),
+ ('95', '95%'),
+ ('99', '99%'),
+ ('other', 'Other'),
+ ]
+ def __init__(self, param='conflev', label='Confidence limits',
+ target=None, note=None, default='95', optional=False):
+ options = self.options
+ if not optional:
+ options = self.options[1:]
+ ChooseOneField.__init__(self, param=param, label=label,
+ options=options, target=target, note=note,
+ horizontal=True, default=default, pytype=float)
+
+ def get_params(self, ns, kwargs):
+ try:
+ value = _get_param(ns, self.param, self.pytype)
+ except (TypeError, ValueError), e:
+ raise UIError('Bad value for %s field - %s' % (self.label, e))
+ if value is not None:
+ value /= 100
+ set_target(kwargs, self.target, self.param, value)
diff --git a/web/libsoomexplorer/filter.py b/web/libsoomexplorer/filter.py
new file mode 100644
index 0000000..b5127e4
--- /dev/null
+++ b/web/libsoomexplorer/filter.py
@@ -0,0 +1,662 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: filter.py 3695 2009-02-11 02:29:17Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/filter.py,v $
+
+# Standard libraries
+import time
+import re
+import copy
+
+# eGenix mx.Tools, http://www.egenix.com/files/python/mxTools.html
+import mx
+
+# SOOM NSWDoH
+import SOOMv0
+
+# Application modules
+from libsoomexplorer import colvals
+from libsoomexplorer.undo import UndoMixin
+
+class Node:
+
+ def __init__(self):
+ self.children = []
+
+ def update_path(self, path='0'):
+ self.path = path
+ for i, child in enumerate(self.children):
+ child.update_path('%s_%d' % (path, i))
+
+ def height(self):
+ return sum([child.height() for child in self.children]) or 1
+
+ def find_node(self, path):
+ """ Given a PATH, find a node """
+ if path == self.path:
+ return self
+ for child in self.children:
+ node = child.find_node(path)
+ if node:
+ return node
+
+ def find_parent_node(self, child_node):
+ """ Given a node, find the parent """
+ if child_node in self.children:
+ return self
+ for child in self.children:
+ node = child.find_parent_node(child_node)
+ if node:
+ return node
+
+
+class ContainerNode(Node):
+
+ def __init__(self, name, *children):
+ Node.__init__(self)
+ self.name = name
+ self.children = list(children)
+
+ def describe(self, dsname):
+ return self.name
+
+ def as_string(self):
+ children_as_str = [child.as_string()
+ for child in self.children
+ if child.is_complete()]
+ if not children_as_str:
+ return ''
+ return '(%s)' % ((' %s ' % self.name).join(children_as_str))
+
+ def is_complete(self):
+ for child in self.children:
+ if child.is_complete():
+ return True
+ return False
+
+
+class LeafNode(Node):
+
+ def __init__(self, colname, op, value):
+ Node.__init__(self)
+ self.colname = colname
+ if value is None:
+ if op == '==':
+ op = 'is null'
+ elif op == '!=':
+ op = 'is not null'
+ self.op = op
+ self.value = value
+
+ def describe(self, dsname):
+ def fmt(v):
+ return col.do_format(col.do_outtrans(v))
+
+ if not self.colname:
+ return '???'
+ col = SOOMv0.dsload(dsname).get_column(self.colname)
+ if type(self.value) is tuple:
+ value = ', '.join([fmt(v) for v in self.value])
+ else:
+ value = fmt(self.value)
+ if self.op == 'contains':
+ value = "[[%s]]" % value
+ elif self.op.startswith('is'):
+ value = ''
+ return '%s %s %s' % (col.label, self.op, value)
+
+ def as_string(self):
+ if not self.is_complete():
+ return ''
+ value = self.value
+ op = self.op
+ if type(value) is mx.DateTime.DateTimeType:
+ value = 'date(%s,%s,%s)' % (value.year, value.month, value.day)
+ elif type(value) is tuple:
+ # So 1-tuple is correctly represented (no trailing comma)
+ value = '(%s)' % (', '.join([repr(v) for v in value]))
+ elif op == "contains":
+ value = "[[%s]]" % value
+ elif op == 'is null':
+ op = '=='
+ value = 'null'
+ elif op == 'is not null':
+ op = '!='
+ value = 'null'
+ else:
+ value = repr(value)
+ return '%s %s %s' % (self.colname, op, value)
+
+ def is_complete(self):
+ return self.colname and self.op
+
+class FilterError(Exception): pass
+
+ops = [
+ ('Simple', (
+ ('op_equal', '==', 'equal'),
+ ('op_not_equal', '!=', 'not equal'),
+ ('op_less_than', '<', 'less than'),
+ ('op_less_equal', '<=', 'less than or equal'),
+ ('op_greater_than', '>', 'greater than'),
+ ('op_greater_equal', '>=', 'greater than or equal'),
+ ('op_equal', 'is null', 'is null'),
+ ('op_not_equal', 'is not null', 'is not null'),
+ )),
+ ('Regular Expression', (
+ ('op_regexp', '~', 'regexp'),
+ ('op_not_regexp', '!~', 'not regexp'),
+ )),
+ ('Prefix', (
+ ('op_equal_col', '==:', 'starts with'),
+ ('op_not_equal_col', '!=:', 'does not start with'),
+ ('op_less_than_col', '<:', 'less than starting'),
+ ('op_less_equal_col', '<=:', 'less than or equal starting'),
+ ('op_greater_than_col', '>:', 'greater than starting'),
+ ('op_greater_equal_col','>=:', 'greater than or equal starting'),
+ )),
+ ('Sets', (
+ ('op_in', 'in', 'in'),
+ ('op_not_in', 'notin', 'not in'),
+ ('op_in_col', 'in:', 'prefix in'),
+ ('op_not_in_col', 'notin:', 'prefix not in'),
+ )),
+ ('Free text', (
+ ('op_contains', 'contains', 'contains'),
+ )),
+]
+
+
+class ExprValueBase:
+ markup = 'none'
+
+ def __init__(self, name, value, multiple=False):
+ pass
+
+ def search(self, workspace):
+ pass
+
+ def as_pytypes(self, workspace):
+ return None
+
+ def pretty_value(self, workspace):
+ return ''
+
+ def __nonzero__(self):
+ return False
+
+ def show_search_box(self, workspace):
+ return False
+
+
+class ExprValueNull(ExprValueBase):
+ pass
+
+
+class ExprValue(ExprValueBase):
+
+ markup = 'general'
+ split_re = re.compile('[ ]*,[ ]*')
+
+ def __init__(self, name, value, multiple=False):
+ self.name = name
+ self.value = value
+ self.multiple = multiple
+
+ def search(self, workspace):
+ pass
+
+ def as_pytypes(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ if self.multiple:
+ value = self.value
+ if type(self.value) in (str, unicode):
+ value = self.split_re.split(self.value.strip())
+ elif self.value is None:
+ value = []
+ return tuple([colvals.to_datatype(col, v) for v in value])
+ else:
+ return colvals.to_datatype(col, self.value)
+
+ def strval(self):
+ if self.value is None:
+ return ''
+ if self.multiple:
+ return ', '.join(self.value)
+ else:
+ return str(self.value)
+
+ def pretty_value(self, workspace):
+ col = workspace.get_dataset()[self.name]
+ value = self.as_pytypes(workspace)
+ if self.multiple:
+ return ', '.join([colvals.shorten_trans(col, v) for v in value])
+ else:
+ return colvals.shorten_trans(col, value)
+
+ def __repr__(self):
+ return '%s(%r, %r, %r)' %\
+ (self.__class__.__name__,
+ self.name,
+ self.value,
+ self.multiple)
+
+
+class ExprTextArea(ExprValue):
+
+ markup = 'textarea'
+
+
+class ExprValueDiscrete(colvals.ColValSelect, ExprValueBase):
+
+ markup = 'discrete'
+
+ def show_search_box(self, workspace):
+ return self.cardinality_is_high(workspace)
+
+
+class ExprValueDate(ExprValueBase):
+
+ markup = 'date'
+
+ def __init__(self, name, value, multiple=False):
+ if not value:
+ value = mx.DateTime.now()
+ self.year = value.year
+ self.month = value.month
+ self.day = value.day
+
+ def search(self, workspace):
+ pass
+
+ def as_pytypes(self, workspace):
+ try:
+ return mx.DateTime.DateTime(int(self.year),
+ int(self.month),
+ int(self.day))
+ except mx.DateTime.Error, e:
+ raise FilterError('date: %s' % e)
+
+ def pretty_value(self, workspace):
+ return '%s-%s-%s' % (self.year, self.month, self.day)
+
+ def yearopt(self):
+ return range(2050, 1900, -1)
+
+ def monthopt(self):
+ months = [(i, n) for i, n in mx.DateTime.Month.items()
+ if type(i) is int]
+ months.sort()
+ return months
+
+ def dayopt(self):
+ return range(1, 32)
+
+
+class ExpressionEdit:
+ type = 'expr'
+
+ EDITCOL = 0
+ EDITOP = 1
+ EDITVALUE = 2
+
+ modes = [
+ (EDITCOL, 'Column'),
+ (EDITOP, 'Expression'),
+ (EDITVALUE, 'Value'),
+ ]
+
+ grouplabels = (
+ 'Discrete',
+ 'Scalar',
+ 'Date/Time',
+ 'Free text',
+ 'Other',
+ )
+
+ def __init__(self, dsname, node):
+ self.dsname = dsname
+ self.state = self.EDITCOL
+ self.colname = node.colname
+ self.op = node.op
+ self.node = node
+ self.__value_type = None
+ self.set_value(node.value)
+
+ def get_column(self):
+ if self.colname:
+ ds = SOOMv0.dsload(self.dsname)
+ return ds.get_column(self.colname)
+
+ def col_ops(self):
+ if not self.colname:
+ return ops
+ col = self.get_column()
+ col_ops = []
+ for grouplabel, group in ops:
+ group = [(op, label)
+ for method, op, label in group
+ if hasattr(col, method)]
+ if group:
+ col_ops.append((grouplabel, group))
+ return col_ops
+
+ def is_set_op(self):
+ return self.op in ('in', 'in:', 'notin', 'notin:')
+
+ def is_pattern_op(self):
+ return self.op.endswith(':') or self.op.endswith('~')
+
+ def next(self):
+ if ((self.state == self.EDITCOL and self.colname)
+ or (self.state == self.EDITOP and self.op)):
+ self.state += 1
+ self.set_value()
+
+ def set_value(self, value = None):
+ col = self.get_column()
+ if (not self.colname or not self.op
+ or self.op in ('is null', 'is not null')):
+ value_method = ExprValueNull
+ elif col.is_searchabletext():
+ value_method = ExprTextArea
+ elif self.is_pattern_op():
+ value_method = ExprValue
+ elif col.is_datetimetype():
+ value_method = ExprValueDate
+ elif not col.is_discrete():
+ value_method = ExprValue
+ else:
+ value_method = ExprValueDiscrete
+ value_type = self.colname, value_method, self.is_set_op()
+ if value_type != self.__value_type:
+ self.value = value_method(self.colname, value,
+ multiple=self.is_set_op())
+ self.__value_type = value_type
+
+ def colgroups(self, filter = None):
+ ds = SOOMv0.dsload(self.dsname)
+ cols = [(col.label, col) for col in ds.get_columns()]
+ cols.sort()
+ colgroups = {}
+ for label, col in cols:
+ groups = []
+ if col.is_datetimetype():
+ groups.append('Date/Time')
+ if col.is_discrete():
+ groups.append('Discrete')
+ if col.is_searchabletext():
+ groups.append('Free text')
+ if col.is_scalar():
+ groups.append('Scalar')
+ if not groups:
+ groups.append('Other')
+ for group in groups:
+ colgroups.setdefault(group, []).append(col)
+ return [(gl, [(col.name, col.label) for col in colgroups[gl]])
+ for gl in self.grouplabels if gl in colgroups]
+
+ def colname_select(self, filter = None):
+ if filter is None:
+ filter = True
+ elif filter == 'datetime':
+ filter = lambda c: c.is_datetimetype()
+ elif filter == 'discrete':
+ filter = lambda c: c.is_discrete() and not c.is_datetimetype() \
+ and not c.is_searchabletext()
+ elif filter == 'text':
+ filter = lambda c: c.is_searchabletext()
+ elif filter == 'other':
+ filter = lambda c: not c.is_discrete() and not c.is_datetimetype() \
+ and not c.is_searchabletext()
+ else:
+ raise ValueError('bad column filter value')
+ ds = SOOMv0.dsload(self.dsname)
+ cols = [(col.label, col.name)
+ for col in ds.get_columns()
+ if filter(col)]
+ cols.sort()
+ cols.insert(0, ('-- select --', ''))
+ return [(name, label) for label, name in cols]
+
+ def get_available_values(self):
+ """
+ For discrete columns, return a list of potential values
+ """
+ col = self.get_column()
+ if not col.is_discrete():
+ return
+ values = [(v, str(col.do_outtrans(v))[:45])
+ for v in col.inverted.keys()]
+ if col.is_ordered():
+ values.sort()
+ else:
+ values = [(l, v) for v, l in values]
+ values.sort()
+ values = [(v, l) for l, v in values]
+ return values
+
+ def pretty_value(self, workspace):
+ return self.value.pretty_value(workspace)
+
+ def show_search_box(self, workspace):
+ return (self.state == self.EDITVALUE
+ and self.value.show_search_box(workspace))
+
+
+class FilterInfo:
+ def __init__(self, filter):
+ self.name = filter.name
+ self.label = filter.label
+
+
+class AndOrEdit:
+ type = 'andor'
+
+ def __init__(self, dsname, node):
+ self.dsname = dsname
+ self.node = node
+ self.name = node.name
+
+
+class Filter(UndoMixin):
+ def __init__(self, dsname):
+ UndoMixin.__init__(self)
+ assert dsname
+ self.dsname = dsname
+ self.name = None
+ self.label = None
+ self.copy_buffer = None
+ self.root = LeafNode(None, None, None)
+ self.root.update_path()
+ self.clear_edit()
+ self.clear_undo()
+ self.updatetime = time.time()
+
+ def set_root(self, root):
+ self._replace_node(None, root)
+
+ def clear_edit(self):
+ self.edit_expr = None
+ self.edit_info = None
+ self.edit_andor = None
+
+ def node_is_selected(self, node):
+ return ((self.edit_expr and self.edit_expr.node == node)
+ or (self.edit_andor and self.edit_andor.node == node))
+
+ def undo(self):
+ self.clear_edit()
+ UndoMixin.undo(self)
+
+ def redo(self):
+ self.clear_edit()
+ UndoMixin.redo(self)
+
+ def as_string(self):
+ return self.root.as_string()
+
+ def in_edit(self):
+ return self.edit_expr or self.edit_andor or self.edit_info
+
+ def loaded(self):
+ try:
+ self.copy_buffer
+ except AttributeError:
+ self.copy_buffer = None
+
+ def _edit_node(self, node):
+ self.clear_edit()
+ if isinstance(node, ContainerNode):
+ self.edit_andor = AndOrEdit(self.dsname, node)
+ else:
+ self.edit_expr = ExpressionEdit(self.dsname, node)
+
+ # Basic operations - these should either be in complementary pairs, or be
+ # involute themselves, so "undo/redo" works.
+ #
+ # AM - I'm not convinced it's safe saving node instances in
+ # the undo data, but it would be hard to do it any other way.
+
+ def _replace_node(self, node, new_node):
+ parent = self.root.find_parent_node(node)
+ if parent is None:
+ self._record_undo('_replace_node', None, self.root)
+ self.root = new_node
+ else:
+ i = parent.children.index(node)
+ parent.children[i] = new_node
+ self._record_undo('_replace_node', new_node, node)
+ self.root.update_path()
+ self._edit_node(new_node)
+
+ def _add_node(self, node, new_node, index=None):
+ self._record_undo('_del_node', new_node)
+ if node is None:
+ self.root = new_node
+ elif index is None:
+ node.children.append(new_node)
+ else:
+ node.children.insert(index, new_node)
+ self.root.update_path()
+ self._edit_node(new_node)
+
+ def _del_node(self, node):
+ parent = self.root.find_parent_node(node)
+ if parent is None:
+ # root node
+ self._replace_node(None, LeafNode(None, None, None))
+ else:
+ i = parent.children.index(node)
+ if len(parent.children) == 2:
+ # If parent left with only one child, we eliminate it.
+ self._replace_node(parent, parent.children[i ^ 1])
+ else:
+ del parent.children[i]
+ self._record_undo('_add_node', parent, node, i)
+ self.root.update_path()
+ self._edit_node(parent)
+
+ def _set_expr(self, node, colname, op, value):
+ self._record_undo('_set_expr', node, node.colname, node.op, node.value)
+ node.colname = colname
+ node.op = op
+ node.value = value
+
+ def _set_info(self, name, label):
+ self._record_undo('_set_info', self.name, self.label)
+ self.name = name
+ if not label:
+ label = self.name
+ self.label = label
+
+ def _set_andor(self, node, name):
+ self._record_undo('_set_andor', node, node.name)
+ node.name = name
+ if not self.edit_info:
+ self.start_edit_node(node.path)
+
+
+ # Higher level operations
+ def start_edit_node(self, path):
+ node = self.root.find_node(path)
+ if not node:
+ raise LookupError('node path %s not found' % path)
+ self._edit_node(node)
+
+ def add_expr(self, node):
+ self._add_node(node, LeafNode(None, None, None))
+
+ def del_node(self):
+ edit = self.edit_andor or self.edit_expr
+ assert edit is not None
+ self._del_node(edit.node)
+ self.copy_buffer = edit.node
+
+ def splice_filter(self, filter):
+ # Import another filter at /node/, replacing /node/
+ edit = self.edit_andor or self.edit_expr
+ assert edit is not None
+ self.clear_edit()
+ self._replace_node(edit.node, filter.root)
+
+ def paste(self):
+ if self.copy_buffer is not None:
+ edit = self.edit_andor or self.edit_expr
+ assert edit is not None
+ self._replace_node(edit.node, copy.deepcopy(self.copy_buffer))
+
+ def add_andor(self, node, name):
+ # Insert at "node" a new and/or node, moving "node" down, and adding
+ # a new leaf node.
+ new_node = ContainerNode(name)
+ new_node.children.append(node)
+ new_expr_node = LeafNode(None, None, None)
+ new_node.children.append(new_expr_node)
+ self._replace_node(node, new_node)
+ self._edit_node(new_expr_node)
+
+ def expr_mode(self, mode):
+ self.edit_expr.state = mode
+ self.edit_expr.set_value()
+
+ def expr_next(self):
+ if self.edit_expr:
+ self.edit_expr.next()
+
+ def commit_edit_node(self, workspace):
+ self._set_expr(self.edit_expr.node, self.edit_expr.colname,
+ self.edit_expr.op,
+ self.edit_expr.value.as_pytypes(workspace))
+ self.clear_edit()
+
+ def commit_add_edit_node(self, workspace):
+ node = self.edit_expr.node
+ parent = self.root.find_parent_node(node)
+ self.commit_edit_node(workspace)
+ if parent is None:
+ self.add_andor(node, 'and')
+ else:
+ self.add_expr(parent)
+
+ def start_edit_info(self):
+ self.edit_info = FilterInfo(self)
+
+ def apply_info(self):
+ self._set_info(self.edit_info.name, self.edit_info.label)
+ self.clear_edit()
+
+ def set_andor(self, node, name):
+ self._set_andor(node, name)
diff --git a/web/libsoomexplorer/filterparse.py b/web/libsoomexplorer/filterparse.py
new file mode 100644
index 0000000..58011b7
--- /dev/null
+++ b/web/libsoomexplorer/filterparse.py
@@ -0,0 +1,635 @@
+# vim: set ts=4 sw=4 et:
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+
+import mx
+
+from SOOMv0.soomparse import dequote, relativeDate
+from libsoomexplorer.filter import LeafNode, ContainerNode
+
+# Lines starting with #XX have been disabled for expedience when porting this to NEA-web
+# These will need to be addressed at some point.
+
+
+# Begin -- grammar generated by Yapps
+import sys, re
+import yappsrt
+
+class filterparseScanner(yappsrt.Scanner):
+ patterns = [
+ ('"reldate"', re.compile('reldate')),
+ ('"date"', re.compile('date')),
+ ('","', re.compile(',')),
+ ('"\\\\]\\\\]"', re.compile('\\]\\]')),
+ ('"\\\\[\\\\["', re.compile('\\[\\[')),
+ ('"\\\\)"', re.compile('\\)')),
+ ('"\\\\("', re.compile('\\(')),
+ ('"and"', re.compile('and')),
+ ('"or"', re.compile('or')),
+ ('"!~"', re.compile('!~')),
+ ('"~"', re.compile('~')),
+ ('"contains"', re.compile('contains')),
+ ('"notin:"', re.compile('notin:')),
+ ('"notin"', re.compile('notin')),
+ ('"in:"', re.compile('in:')),
+ ('"in"', re.compile('in')),
+ ('"=="', re.compile('==')),
+ ('"is"', re.compile('is')),
+ ('"="', re.compile('=')),
+ ('"eq"', re.compile('eq')),
+ ('"equalto"', re.compile('equalto')),
+ ('"equals"', re.compile('equals')),
+ ('"to"', re.compile('to')),
+ ('"equal"', re.compile('equal')),
+ ('"notstartswith"', re.compile('notstartswith')),
+ ('"notstartingwith"', re.compile('notstartingwith')),
+ ('"<>:"', re.compile('<>:')),
+ ('"#:"', re.compile('#:')),
+ ('"!==:"', re.compile('!==:')),
+ ('"!=:"', re.compile('!=:')),
+ ('"ne:"', re.compile('ne:')),
+ ('"doesnotequal:"', re.compile('doesnotequal:')),
+ ('"notequal:"', re.compile('notequal:')),
+ ('"notequalto:"', re.compile('notequalto:')),
+ ('"<>"', re.compile('<>')),
+ ('"is not"', re.compile('is not')),
+ ('"#"', re.compile('#')),
+ ('"!=="', re.compile('!==')),
+ ('"!="', re.compile('!=')),
+ ('"ne"', re.compile('ne')),
+ ('"doesnotequal"', re.compile('doesnotequal')),
+ ('"notequal"', re.compile('notequal')),
+ ('"notequalto"', re.compile('notequalto')),
+ ('"=>:"', re.compile('=>:')),
+ ('">=:"', re.compile('>=:')),
+ ('"ge:"', re.compile('ge:')),
+ ('"greaterequal:"', re.compile('greaterequal:')),
+ ('"greaterthanorequalto:"', re.compile('greaterthanorequalto:')),
+ ('"=>"', re.compile('=>')),
+ ('">="', re.compile('>=')),
+ ('"ge"', re.compile('ge')),
+ ('"greaterequal"', re.compile('greaterequal')),
+ ('"greaterthanorequalto"', re.compile('greaterthanorequalto')),
+ ('">:"', re.compile('>:')),
+ ('"gt:"', re.compile('gt:')),
+ ('"greaterthan:"', re.compile('greaterthan:')),
+ ('">"', re.compile('>')),
+ ('"gt"', re.compile('gt')),
+ ('"greaterthan"', re.compile('greaterthan')),
+ ('"=<:"', re.compile('=<:')),
+ ('"<=:"', re.compile('<=:')),
+ ('"le:"', re.compile('le:')),
+ ('"lessequal:"', re.compile('lessequal:')),
+ ('"lessthanorequalto:"', re.compile('lessthanorequalto:')),
+ ('"=<"', re.compile('=<')),
+ ('"<="', re.compile('<=')),
+ ('"le"', re.compile('le')),
+ ('"lessequal"', re.compile('lessequal')),
+ ('"lessthanorequalto"', re.compile('lessthanorequalto')),
+ ('"<:"', re.compile('<:')),
+ ('"lt:"', re.compile('lt:')),
+ ('"lessthan:"', re.compile('lessthan:')),
+ ('"<"', re.compile('<')),
+ ('"lt"', re.compile('lt')),
+ ('"lessthan"', re.compile('lessthan')),
+ ('"eq:"', re.compile('eq:')),
+ ('"startswith"', re.compile('startswith')),
+ ('"startingwith"', re.compile('startingwith')),
+ ('"==:"', re.compile('==:')),
+ ('"=:"', re.compile('=:')),
+ ('"with"', re.compile('with')),
+ ('"starting"', re.compile('starting')),
+ ('[ \t\n\r]+', re.compile('[ \t\n\r]+')),
+ ('END', re.compile('$')),
+ ('DATE', re.compile('\\d{4}-\\d{1,2}-\\d{1,2}')),
+ ('INT', re.compile('[-+]?[0-9]+')),
+ ('FLOAT', re.compile('[-+]?([0-9]*\\.[0-9]+)|([0-9]+\\.)|(([0-9]*\\.)?[0-9]+[eE][-+]?[0-9]+)')),
+ ('ID', re.compile('[a-zA-Z0-9_]+')),
+ ('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
+ ('WORD', re.compile("[a-zA-Z0-9'*]?[a-zA-Z0-9][a-zA-Z0-9'*]*")),
+ ('SEXPR', re.compile('.*?(?=]])')),
+ ('NULL', re.compile('[Nn][Oo][Nn][Ee]|[Nn][Uu][Ll][Ll]')),
+ ]
+ def __init__(self, str):
+ yappsrt.Scanner.__init__(self,None,['[ \t\n\r]+'],str)
+
+class filterparse(yappsrt.Parser):
+ Context = yappsrt.Context
+ def starts_with(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'starts_with', [])
+ _token = self._peek('"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"')
+ if _token == '"starting"':
+ self._scan('"starting"')
+ self._scan('"with"')
+ return '==:'
+ elif _token == '"=:"':
+ self._scan('"=:"')
+ return '==:'
+ elif _token == '"==:"':
+ self._scan('"==:"')
+ return '==:'
+ elif _token == '"startingwith"':
+ self._scan('"startingwith"')
+ return '==:'
+ elif _token == '"startswith"':
+ self._scan('"startswith"')
+ return '==:'
+ else: # == '"eq:"'
+ self._scan('"eq:"')
+ return '==:'
+
+ def lt(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'lt', [])
+ _token = self._peek('"lessthan"', '"lt"', '"<"')
+ if _token == '"lessthan"':
+ self._scan('"lessthan"')
+ return '<'
+ elif _token == '"lt"':
+ self._scan('"lt"')
+ return '<'
+ else: # == '"<"'
+ self._scan('"<"')
+ return '<'
+
+ def lt_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'lt_col', [])
+ _token = self._peek('"lessthan:"', '"lt:"', '"<:"')
+ if _token == '"lessthan:"':
+ self._scan('"lessthan:"')
+ return '<:'
+ elif _token == '"lt:"':
+ self._scan('"lt:"')
+ return '<:'
+ else: # == '"<:"'
+ self._scan('"<:"')
+ return '<:'
+
+ def le(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'le', [])
+ _token = self._peek('"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"')
+ if _token == '"lessthanorequalto"':
+ self._scan('"lessthanorequalto"')
+ return '<='
+ elif _token == '"lessequal"':
+ self._scan('"lessequal"')
+ return '<='
+ elif _token == '"le"':
+ self._scan('"le"')
+ return '<='
+ elif _token == '"<="':
+ self._scan('"<="')
+ return '<='
+ else: # == '"=<"'
+ self._scan('"=<"')
+ return '<='
+
+ def le_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'le_col', [])
+ _token = self._peek('"lessthanorequalto:"', '"lessequal:"', '"le:"', '"<=:"', '"=<:"')
+ if _token == '"lessthanorequalto:"':
+ self._scan('"lessthanorequalto:"')
+ return '<=:'
+ elif _token == '"lessequal:"':
+ self._scan('"lessequal:"')
+ return '<=:'
+ elif _token == '"le:"':
+ self._scan('"le:"')
+ return '<=:'
+ elif _token == '"<=:"':
+ self._scan('"<=:"')
+ return '<=:'
+ else: # == '"=<:"'
+ self._scan('"=<:"')
+ return '<=:'
+
+ def gt(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'gt', [])
+ _token = self._peek('"greaterthan"', '"gt"', '">"')
+ if _token == '"greaterthan"':
+ self._scan('"greaterthan"')
+ return '>'
+ elif _token == '"gt"':
+ self._scan('"gt"')
+ return '>'
+ else: # == '">"'
+ self._scan('">"')
+ return '>'
+
+ def gt_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'gt_col', [])
+ _token = self._peek('"greaterthan:"', '"gt:"', '">:"')
+ if _token == '"greaterthan:"':
+ self._scan('"greaterthan:"')
+ return '>:'
+ elif _token == '"gt:"':
+ self._scan('"gt:"')
+ return '>:'
+ else: # == '">:"'
+ self._scan('">:"')
+ return '>:'
+
+ def ge(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ge', [])
+ _token = self._peek('"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"')
+ if _token == '"greaterthanorequalto"':
+ self._scan('"greaterthanorequalto"')
+ return '>='
+ elif _token == '"greaterequal"':
+ self._scan('"greaterequal"')
+ return '>='
+ elif _token == '"ge"':
+ self._scan('"ge"')
+ return '>='
+ elif _token == '">="':
+ self._scan('">="')
+ return '>='
+ else: # == '"=>"'
+ self._scan('"=>"')
+ return '>='
+
+ def ge_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ge_col', [])
+ _token = self._peek('"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"')
+ if _token == '"greaterthanorequalto:"':
+ self._scan('"greaterthanorequalto:"')
+ return '>=:'
+ elif _token == '"greaterequal:"':
+ self._scan('"greaterequal:"')
+ return '>=:'
+ elif _token == '"ge:"':
+ self._scan('"ge:"')
+ return '>=:'
+ elif _token == '">=:"':
+ self._scan('">=:"')
+ return '>=:'
+ else: # == '"=>:"'
+ self._scan('"=>:"')
+ return '>=:'
+
+ def ne(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ne', [])
+ _token = self._peek('"notequalto"', '"notequal"', '"doesnotequal"', '"ne"', '"!="', '"!=="', '"#"', '"is not"', '"<>"')
+ if _token == '"notequalto"':
+ self._scan('"notequalto"')
+ return '!='
+ elif _token == '"notequal"':
+ self._scan('"notequal"')
+ return '!='
+ elif _token == '"doesnotequal"':
+ self._scan('"doesnotequal"')
+ return '!='
+ elif _token == '"ne"':
+ self._scan('"ne"')
+ return '!='
+ elif _token == '"!="':
+ self._scan('"!="')
+ return '!='
+ elif _token == '"!=="':
+ self._scan('"!=="')
+ return '!='
+ elif _token == '"#"':
+ self._scan('"#"')
+ return '!='
+ elif _token == '"is not"':
+ self._scan('"is not"')
+ return '!='
+ else: # == '"<>"'
+ self._scan('"<>"')
+ return '!='
+
+ def ne_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'ne_col', [])
+ _token = self._peek('"notequalto:"', '"notequal:"', '"doesnotequal:"', '"ne:"', '"!=:"', '"!==:"', '"#:"', '"<>:"', '"notstartingwith"', '"notstartswith"')
+ if _token == '"notequalto:"':
+ self._scan('"notequalto:"')
+ return '!=:'
+ elif _token == '"notequal:"':
+ self._scan('"notequal:"')
+ return '!=:'
+ elif _token == '"doesnotequal:"':
+ self._scan('"doesnotequal:"')
+ return '!=:'
+ elif _token == '"ne:"':
+ self._scan('"ne:"')
+ return '!=:'
+ elif _token == '"!=:"':
+ self._scan('"!=:"')
+ return '!=:'
+ elif _token == '"!==:"':
+ self._scan('"!==:"')
+ return '!=:'
+ elif _token == '"#:"':
+ self._scan('"#:"')
+ return '!=:'
+ elif _token == '"<>:"':
+ self._scan('"<>:"')
+ return '!=:'
+ elif _token == '"notstartingwith"':
+ self._scan('"notstartingwith"')
+ return '!=:'
+ else: # == '"notstartswith"'
+ self._scan('"notstartswith"')
+ return '!=:'
+
+ def eq(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'eq', [])
+ _token = self._peek('"equal"', '"equals"', '"equalto"', '"eq"', '"="', '"is"', '"=="')
+ if _token == '"equal"':
+ self._scan('"equal"')
+ self._scan('"to"')
+ return '=='
+ elif _token == '"equals"':
+ self._scan('"equals"')
+ return '=='
+ elif _token == '"equalto"':
+ self._scan('"equalto"')
+ return '=='
+ elif _token == '"eq"':
+ self._scan('"eq"')
+ return '=='
+ elif _token == '"="':
+ self._scan('"="')
+ return '=='
+ elif _token == '"is"':
+ self._scan('"is"')
+ return '=='
+ else: # == '"=="'
+ self._scan('"=="')
+ return '=='
+
+ def in_op(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'in_op', [])
+ self._scan('"in"')
+ return 'in'
+
+ def in_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'in_col', [])
+ self._scan('"in:"')
+ return 'in:'
+
+ def not_in(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_in', [])
+ self._scan('"notin"')
+ return 'notin'
+
+ def not_in_col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_in_col', [])
+ self._scan('"notin:"')
+ return 'notin:'
+
+ def contains(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'contains', [])
+ self._scan('"contains"')
+ return 'contains'
+
+ def regexp(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'regexp', [])
+ self._scan('"~"')
+ return '~'
+
+ def not_regexp(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'not_regexp', [])
+ self._scan('"!~"')
+ return '!~'
+
+ def op(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'op', [])
+ _token = self._peek('"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"', '"lessthan"', '"lt"', '"<"', '"lessthan:"', '"lt:"', '"<:"', '"greaterthan"', '"gt"', '">"', '"greaterthan:"', '"gt:"', '">:"', '"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"', '"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"', '"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"', '"lessthanorequalto:"', '"lessequal:"', '"le:"', '"<=:"', ' [...]
+ if _token in ['"starting"', '"=:"', '"==:"', '"startingwith"', '"startswith"', '"eq:"']:
+ starts_with = self.starts_with(_context)
+ return starts_with
+ elif _token in ['"lessthan"', '"lt"', '"<"']:
+ lt = self.lt(_context)
+ return lt
+ elif _token in ['"lessthan:"', '"lt:"', '"<:"']:
+ lt_col = self.lt_col(_context)
+ return lt_col
+ elif _token in ['"greaterthan"', '"gt"', '">"']:
+ gt = self.gt(_context)
+ return gt
+ elif _token in ['"greaterthan:"', '"gt:"', '">:"']:
+ gt_col = self.gt_col(_context)
+ return gt_col
+ elif _token in ['"greaterthanorequalto"', '"greaterequal"', '"ge"', '">="', '"=>"']:
+ ge = self.ge(_context)
+ return ge
+ elif _token in ['"greaterthanorequalto:"', '"greaterequal:"', '"ge:"', '">=:"', '"=>:"']:
+ ge_col = self.ge_col(_context)
+ return ge_col
+ elif _token in ['"lessthanorequalto"', '"lessequal"', '"le"', '"<="', '"=<"']:
+ le = self.le(_context)
+ return le
+ elif _token in ['"lessthanorequalto:"', '"lessequal:"', '"le:"', '"<=:"', '"=<:"']:
+ le_col = self.le_col(_context)
+ return le_col
+ elif _token in ['"notequalto"', '"notequal"', '"doesnotequal"', '"ne"', '"!="', '"!=="', '"#"', '"is not"', '"<>"']:
+ ne = self.ne(_context)
+ return ne
+ elif _token not in ['"equal"', '"equals"', '"equalto"', '"eq"', '"="', '"is"', '"=="', '"in"', '"in:"', '"notin"', '"notin:"', '"contains"', '"~"', '"!~"']:
+ ne_col = self.ne_col(_context)
+ return ne_col
+ elif _token not in ['"in"', '"in:"', '"notin"', '"notin:"', '"contains"', '"~"', '"!~"']:
+ eq = self.eq(_context)
+ return eq
+ elif _token == '"in"':
+ in_op = self.in_op(_context)
+ return in_op
+ elif _token == '"in:"':
+ in_col = self.in_col(_context)
+ return in_col
+ elif _token == '"notin"':
+ not_in = self.not_in(_context)
+ return not_in
+ elif _token == '"notin:"':
+ not_in_col = self.not_in_col(_context)
+ return not_in_col
+ elif _token == '"contains"':
+ contains = self.contains(_context)
+ return contains
+ elif _token == '"~"':
+ regexp = self.regexp(_context)
+ return regexp
+ else: # == '"!~"'
+ not_regexp = self.not_regexp(_context)
+ return not_regexp
+
+ def goal(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'goal', [])
+ expr = self.expr(_context)
+ END = self._scan('END')
+ return expr
+
+ def expr(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'expr', [])
+ factor = self.factor(_context)
+ n = [factor]
+ while self._peek('"or"', 'END', '"\\\\)"') == '"or"':
+ self._scan('"or"')
+ factor = self.factor(_context)
+ n.append(factor)
+ if self._peek() not in ['"or"', 'END', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"or"', 'END', '"\\\\)"']))
+ return container('or', n)
+
+ def factor(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'factor', [])
+ comparison = self.comparison(_context)
+ n = [comparison]
+ while self._peek('"and"', '"or"', 'END', '"\\\\)"') == '"and"':
+ self._scan('"and"')
+ comparison = self.comparison(_context)
+ n.append(comparison)
+ if self._peek() not in ['"and"', '"or"', 'END', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"and"', '"or"', 'END', '"\\\\)"']))
+ return container('and', n)
+
+ def comparison(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'comparison', [])
+ _token = self._peek('"\\\\("', 'ID')
+ if _token == 'ID':
+ col = self.col(_context)
+ op = self.op(_context)
+ term = self.term(_context)
+ return LeafNode(col, op, term)
+ else: # == '"\\\\("'
+ self._scan('"\\\\("')
+ expr = self.expr(_context)
+ self._scan('"\\\\)"')
+ return expr
+
+ def term(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'term', [])
+ _token = self._peek('NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"')
+ if _token == 'NULL':
+ NULL = self._scan('NULL')
+ return None
+ elif _token == 'INT':
+ INT = self._scan('INT')
+ return int(INT)
+ elif _token == 'FLOAT':
+ FLOAT = self._scan('FLOAT')
+ return float(FLOAT)
+ elif _token == 'STR':
+ STR = self._scan('STR')
+ return dequote(STR)
+ elif _token == '"\\\\[\\\\["':
+ self._scan('"\\\\[\\\\["')
+ SEXPR = self._scan('SEXPR')
+ self._scan('"\\\\]\\\\]"')
+ return SEXPR
+ elif _token == '"\\\\("':
+ self._scan('"\\\\("')
+ while 1:
+ term = self.term(_context)
+ term_list = [term]
+ while self._peek('","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"') == '","':
+ self._scan('","')
+ term = self.term(_context)
+ term_list.append(term)
+ if self._peek() not in ['","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', '"\\\\)"', 'NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']))
+ if self._peek('NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"', '"\\\\)"', '","') not in ['NULL', 'INT', 'FLOAT', 'STR', '"\\\\[\\\\["', '"\\\\("', '"date"', '"reldate"']: break
+ self._scan('"\\\\)"')
+ return term_list
+ elif _token == '"date"':
+ self._scan('"date"')
+ _token = self._peek('"\\\\("', 'DATE')
+ if _token == '"\\\\("':
+ self._scan('"\\\\("')
+ INT = self._scan('INT')
+ year = int(INT)
+ self._scan('","')
+ INT = self._scan('INT')
+ month = int(INT)
+ self._scan('","')
+ INT = self._scan('INT')
+ day = int(INT)
+ self._scan('"\\\\)"')
+ return mx.DateTime.Date(year, month, day)
+ else: # == 'DATE'
+ DATE = self._scan('DATE')
+ return mx.DateTime.ISO.ParseDate(DATE)
+ else: # == '"reldate"'
+ self._scan('"reldate"')
+ kwargs = self.kwargs(_context)
+ return relativeDate(**kwargs)
+
+ def col(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'col', [])
+ ID = self._scan('ID')
+ return self.ds.get_column(ID).name
+
+ def kwargs(self, _parent=None):
+ _context = self.Context(_parent, self._scanner, self._pos, 'kwargs', [])
+ self._scan('"\\\\("')
+ kwargs = {}
+ _token = self._peek('ID', '"\\\\)"', '","')
+ if _token != 'ID':
+ pass
+ else: # == 'ID'
+ ID = self._scan('ID')
+ self._scan('"="')
+ term = self.term(_context)
+ kwargs[ID] = term
+ while self._peek('","', '"\\\\)"') == '","':
+ self._scan('","')
+ ID = self._scan('ID')
+ self._scan('"="')
+ term = self.term(_context)
+ kwargs[ID] = term
+ if self._peek() not in ['","', '"\\\\)"']:
+ raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', '"\\\\)"']))
+ self._scan('"\\\\)"')
+ return kwargs
+
+
+def parse(rule, text):
+ P = filterparse(filterparseScanner(text))
+ return yappsrt.wrap_error_reporter(P, rule)
+
+# End -- grammar generated by Yapps
+
+
+
+def container(op, nodes):
+ if len(nodes) == 1:
+ return nodes[0]
+ return ContainerNode(op, *nodes)
+
+class FilterParseError(Exception): pass
+
+class FilterParse(filterparse):
+ def __init__(self, ds):
+ self.ds = ds
+
+ def parse(self, expr):
+ scanner = filterparseScanner(expr)
+ try:
+ filterparse.__init__(self, scanner)
+ root = self.goal()
+ except yappsrt.SyntaxError, s:
+ raise FilterParseError('Syntax error in filter expression %s' %
+ (s.msg,))
+ except yappsrt.NoMoreTokens:
+ raise FilterParseError('Could not complete parsing filter '
+ 'expression; stopped around here:' + scanner)
+ else:
+# while len(root.children) == 1:
+# # Strip out unnecessary ()
+# root = root.children[0]
+ return root
+
+def parse(ds, expr):
+ return FilterParse(ds).parse(expr)
diff --git a/web/libsoomexplorer/filterstore.py b/web/libsoomexplorer/filterstore.py
new file mode 100644
index 0000000..689f25a
--- /dev/null
+++ b/web/libsoomexplorer/filterstore.py
@@ -0,0 +1,184 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: filterstore.py 3703 2009-03-03 04:58:06Z andrewm $
+
+# Standard libraries
+import os
+import cPickle
+import fcntl
+import time
+import errno
+import re
+import tempfile
+
+from libsoomexplorer.filter import Filter, FilterError
+
+import config
+
+__all__ = ('filterstore',)
+
+safe_dsname_re = re.compile(r'^[a-z0-9_-]{1,16}$', re.IGNORECASE)
+
+def dsfilters_filename(dsname):
+ if not safe_dsname_re.match(dsname):
+ # Shouldn't happen, but, because we use this as a filename,
+ # we need to be paranoid.
+ raise FilterError('Bad dataset name: %r' % dsname)
+ return os.path.join(config.data_dir, '%s_filters.pkl' % dsname)
+
+
+class DSFilterStore:
+ def __init__(self, dsname):
+ self.dsname = dsname
+ self.mtime = None
+ self.filters = {}
+
+ def refresh(self):
+ filename = dsfilters_filename(self.dsname)
+ try:
+ mtime = os.path.getmtime(filename)
+ except OSError, (eno, estr):
+ if eno == errno.ENOENT:
+ self.filters = {}
+ return
+ raise
+ if self.mtime is None or self.mtime < mtime:
+ try:
+ f = open(filename, 'rb')
+ except IOError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ self.filters = {}
+ else:
+ try:
+ self.filters = cPickle.load(f)
+ finally:
+ f.close()
+ for filter in self.filters.itervalues():
+ filter.loaded()
+
+ def _update(self, filter, delete=False):
+ """
+ Merge the given filter into the saved filters for this dataset
+
+ We first obtain an exclusive lock on the current file,
+ read it, and check to make sure someone else hasn't already
+ updated the filter in question. If this condition is met,
+ a new temporary filters pickle is writen, then renamed into
+ place (an atomic operation).
+ """
+ if not filter.name:
+ raise FilterError('A filter name must be specified!')
+# if not self.name_re.match(filter.name):
+# raise FilterError('Invalid filter name %r' % filter.name)
+ filename = dsfilters_filename(self.dsname)
+ readit = True
+ try:
+ f = open(filename, 'r+b')
+ except IOError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ f = open(filename, 'wb')
+ readit = False
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX)
+ if readit:
+ self.filters = cPickle.load(f)
+ f.seek(0)
+ else:
+ self.filters = {}
+ # AM Feb09 - this check is unreliable
+ #old_filter = self.filters.get(filter.name.lower())
+ #if old_filter and old_filter.updatetime != filter.updatetime:
+ # raise FilterError('filter %r updated by another user!' %
+ # filter.name)
+ if delete:
+ try:
+ del self.filters[filter.name.lower()]
+ except KeyError:
+ pass
+ else:
+ filter.clear_undo()
+ filter.updatetime = time.time()
+ self.filters[filter.name.lower()] = filter
+ dir, fn = os.path.split(filename)
+ tf, tempname = tempfile.mkstemp(prefix='.filter.', dir=dir)
+ tf = os.fdopen(tf, 'wb')
+ try:
+ cPickle.dump(self.filters, tf, -1)
+ except:
+ tf.close()
+ os.unlink(tempname)
+ raise
+ else:
+ tf.close()
+ os.rename(tempname, filename)
+ self.mtime = os.path.getmtime(filename)
+ finally:
+ f.close()
+
+ def update(self, filter):
+ # We save a copy of the filter, minus the undo information
+ self._update(filter.copy())
+
+ def delete(self, filter):
+ self._update(filter, delete=True)
+
+ def __getitem__(self, name):
+ return self.filters[name.lower()]
+
+ def values(self):
+ return self.filters.values()
+
+
+class FilterStore:
+ """
+ This represents "saved" filters, and provides
+ concurrent-access-safe methods for loading, saving and deleting
+ filters.
+ """
+
+ def __init__(self):
+ self._dsfilters = {}
+
+ def __getstate__(self):
+ raise NotImplementedError
+
+ def _get_dsfilters(self, dsname):
+ try:
+ dsfilters = self._dsfilters[dsname]
+ except KeyError:
+ dsfilters = self._dsfilters[dsname] = DSFilterStore(dsname)
+ dsfilters.refresh()
+ return dsfilters
+
+ def available_filters(self, dsname):
+ return self._get_dsfilters(dsname).values()
+
+ def load_filter(self, dsname, filtername):
+ filter = self._get_dsfilters(dsname)[filtername]
+ filter.clear_undo()
+ return filter
+
+ def update_filter(self, filter):
+ self._get_dsfilters(filter.dsname).update(filter)
+
+ def delete_filter(self, filter):
+ self._get_dsfilters(filter.dsname).delete(filter)
+
+ def new_filter(self, dsname):
+ return Filter(dsname)
+
+filterstore = FilterStore()
diff --git a/web/libsoomexplorer/output/__init__.py b/web/libsoomexplorer/output/__init__.py
new file mode 100644
index 0000000..5fb0fc8
--- /dev/null
+++ b/web/libsoomexplorer/output/__init__.py
@@ -0,0 +1,16 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: __init__.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/output/__init__.py,v $
diff --git a/web/libsoomexplorer/output/base.py b/web/libsoomexplorer/output/base.py
new file mode 100644
index 0000000..bc6f1d5
--- /dev/null
+++ b/web/libsoomexplorer/output/base.py
@@ -0,0 +1,147 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: base.py 3673 2009-02-02 06:01:30Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/libsoomexplorer/output/base.py $
+
+import os
+import sys
+import tempfile
+import csv
+
+import config
+
+class OutputError(Exception):
+ pass
+
+
+class _OutputFile(object):
+ def __init__(self, fn, label):
+ self.fn = fn
+ self.label = label
+
+ def delete(self):
+ try:
+ os.unlink(self.fn)
+ except OSError:
+ pass
+
+ def url(self):
+ return '/%s/dynamic/%s' % (config.appname, os.path.basename(self.fn))
+
+ def exists(self):
+ return os.path.exists(self.fn)
+
+
+class DownloadBase:
+ def __init__(self, file_name, content_type=None):
+ self.file_name = file_name
+ if content_type is None:
+ content_type = 'application/unknown'
+ self.content_type = content_type
+
+ def set_headers(self, ctx):
+ ctx.set_save_session(False)
+ # IE will not download via SSL if caching is disabled.
+ # See: http://support.microsoft.com/?kbid=323308
+ ctx.del_header('Cache-Control')
+ ctx.del_header('Pragma')
+ ctx.set_header('Content-Type', self.content_type)
+ ctx.set_header('Content-Disposition',
+ 'attachment; filename="%s"' % self.file_name)
+
+
+class Download(DownloadBase):
+ def __init__(self, file_name, data=None,
+ content_type='application/unknown'):
+ DownloadBase.__init__(self, file_name, content_type)
+ self.data = []
+ if data:
+ self.data.append(data)
+
+ def write(self, data):
+ self.data.append(data)
+
+ def send(self, ctx):
+ self.set_headers(ctx)
+ ctx.send_content(''.join(self.data))
+
+
+class CSVdownload(DownloadBase):
+ def __init__(self, rowgen, file_name,
+ content_type='application/vnd.ms-excel'):
+ DownloadBase.__init__(self, file_name, content_type)
+ self.rowgen = rowgen
+
+ def send(self, ctx):
+ class ReqFile:
+ def __init__(self, out_fn):
+ self.write = out_fn
+ self.set_headers(ctx)
+ ctx.write_headers()
+ csv.writer(ReqFile(ctx.request.write_content)).writerows(self.rowgen)
+
+
+class OutputBase(object):
+
+ def __init__(self):
+ self._reset()
+
+ def _reset(self):
+ self._files = []
+ self._download = None
+
+ def clear(self):
+ for of in self._files:
+ of.delete()
+ self._reset()
+
+ def files(self):
+ return [of for of in self._files if of.exists()]
+
+ def have_files(self):
+ return len(self.files()) > 0
+
+ def tempfile(self, ext, label=None):
+ f, path = tempfile.mkstemp('.' + ext.lower(), 'soom',
+ config.dynamic_target)
+ os.close(f)
+ of = _OutputFile(path, label)
+ self._files.append(of)
+ return of
+
+ def output_names(self):
+ return [of.url() for of in self._files]
+
+ def select_page(self, page):
+ pass
+
+ def have_download(self):
+ return self._download is not None
+
+ def download(self, *a, **kw):
+ self._download = Download(*a, **kw)
+
+ def csv_download(self, *a, **kw):
+ self._download = CSVdownload(*a, **kw)
+
+ def send_download(self, ctx):
+ if self._download is not None:
+ self._download.send(ctx)
+
+
+class NullOut(OutputBase):
+
+ def clear(self):
+ pass
diff --git a/web/libsoomexplorer/output/plot.py b/web/libsoomexplorer/output/plot.py
new file mode 100644
index 0000000..1e9234d
--- /dev/null
+++ b/web/libsoomexplorer/output/plot.py
@@ -0,0 +1,59 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: plot.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/output/plot.py,v $
+
+import os
+
+import SOOMv0
+
+import config
+
+from libsoomexplorer.output.base import OutputBase, OutputError
+
+class ImageOut(OutputBase):
+ horizontal = True
+ format = 'PNG'
+ formats = ['PNG', 'JPEG', 'PDF', 'SVG']
+ size = '750x550'
+ want_inline = False
+ markup = 'imageout'
+
+ def __init__(self):
+ super(ImageOut, self).__init__()
+ self.inline = False
+
+ def start(self, methodname):
+ self.clear()
+ of = self.tempfile(self.format)
+ kwargs = {}
+ kwargs['file'] = of.fn
+ if self.format in ('PNG','JPEG'):
+ w, h = [int(r) for r in self.size.split('x')]
+ if w > 10000 or h > 10000:
+ raise WorkspaceError('bad output resolution')
+ kwargs['height'] = h
+ kwargs['width'] = w
+ self.inline = True
+ elif self.format == 'PDF':
+ kwargs['horizontal'] = (self.horizontal == 'True')
+ self.inline = False
+ elif self.format == 'SVG':
+ self.inline = (self.want_inline == 'True')
+ else:
+ raise WorkspaceError('bad output format')
+ SOOMv0.plot.output(self.format, **kwargs)
+ return getattr(SOOMv0.plot, methodname)
+
diff --git a/web/libsoomexplorer/output/table.py b/web/libsoomexplorer/output/table.py
new file mode 100644
index 0000000..3d319ed
--- /dev/null
+++ b/web/libsoomexplorer/output/table.py
@@ -0,0 +1,382 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: table.py 3673 2009-02-02 06:01:30Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/libsoomexplorer/output/table.py $
+
+import MA
+
+from libsoomexplorer.output.base import OutputBase
+
+class AxisBase(object):
+ __slots__ = 'label', 'span', 'total'
+
+ def __init__(self, label, span, total = False):
+ self.label, self.span, self.total = label, span, total
+
+ def __repr__(self):
+ return '<label %r, span %s>' % (self.label, self.span)
+
+class AxisHeader(AxisBase):
+ __slots__ = ()
+ markup = 'label'
+
+class AxisLabel(AxisBase):
+ __slots__ = ()
+ markup = 'value'
+
+class HeaderLevelBase:
+ def __init__(self, col, prev, mt_lead = False):
+ self.next = None
+ if prev:
+ prev.next = self
+ self.col = col
+ self.values = col.inverted.keys()
+ if col.is_ordered():
+ self.values.sort()
+ if col.all_value is not None:
+ # Move all_value to end of list
+ try:
+ self.values.remove(col.all_value)
+ if mt_lead:
+ self.values.insert(0, col.all_value)
+ else:
+ self.values.append(col.all_value)
+ except ValueError:
+ pass
+
+ def get_values(self):
+ """ enumerate all values below this node """
+ if self.next:
+ sub_values = self.next.get_values()
+ return [(v,) + sv for v in self.values for sv in sub_values]
+ else:
+ return [(v,) for v in self.values]
+
+ def span(self):
+ span = len(self.values)
+ if self.next:
+ span *= self.next.span()
+ return span
+
+class BannerLevel(HeaderLevelBase):
+ """
+ This represents one level in the hierarchial column banner -
+ a linked list of these objects is used to represent the whole
+ banner (strictly, it's a tree, but sub-nodes are simply repeated
+ """
+ def get_col_headers(self):
+ headings = []
+ sub_span = 1
+ if self.next:
+ sub_span = self.next.span()
+ headings.append((AxisHeader(self.col.label, sub_span * len(self.values)),))
+ labels = []
+ for v in self.values:
+ strvalue = self.col.do_format(self.col.do_outtrans(v))
+ labels.append(AxisLabel(strvalue, sub_span))
+ headings.append(labels)
+ if self.next:
+ for sh in self.next.get_col_headers():
+ headings.append(sh * len(self.values))
+ return headings
+
+ def get_tot_flags(self):
+ if self.next:
+ return self.next.get_tot_flags() * len(self.values)
+ return [v == self.col.all_value for v in self.values]
+
+class StubLevel(HeaderLevelBase):
+ """
+ Thios represents on level in the hierarchial row banner -
+ a linked list of these objects is used to represent the whole
+ banner (strictly, it's a tree, but sub-nodes are simply repeated
+ """
+ def get_row_headers(self):
+ label = AxisHeader(self.col.label, 1)
+ if self.next:
+ return (label,) + self.next.get_row_headers()
+ else:
+ return (label,)
+
+ def get_row_labels(self):
+ """
+ Returns a list of tuples of AxisLabel instances, each
+ tuple representing a row in the output table. The AxisLabel
+ instances know the appropriate rowspan (and None is used
+ where no table element should be generated.
+ """
+ headings = []
+ sub_headings = [()]
+ sub_span = 1
+ if self.next:
+ sub_headings = self.next.get_row_labels()
+ sub_span = self.next.span()
+ for v in self.values:
+ for sh in sub_headings:
+ axisvalue = None
+ if sh is sub_headings[0]:
+ strvalue = self.col.do_format(self.col.do_outtrans(v))
+ axisvalue = AxisLabel(strvalue, sub_span,
+ self.col.all_value == v)
+ headings.append((axisvalue,) + sh)
+ return headings
+
+ def get_lv_headings(self):
+ """
+ Similar to get_row_labels, but includes row headers elements
+ inline - not currently used.
+ """
+ headings = []
+ sub_headings = [()]
+ sub_span = 1
+ if self.next:
+ sub_headings = self.next.get_lv_headings()
+ sub_span = self.next.span()
+ span = sub_span * len(self.values)
+ for v in self.values:
+ for sh in sub_headings:
+ a = b = None
+ if sh is sub_headings[0]:
+ strvalue = self.col.do_format(self.col.do_outtrans(v))
+ b = AxisLabel(strvalue, sub_span)
+ if v is self.values[0]:
+ a = AxisHeader(self.col.label, span)
+ headings.append((a, b) + sh)
+ return headings
+
+class HeaderBase:
+ def __init__(self, summset, colnames, mt_lead = False):
+ self.axislabels = None
+ self.len = len(colnames)
+ axislabel = None
+ for col in summset.get_columns(colnames):
+ axislabel = self.axis_level_class(col, axislabel, mt_lead)
+ if not self.axislabels:
+ self.axislabels = axislabel
+ self.values = self.axislabels.get_values()
+
+ def __len__(self):
+ return self.len
+
+class Banner(HeaderBase):
+ axis_level_class = BannerLevel
+
+ def __init__(self, summset, colnames, mt_lead):
+ HeaderBase.__init__(self, summset, colnames, mt_lead)
+ self.header_and_values = zip(self.axislabels.get_tot_flags(),
+ self.values)
+
+ def get_col_headers(self):
+ return self.axislabels.get_col_headers()
+
+ def col_group_count(self):
+ axislabel = self.axislabels
+ count = 1
+ while axislabel:
+ if axislabel.next:
+ count *= len(axislabel.values)
+ else:
+ return [len(axislabel.values)] * count
+ axislabel = axislabel.next
+
+class Stub(HeaderBase):
+ axis_level_class = StubLevel
+
+ def grouped_rows(self):
+ if self.len == 1:
+ return [zip(self.get_row_labels(), self.values)]
+ last = None
+ groups = []
+ group = []
+ for labels, row_values in zip(self.get_row_labels(), self.values):
+ if row_values[:-1] != last:
+ last = row_values[:-1]
+ if group:
+ groups.append(group)
+ group = []
+ group.append((labels, row_values))
+ if group:
+ groups.append(group)
+ return groups
+
+ def get_lv_headings(self):
+ return self.axislabels.get_lv_headings()
+
+ def get_row_headers(self):
+ return self.axislabels.get_row_headers()
+
+ def get_row_labels(self):
+ return self.axislabels.get_row_labels()
+
+class TableOutBase(OutputBase):
+ def __init__(self):
+ super(TableOutBase, self).__init__()
+ self.summaryset = None
+ self.inline = True
+ self.title = ''
+ self.subtitle = ''
+
+ def clear(self):
+ self.summaryset = None
+
+
+class CrosstabOut(TableOutBase):
+ markup = 'crosstab'
+
+ def __init__(self):
+ super(CrosstabOut, self).__init__()
+ self.proptype = 'density'
+
+ def clear(self):
+ super(CrosstabOut, self).clear()
+ try:
+ del self.summaryset, self.rowcols, self.colcols, \
+ self.statcols, self.propcols, self.marginal_totals, \
+ self.show_limits, self.rounding, self.simple_table, \
+ self.row_axis, self.col_axis, self.value_to_rownum_map
+ except AttributeError:
+ pass # No analysis run yet
+
+ def go(self, summaryset, rowcols, colcols, statcols, propcols,
+ marginal_totals='none', show_limits=False, rounding=None,
+ simple_table=False):
+ self.summaryset = summaryset
+ self.rowcols = rowcols
+ self.colcols = colcols
+ self.statcols = statcols
+ self.propcols = propcols
+ self.marginal_totals = marginal_totals
+ self.show_limits = show_limits
+ self.rounding = rounding
+ self.simple_table = simple_table
+ mt_lead = (marginal_totals == 'before')
+ assert rowcols
+ assert colcols
+ self.row_axis = Stub(self.summaryset, rowcols, mt_lead)
+ self.col_axis = Banner(self.summaryset, colcols, mt_lead)
+ self.value_to_rownum_map = self.make_condcol_index(summaryset,
+ rowcols + colcols)
+
+ def get_rownum(self, row_values, col_values):
+ return self.value_to_rownum_map[tuple(row_values + col_values)]
+
+ def make_condcol_index(self, summset, colnames):
+ valuemap = [[] for i in xrange(len(summset))]
+ for col in summset.get_columns(colnames):
+ for value, rows in col.inverted.iteritems():
+ for row in rows:
+ valuemap[row].append(value)
+ return dict([(tuple(v), i) for i, v in enumerate(valuemap)])
+
+ def colours(self, v):
+ colmap = ['#ffc6c6', '#ffd2c6', '#ffddc6', '#ffe8c6', '#fff3c6',
+ '#ffffc6', '#f3ffc6', '#e8ffc6', '#ddffc6', '#d2ffc6',
+ '#c6ffc6', '#c6ffd2', '#c6ffdd', '#c6ffe8', '#c6fff3',
+ '#c6ffff', '#c6f3ff', '#c6e8ff', '#c6ddff', '#c6d2ff']
+ if type(v) is MA.MaskedScalar:
+ return '#ffffff'
+ return colmap[int(round((1-v) * (len(colmap) - 1)))]
+
+ def format_cell(self, colname, index):
+ col = self.summaryset[colname]
+ data = col.data[index]
+ if type(data) is MA.MaskedScalar:
+ return '--'
+ if self.rounding is None:
+ fmt = col.do_format
+ else:
+ def fmt(v):
+ if type(v) is MA.MaskedScalar:
+ return '--'
+ if self.rounding < 1:
+ try:
+ return '%d' % round(v, self.rounding)
+ except:
+ print repr(v), type(v)
+ raise
+ else:
+ return '%.*f' % (self.rounding, v)
+ if self.show_limits:
+ try:
+ ul = self.summaryset[colname + '_ul'].data[index]
+ ll = self.summaryset[colname + '_ll'].data[index]
+ except KeyError:
+ pass
+ else:
+ return '%s (%s..%s)' % (fmt(data).strip(),
+ fmt(ll).strip(), fmt(ul).strip())
+ return fmt(data)
+
+ def propn2perc(self, v):
+ if type(v) is MA.MaskedScalar:
+ return '--'
+ return '%.1f%%' % (v * 100)
+
+
+class TableOut(TableOutBase):
+ markup = 'tableout'
+
+class DatasetRowsOut(TableOutBase):
+ markup = 'dsrows'
+
+ def clear(self):
+ self.dsrows = None
+ self.startrow = 0
+
+ def has_rows(self):
+ return len(self.dsrows) > 0
+
+ def has_prevpage(self):
+ return self.pagesize and self.startrow > 0
+
+ def has_nextpage(self):
+ return self.pagesize and self.startrow + self.pagesize < len(self.dsrows)
+
+ def page_rows(self):
+ dslen = len(self.dsrows)
+ if self.pagesize:
+ endrow = min(dslen, self.startrow + self.pagesize)
+ return xrange(self.startrow, endrow)
+ else:
+ return xrange(dslen)
+
+ def prev_page(self):
+ if self.has_prevpage():
+ self.startrow -= self.pagesize
+
+ def next_page(self):
+ if self.has_nextpage():
+ self.startrow += self.pagesize
+
+ def page(self):
+ return self.startrow / self.pagesize + 1
+
+ def pages(self):
+ if self.pagesize:
+ return (len(self.dsrows) + self.pagesize - 1) / self.pagesize
+ return 0
+
+ def select_page(self, select_page):
+ cur_page = self.page()
+ for page in select_page:
+ try:
+ page = int(page)
+ except (ValueError, TypeError):
+ continue
+ if page != cur_page:
+ startrow = self.pagesize * (page - 1)
+ if 0 <= startrow < len(self.dsrows):
+ self.startrow = startrow
+ return
diff --git a/web/libsoomexplorer/output/twobytwo.py b/web/libsoomexplorer/output/twobytwo.py
new file mode 100644
index 0000000..b00fa96
--- /dev/null
+++ b/web/libsoomexplorer/output/twobytwo.py
@@ -0,0 +1,105 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: twobytwo.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/output/twobytwo.py,v $
+
+import Numeric
+
+import SOOMv0
+
+from libsoomexplorer.output.base import OutputBase
+
+class _AxisLabel:
+ def __init__(self, label, values):
+ self.label = label
+ self.values = values
+
+class _XTab:
+ def __init__(self, label, data):
+ self.label = label
+ self.data = data.copy()
+ self.htot = Numeric.add.reduce(self.data, 1)
+ self.vtot = Numeric.add.reduce(self.data, 0)
+ self.tot = Numeric.add.reduce(self.data.flat)
+
+class TwoByTwoDisplay:
+ """
+ A bit silly - we need this "display" class to attach the twobytwo
+ analysis object to, because the "output" class gets pickled,
+ and the twobytwo analysis doesn't like this.
+ """
+ def __init__(self, summaryset, measurecol, conflev=None):
+ from SOOMv0 import Analysis
+ def add_stratum(a, label):
+ self.analysis.add_stratum(a[0,0], a[1,0], a[0,1], a[1,1],
+ label=label)
+ self.xtabs.append(_XTab(label, a))
+
+ # CrossTab object
+ self.ct = summaryset.crosstab()
+ # Counts array
+ self.freq = self.ct[measurecol].data.filled()
+ # TwoByTwo analysis object and _XTab objects
+ analargs = {}
+ if conflev is not None:
+ analargs['conflev'] = conflev
+ self.analysis = Analysis.twobytwotable(**analargs)
+ self.xtabs = []
+ if len(self.freq.shape) == 2:
+ add_stratum(self.freq, '')
+ else:
+ axis = self.ct.axes[2]
+ labels = ['%s: %s' % (axis.label, axis.col.do_outtrans(v))
+ for v in axis.values]
+ for i in range(self.freq.shape[2]):
+ add_stratum(self.freq[:,:,i], labels[i])
+ self.xtabs.append(_XTab('Unstratified (crude)',
+ Numeric.add.reduce(self.freq, 2)))
+
+ # TwoByTwo report object
+ sections = list(Analysis.twobytwotable.sections)
+ sections.remove('counts')
+ self.report = list(self.analysis.report(sections))
+ # _XTab labels
+ self.axislabels = []
+ for axis in self.ct.axes[:2]:
+ values = [axis.col.do_format(axis.col.do_outtrans(v))
+ for v in axis.values]
+ self.axislabels.append(_AxisLabel(axis.label, values))
+
+class TwoByTwoOut(OutputBase):
+ inline = True
+ markup = 'twobytwo'
+
+ def __init__(self):
+ super(TwoByTwoOut, self).__init__()
+ self.summaryset = None
+ self.title = ''
+ self.footer = ''
+
+ def clear(self):
+ super(TwoByTwoOut, self).clear()
+ self.summaryset = None
+
+ def plotstart(self):
+ f, path = self.tempfile('png')
+ SOOMv0.plot.output('PNG', file=path)
+
+ def display(self):
+ return TwoByTwoDisplay(self.summaryset, '_freq_', conflev=self.conflev)
+
+ def go(self, summaryset, conflev=None):
+ self.summaryset = summaryset
+ self.conflev = conflev
diff --git a/web/libsoomexplorer/parameters.py b/web/libsoomexplorer/parameters.py
new file mode 100644
index 0000000..6f744b2
--- /dev/null
+++ b/web/libsoomexplorer/parameters.py
@@ -0,0 +1,83 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: parameters.py 3701 2009-02-26 05:56:34Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/libsoomexplorer/parameters.py $
+
+import SOOMv0
+
+from libsoomexplorer.dsparams import DSParams
+
+class Parameters:
+ """
+ A container for plot parameters
+
+ It must be possible to pickle this and unpickle against a different
+ version of the code, so keep things simple: all referenced data must
+ be self-contained, accesses this data must cope if the attribute is
+ missing, etc.
+ """
+ def __init__(self, dsname=None, plottype=None):
+ self.dsparams = DSParams(dsname)
+ self.plottype = plottype
+ self.loaded_from = ''
+ self.save_as = ''
+
+ def set_default(self, attr, value):
+ """Set an attribute if it doesn't already exist"""
+ if not hasattr(self, attr):
+ setattr(self, attr, value)
+
+ def do_colset(self, ctx, op, field, index):
+ colset = getattr(self, field)
+ getattr(self, 'colset_' + op)(colset, int(index))
+
+ def colset_add(self, colset, field_count):
+ if field_count == 1:
+ v = None
+ else:
+ v = [None] * field_count
+ colset.append(v)
+
+ def colset_del(self, colset, index):
+ del colset[int(index)]
+
+ def colset_up(self, colset, index):
+ if index > 0:
+ colset[index], colset[index-1] = colset[index-1], colset[index]
+
+ def colset_dn(self, colset, index):
+ if index <= len(colset) - 1:
+ colset[index], colset[index+1] = colset[index+1], colset[index]
+
+ def do_filter(self, ctx, op, field, *args):
+ getattr(getattr(self, field), 'do_' + op)(ctx, *args)
+
+ def has_changed(self, field):
+ old_value = getattr(self, '_last_' + field, None)
+ cur_value = getattr(self, field, None)
+ setattr(self, '_last_' + field, cur_value)
+ return old_value != cur_value
+
+ # "sys" fields know whether they've been set by the user or
+ # set to a default by the system.
+ def is_sys(self, field):
+ value = getattr(self, field, None)
+ if not value:
+ return True
+ return value.replace('\r', '') == getattr(self, '_sys_' + field, None)
+
+ def set_sys(self, field, value):
+ setattr(self, field, value)
+ setattr(self, '_sys_' + field, value)
diff --git a/web/libsoomexplorer/paramstore.py b/web/libsoomexplorer/paramstore.py
new file mode 100644
index 0000000..364071f
--- /dev/null
+++ b/web/libsoomexplorer/paramstore.py
@@ -0,0 +1,179 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: paramstore.py 3675 2009-02-02 06:50:31Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/libsoomexplorer/paramstore.py $
+
+"""
+Abstractions representing the stored parameter sets (analyses)
+"""
+
+# Standard Libraries
+import os
+import cPickle as pickle
+import fcntl
+import errno
+import re
+import time
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+# 3rd Party
+from mx import DateTime
+
+# SOOM
+import SOOMv0
+
+# Application
+from libsoomexplorer.common import *
+import config
+
+class Analysis:
+ def __init__(self, label, dsname):
+ self.label = label
+ self.dsname = dsname
+ self.fn = None
+
+ def get_dataset(self):
+ class BadDS:
+ def __init__(self, label):
+ self.label = '??? %s' % label
+ self.date_updated = DateTime.now()
+ try:
+ return SOOMv0.dsload(self.dsname)
+ except Exception, e:
+ return BadDS(str(e))
+
+ def make_fn(self):
+ fn = '%s_%s_%s' % (self.label[:30], time.time(), self.dsname)
+ fn = re.sub('[^a-zA-Z0-9_]', '', fn)
+ return fn
+
+ def write(self, dir, params):
+ if self.fn is None:
+ self.fn = self.make_fn()
+ f = open(os.path.join(dir, self.fn), 'wb')
+ try:
+ pickle.dump(params, f, -1)
+ finally:
+ f.close()
+
+
+class Analyses:
+
+ def __init__(self):
+ self.dir = os.path.join(config.data_dir, 'analyses')
+ self.index_fn = os.path.join(self.dir, '.index')
+
+ def load_index(self):
+ try:
+ f = open(self.index_fn, 'rb')
+ except IOError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ return []
+ try:
+ fcntl.lockf(f, fcntl.LOCK_SH)
+ return pickle.load(f)
+ finally:
+ f.close()
+
+ def __len__(self):
+ return len(self.load_index())
+
+ def available(self):
+ try:
+ files = set(os.listdir(self.dir))
+ except OSError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ return []
+ available = []
+ for an in self.load_index():
+ try:
+ files.remove(an.fn)
+ except KeyError:
+ pass
+ else:
+ available.append((an.label, an))
+ # XXX At this point, /files/ contains all the entries that don't appear
+ # in the index - we could/should add these to the index?
+ available.sort()
+ return [an for label, an in available]
+
+ def update_index(self, cb):
+ try:
+ f = open(self.index_fn, 'r+b')
+ except IOError, (eno, estr):
+ if eno != errno.ENOENT:
+ raise
+ f = None
+ try:
+ if f is None:
+ os.mkdir(self.dir)
+ fd = os.open(self.index_fn, os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0666)
+ f = os.fdopen(fd, 'wb')
+ fcntl.lockf(f, fcntl.LOCK_EX)
+ index = []
+ else:
+ fcntl.lockf(f, fcntl.LOCK_EX)
+ index = pickle.load(f)
+ f.seek(0, 0)
+ cb(index)
+ pickle.dump(index, f, -1)
+ f.truncate()
+ finally:
+ f.close()
+
+ def save(self, params, dsname, label, overwrite=False):
+ def _save(index):
+ if overwrite:
+ for an in index:
+ if an.dsname == dsname and an.label == label:
+ an.write(self.dir, params)
+ return
+ an = Analysis(label, dsname)
+ an.write(self.dir, params)
+ index.append(an)
+ self.update_index(_save)
+
+ def load(self, fn):
+ assert not os.path.dirname(fn)
+ f = open(os.path.join(self.dir, fn))
+ try:
+ try:
+ return pickle.load(f)
+ except Exception, e:
+ raise UIError('Unable to load this analysis: %s' % e)
+ finally:
+ f.close()
+
+ def find(self, fn):
+ for an in self.load_index():
+ if an.fn == fn:
+ return an
+ return None
+
+ def delete(self, fn):
+ def _delete(index):
+ for i, an in enumerate(index):
+ if an.fn == fn:
+ del index[i]
+ break
+ self.update_index(_delete)
+
+
+analyses = Analyses()
diff --git a/web/libsoomexplorer/plotform.py b/web/libsoomexplorer/plotform.py
new file mode 100644
index 0000000..7d2966b
--- /dev/null
+++ b/web/libsoomexplorer/plotform.py
@@ -0,0 +1,606 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+"""
+Machinery for collecting plot and table arguments and producing an
+output object from them.
+"""
+# $Id: plotform.py 3706 2009-03-03 05:43:38Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/plotform.py,v $
+
+import sets
+import re
+import cgi
+import SOOMv0, SOOMv0.SummaryStats
+from SOOMv0.soomparse import soomparseScanner
+from SOOMv0.Filter import sorted_ds
+import Numeric, MA
+# Application modules
+from libsoomexplorer.common import UIError, ConversionError, timer
+from libsoomexplorer.dsparams import DSParams
+from libsoomexplorer.condcol import CondColParams, StratifyParams
+from libsoomexplorer.twobytwoparams import TwoByTwoColParams
+from libsoomexplorer.fields import set_target
+
+
+class DupCol(Exception): pass
+
+class ColList(list):
+ def __init__(self, colparams=None, nodups=True):
+ self.colparams = colparams
+ self.nodups = nodups
+ self.cols = sets.Set()
+
+ def set(self, name, cols):
+ if type(cols) not in (list, tuple):
+ cols = [cols]
+ for col in cols:
+ if col in self.cols:
+ if self.nodups:
+ raise DupCol(col)
+ else:
+ self.cols.add(col)
+ if self.colparams is not None:
+ params = self.colparams.get(col)
+ if params:
+ col = SOOMv0.condcol(col, *params)
+ self.append(col)
+
+
+class KWArgs(dict):
+ def set(self, name, value):
+ self[name] = value
+
+class _FieldsMeta(type):
+ def __init__(cls, name, bases, ns):
+ targets_fields = {}
+ if hasattr(cls, 'fields'):
+ for field in cls.fields:
+ if field.target:
+ for target in field.target:
+ targets_fields.setdefault(target, []).append(field)
+ cls.targets_fields = targets_fields
+ super(_FieldsMeta, cls).__init__(name, bases, ns)
+
+
+class _SummaryBase(object):
+ __metaclass__ = _FieldsMeta
+ options = []
+
+ def __init__(self, workspace):
+ self.workspace = workspace
+ self.hide_params = False
+
+ def get_target(self, target, ns, param, no_verify=True):
+ kwargs = {target: param}
+ for field in self.targets_fields[target]:
+ try:
+ field.get_params(ns, kwargs)
+ except UIError:
+ if not no_verify:
+ raise
+ return param
+
+ def set_default(self):
+ params = self.workspace.params
+ params.set_default('condcolparams', {})
+ params.set_default('sys_title', '')
+ params.set_default('sys_subtitle', '')
+ for field in self.fields:
+ field.set_default(self.workspace, params)
+
+ def condcol_params(self, condcol):
+ params = self.workspace.params.condcolparams.get(condcol)
+ if params:
+ return SOOMv0.condcol(condcol, *params)
+ return condcol
+
+ def new_args(self):
+ params = self.workspace.params
+ return {
+ 'stratacols': ColList(),
+ 'stndrdcols': ColList(params.condcolparams),
+ 'summcols': ColList(params.condcolparams),
+ 'plotcols': ColList(params.condcolparams),
+ 'measures': ColList(),
+ 'summkw': KWArgs(),
+ 'plotkw': KWArgs(),
+ 'outputkw': KWArgs(),
+ }
+
+ def get_params(self):
+ kwargs = self.new_args()
+ try:
+ for field in self.fields:
+ field.get_params(self.workspace.params, kwargs)
+ except DupCol, colname:
+ col = self.workspace.get_dataset()[str(colname)]
+ raise UIError('%r column used more than once' %
+ (col.label or col.name))
+ return kwargs
+
+ def get_collist(self, target='stratacols', nodups=False):
+ return self.get_target(target, self.workspace.params,
+ ColList(nodups=nodups))
+
+ def get_condcolparams(self, workspace):
+ return CondColParams(workspace, workspace.params.condcolparams,
+ self.get_collist())
+
+ def refresh(self):
+ pass
+
+ def user_title(self):
+ """
+ If the user has set a title, return that, but if no title
+ is set, or if the title matches the last system generated
+ title, return None.
+ """
+ if not self.workspace.params.is_sys('title'):
+ return self.workspace.params.title.replace('\r', '')
+ return None
+
+ def user_subtitle(self):
+ if not self.workspace.params.is_sys('subtitle'):
+ return self.workspace.params.subtitle.replace('\r', '')
+ return None
+
+ def generate_title(self, ds, condcols):
+ """
+ Generate a title and subtitle if the user has not supplied
+ an explicit one.
+ """
+ def get_col_label(ds, colname):
+ col = ds.get_column(colname)
+ return col.label or col.name
+ params = self.workspace.params
+ if params.is_sys('title'):
+ labels = []
+ for condcol in condcols:
+ if SOOMv0.isstatmethod(condcol):
+ labels.append(condcol.get_label(ds, None))
+ else:
+ try:
+ colname = condcol.get_colname()
+ except AttributeError:
+ colname = condcol
+ labels.append(get_col_label(ds, colname))
+ title = ' by '.join(labels)
+ params.set_sys('title', title)
+ if params.is_sys('subtitle'):
+ params.set_sys('subtitle', ds.short_description())
+ self.workspace.output.title = params.title
+ self.workspace.output.subtitle = params.subtitle
+
+
+class PlotTypeBase(_SummaryBase):
+ def __init__(self, workspace):
+ _SummaryBase.__init__(self, workspace)
+ self.workspace.set_outtype('image')
+
+ def get_params(self):
+ kwargs = _SummaryBase.get_params(self)
+ plotkw = kwargs['plotkw']
+ for kw in ('groupby', 'stackby'):
+ if kw in plotkw:
+ plotkw.set(kw, self.condcol_params(plotkw[kw]))
+ kwargs.pop('title', None)
+ kwargs.pop('subtitle', None)
+ return kwargs
+
+ def _getplot(self, ds):
+ params = self.workspace.params
+ kwargs = self.get_params()
+ params.dsparams.filter_args(kwargs['plotkw'])
+ kwargs['title'] = self.user_title()
+ kwargs['footer'] = self.user_subtitle()
+ timer('plotmethod')
+ plot_method = getattr(SOOMv0.plot, self.name)(ds)
+ plot_method.procargs(ds, *kwargs['plotcols'], **kwargs['plotkw'])
+ timer('filter')
+ filtered_ds = plot_method.get_filtered_ds(ds)
+ if params.is_sys('title'):
+ params.set_sys('title', plot_method.get_title(filtered_ds))
+ if not self.user_subtitle():
+ params.set_sys('subtitle', plot_method.get_footer(filtered_ds))
+ timer('post-filter')
+ return plot_method
+
+ def refresh(self):
+ super(PlotTypeBase, self).refresh()
+# AM Jan '09 - disabled - requires running filter and doing dataset
+# summarisation, which can take many seconds, which significantly slows down
+# the interface. The downside of turning this off is that plot titles and
+# footers no longer automatically update.
+# ds = self.workspace.get_dataset()
+# try:
+# plot_method = self._getplot(ds)
+# except TypeError:
+# # Most likely due to missing args at this stage - ignore
+# pass
+
+ def go(self):
+ self.workspace.output.start(self.name)
+ ds = self.workspace.get_dataset()
+ plot_method = self._getplot(ds)
+ timer('plot')
+ plot_method.plot(ds)
+ self.hide_params = self.workspace.output.inline
+
+
+class TableTypeBase(_SummaryBase):
+ def __init__(self, workspace):
+ super(TableTypeBase, self).__init__(workspace)
+ self.workspace.set_outtype('table')
+
+ def set_default(self):
+ super(TableTypeBase, self).set_default()
+ self.workspace.params.set_default('statcols', [])
+
+ def go(self):
+ kwargs = self.get_params()
+ self.workspace.params.dsparams.filter_args(kwargs['summkw'])
+ ds = self.workspace.get_dataset()
+ self.generate_title(ds, kwargs['stratacols'])
+ self.workspace.output.summaryset = ds.summ(*kwargs['summcols'],
+ **kwargs['summkw'])
+ self.hide_params = True
+
+
+class CrosstabBase(_SummaryBase):
+ def __init__(self, workspace):
+ super(CrosstabBase, self).__init__(workspace)
+ self.workspace.set_outtype('crosstab')
+
+ def new_args(self):
+ kwargs = super(CrosstabBase, self).new_args()
+ kwargs['rowcols'] = ColList()
+ kwargs['colcols'] = ColList()
+ return kwargs
+
+ def set_default(self):
+ super(CrosstabBase, self).set_default()
+ ds = self.workspace.get_dataset()
+ self.workspace.params.set_default('statcols', [])
+ self.workspace.params.set_default('propcols', [])
+ self.workspace.params.set_default('proptype', 'density')
+ self.workspace.params.set_default('heatmap', False)
+ self.workspace.params.set_default('weightcol', ds.weightcol)
+
+ def go(self):
+ ds = self.workspace.get_dataset()
+ params = self.workspace.params
+ if not params.statcols and not params.propcols:
+ params.statcols = [['freq', None, '_default_']]
+ # Generate the summary!
+ kwargs = self.get_params()
+ params.dsparams.filter_args(kwargs['summkw'])
+ if kwargs['outputkw']['marginal_totals'] in ('before', 'after'):
+ kwargs['summkw'].set('allcalc', True)
+ if params.propcols:
+ available = [n for n, l in SOOMv0.propn_names_and_labels(ds, kwargs['stratacols'])]
+ for propcol in params.propcols:
+ if propcol not in available:
+ raise UIError('Conditioning columns have changed, reselect proportions')
+ kwargs['summkw'].set('proportions', True)
+ self.workspace.output.proptype = params.proptype
+ self.workspace.output.heatmap = str(params.heatmap) == 'True'
+ for condcol in kwargs['summcols']:
+ if hasattr(condcol, 'conflev'):
+ kwargs['outputkw'].set('show_limits', True)
+ break
+ summaryset = ds.summ(*kwargs['summcols'], **kwargs['summkw'])
+ # XXX
+ statcolnames = [summaryset.get_method_statcolname(col)
+ for col in kwargs['measures']]
+ self.workspace.output.go(summaryset,
+ kwargs['rowcols'], kwargs['colcols'],
+ statcolnames, params.propcols,
+ **kwargs['outputkw'])
+ self.generate_title(ds, kwargs['rowcols'] + kwargs['colcols'])
+ self.hide_params = True
+
+
+class DatasetRowsBase(_SummaryBase):
+ MAX_CSV = 65536 # Excel does not allow more than this many rows,
+ # plus we need a upper bound as this is a
+ # relatively expensive operation.
+
+ def __init__(self, workspace):
+ super(DatasetRowsBase, self).__init__(workspace)
+ self.workspace.set_outtype('dsrows')
+
+ def set_default(self):
+ cols = getattr(self.workspace.params, 'condcols', [])
+ # Allows loading of old (pre r3700) parameter sets
+ self.workspace.params.set_default('outcols', cols)
+ super(DatasetRowsBase, self).set_default()
+ self.workspace.params.set_default('orderby', [])
+
+ def generate_title(self, ds, filterexpr):
+ params = self.workspace.params
+ if params.is_sys('title'):
+ params.set_sys('title', filterexpr)
+ if params.is_sys('subtitle'):
+ params.set_sys('subtitle', ds.short_description())
+ self.workspace.output.title = params.title
+ self.workspace.output.subtitle = params.subtitle
+
+ # we know our filter expression is well-formed so we can write clean REs
+ SEARCH_EXPR_RE = re.compile(r'(\w+)\s+contains\s+\[\[(.*?)\]\]', re.I | re.S)
+ NEARNESS_RE = re.compile(r'\[\s*\d+\s*\]', re.S)
+ WORD_RE = dict(soomparseScanner.patterns)['WORD']
+
+ def get_highlight_fns(self, filterexpr):
+ # Find all words used to search specific columns and generate a specific
+ # regular expression based outtrans function for that column
+ outtrans_fns = {}
+ if filterexpr:
+ for mexpr in self.SEARCH_EXPR_RE.finditer(filterexpr):
+ column, sexpr = mexpr.group(1), mexpr.group(2)
+ # remove [n] nearness specifiers
+ sexpr = self.NEARNESS_RE.sub('', sexpr)
+ terms = sets.Set()
+ for mword in self.WORD_RE.finditer(sexpr):
+ word = "'?".join(list(mword.group().replace("'", "")))
+ term_re = word.replace('*', r'\S*?')
+ terms.add(term_re)
+ trans_re = re.compile(r"\b(%s)\b" % "|".join(terms), re.I)
+ outtrans_fns[column] = Highlighter(trans_re)
+ return outtrans_fns
+
+ def yield_rows(self, ds, dsrows):
+ cols = ds.get_columns(self.workspace.output.colnames)
+ yield [col.label for col in cols]
+ for i in dsrows:
+ yield [col.do_format(col.do_outtrans(col[i])) for col in cols]
+
+ def go(self):
+ kwargs = self.get_params()
+ filterds = ds = self.workspace.get_dataset()
+ params = self.workspace.params
+ filterexpr = params.dsparams.filterexpr
+ if filterexpr:
+ timer('filter')
+ try:
+ filterds = filterds.filter(filterexpr)
+ except Exception, e:
+ raise UIError('Could not make filter: %s' % e)
+ if params.orderby:
+ timer('sort')
+ orderby = ['%s %s' % (c, d) for c, d in params.orderby if c]
+ filterds = sorted_ds(filterds, *orderby)
+ if filterds is ds:
+ dsrows = Numeric.arrayrange(len(ds))
+ else:
+ dsrows = filterds.record_ids
+ if params.pagesize == 'csv':
+ if not dsrows:
+ raise UIError('No rows returned')
+ if len(dsrows) > self.MAX_CSV:
+ raise UIError('More than %s rows returned' % self.MAX_CSV)
+ self.workspace.output.csv_download(self.yield_rows(ds, dsrows), 'dsrows.csv')
+ else:
+ # set any output translations
+ self.generate_title(ds, filterexpr)
+ self.workspace.output.dsrows = dsrows
+ self.workspace.output.highlight_fns = self.get_highlight_fns(filterexpr)
+ self.workspace.output.colnames = self.workspace.params.outcols
+ self.workspace.output.pagesize = int(self.workspace.params.pagesize)
+ self.hide_params = True
+
+
+class Highlighter:
+ def __init__(self, trans_re):
+ self.trans_re = trans_re
+
+ def __call__(self, v):
+ return self.trans_re.sub(r'<b style="color:black;background-color:#fdd">\1</b>', cgi.escape(v))
+
+ def __repr__(self):
+ return "<Highlighter: %s>" % self.trans_re.pattern
+
+
+class PopRateBase(_SummaryBase):
+
+ def set_default(self):
+ super(PopRateBase, self).set_default()
+ params = self.workspace.params
+ params.set_default('popsetparams', DSParams())
+ params.set_default('stdpopsetparams', DSParams())
+ params.set_default('stdsetparams', DSParams())
+
+ def refresh(self):
+ super(PopRateBase, self).refresh()
+ params = self.workspace.params
+ params.popsetparams.set_dsname(getattr(params, 'popset', None))
+ params.stdpopsetparams.set_dsname(getattr(params, 'stdpopset', None))
+ params.stdsetparams.set_dsname(getattr(params, 'stdset', None))
+
+
+class DSRMixin(PopRateBase):
+ measure = 'dsr'
+
+ def calc_rates(self, kwargs):
+ from SOOMv0 import Analysis
+ params = self.workspace.params
+ ds = self.workspace.get_dataset()
+ popset = params.popsetparams.get_filtered_dataset()
+ stdpopset = SOOMv0.dsload(params.stdpopset)
+ summcols = kwargs['stndrdcols'] + kwargs['summcols']
+ summset = ds.summ(*summcols, **kwargs['summkw'])
+ return Analysis.calc_directly_std_rates(summset, popset, stdpopset,
+ popset_popcol=params.popset_popcol,
+ stdpopset_popcol=params.stdpopset_popcol,
+ conflev=kwargs['conflev'])
+
+
+class SRMixin(PopRateBase):
+ measure = 'sr'
+
+ def calc_rates(self, kwargs):
+ from SOOMv0 import Analysis
+ params = self.workspace.params
+ ds = self.workspace.get_dataset()
+ popset = params.popsetparams.get_filtered_dataset()
+ summset = ds.summ(*kwargs['summcols'], **kwargs['summkw'])
+ return Analysis.calc_stratified_rates(summset, popset,
+ popset_popcol=params.popset_popcol,
+ conflev=kwargs['conflev'])
+
+
+class ISRMixin(PopRateBase):
+ measure = 'isr'
+
+ def calc_rates(self, kwargs):
+ from SOOMv0 import Analysis
+ params = self.workspace.params
+ params.dsparams.filter_args(kwargs['summkw'])
+ kwargs['summkw'].set('zeros', True)
+ ds = self.workspace.get_dataset()
+ popset = params.popsetparams.get_filtered_dataset()
+ stdpopset = params.stdpopsetparams.get_filtered_dataset()
+ summset = ds.summ(*kwargs['summcols'], **kwargs['summkw'])
+ stdset = SOOMv0.dsload(params.stdset)
+ stdsetkw = {'zeros': True}
+ params.stdsetparams.filter_args(stdsetkw)
+ stdsummset = stdset.summ(*kwargs['stndrdcols'], **stdsetkw)
+ return Analysis.calc_indirectly_std_ratios(summset, popset,
+ stdsummset, stdpopset,
+ popset_popcol=params.popset_popcol,
+ stdpopset_popcol=params.stdpopset_popcol,
+ conflev=kwargs['conflev'])
+
+
+class RatePlotBase(PlotTypeBase):
+
+ def _getplot(self, ds):
+ params = self.workspace.params
+ kwargs = self.get_params()
+ if params.is_sys('title'):
+ self.generate_title(ds, kwargs['stratacols'])
+ params.set_sys('title', self.label + ' of ' + params.title)
+ kwargs['title'] = self.user_title()
+ kwargs['footer'] = self.user_subtitle()
+
+ def go(self):
+ params = self.workspace.params
+ kwargs = self.get_params()
+ params.dsparams.filter_args(kwargs['summkw'])
+ output_type = kwargs.pop('output_type')
+ if output_type == 'barchart_horizontal':
+ kwargs['plotkw'].set('horizontal', True)
+ output_type = 'barchart'
+ elif output_type not in ('barchart', 'lineplot'):
+ raise UIError('Unsupported plot type')
+ plotmethod = self.workspace.output.start(output_type)
+ rateset = self.calc_rates(kwargs)
+ plot = plotmethod(rateset)
+ kwargs['plotkw'].set('measure', self.measure)
+ plot.procargs(rateset, *kwargs['plotcols'], **kwargs['plotkw'])
+ plot.plot(rateset)
+
+
+class RateTabBase(CrosstabBase):
+ def go(self):
+ params = self.workspace.params
+ kwargs = self.get_params()
+ params.dsparams.filter_args(kwargs['summkw'])
+ rateset = self.calc_rates(kwargs)
+ self.workspace.output.go(rateset, kwargs['rowcols'], kwargs['colcols'],
+ propcols=[],
+ show_limits=True, **kwargs['outputkw'])
+ self.generate_title(rateset, kwargs['rowcols'] + kwargs['colcols'])
+ self.hide_params = True
+
+
+class TwoByTwoBase(_SummaryBase):
+ def __init__(self, workspace):
+ _SummaryBase.__init__(self, workspace)
+ self.workspace.set_outtype('twobytwo')
+
+ def set_default(self):
+ super(TwoByTwoBase, self).set_default()
+ params = self.workspace.params
+ if not hasattr(params, 'exposure_params'):
+ params.exposure_params = TwoByTwoColParams('exposed')
+ params.exposure_params.set_colname(self.workspace, params.exposure)
+ if not hasattr(params, 'outcome_params'):
+ params.outcome_params = TwoByTwoColParams('outcome')
+ params.outcome_params.set_colname(self.workspace, params.outcome)
+
+ def get_params(self):
+ params = self.workspace.params
+ condcolparams = params.condcolparams
+ condcolparams.update(params.exposure_params.get_map(self.workspace))
+ condcolparams.update(params.outcome_params.get_map(self.workspace))
+ return super(TwoByTwoBase, self).get_params()
+
+ def get_condcolparams(self, workspace):
+ return StratifyParams(workspace, workspace.params.condcolparams,
+ self.get_collist())
+
+ def refresh(self):
+ super(TwoByTwoBase, self).refresh()
+ params = self.workspace.params
+ if params.exposure:
+ params.exposure_params.set_colname(self.workspace, params.exposure)
+ if params.outcome:
+ params.outcome_params.set_colname(self.workspace, params.outcome)
+
+ def do_fourfold(self, ds, summaryset, summcols, kwargs, conflev, std):
+ if not hasattr(SOOMv0.plot, 'fourfold') or std is None:
+ return
+ margin = None
+ std_labels = {
+ 'margins_1': 'standardised by row',
+ 'margins_2': 'standardised by column',
+ 'margins_12': 'standardised by rows and columns',
+ 'ind.max': 'individually standardised',
+ 'all.max': 'simultaneously standardised',
+ }
+ std_label = std_labels[std]
+ if std.startswith('margins_'):
+ std, margin = std.split('_')
+ margin = {'1': 1, '2': 2, '12': (1,2)}[margin]
+ plotargs = dict(title='', footer='', conflev=conflev,
+ std=std, margin=margin)
+ if len(summcols) > 2:
+ s_label = 'Stratified'
+ else:
+ s_label = 'Unstratified'
+ p_label = ' (%g%% conf. limits, %s)' % (conflev * 100, std_label)
+ of = self.workspace.output.tempfile('png', label=s_label + p_label)
+ SOOMv0.plot.output('PNG', file=of.fn, width=800)
+ SOOMv0.plot.fourfold(summaryset, *summcols, **plotargs)
+ if len(summcols) > 2:
+ unstratified_summcols = summcols[:2]
+ of = self.workspace.output.tempfile('png',
+ label='Unstratified' + p_label)
+ SOOMv0.plot.output('PNG', file=of.fn, width=800)
+ SOOMv0.plot.fourfold(ds, *unstratified_summcols,
+ **plotargs)
+
+ def go(self):
+ params = self.workspace.params
+ kwargs = self.get_params()
+ std = kwargs.pop('std')
+ ds = self.workspace.get_dataset()
+ params.dsparams.filter_args(kwargs['summkw'])
+ summcols = kwargs['summcols']
+ summaryset = ds.summ(*summcols, **kwargs['summkw'])
+ self.generate_title(summaryset, summcols)
+ conflev = kwargs['conflev']
+ self.do_fourfold(ds, summaryset, summcols, kwargs, conflev, std)
+ self.workspace.output.go(summaryset, conflev=conflev)
diff --git a/web/libsoomexplorer/plottypes.py b/web/libsoomexplorer/plottypes.py
new file mode 100644
index 0000000..f160d0f
--- /dev/null
+++ b/web/libsoomexplorer/plottypes.py
@@ -0,0 +1,456 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: plottypes.py 3701 2009-02-26 05:56:34Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/plottypes.py,v $
+
+from libsoomexplorer.fields import *
+from libsoomexplorer import plotform
+
+common_first_fields = [
+ ShowDatasetField(),
+ AnalysisTypeField(),
+ FilterField(),
+]
+
+crosstab_common_fields = [
+ ChooseOneField('rounding', 'Rounding', target='outputkw',
+ options = [
+ (None, 'None'),
+ (-5, '100k'),
+ (-4, '10k'),
+ (-3, '1000'),
+ (-2, '100'),
+ (-1, '10'),
+ (0, '1'),
+ (2, '2dp'),
+ (4, '4dp'),
+ ],
+ pytype=int, default=False, horizontal=True),
+ BoolField('simple_table', 'Alternate rendering', target='outputkw',
+ default=False),
+ ]
+
+
+class Scatter(plotform.PlotTypeBase):
+ name = 'scatterplot'
+ label = 'Scatter Plot'
+ sample = True
+
+ fields = common_first_fields + [
+ ColField('xcolname', 'X Column',
+ colfilter=ordinalcol, logscale_attr='logxscale'),
+ ColField('ycolname', 'Y Column',
+ colfilter=ordinalcol, logscale_attr='logyscale'),
+ ColsField('condcols', 'Panel(s)'),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ OutputField(),
+ ]
+
+class ScatterMatrix(plotform.PlotTypeBase):
+ name = 'scattermatrix'
+ label = 'Scatter Matrix Plot'
+ sample = True
+
+ fields = common_first_fields + [
+ ColsField('condcols', 'Measures', colfilter=scalarcol),
+ GroupByColField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ OutputField(),
+ ]
+
+class Box(plotform.PlotTypeBase):
+ name = 'boxplot'
+ label = 'Box and Whisker Plot'
+ sample = True
+
+ fields = common_first_fields + [
+ ColField('ycolname', 'Y Column', colfilter=scalarcol),
+ ColField('xcolname', 'X Column', colfilter=discretecol),
+ ColsField('condcols', 'Panel(s)'),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ BoolField('horizontal', 'Horizontal', target='plotkw', default=False),
+ BoolField('notches', 'Notches', target='plotkw', default=True),
+ BoolField('outliers', 'Outliers', target='plotkw', default=True),
+ BoolField('variable_width', 'Variable Width', target='plotkw',
+ default=True),
+ BoolField('violins', 'Violins', target='plotkw', default=False),
+ OutputField(),
+ ]
+
+
+class Histogram(plotform.PlotTypeBase):
+ name = 'histogram'
+ label = 'Histogram Plot'
+ sample = True
+
+ fields = common_first_fields + [
+ ColField('xcolname', 'X Column', colfilter=scalarcol),
+ ColsField('condcols', 'Panel(s)'),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ChooseOneField('hist_type', 'Type', target='plotkw',
+ options = [
+ ('percent', 'Percent'),
+ ('count', 'Frequency'),
+ ('density', 'Density'),
+ ],
+ default = 'percent'),
+ IntField('bins', 'Bins', target='plotkw', default=''),
+ OutputField(),
+ ]
+
+class Density(plotform.PlotTypeBase):
+ name = 'densityplot'
+ label = 'Density Plot'
+ sample = True
+
+ fields = common_first_fields + [
+ ColField('xcolname', 'X Column', colfilter=scalarcol),
+ ColsField('condcols', 'Panel(s)'),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ IntField('bins', 'Bins', target='plotkw', default=''),
+ OutputField(),
+ ]
+
+class Line(plotform.PlotTypeBase):
+ name = 'lineplot'
+ label = 'Line Plot'
+
+ fields = common_first_fields + [
+# DateTimeColField('xcolname', 'X Column'),
+ ColField('xcolname', 'X Column', colfilter=discretecol),
+ GroupByColField(),
+ ColsField('condcols', 'Panel(s)', colfilter=discretecol),
+ MeasureColField('measure'),
+ CondColParamsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(note='(when applicable)'),
+ OutputField(),
+ ]
+
+
+class CatChartBase(plotform.PlotTypeBase):
+ stack = False
+
+class Barchart(CatChartBase):
+ name = 'barchart'
+ label = 'Bar Chart'
+
+ fields = common_first_fields + [
+ ColField('xcolname', 'X Column', colfilter=discretecol),
+ MeasureColField('measure'),
+ GroupByColField(allow_stack=True),
+ ColsField('condcols', 'Panel(s)', colfilter=discretecol),
+ CondColParamsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(note='(when applicable)'),
+ BoolField('horizontal', 'Horizontal', target='plotkw', default=False),
+ FloatField('origin', 'Origin', target='plotkw', default=0),
+ OutputField(),
+ ]
+
+class Dotchart(CatChartBase):
+ name = 'dotchart'
+ label = 'Dot Chart'
+
+ fields = common_first_fields + [
+ ColField('xcolname', 'X Column', colfilter=discretecol),
+ MeasureColField('measure'),
+ GroupByColField(),
+ ColsField('condcols', 'Panel(s)', colfilter=discretecol),
+ CondColParamsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(note='(when applicable)'),
+ BoolField('horizontal', 'Horizontal', target='plotkw', default=False),
+ OutputField(),
+ ]
+
+class SummTable(plotform.TableTypeBase):
+ name = 'summtable'
+ label = 'Summary Table'
+
+ fields = common_first_fields + [
+ ColsField('condcols', 'Columns(s)', colfilter=discretecol, min=1),
+ StatColsField(),
+ CondColParamsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(note='(when applicable)'),
+ ]
+
+crosstabcols = [
+ ColsField('colcols', 'Column(s)', target='stratacols|summcols|colcols',
+ colfilter=discretecol, min=1),
+ ColsField('rowcols', 'Row(s)', target='stratacols|summcols|rowcols',
+ colfilter=discretecol, min=1),
+]
+
+class CrossTab(plotform.CrosstabBase):
+ name = 'crosstab'
+ label = 'Crosstab'
+
+ fields = common_first_fields + crosstabcols + [
+ WeightColField(),
+ StatColsField(),
+ ProportionColsField(),
+ CondColParamsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(note='(when applicable)'),
+ ChooseOneField('marginal_totals', 'Marginal Totals', target='outputkw',
+ options = [
+ ('none', 'None'),
+ ('before', 'Top/Left'),
+ ('after', 'Bottom/Right'),
+ ],
+ default='none'),
+ ] + crosstab_common_fields
+
+class DatasetRows(plotform.DatasetRowsBase):
+ name = 'dsrows'
+ label = 'Dataset Rows'
+
+ fields = common_first_fields + [
+ ColsField('outcols', 'Columns(s)', target='stratacols|outcols'),
+ OrderColsField(),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ChooseOneField('pagesize', 'Rows per page',
+ options = [
+ ('25', '25'),
+ ('50', '50'),
+ ('100', '100'),
+ ('1000', '1000'),
+ ('csv', 'CSV'),
+ ],
+ default='100', horizontal=True),
+ ]
+
+rate_common_fields = [
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ConfLevField(optional=True),
+ ]
+
+rateplot_common_fields = rate_common_fields + [
+ DropField('output_type', 'Output type',
+ options = [
+ ('barchart', 'Bar Chart (vertical)'),
+ ('barchart_horizontal', 'Bar Chart (horizontal)'),
+ ('lineplot', 'Line Plot'),
+ ]),
+ OutputField(),
+ ]
+
+ratetab_common_fields = rate_common_fields + crosstab_common_fields
+
+class DSRPlot(plotform.DSRMixin, plotform.RatePlotBase):
+ name = 'dsrplot'
+ label = 'Directly Standardised Event Rates Plot'
+
+ fields = [
+ ShowDatasetField('Numerator Dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Numerator Filter'),
+ ColField('standardiseby', 'Standardise by',
+ target='stndrdcols', colfilter=discretecol),
+ ColField('xcolname', 'X Column',
+ colfilter=(discretecol, notstandardisecol)),
+ GroupByColField(colfilter=(discretecol, notstandardisecol)),
+ ColsField('condcols', 'Panel(s)',
+ colfilter=(discretecol, notstandardisecol)),
+ PopulationDSField('popset', 'Denominator Population',
+ dsfilter=dssummarised),
+ FilterField('popsetparams', label='Denom. Popn. filter'),
+ PopulationDSField('stdpopset', 'Standard Population',
+ dsfilter=[dssummarised, dshascols('stndrdcols')]),
+ FilterField('stdpopsetparams', label='Std. Pop. filter'),
+ ] + rateplot_common_fields
+
+
+class DSRCrosstab(plotform.DSRMixin, plotform.RateTabBase):
+ name = 'dsrcrosstab'
+ label = 'Directly Standardised Event Rates Crosstab'
+
+ fields = [
+ ShowDatasetField('Numerator Dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Numerator Filter'),
+ ColField('standardiseby', 'Standardise by',
+ target='stndrdcols', colfilter=discretecol),
+ ] + crosstabcols + [
+ PopulationDSField('popset', 'Denominator Population',
+ dsfilter=dssummarised),
+ FilterField('popsetparams', label='Denom. Popn. filter'),
+ PopulationDSField('stdpopset', 'Standard Population',
+ dsfilter=[dssummarised, dshascols('stndrdcols')]),
+ FilterField('stdpopsetparams', label='Std. Pop. filter'),
+ ChooseManyField('statcols', 'Measures', target='outputkw',
+ options = [
+ ('summfreq', 'Events'),
+ ('popfreq', 'Population at risk'),
+ ('std_strata_summfreq', 'Std Events'),
+ ('cr', 'CR'),
+ ('dsr', 'DSR'),
+ ],
+ horizontal=True, default='dsr'),
+ ] + ratetab_common_fields
+
+
+class SRPlot(plotform.SRMixin, plotform.RatePlotBase):
+ name = 'srplot'
+ label = 'Stratified Population Rates Plot'
+
+ fields = [
+ ShowDatasetField('Numerator Dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Numerator Filter'),
+ ColField('xcolname', 'Stratify by', colfilter=discretecol),
+ GroupByColField(),
+ ColsField('condcols', 'Panel(s)', colfilter=discretecol),
+ PopulationDSField('popset', 'Denominator Population',
+ dsfilter=dssummarised),
+ FilterField('popsetparams', label='Denom. Popn. filter'),
+ ] + rateplot_common_fields
+
+
+class SRCrosstab(plotform.SRMixin, plotform.RateTabBase):
+ name = 'srcrosstab'
+ label = 'Stratified Population Rates Crosstab'
+
+ fields = [
+ ShowDatasetField('Numerator Dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Numerator Filter'),
+ ] + crosstabcols + [
+ PopulationDSField('popset', 'Denominator Population',
+ dsfilter=dssummarised),
+ FilterField('popsetparams', label='Denom. Pop. filter'),
+ ChooseManyField('statcols', 'Measures', target='outputkw',
+ options = [
+ ('summfreq', 'Events'),
+ ('popfreq', 'Population at risk'),
+ ('sr', 'SR'),
+ ],
+ horizontal=True, default='sr'),
+ ] + ratetab_common_fields
+
+
+class ISRPlot(plotform.ISRMixin, plotform.RatePlotBase):
+ name = 'iserp'
+ label = 'Indirectly Standardised Event Ratio Plot'
+
+ fields = [
+ ShowDatasetField('Events dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Events filter'),
+ ColsField('stdcols', 'Standardisation', min=1,
+ target='stndrdcols', colfilter=discretecol),
+ ColField('xcolname', 'X Column',
+ colfilter=(discretecol,)),
+ GroupByColField(colfilter=(discretecol, )),
+ ColsField('condcols', 'Panel(s)',
+ colfilter=(discretecol, )),
+ PopulationDSField('popset', 'Population for events',
+ dsfilter=[dssummarised,
+ dshascols('stratacols'),
+ dshascols('stndrdcols')]),
+ FilterField('popsetparams', label='Event Pop. filter'),
+ DatasetField('stdset', 'Standard events dataset',
+ dsfilter=dshascols('stndrdcols')),
+ FilterField('stdsetparams', label='Std. events filter'),
+ PopulationDSField('stdpopset', 'Std. events population',
+ dsfilter=[dssummarised, dshascols('stndrdcols')]),
+ FilterField('stdpopsetparams', label='Std. Pop. filter'),
+ ] + rateplot_common_fields
+
+
+class ISRCrosstab(plotform.ISRMixin, plotform.RateTabBase):
+ name = 'isercrosstab'
+ label = 'Indirectly Standardised Event Ratio Crosstab'
+
+ fields = [
+ ShowDatasetField('Events Dataset'),
+ AnalysisTypeField(),
+ FilterField(label='Events filter'),
+ ColsField('stdcols', 'Standardisation', target='stndrdcols',
+ colfilter=discretecol),
+ ] + crosstabcols + [
+ PopulationDSField('popset', 'Population for events',
+ dsfilter=[dssummarised,
+ dshascols('stratacols'),
+ dshascols('stndrdcols')]),
+ FilterField('popsetparams', label='Event Pop. filter'),
+ DatasetField('stdset', 'Standard events dataset',
+ dsfilter=dshascols('stndrdcols')),
+ FilterField('stdsetparams', label='Std. events filter'),
+ PopulationDSField('stdpopset', 'Std. events population',
+ dsfilter=[dssummarised, dshascols('stndrdcols')]),
+ FilterField('stdpopsetparams', label='Std. Pop. filter'),
+ ChooseManyField('statcols', 'Measures', target='outputkw',
+ options = [
+ ('isr', 'ISR'),
+ ('observed', 'Observed'),
+ ('expected', 'Expected'),
+ ('popfreq', 'Population at risk'),
+ ],
+ horizontal=True, default='isr'),
+ ] + ratetab_common_fields
+
+
+class TwoByTwo(plotform.TwoByTwoBase):
+ name = 'twobytwo'
+ label = '2 x 2 x k Analysis'
+
+ fields = common_first_fields + [
+ TwoByTwoColField('exposure', 'Exposure Column',
+ colfilter=discretecol),
+ TwoByTwoColField('outcome', 'Outcome Column',
+ colfilter=(discretecol,notcol('exposure'))),
+ StratifyColField('stratacolname'),
+ CondColParamsField(label='Stratify Parameters'),
+ TextAreaField('title', 'Title'),
+ TextAreaField('subtitle', 'Footer'),
+ ChooseOneField('std', 'Fourfold standardise',
+ options = [
+ ('None', 'No Fourfold plot'),
+ ('margins_1', 'Standardise row'),
+ ('margins_2', 'Standardise column'),
+ ('margins_12', 'Standardise rows and columns'),
+ ('ind.max', 'Individually standardised'),
+ ('all.max', 'Simultaneously standardised'),
+ ],
+ default='ind.max'),
+ ConfLevField(),
+ ]
+
+
+plottypes = (
+ Line, Barchart, Dotchart,
+ Scatter, ScatterMatrix, Histogram, Density, Box,
+ SummTable, CrossTab, DatasetRows,
+ TwoByTwo,
+ DSRPlot, DSRCrosstab,
+ SRPlot, SRCrosstab,
+ ISRPlot, ISRCrosstab,
+)
+
diff --git a/web/libsoomexplorer/twobytwoparams.py b/web/libsoomexplorer/twobytwoparams.py
new file mode 100644
index 0000000..25368c9
--- /dev/null
+++ b/web/libsoomexplorer/twobytwoparams.py
@@ -0,0 +1,213 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: twobytwoparams.py 3673 2009-02-02 06:01:30Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/twobytwoparams.py,v $
+
+import copy
+import sets
+import fnmatch
+import SOOMv0
+from libsoomexplorer.common import *
+from libsoomexplorer import colvals
+
+
+def short_desc(col, values):
+ limit = 200
+ desc = []
+ length = 0
+ for value in values:
+ value = colvals.shorten_trans(col, value)
+ length += len(value)
+ desc.append(value)
+ if length > limit:
+ break
+ remaining = len(values) - len(desc)
+ if remaining:
+ desc.append('and %d more ...' % remaining)
+ return ', '.join(desc)
+
+
+class TwoByTwoColParams:
+ too_many_results = 200
+
+ def __init__(self, label):
+ self.label = label
+ self.colname = None
+ self.saved = None
+ self.clear()
+
+ def clear(self, colname=None):
+ self.colname = colname
+ self.positive_label = self.label.capitalize()
+ self.negative_label = 'Not ' + self.label
+ self.inc = []
+ self.low_cardinality = False
+ self.high_cardinality = False
+ self.cardinality = 0
+ self.clear_search()
+
+ def _get_values(self, col):
+ return [value
+ for value, rows in col.inverted.items()
+ if value is not None and len(rows) > 0]
+
+ def _get_positive(self, col):
+ inc = sets.Set(self.inc)
+ return [v for v in self._get_values(col) if str(v) in inc]
+
+ def _get_negative(self, col):
+ inc = sets.Set(self.inc)
+ return [v for v in self._get_values(col) if str(v) not in inc]
+
+ def get_col(self, workspace):
+ return workspace.get_filtered_dataset()[self.colname]
+
+ def set_colname(self, workspace, colname):
+ if colname != self.colname:
+ self.clear(colname)
+ col = self.get_col(workspace)
+ values = self._get_values(col)
+ values.sort()
+ self.cardinality = len(values)
+ if self.cardinality < 2:
+ self.low_cardinality = True
+ elif self.cardinality == 2:
+ if col.do_format(col.do_outtrans(values[0])).lower() in ('yes', 'positive'):
+ i = 0
+ else:
+ i = 1
+ self.inc = [str(values[i])]
+ self.positive_label = col.do_outtrans(values[i])
+ self.negative_label = col.do_outtrans(values[not i])
+ elif self.cardinality > 30:
+ self.high_cardinality = True
+
+ def is_okay(self):
+ return len(self.inc) > 0 and len(self.inc) < self.cardinality
+
+ def desc_positive(self, workspace):
+ col = self.get_col(workspace)
+ return short_desc(col, self._get_positive(col))
+
+ def desc_negative(self, workspace):
+ col = self.get_col(workspace)
+ return short_desc(col, self._get_negative(col))
+
+ def options(self, workspace):
+ col = self.get_col(workspace)
+ return [(v, col.do_outtrans(v)) for v in self._get_values(col)]
+
+ def inc_options(self, workspace):
+ col = self.get_col(workspace)
+ return [(v, col.do_outtrans(v)) for v in self._get_positive(col)]
+
+ def result_options(self, workspace):
+ col = self.get_col(workspace)
+ return [(v, col.do_outtrans(v)) for v in self.result]
+
+ def do_res(self, ctx, op, collection):
+ col = self.get_col(ctx.locals.workspace)
+ inc = sets.Set(self.inc)
+ if collection == 'checked':
+ collection = sets.Set(self.result_inc)
+ else:
+ collection = sets.Set([str(v) for v in self.result])
+ if op == 'add':
+ inc |= collection
+ else:
+ inc -= collection
+ self.inc = list(inc)
+
+ def do_swap(self, ctx):
+ col = self.get_col(ctx.locals.workspace)
+ self.inc = self._get_negative(col)
+ self.negative_label, self.positive_label = \
+ self.positive_label, self.negative_label
+
+ def do_clear(self, ctx):
+ colname = self.colname
+ self.clear()
+ self.set_colname(ctx.locals.workspace, colname)
+
+ def get_map(self, workspace):
+ if self.colname is None:
+ return {}
+ col = self.get_col(workspace)
+ positive = []
+ negative = []
+ ignore = []
+ for value, rows in col.inverted.items():
+ if value is None or len(rows) == 0:
+ ignore.append(value)
+ elif str(value) in self.inc:
+ positive.append(value)
+ else:
+ negative.append(value)
+ if not positive:
+ raise UIError('%s: at least one value must be selected' %
+ self.positive_label)
+ if not negative:
+ raise UIError('%s: at least one value must be selected' %
+ self.negative_label)
+ param_map = {
+ self.colname: (
+ SOOMv0.coalesce(positive, value=-2, label=self.positive_label),
+ SOOMv0.coalesce(negative, value=-1, label=self.negative_label),
+ SOOMv0.suppress(ignore),
+ SOOMv0.order(-2),
+ )
+ }
+ return param_map
+
+ def clear_search(self):
+ self.pattern = None
+ self.result_inc = []
+ self.result = []
+ self.search_error = ''
+
+ def do_search_clear(self, ctx):
+ self.clear_search()
+
+ def search(self, workspace):
+ col = self.get_col(workspace)
+ self.result = []
+ self.search_error = ''
+ if self.pattern:
+ pattern = colvals.make_re(self.pattern)
+ for v in self._get_values(col):
+ if self.pattern == str(v):
+ self.result.append(v)
+ if pattern.match(str(col.do_outtrans(v))):
+ self.result.append(v)
+ if not self.result:
+ self.search_error = 'No matches'
+ elif len(self.result) > self.too_many_results:
+ self.search_error = 'Too many matches (%d)' % len(self.result)
+ self.result = []
+ self.result.sort()
+
+
+ def save_undo(self):
+ self.saved = self.inc, self.positive_label, self.negative_label
+ self.inc = self.inc[:]
+
+ def clear_undo(self):
+ self.saved = None
+
+ def undo(self):
+ if self.saved is not None:
+ self.inc, self.positive_label, self.negative_label = self.saved
+ self.saved = None
+
diff --git a/web/libsoomexplorer/undo.py b/web/libsoomexplorer/undo.py
new file mode 100644
index 0000000..bc286de
--- /dev/null
+++ b/web/libsoomexplorer/undo.py
@@ -0,0 +1,105 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: undo.py 3703 2009-03-03 04:58:06Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/libsoomexplorer/undo.py,v $
+
+import copy
+
+class _UMC(object):
+ __slots__ = 'method', 'args', 'kwargs'
+
+ def __init__(self, method, *args, **kwargs):
+ self.method = method
+ self.args = args
+ self.kwargs = kwargs
+
+ def undo(self, obj):
+ getattr(obj, self.method)(*self.args, **self.kwargs)
+
+class UndoError(Exception): pass
+
+class UndoMixin:
+ def __init__(self):
+ self.clear_undo()
+
+ def undo(self):
+ try:
+ last_event = self.__undo.pop(-1)
+ except IndexError:
+ raise UndoError('nothing to undo')
+ self.__redo, self.__undo, real_undo = [], self.__redo, self.__undo
+ try:
+ last_event.undo(self)
+ finally:
+ self.__redo, self.__undo = self.__undo, real_undo
+
+ def redo(self):
+ try:
+ next_event = self.__redo.pop(-1)
+ except IndexError:
+ raise UndoError('nothing to redo')
+ real_redo, self.__redo = self.__redo, []
+ try:
+ next_event.undo(self)
+ finally:
+ self.__redo = real_redo
+
+ def _record_undo(self, method, *args, **kwargs):
+ self.__undo.append(_UMC(method, *args, **kwargs))
+ self.__redo = []
+
+ def modified(self):
+ return len(self.__undo) > 0
+
+ def clear_undo(self):
+ self.__undo = []
+ self.__redo = []
+
+ def copy(self):
+ o = copy.deepcopy(self)
+ o.clear_undo()
+ return o
+
+
+if __name__ == '__main__':
+ class A(UndoMixin):
+ def __init__(self):
+ UndoMixin.__init__(self)
+ self.b = []
+
+ def add(self, n):
+ self._record_undo('remove', -1)
+ self.b.append(n)
+
+ def remove(self, i):
+ v = self.b.pop(i)
+ self._record_undo('add', v)
+
+ a = A()
+ a.add(4)
+ a.add(5)
+ print 'before', a.b, a.b == [4,5]
+ a.undo()
+ print 'undo one', a.b, a.b == [4]
+ a.undo()
+ print 'undo two', a.b, a.b == []
+ a.redo()
+ print 'redo', a.b, a.b == [4]
+ a.undo()
+ print 'undo redo', a.b, a.b == []
+ a.redo()
+ print 'redo one', a.b, a.b == [4]
+ a.redo()
+ print 'redo two', a.b, a.b == [4, 5]
diff --git a/web/libsoomexplorer/workspace.py b/web/libsoomexplorer/workspace.py
new file mode 100644
index 0000000..356e3d3
--- /dev/null
+++ b/web/libsoomexplorer/workspace.py
@@ -0,0 +1,124 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+# $Id: workspace.py 3676 2009-02-03 03:32:48Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/libsoomexplorer/workspace.py $
+
+from time import time
+import SOOMv0
+from libsoomexplorer import plottypes
+from libsoomexplorer.parameters import Parameters
+from libsoomexplorer.paramstore import analyses
+from libsoomexplorer.dsparams import DSParams
+from libsoomexplorer.output.base import NullOut, OutputError
+from libsoomexplorer.output.plot import ImageOut
+from libsoomexplorer.output.table import TableOut, CrosstabOut, DatasetRowsOut
+from libsoomexplorer.output.twobytwo import TwoByTwoOut
+
+
+class Workspace:
+ """
+ Analysis workspace
+
+ Main Attributes:
+ output Currently selected output style
+ outputs Available output styles (and output params)
+ params Analysis-type (plottype) parameters
+ plottype Currently selected analysis ("plottype")
+ """
+
+ def __init__(self):
+ self.params = Parameters()
+ self.plottype = None
+ self.outputs = {
+ 'image': ImageOut(),
+ 'table': TableOut(),
+ 'crosstab': CrosstabOut(),
+ 'dsrows': DatasetRowsOut(),
+ 'twobytwo': TwoByTwoOut(),
+ }
+ self.output = NullOut()
+
+ def clear_params(self):
+ self.params = Parameters(self.params.dsparams.dsname,
+ self.params.plottype)
+ self.plottype.set_default()
+
+ def datasets(self, filter=None, orderby='label'):
+ datasets = []
+ for dsname in SOOMv0.soom.available_datasets():
+ ds = SOOMv0.dsload(dsname)
+ if filter is None or filter(ds, self):
+ datasets.append((getattr(ds, orderby), ds))
+ datasets.sort()
+ return [ds for o, ds in datasets]
+
+ def available_datasets(self, filter=None):
+ return [(ds.name, ds.label) for ds in self.datasets(filter)]
+
+ def get_dataset(self):
+ return self.params.dsparams.get_dataset()
+
+ def get_filtered_dataset(self):
+ return self.params.dsparams.get_filtered_dataset()
+
+ def available_plottypes(self):
+ return [(p.name, p.label) for p in plottypes.plottypes]
+
+ def set_dataset(self, dsname):
+ self.params.dsparams.set_dsname(dsname)
+ self.set_plottype()
+
+ def set_plottype(self):
+ if not self.params.plottype:
+ self.params.plottype = plottypes.plottypes[0].name
+ if self.plottype is None or self.plottype.name != self.params.plottype:
+ for plottype in plottypes.plottypes:
+ if self.params.plottype == plottype.name:
+ break
+ else:
+ raise ValueError('bad plottype %r' % self.params.plottype)
+ self.output.clear()
+ self.plottype = plottype(self)
+ self.plottype.set_default()
+ self.params.loaded_from = ''
+ return True
+ return False
+
+ def get_condcolparams(self):
+ return self.plottype.get_condcolparams(self)
+
+ def set_outtype(self, outtype):
+ self.output = self.outputs[outtype]
+
+ def refresh(self):
+ self.set_plottype()
+ self.plottype.refresh()
+
+ def go(self):
+ start = time()
+ self.output.clear()
+ self.plottype.go()
+ self.output.elapsed = time() - start
+
+ def save(self):
+ self.output.clear()
+ overwrite=(self.params.save_as == self.params.loaded_from)
+ analyses.save(self.params, self.params.dsparams.dsname,
+ self.params.save_as, overwrite=overwrite)
+
+ def load(self, fn):
+ self.params = analyses.load(fn)
+ self.params.loaded_from = self.params.save_as
+ self.refresh()
diff --git a/web/libsoomexplorer/yappsrt.py b/web/libsoomexplorer/yappsrt.py
new file mode 100644
index 0000000..414afa4
--- /dev/null
+++ b/web/libsoomexplorer/yappsrt.py
@@ -0,0 +1,328 @@
+#
+# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
+# Copyright 1999-2003 by Amit J. Patel <amitp at cs.stanford.edu>
+#
+# This version of the Yapps 2 Runtime can be distributed under the
+# terms of the MIT open source license, either found in the LICENSE file
+# included with the Yapps distribution
+# <http://theory.stanford.edu/~amitp/yapps/> or at
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+
+# Copyright 2004 Amit J. Patel
+# see: http://theory.standford.edu/~amitp/Yapps/
+#
+# <http://www.opensource.org/licenses/mit-license.php>
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+"""Run time libraries needed to run parsers generated by Yapps.
+
+This module defines parse-time exception classes, a scanner class, a
+base class for parsers produced by Yapps, and a context class that
+keeps track of the parse stack.
+
+"""
+
+# TODO: it should be possible to embed yappsrt into the generated
+# grammar to make a standalone module.
+
+import sys, re
+
+class SyntaxError(Exception):
+ """When we run into an unexpected token, this is the exception to use"""
+ def __init__(self, charpos=-1, msg="Bad Token", context=None):
+ Exception.__init__(self)
+ self.charpos = charpos
+ self.msg = msg
+ self.context = context
+
+ def __str__(self):
+ if self.charpos < 0: return 'SyntaxError'
+ else: return 'SyntaxError at char%s(%s)' % (repr(self.charpos), self.msg)
+
+class NoMoreTokens(Exception):
+ """Another exception object, for when we run out of tokens"""
+ pass
+
+class Scanner:
+ """Yapps scanner.
+
+ The Yapps scanner can work in context sensitive or context
+ insensitive modes. The token(i) method is used to retrieve the
+ i-th token. It takes a restrict set that limits the set of tokens
+ it is allowed to return. In context sensitive mode, this restrict
+ set guides the scanner. In context insensitive mode, there is no
+ restriction (the set is always the full set of tokens).
+
+ """
+
+ def __init__(self, patterns, ignore, input):
+ """Initialize the scanner.
+
+ Parameters:
+ patterns : [(terminal, uncompiled regex), ...] or None
+ ignore : [terminal,...]
+ input : string
+
+ If patterns is None, we assume that the subclass has
+ defined self.patterns : [(terminal, compiled regex), ...].
+ Note that the patterns parameter expects uncompiled regexes,
+ whereas the self.patterns field expects compiled regexes.
+ """
+ self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
+ self.restrictions = []
+ self.input = input
+ self.pos = 0
+ self.ignore = ignore
+ self.first_line_number = 1
+
+ if patterns is not None:
+ # Compile the regex strings into regex objects
+ self.patterns = []
+ for terminal, regex in patterns:
+ self.patterns.append( (terminal, re.compile(regex)) )
+
+ def get_token_pos(self):
+ """Get the current token position in the input text."""
+ return len(self.tokens)
+
+ def get_char_pos(self):
+ """Get the current char position in the input text."""
+ return self.pos
+
+ def get_prev_char_pos(self, i=None):
+ """Get the previous position (one token back) in the input text."""
+ if self.pos == 0: return 0
+ if i is None: i = -1
+ return self.tokens[i][0]
+
+ def get_line_number(self):
+ """Get the line number of the current position in the input text."""
+ # TODO: make this work at any token/char position
+ return self.first_line_number + self.get_input_scanned().count('\n')
+
+ def get_column_number(self):
+ """Get the column number of the current position in the input text."""
+ s = self.get_input_scanned()
+ i = s.rfind('\n') # may be -1, but that's okay in this case
+ return len(s) - (i+1)
+
+ def get_input_scanned(self):
+ """Get the portion of the input that has been tokenized."""
+ return self.input[:self.pos]
+
+ def get_input_unscanned(self):
+ """Get the portion of the input that has not yet been tokenized."""
+ return self.input[self.pos:]
+
+ def token(self, i, restrict=None):
+ """Get the i'th token in the input.
+
+ If i is one past the end, then scan for another token.
+
+ Args:
+
+ restrict : [token, ...] or None; if restrict is None, then any
+ token is allowed. You may call token(i) more than once.
+ However, the restrict set may never be larger than what was
+ passed in on the first call to token(i).
+
+ """
+ if i == len(self.tokens):
+ self.scan(restrict)
+ if i < len(self.tokens):
+ # Make sure the restriction is more restricted. This
+ # invariant is needed to avoid ruining tokenization at
+ # position i+1 and higher.
+ if restrict and self.restrictions[i]:
+ for r in restrict:
+ if r not in self.restrictions[i]:
+ raise NotImplementedError("Unimplemented: restriction set changed")
+ return self.tokens[i]
+ raise NoMoreTokens()
+
+ def __repr__(self):
+ """Print the last 10 tokens that have been scanned in"""
+ output = ''
+ for t in self.tokens[-10:]:
+ output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
+ return output
+
+ def scan(self, restrict):
+ """Should scan another token and add it to the list, self.tokens,
+ and add the restriction to self.restrictions"""
+ # Keep looking for a token, ignoring any in self.ignore
+ while 1:
+ # Search the patterns for the longest match, with earlier
+ # tokens in the list having preference
+ best_match = -1
+ best_pat = '(error)'
+ for p, regexp in self.patterns:
+ # First check to see if we're ignoring this token
+ if restrict and p not in restrict and p not in self.ignore:
+ continue
+ m = regexp.match(self.input, self.pos)
+ if m and len(m.group(0)) > best_match:
+ # We got a match that's better than the previous one
+ best_pat = p
+ best_match = len(m.group(0))
+
+ # If we didn't find anything, raise an error
+ if best_pat == '(error)' and best_match < 0:
+ msg = 'Bad Token'
+ if restrict:
+ msg = 'Trying to find one of '+', '.join(restrict)
+ raise SyntaxError(self.pos, msg)
+
+ # If we found something that isn't to be ignored, return it
+ if best_pat not in self.ignore:
+ # Create a token with this data
+ token = (self.pos, self.pos+best_match, best_pat,
+ self.input[self.pos:self.pos+best_match])
+ self.pos = self.pos + best_match
+ # Only add this token if it's not in the list
+ # (to prevent looping)
+ if not self.tokens or token != self.tokens[-1]:
+ self.tokens.append(token)
+ self.restrictions.append(restrict)
+ return
+ else:
+ # This token should be ignored ..
+ self.pos = self.pos + best_match
+
+class Parser:
+ """Base class for Yapps-generated parsers.
+
+ """
+
+ def __init__(self, scanner):
+ self._scanner = scanner
+ self._pos = 0
+
+ def _peek(self, *types):
+ """Returns the token type for lookahead; if there are any args
+ then the list of args is the set of token types to allow"""
+ tok = self._scanner.token(self._pos, types)
+ return tok[2]
+
+ def _scan(self, type):
+ """Returns the matched text, and moves to the next token"""
+ tok = self._scanner.token(self._pos, [type])
+ if tok[2] != type:
+ raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(self._scanner.restrictions[self._pos]))
+ self._pos = 1 + self._pos
+ return tok[3]
+
+class Context:
+ """Class to represent the parser's call stack.
+
+ Every rule creates a Context that links to its parent rule. The
+ contexts can be used for debugging.
+
+ """
+
+ def __init__(self, parent, scanner, tokenpos, rule, args=()):
+ """Create a new context.
+
+ Args:
+ parent: Context object or None
+ scanner: Scanner object
+ pos: integer (scanner token position)
+ rule: string (name of the rule)
+ args: tuple listing parameters to the rule
+
+ """
+ self.parent = parent
+ self.scanner = scanner
+ self.tokenpos = tokenpos
+ self.rule = rule
+ self.args = args
+
+ def __str__(self):
+ output = ''
+ if self.parent: output = str(self.parent) + ' > '
+ output += self.rule
+ return output
+
+def print_line_with_pointer(text, p):
+ """Print the line of 'text' that includes position 'p',
+ along with a second line with a single caret (^) at position p"""
+
+ # TODO: separate out the logic for determining the line/character
+ # location from the logic for determining how to display an
+ # 80-column line to stderr.
+
+ # Now try printing part of the line
+ text = text[max(p-80, 0):p+80]
+ p = p - max(p-80, 0)
+
+ # Strip to the left
+ i = text[:p].rfind('\n')
+ j = text[:p].rfind('\r')
+ if i < 0 or (0 <= j < i): i = j
+ if 0 <= i < p:
+ p = p - i - 1
+ text = text[i+1:]
+
+ # Strip to the right
+ i = text.find('\n', p)
+ j = text.find('\r', p)
+ if i < 0 or (0 <= j < i): i = j
+ if i >= 0:
+ text = text[:i]
+
+ # Now shorten the text
+ while len(text) > 70 and p > 60:
+ # Cut off 10 chars
+ text = "..." + text[10:]
+ p = p - 7
+
+ # Now print the string, along with an indicator
+ print >>sys.stderr, '> ',text
+ print >>sys.stderr, '> ',' '*p + '^'
+
+def print_error(input, err, scanner):
+ """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
+ # NOTE: this function assumes 80 columns :-(
+ # Figure out the line number
+ line_number = scanner.get_line_number()
+ column_number = scanner.get_column_number()
+ print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
+
+ context = err.context
+ if not context:
+ print_line_with_pointer(input, err.charpos)
+
+ while context:
+ # TODO: add line number
+ print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
+ print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
+ context = context.parent
+
+def wrap_error_reporter(parser, rule):
+ try:
+ return getattr(parser, rule)()
+ except SyntaxError, e:
+ input = parser._scanner.input
+ print_error(input, e, parser._scanner)
+ except NoMoreTokens:
+ print >>sys.stderr, 'Could not complete parsing; stopped around here:'
+ print >>sys.stderr, parser._scanner
diff --git a/web/nea-standalone.py b/web/nea-standalone.py
new file mode 100755
index 0000000..c92bcff
--- /dev/null
+++ b/web/nea-standalone.py
@@ -0,0 +1,90 @@
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+#
+# $Id: nea-standalone.py 3666 2009-01-29 10:25:19Z andrewm $
+# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/web/nea-standalone.py,v $
+
+import os
+import sys
+import optparse
+from albatross import httpdapp
+
+from SOOMv0 import soom
+
+
+appdir = os.path.abspath(os.path.dirname(__file__))
+default_dynamic_dir = os.path.join(appdir, 'dynamic')
+default_static_dir = os.path.join(appdir, 'static')
+default_data_dir = appdir
+
+if __name__ == '__main__':
+ sys.path.append(appdir)
+
+ opt_parse = optparse.OptionParser()
+ opt_parse.add_option('-p', '--port',
+ type='int', dest='port', default=8080,
+ help='listen on PORT (default: 8080)')
+ opt_parse.add_option('-S', '--soompath',
+ dest='soompath',
+ default='SOOM_objects:../SOOM_objects',
+ help='SOOM search path')
+ opt_parse.add_option('-N', '--appname',
+ dest='appname', default='nea',
+ help='application name (effects paths)')
+ opt_parse.add_option('-T', '--apptitle',
+ dest='apptitle',
+ default='NetEpi Analysis',
+ help='web application title')
+ opt_parse.add_option('--session-secret',
+ dest='session_secret',
+ help='Session signing secret')
+ opt_parse.add_option('--datadir',
+ dest='data_dir', default=default_data_dir,
+ help='A writeable directory NOT published by '
+ 'the web server (contains private data)')
+ opt_parse.add_option('--dynamicdir',
+ dest='dynamic_target', default=default_dynamic_dir,
+ help='A writeable directory published by '
+ 'the web server, contains files generated '
+ 'by the application')
+ opt_parse.add_option('--staticdir',
+ dest='static_target', default=default_static_dir,
+ help='A UNwritable directory published by '
+ 'the web server, contains static content '
+ 'used by the application (css, images)')
+ opt_parse.add_option('-D', '--debug', action='store_true',
+ dest='debug', default=False,
+ help='Enable developer features')
+ options, args = opt_parse.parse_args()
+ if not options.session_secret:
+ import binascii
+ f = open('/dev/urandom', 'rb')
+ try:
+ data = f.read(32)
+ finally:
+ f.close()
+ options.session_secret = binascii.b2a_base64(data).rstrip()
+
+ static_resources = [
+ ('/nea/dynamic', options.dynamic_target),
+ ('/nea', options.static_target),
+ ]
+ sys.modules['config'] = options # XXX Dodgy - "import config"
+
+ # Create the HTTP server and serve requests.
+ from nea import app
+ httpd = httpdapp.HTTPServer(app, options.port,
+ static_resources = static_resources)
+ httpd.serve_forever()
diff --git a/web/nea.py b/web/nea.py
new file mode 100644
index 0000000..fafbd87
--- /dev/null
+++ b/web/nea.py
@@ -0,0 +1,583 @@
+#!/usr/bin/python
+#
+# The contents of this file are subject to the HACOS License Version 1.2
+# (the "License"); you may not use this file except in compliance with
+# the License. Software distributed under the License is distributed
+# on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the LICENSE file for the specific language governing
+# rights and limitations under the License. The Original Software
+# is "NetEpi Analysis". The Initial Developer of the Original
+# Software is the Health Administration Corporation, incorporated in
+# the State of New South Wales, Australia.
+#
+# Copyright (C) 2004,2005 Health Administration Corporation.
+# All Rights Reserved.
+#
+#
+# $Id: nea.py 3703 2009-03-03 04:58:06Z andrewm $
+# $HeadURL: https://wwwepi4.health.nsw.gov.au/svn/netepi/Analysis/trunk/web/nea.py $
+
+# Standard libraries
+import sys, os
+import copy
+import traceback
+
+# Albatross, http://www.object-craft.com.au/projects/albatross/
+from albatross import SimpleApp, SimpleAppContext
+
+def is_fcgi():
+ # If there's a better way of detecting a FastCGI environment, I'd love to
+ # hear it.
+ import socket, errno
+ try:
+ s=socket.fromfd(sys.stdin.fileno(), socket.AF_INET,
+ socket.SOCK_STREAM)
+ except socket.error:
+ return False
+ try:
+ try:
+ s.getpeername()
+ except socket.error, (eno, errmsg):
+ return eno == errno.ENOTCONN
+ finally:
+ s.close()
+
+use_fcgi = is_fcgi()
+if use_fcgi:
+ from albatross.fcgiapp import Request, running
+else:
+ from albatross.cgiapp import Request
+
+# SOOM, NSWDoH
+import SOOMv0
+
+# Application modules
+from libsoomexplorer.workspace import Workspace
+from libsoomexplorer.paramstore import analyses
+from libsoomexplorer.filter import ExpressionEdit, Filter, FilterError
+from libsoomexplorer.undo import UndoError
+from libsoomexplorer.output.base import OutputError
+from libsoomexplorer.common import *
+
+import config
+
+app_dir = os.path.dirname(__file__)
+page_dir = os.path.join(app_dir, 'pages')
+sys.path.insert(0, app_dir)
+
+catchable_errors = (SOOMv0.Error, UIError, OutputError)
+
+
+class PageBase:
+ def page_display(self, ctx):
+ workspace = getattr(ctx.locals, 'workspace', None)
+ if workspace and workspace.output.have_download():
+ try:
+ ctx.locals.workspace.output.send_download(ctx)
+ except Exception:
+ traceback.print_exc(None, sys.stderr)
+ else:
+ ctx.locals.title = ctx.locals.appname
+ ctx.run_template(self.name + '.html')
+
+ def dispatch(self, ctx, *objects):
+ for field in ctx.request.field_names():
+ if field.endswith('.x'):
+ field = field[:-2]
+ elif field.endswith('.y'):
+ continue
+ subfields = field.split('/')
+ for o in objects:
+ meth = getattr(o, 'do_' + subfields[0], None)
+ if meth:
+ ctx.catchcall(meth, ctx, *subfields[1:])
+ return True
+ return False
+
+ def page_process(self, ctx):
+ self.dispatch(ctx, self)
+
+
+class StartPage(PageBase):
+ name = 'start'
+
+ def page_enter(self, ctx):
+ if not analyses:
+ ctx.set_page('newanalysis')
+ ctx.locals.delete_an = None
+ ctx.add_session_vars('delete_an')
+
+ def page_leave(self, ctx):
+ ctx.del_session_vars('delete_an')
+
+ def do_new(self, ctx):
+ ctx.set_page('newanalysis')
+
+ def do_analyse(self, ctx, fn):
+ ctx.locals.workspace = Workspace()
+ ctx.locals.workspace.load(fn)
+ ctx.add_session_vars('workspace')
+ ctx.set_page('params')
+
+ def do_delete(self, ctx, fn):
+ ctx.locals.delete_an = analyses.find(fn)
+
+ def do_delete_confirm(self, ctx):
+ analyses.delete(ctx.locals.delete_an.fn)
+ self.do_delete_cancel(ctx)
+
+ def do_delete_cancel(self, ctx):
+ ctx.locals.delete_an = None
+
+
+class NewAnalysisPage(PageBase):
+ name = 'newanalysis'
+
+ def page_enter(self, ctx):
+ ctx.locals.workspace = Workspace()
+ ctx.add_session_vars('workspace')
+
+ def do_analyse(self, ctx, dsname):
+ ctx.locals.workspace.set_dataset(dsname)
+ ctx.set_page('params')
+
+ def do_about(self, ctx, dsname):
+ ctx.push_page('explore', dsname)
+
+ def do_show_prepared(self, ctx):
+ ctx.set_page('start')
+
+
+class ParamPage(PageBase):
+ name = 'params'
+
+ def page_display(self, ctx):
+ workspace = ctx.locals.workspace
+ label = workspace.params.dsparams.get_label()
+ ctx.locals.title = 'Dataset %r - %s' % (label, ctx.locals.appname)
+ ctx.locals.condcolparams = workspace.get_condcolparams()
+ PageBase.page_display(self, ctx)
+
+ def do_info(self, ctx, param):
+ dsname = getattr(ctx.locals.workspace.params, param + 'params').dsname
+ if dsname:
+ ctx.push_page('explore', dsname)
+
+ def do_back(self, ctx):
+ ctx.set_page('start')
+
+ def do_save(self, ctx):
+ ctx.push_page('paramsave')
+
+ def do_ok(self, ctx):
+ ctx.locals.workspace.go()
+ if ctx.locals.workspace.output.inline:
+ ctx.push_page('result')
+
+ def do_plottype_reset(self, ctx):
+ ctx.locals.workspace.clear_params()
+
+ def do_hide_params(self, ctx):
+ ctx.locals.workspace.plottype.hide_params = True
+ if ctx.locals.workspace.output.have_files() and ctx.locals.workspace.output.inline:
+ ctx.push_page('result')
+
+ def do_edit_condcolparams(self, ctx):
+ if not len(ctx.locals.workspace.get_condcolparams()):
+ raise UIError('No conditioning columns?')
+ ctx.push_page('condcolparams')
+
+ def do_edit_exposed(self, ctx):
+ ctx.push_page('twobytwoparams',
+ ctx.locals.workspace.params.exposure_params)
+
+ def do_edit_outcome(self, ctx):
+ ctx.push_page('twobytwoparams',
+ ctx.locals.workspace.params.outcome_params)
+
+ def page_process(self, ctx):
+ timer('refresh')
+ ctx.catchcall(ctx.locals.workspace.refresh)
+ timer('dispatch')
+ self.dispatch(ctx, self, ctx.locals.workspace.params)
+
+
+class ParamSavePage(PageBase):
+ name = 'paramsave'
+
+ def page_enter(self, ctx):
+ params = ctx.locals.workspace.params
+ if params.loaded_from:
+ params.save_as = params.loaded_from
+ else:
+ params.save_as = params.title
+
+ def do_back(self, ctx):
+ ctx.pop_page()
+
+ def do_save(self, ctx):
+ ctx.locals.workspace.save()
+ ctx.pop_page()
+
+
+class ResultPage(PageBase):
+ name = 'result'
+
+ def do_back(self, ctx):
+ ctx.set_page('start')
+
+ def do_show_params(self, ctx):
+ ctx.locals.workspace.plottype.hide_params = False
+ ctx.pop_page()
+
+ def do_prev_page(self, ctx):
+ ctx.locals.workspace.output.prev_page()
+
+ def do_next_page(self, ctx):
+ ctx.locals.workspace.output.next_page()
+
+ def page_process(self, ctx):
+ if not self.dispatch(ctx, self):
+ page = getattr(ctx.locals, 'select_page', [])
+ ctx.locals.workspace.output.select_page(page)
+ ctx.locals.select_page = None
+
+
+class FilterPage(PageBase):
+ name = 'filter'
+ colnamefields = 'dtcol', 'disccol', 'textcol', 'othercol'
+
+ def page_enter(self, ctx, dsparams):
+ ctx.locals.dsparams = dsparams
+ ctx.add_session_vars('dsparams')
+ filter = ctx.locals.dsparams.edit_filter
+ if not filter.name:
+ filter.start_edit_node(filter.root.path)
+ ctx.locals.filter = filter
+ ctx.add_session_vars('filter')
+ for a in self.colnamefields:
+ setattr(ctx.locals, a, '')
+ ctx.add_session_vars(*self.colnamefields)
+ ctx.locals.colvalselect = None
+ ctx.add_session_vars('colvalselect')
+ ctx.locals.delete_confirm = False
+ ctx.add_session_vars('delete_confirm')
+ ctx.locals.want_import = False
+ ctx.add_session_vars('want_import')
+ ctx.locals.confirm_quit = False
+ ctx.add_session_vars('confirm_quit')
+
+ def page_leave(self, ctx):
+ ctx.locals.dsparams.edit_filter = None
+ ctx.del_session_vars('dsparams')
+ ctx.del_session_vars('filter')
+ ctx.del_session_vars(*self.colnamefields)
+ ctx.del_session_vars('colvalselect')
+ ctx.del_session_vars('delete_confirm')
+ ctx.del_session_vars('want_import')
+ ctx.del_session_vars('confirm_quit')
+
+ def page_display(self, ctx):
+ filter = ctx.locals.filter
+ workspace = ctx.locals.workspace
+ if filter.edit_expr and hasattr(filter.edit_expr, 'colname'):
+ ctx.locals.colvalselect = filter.edit_expr.value
+ ctx.locals.search_result = ctx.locals.colvalselect.search(workspace)
+ for a in self.colnamefields:
+ setattr(ctx.locals, a, filter.edit_expr.colname)
+ else:
+ ctx.locals.colvalselect = None
+ ctx.locals.search_result = []
+ PageBase.page_display(self, ctx)
+
+ def do_cancel(self, ctx):
+ if ctx.locals.filter.modified():
+ ctx.locals.confirm_quit = True
+ else:
+ ctx.pop_page()
+
+ def do_cancel_confirm(self, ctx):
+ ctx.pop_page()
+
+ def do_cancel_cancel(self, ctx):
+ ctx.locals.confirm_quit = False
+
+ def do_delete(self, ctx):
+ ctx.locals.delete_confirm = True
+
+ def do_delete_confirm(self, ctx):
+ try:
+ ctx.locals.dsparams.delete_filter(ctx.locals.filter)
+ finally:
+ ctx.locals.delete_confirm = False
+ ctx.pop_page()
+
+ def do_delete_cancel(self, ctx):
+ ctx.locals.delete_confirm = False
+
+ def do_save(self, ctx):
+ if ctx.locals.filter.edit_info:
+ ctx.locals.filter.apply_info()
+ if not ctx.locals.filter.name:
+ ctx.msg('warn', 'Give the filter a name before saving')
+ self.do_edit_info(ctx)
+ elif not ctx.locals.filter.modified():
+ ctx.msg('warn', 'Filter has not been modified.')
+ else:
+ ctx.locals.dsparams.save_filter(ctx.locals.filter)
+ ctx.pop_page()
+
+ def do_okay(self, ctx):
+ if ctx.locals.filter.edit_info:
+ ctx.locals.filter.apply_info()
+ if ctx.locals.filter.edit_expr:
+ ctx.locals.filter.commit_edit_node(ctx.locals.workspace)
+ ctx.locals.dsparams.use_filter(ctx.locals.filter)
+ if ctx.locals.filter.modified():
+ ctx.msg('warn', 'Filter has been modified, but not saved.')
+ ctx.pop_page()
+
+ def do_undo(self, ctx):
+ ctx.locals.filter.undo()
+
+ def do_redo(self, ctx):
+ ctx.locals.filter.redo()
+
+ def do_edit_info(self, ctx):
+ ctx.locals.filter.start_edit_info()
+
+ def do_info_edit_apply(self, ctx):
+ ctx.locals.filter.apply_info()
+
+ def do_clear_edit(self, ctx):
+ ctx.locals.filter.clear_edit()
+
+ def do_andor_insert(self, ctx, op):
+ ctx.locals.filter.add_andor(ctx.locals.filter.edit_andor.node, op)
+
+ def do_andor_change(self, ctx, op):
+ ctx.locals.filter.set_andor(ctx.locals.filter.edit_andor.node, op)
+
+ def do_andor_add_expr(self, ctx):
+ ctx.locals.filter.add_expr(ctx.locals.filter.edit_andor.node)
+
+ def do_expr_insert(self, ctx, op):
+ ctx.locals.filter.add_andor(ctx.locals.filter.edit_expr.node, op)
+
+ def do_expr_select(self, ctx, mode):
+ ctx.locals.filter.expr_mode(int(mode))
+
+ def do_delete_node(self, ctx):
+ ctx.locals.filter.del_node()
+
+ def do_expr_okay(self, ctx):
+ ctx.locals.filter.commit_edit_node(ctx.locals.workspace)
+
+ def do_expr_okay_add(self, ctx):
+ ctx.locals.filter.commit_add_edit_node(ctx.locals.workspace)
+
+ def do_req_import(self, ctx):
+ ctx.locals.want_import = True
+
+ def do_paste(self, ctx):
+ ctx.locals.filter.paste()
+
+ def do_import_cancel(self, ctx):
+ ctx.locals.want_import = False
+
+ def do_import(self, ctx):
+ try:
+ filter = ctx.locals.dsparams.load_filter(ctx.locals.import_filter)
+ ctx.locals.filter.splice_filter(filter)
+ finally:
+ ctx.locals.want_import = False
+
+ def do_node(self, ctx, *args):
+ ctx.locals.filter.start_edit_node(*args)
+
+ def do_sop(self, ctx, *args):
+ ctx.locals.colvalselect.sop(ctx.locals.workspace, *args)
+
+ def page_process(self, ctx):
+ if not self.dispatch(ctx, self):
+ ctx.locals.filter.expr_next()
+
+
+class DSMeta:
+ def __init__(self, dsname):
+ self.dsname = dsname
+ self.show_col = None
+
+ def get_dataset(self):
+ return SOOMv0.dsload(self.dsname)
+
+ def describe_ds(self):
+ return self.get_dataset().describe(SOOMv0.SOME_DETAIL).describe_tuples()
+
+ def describe_cols(self):
+ return self.get_dataset().describe_cols()
+
+ def describe_col(self, colname):
+ return self.get_dataset()[colname].describe(SOOMv0.SOME_DETAIL).describe_tuples()
+
+
+class ExplorePage(PageBase):
+ name = 'explore'
+
+ def page_enter(self, ctx, dsname):
+ ctx.locals.dsmeta = DSMeta(dsname)
+ ctx.add_session_vars('dsmeta')
+
+ def page_leave(self, ctx):
+ ctx.del_session_vars('dsmeta')
+
+ def do_back(self, ctx):
+ ctx.pop_page()
+
+ def do_allcols(self, ctx):
+ ctx.locals.dsmeta.show_col = None
+
+ def do_view(self, ctx, show_col):
+ ctx.locals.dsmeta.show_col = show_col
+
+
+class CondColParamsPage(PageBase):
+ name = 'condcolparams'
+
+ def page_enter(self, ctx):
+ workspace = ctx.locals.workspace
+ ctx.locals.condcolparams = workspace.get_condcolparams()
+ ctx.locals.colvalselect = None
+ ctx.add_session_vars('condcolparams', 'colvalselect')
+
+ def page_leave(self, ctx):
+ ctx.del_session_vars('condcolparams', 'colvalselect')
+
+ def page_display(self, ctx):
+ condcolparams = ctx.locals.condcolparams
+ workspace = ctx.locals.workspace
+ # We want the search result to be transient
+ ctx.locals.search_result = condcolparams.maybe_search(workspace)
+ PageBase.page_display(self, ctx)
+
+ def do_okay(self, ctx):
+ ctx.pop_page()
+ param_map = ctx.locals.condcolparams.get_map(ctx.locals.workspace)
+ ctx.locals.workspace.params.condcolparams.update(param_map)
+
+ def do_back(self, ctx):
+ ctx.pop_page()
+
+ def do_clear(self, ctx):
+ ctx.locals.condcolparams.clear(ctx.locals.workspace)
+
+ def do_edit_okay(self, ctx):
+ ctx.locals.condcolparams.done_edit(ctx.locals.workspace)
+ ctx.locals.colvalselect = None
+
+ def do_col(self, ctx, *fields):
+ ctx.locals.condcolparams.do_col(ctx.locals.workspace, *fields)
+ ctx.locals.colvalselect = ctx.locals.condcolparams.edit_col.edit
+
+ def do_sop(self, ctx, *fields):
+ ctx.locals.colvalselect.sop(ctx.locals.workspace, *fields)
+
+
+class TwoByTwoParamsPage(PageBase):
+ name = 'twobytwoparams'
+
+ def page_enter(self, ctx, params):
+ params.save_undo()
+ ctx.locals.params = params
+ ctx.add_session_vars('params')
+
+ def page_leave(self, ctx):
+ ctx.del_session_vars('params')
+
+ def do_back(self, ctx):
+ ctx.locals.params.undo()
+ ctx.pop_page()
+
+ def do_okay(self, ctx):
+ param_map = ctx.locals.params.get_map(ctx.locals.workspace)
+ ctx.locals.workspace.params.condcolparams.update(param_map)
+ ctx.locals.params.clear_undo()
+ ctx.pop_page()
+
+ def page_process(self, ctx):
+ self.dispatch(ctx, self, ctx.locals.params)
+ ctx.locals.params.search(ctx.locals.workspace)
+
+
+class Context(SimpleAppContext):
+ def __init__(self, app):
+ SimpleAppContext.__init__(self, app)
+ for attr in ('appname', 'apptitle'):
+ setattr(self.locals, attr, getattr(config, attr))
+ self.add_session_vars(attr)
+ self.locals.soomversion = SOOMv0.version
+ self.locals.msgs = []
+ timer.reset()
+ timer('init context')
+ self.run_template_once('macros.html')
+
+ def msg(self, lvl, msg):
+ if lvl not in ('info', 'warn', 'err'):
+ lvl = 'warn'
+ self.locals.msgs.append((lvl, msg))
+
+ def catchcall(self, fn, *a, **kw):
+ try:
+ fn(*a, **kw)
+ return False
+ except (SOOMv0.ExpressionError, SOOMv0.yappsrt.SyntaxError), e:
+ self.msg('err', 'Filter: %s' % e)
+ except (UIError, UndoError, FilterError), e:
+ self.msg('err', e)
+ except catchable_errors, e:
+ self.msg('err', '%s: %s' % (e.__class__.__name__, e))
+ return True
+
+
+class Application(SimpleApp):
+ pages = (
+ CondColParamsPage,
+ ExplorePage,
+ FilterPage,
+ NewAnalysisPage,
+ ParamPage,
+ ParamSavePage,
+ ResultPage,
+ StartPage,
+ TwoByTwoParamsPage,
+ )
+
+ def __init__(self):
+ SimpleApp.__init__(self,
+ base_url = 'nea.py',
+ # module_path=page_dir,
+ template_path=page_dir,
+ start_page='start',
+ secret=config.session_secret)
+ for page in self.pages:
+ self.register_page(page.name, page())
+
+ def create_context(self):
+ return Context(self)
+
+
+SOOMv0.soom.setpath(config.soompath, config.data_dir)
+app = Application()
+
+if __name__ == "__main__":
+ import signal, rpy
+ signal.signal(signal.SIGSEGV, signal.SIG_DFL)
+
+ if not os.path.isabs(config.soompath):
+ config.soompath = os.path.join(app_dir, config.soompath)
+
+ if use_fcgi:
+ while running():
+ app.run(Request())
+ else:
+ app.run(Request())
diff --git a/web/pages/colvalselect.html b/web/pages/colvalselect.html
new file mode 100644
index 0000000..dee990d
--- /dev/null
+++ b/web/pages/colvalselect.html
@@ -0,0 +1,113 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-macro name="high_card_values">
+ <al-if expr="colvalselect">
+ <table width="100%" cellpadding="0" cellspacing="0">
+ <tr>
+ <td width="100%">
+ <table width="100%">
+ <al-for iter="ii" expr="colvalselect.trans_values(workspace)">
+ <tr>
+ <td width="17">
+ <al-if expr="colvalselect.multiple">
+ <al-input type="image" src="/nea/button-del.png"
+ nameexpr="'sop/del/%s' % ii.value()[0]" />
+ </al-if>
+ </td>
+ <td>
+ <al-value expr="ii.value()[1]" />
+ </td>
+ </tr>
+ </al-for>
+ </table>
+ </td>
+ <td valign="middle" class="submit">
+ <al-if expr="colvalselect.multiple">
+ <al-input type="submit" class="submit" value="None" name="sop/none/" />
+ </al-if>
+ </td>
+ </tr>
+ </table>
+ <al-else>
+ <div class="warn" align="center">Nothing selected</div>
+ </al-if>
+</al-macro>
+
+<al-macro name="colval_search">
+ <table width="100%" cellpadding="0" cellspacing="0" class="search">
+ <tr>
+ <td colspan="2" nowrap>
+ <b>Search:</b>
+ <al-input name="colvalselect.search_pat" />
+ <al-input type="submit" class="submit" value="Search" name="search" />
+ <al-input type="submit" class="submit" value="Clear" name="sop/clr/" />
+ </td>
+ </tr>
+ <al-if expr="colvalselect.errorstr">
+ <tr>
+ <td class="error" colspan="2">
+ <al-value expr="colvalselect.errorstr" />
+ </td>
+ </tr>
+ </al-if>
+ <tr>
+ <td width="100%">
+ <al-if expr="search_result">
+ <table width="100%">
+ <al-for iter="ii" expr="search_result">
+ <tr>
+ <td width="17">
+ <al-input type="image" src="/nea/button-add.png"
+ nameexpr="'sop/add/%s' % ii.value()[0]" />
+ </td>
+ <td width="100%">
+ <al-value expr="ii.value()[1]" />
+ </td>
+ </tr>
+ </al-for>
+ </table>
+ </al-if>
+ </td>
+ <td valign="middle" class="submit" align="right">
+ <al-if expr="colvalselect.multiple">
+ <al-input type="submit" class="submit" value="All" name="sop/all/" />
+ </al-if>
+ </td>
+ </tr>
+ </table>
+</al-macro>
+
+<al-macro name="value_edit">
+ <al-if expr="colvalselect.cardinality_is_high(workspace)">
+ <al-expand name="high_card_values" />
+ <al-else>
+ <table width="100%" cellpadding="0" cellspacing="0">
+ <tr>
+ <td>
+ <al-if expr="colvalselect.multiple">
+ <al-select name="colvalselect.value" multiple size="8"
+ optionexpr="colvalselect.select_values(workspace)" />
+ <al-else>
+ <al-select name="colvalselect.value" size="8"
+ optionexpr="colvalselect.select_values(workspace)" />
+ </al-if>
+ </td>
+ </tr>
+ </table>
+ </al-if>
+</al-macro>
diff --git a/web/pages/condcolparams.html b/web/pages/condcolparams.html
new file mode 100644
index 0000000..4829a3d
--- /dev/null
+++ b/web/pages/condcolparams.html
@@ -0,0 +1,134 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-include name="colvalselect.html" />
+
+<al-macro name="condcolparams_buttons">
+ <tr>
+ <td colspan="3" class="buttons">
+ <table width="100%" cellpadding="0" cellspacing="0">
+ <tr>
+ <td align="left" class="butt">
+ <al-input type="submit" name="back" value="Cancel" />
+ </td>
+ <td align="center" class="butt">
+ <al-input type="submit" name="clear" value="Clear" />
+ </td>
+ <td align="right" class="butt">
+ <al-input type="submit" name="okay" value="Okay" />
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+</al-macro>
+
+<al-macro name="colbuttons">
+ <td class="butt" nowrap>
+ <al-if expr="not condcolparams.inhibit_suppress">
+ <al-input type="submit" value="Suppress"
+ nameexpr="'col/suppress/%s' % colparams.name" />
+ <al-input type="submit" value="Retain"
+ nameexpr="'col/retain/%s' % colparams.name" />
+ <br />
+ </al-if>
+ <al-input type="submit" value="Coalesce"
+ nameexpr="'col/coalesce/%s' % colparams.name" />
+ <al-input type="submit" value="Clear"
+ nameexpr="'col/clear/%s' % colparams.name" />
+ </td>
+</al-macro>
+
+<al-macro name="displaycol">
+ <table width="100%" class="display" cellpadding="0" cellspacing="0">
+ <al-exec expr="desc = colparams.describe(workspace)" />
+ <al-if expr="not desc or desc[0][0] not in ('suppress', 'retain')">
+ <tr>
+ <td></td><td width="100%"></td>
+ <al-expand name="colbuttons" />
+ </tr>
+ </al-if>
+ <al-for iter="ii" expr="desc">
+ <tr>
+ <td><al-value expr="ii.value()[0]" />:</td>
+ <td width="100%"><al-value expr="ii.value()[1]" /></td>
+ <al-if expr="ii.index() == 0 and ii.value()[0] in ('suppress', 'retain')">
+ <al-expand name="colbuttons" />
+ </al-if>
+ <al-if expr="ii.value()[0] == 'coalesce'">
+ <td class="butt">
+ <al-input type="submit" value="Edit"
+ nameexpr="'col/coalesce/%s/%d' % (colparams.name, ii.index())" />
+ <al-input type="submit" value="Delete"
+ nameexpr="'col/del/%s/%d' % (colparams.name, ii.index())" />
+ </td>
+ </al-if>
+ </tr>
+ </al-for>
+ </table>
+</al-macro>
+
+<al-macro name="editcol">
+ <table width="100%" cellpadding="0" cellspacing="0">
+ <al-if expr="colparams.edit.op == 'coalesce'">
+ <tr>
+ <td>Label:</td>
+ <td width="100%"><al-input name="condcolparams.edit_col.edit.label"></td>
+ </tr>
+ </al-if>
+ <tr>
+ <td></td>
+ <td width="100%">
+ <al-expand name="value_edit" />
+ <al-if expr="colvalselect.cardinality_is_high(workspace)">
+ <al-expand name="colval_search" />
+ </al-if>
+ </td>
+ <td class="butt" valign="top" width="100%">
+ <al-input type="submit" value="Okay" name="edit_okay" />
+ </td>
+ </tr>
+ </table>
+</al-macro>
+
+<al-expand name="page_layout">
+ <table class="condcol">
+ <al-expand name="condcolparams_buttons" />
+ <tr>
+ <th>Column</th>
+ <th>Parameters</th>
+ </tr>
+ <al-for iter="i" expr="condcolparams">
+ <al-exec expr="colparams = i.value()" />
+ <tr>
+ <td><al-value expr="colparams.label" /></td>
+ <al-if expr="colparams.edit is None">
+ <td>
+ <al-expand name="displaycol" />
+ </td>
+ <al-else>
+ <td class="edit">
+ <b>Edit <al-value expr="colparams.edit.op" /></b><br />
+ <al-expand name="editcol" />
+ </td>
+ </al-if>
+ </tr>
+ </al-for>
+ <al-expand name="condcolparams_buttons" />
+ </table>
+</al-expand>
+
diff --git a/web/pages/explore.html b/web/pages/explore.html
new file mode 100644
index 0000000..ad9d3cf
--- /dev/null
+++ b/web/pages/explore.html
@@ -0,0 +1,82 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+<al-expand name="page_layout">
+ <center>
+ <table class="explore">
+ <al-for iter="meta_i" expr="dsmeta.describe_ds()">
+ <al-exec expr="label, value = meta_i.value()" />
+ <tr>
+ <th class="label"><al-value expr="label" />:</th>
+ <td><al-value expr="value" /><td>
+ </tr>
+ </al-for>
+ <al-if expr="dsmeta.show_col">
+ <al-expand name="coldetail" />
+ <al-else>
+ <tr>
+ <td colspan="2">
+ <al-expand name="showcols" />
+ </td>
+ </tr>
+ </al-if>
+ <tr>
+ <td colspan="2" align="center">
+ <al-if expr="dsmeta.show_col">
+ <al-input type="submit" name="allcols" value="All Columns" />
+ </al-if>
+ <al-input type="submit" name="back" value="Back" />
+ </td>
+ </tr>
+ </table>
+ </center>
+</al-expand>
+
+<al-macro name="showcols">
+ <table cellspacing="0">
+ <al-exec expr="colshead, colsdata = dsmeta.describe_cols()" />
+ <tr>
+ <td></td>
+ <al-for iter="colhead_i" expr="colshead">
+ <th><al-value expr="colhead_i.value()" /></th>
+ </al-for>
+ </tr>
+ <al-for iter="rows_i" expr="zip(*colsdata)">
+ <tr>
+ <td><al-input type="submit"
+ nameexpr="'view/' + rows_i.value()[0]" value="View" />
+ </td>
+ <al-for iter="row_i" expr="rows_i.value()">
+ <td><al-value expr="row_i.value()" /></td>
+ </al-for>
+ </tr>
+ </al-for>
+ </table>
+</al-macro>
+
+<al-macro name="coldetail">
+ <tr>
+ <th colspan="2" class="detail" align="center">Column detail</th>
+ </tr>
+ <al-for iter="meta_i" expr="dsmeta.describe_col(dsmeta.show_col)">
+ <al-exec expr="label, value = meta_i.value()" />
+ <tr>
+ <th class="detail label"><al-value expr="label" />:</th>
+ <td class="detail"><al-value expr="value" /><td>
+ </tr>
+ </al-for>
+</al-macro>
+
diff --git a/web/pages/fields.html b/web/pages/fields.html
new file mode 100644
index 0000000..52d0377
--- /dev/null
+++ b/web/pages/fields.html
@@ -0,0 +1,445 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-macro name="field">
+ <tr>
+ <td class="section">
+ <al-value expr="field.label" />:
+ <al-if expr="field.note">
+ <div class="note"><al-value expr="field.note" /></div>
+ </al-if>
+ <al-if expr="config.debug">
+ <div class="note">[<al-value expr="field.markup" />]</div>
+ </al-if>
+ </td>
+ <al-usearg />
+ </tr>
+</al-macro>
+
+<al-macro name="simplefield">
+ <al-expand name="field">
+ <td><al-usearg /></td>
+ </al-expand>
+</al-macro>
+
+<al-macro name="rowtable">
+ <al-expand name="field">
+ <td>
+ <table class="rowtable">
+ <tr>
+ <al-usearg>
+ </tr>
+ </table>
+ </td>
+ </al-expand>
+</al-macro>
+
+<al-macro name="fillfield">
+ <al-expand name="field">
+ <td class="fill"><al-usearg /></td>
+ </al-expand>
+</al-macro>
+
+
+<al-lookup name="field_markup">
+
+ <al-item expr="'showdataset'">
+ <al-expand name="rowtable">
+ <td class="fill" style="white-space: normal;"><al-value expr="workspace.params.dsparams.get_label()"/></td>
+ <td>
+ <al-input class="submit" type="submit" name="back" value="Change" />
+ <al-input class="submit" type="submit" name="info/ds" value="Info" />
+ </td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'analysis'">
+ <al-expand name="rowtable">
+ <td class="fill">
+ <al-select name="workspace.params.plottype" onchange="submit();"
+ optionexpr="workspace.available_plottypes()" />
+ </td>
+ <td>
+ <al-input class="submit" type="submit" name="plottype_change" value="Change" />
+ <al-input class="submit" type="submit" name="plottype_reset" value="Reset" />
+ </td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'filter'">
+ <al-expand name="rowtable">
+ <td class="fill" width="70%">
+ <al-textarea cols="40" rows="3"
+ nameexpr="'workspace.params.%s.filterexpr' % field.param" />
+ </td>
+ <td width="30%">
+ <table class="rowtable">
+ <al-exec expr="avail = getattr(workspace.params, field.param).available_filters()" />
+ <al-if expr="avail">
+ <tr>
+ <td colspan="2" class="fill">
+ <al-select nameexpr="'workspace.params.%s.filtername' % field.param"
+ optionexpr="avail" />
+ </td>
+ <td><al-input class="submit" type="submit" value="Load"
+ nameexpr="'filter/load/' + field.param" /></td>
+ </tr>
+ </al-if>
+ <tr>
+ <td><al-input class="submit" type="submit" value="Clear"
+ nameexpr="'filter/clear/' + field.param" /></td>
+ <td><al-input class="submit" type="submit" value="Edit"
+ nameexpr="'filter/edit/' + field.param" /></td>
+ <td><al-input class="submit" type="submit" value="New"
+ nameexpr="'filter/new/' + field.param" /></td>
+ </tr>
+ </table>
+ </td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'colname'">
+ <al-expand name="rowtable">
+ <td class="fill">
+ <al-select nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.availablecols(workspace)"
+ onchange="submit();" />
+ </td>
+ <al-if expr="field.logscale_attr">
+ <td nowrap>
+ Logscale:
+ <al-for iter="base_i" expr="['No', 2, 10, 100]">
+ <al-value expr="base_i.value()" />
+ <al-input type="radio" valueexpr="base_i.value()"
+ nameexpr="'workspace.params.%s' % field.logscale_attr" />
+ </al-for>
+ </td>
+ </al-if>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'twobytwocolname'">
+ <al-expand name="rowtable">
+ <td class="fill">
+ <table>
+ <tr>
+ <td>
+ <al-select nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.availablecols(workspace)"
+ onchange="submit();" />
+ </td>
+ </tr>
+ <al-exec expr="params = field.get_condcolparams(workspace)" />
+ <al-if expr="params.low_cardinality">
+ <tr>
+ <td class="error">
+ Column does not have enough values (at least two are required)
+ </td>
+ </tr>
+ <al-else>
+ <al-if expr="not params.is_okay()">
+ <tr>
+ <td class="warn">
+ Column values must be assigned
+ </td>
+ </tr>
+ </al-if>
+ <tr>
+ <td>
+ <div class="leftcol">
+ <b>Positive: </b>
+ <al-value expr="params.positive_label" />:
+ <al-value expr="params.desc_positive(workspace)" />
+ </div>
+ <div class="rightcol">
+ <b>Negative: </b>
+ <al-value expr="params.negative_label" />:
+ <al-value expr="params.desc_negative(workspace)" />
+ </div>
+ </td>
+ </tr>
+ </al-if>
+ </table>
+ </td>
+ <td>
+ <al-input class="submit" type="submit" nameexpr="'edit_' + params.label" value="Edit" />
+ </td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'measurecol'">
+ <al-expand name="simplefield">
+ <al-select style="width: 34%;" name="workspace.params.measure_stat"
+ optionexpr="field.statmethods(workspace)" />
+ <al-select style="width: 33%;" name="workspace.params.measure_col"
+ optionexpr="field.statcols(workspace)" />
+ <al-select style="width: 33%;" name="workspace.params.measure_weight"
+ optionexpr="field.weightcols(workspace)" />
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'colset'">
+ <al-expand name="simplefield">
+ <table class="rowtable">
+ <al-exec expr="condcols = getattr(workspace.params, field.param)" />
+ <al-for iter="condcol_i" expr="condcols">
+ <al-exec expr="idx = condcol_i.index()" />
+ <tr>
+ <td class="arrows">
+ <al-if expr="idx > 0">
+ <al-input type="image" src="/nea/button-up.png"
+ nameexpr="'colset/up/%s/%s' % (field.param, idx)" />
+ </al-if></td>
+ <td class="arrows">
+ <al-if expr="idx < len(condcols)-1">
+ <al-input type="image" src="/nea/button-down.png"
+ nameexpr="'colset/dn/%s/%s' % (field.param, idx)" />
+ </al-if>
+ </td>
+ <td class="fill">
+ <al-for vars="attr_name,option_meth,width_css"
+ expr="field.attr_meths()">
+ <al-select styleexpr="width_css"
+ nameexpr="attr_name % condcol_i.index()"
+ optionexpr="option_meth(workspace)" />
+ </al-for>
+ </td>
+ <td class="butt"><al-input class="submit" type="submit" value="Remove"
+ nameexpr="'colset/del/%s/%d' % (field.param, idx)" /></td>
+ </tr>
+ </al-for>
+ <tr>
+ <td colspan="3"></td>
+ <td class="butt" align="right">
+ <al-input class="submit" type="submit" value="Add"
+ nameexpr="'colset/add/%s/%s' % (field.param, len(field.attr_options))"/>
+ </td>
+ </tr>
+ </table>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'groupbycol'">
+ <al-expand name="rowtable">
+ <td class="fill">
+ <al-select nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.groupbycols(workspace)"
+ onchange="submit();" />
+ </td>
+ <al-if expr="field.allow_stack">
+ <td><al-input type="checkbox" name="workspace.params.stack"
+ value="True" /> Stack</td>
+ </al-if>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'propcols'">
+ <tr>
+ <td class="section"><al-value expr="field.label" />: <br />
+ <al-input class="submit" type="submit" name="refresh" value="Refresh" /></td>
+ <td>
+ <table class="rowtable">
+ <tr>
+ <td class="fill">
+ <al-select name="workspace.params.propcols" multiple="multiple"
+ optionexpr="field.propcols(workspace)" />
+ </td>
+ <td>
+ <al-input type="radio" name="workspace.params.proptype"
+ value="density" /> Density<br />
+ <al-input type="radio" name="workspace.params.proptype"
+ value="percent" /> Percent<br />
+ <al-input type="checkbox" name="workspace.params.heatmap"
+ value="True" /> Heatmap<br />
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </al-item>
+
+ <al-item expr="'weightcol'">
+ <al-expand name="fillfield">
+ <al-select name="workspace.params.weightcol"
+ optionexpr="field.weightcols(workspace)" />
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'condcolparams'">
+ <al-expand name="rowtable">
+ <td class="fill">
+ <al-if expr="condcolparams">
+ <table>
+ <al-for iter="i" expr="condcolparams">
+ <al-exec expr="colparam = i.value()" />
+ <al-if expr="colparam.params">
+ <tr>
+ <td valign="baseline">
+ <al-value expr="colparam.label" />:
+ </td>
+ <td valign="baseline">
+ <table width="100%" cellspacing="0" cellpadding="0">
+ <al-for iter="ii" expr="colparam.describe(workspace)">
+ <tr>
+ <td valign="baseline"><al-value expr="ii.value()[0]" />: </td>
+ <td valign="baseline"><al-value expr="ii.value()[1]" /></td>
+ </tr>
+ </al-for>
+ </table>
+ </td>
+ </tr>
+ </al-if>
+ </al-for>
+ </table>
+ </al-if>
+ </td>
+ <td><al-input class="submit" type="submit" name="edit_condcolparams" value="Edit" /></td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'output'">
+ <al-expand name="rowtable">
+ <td>
+ <al-select name="workspace.output.format"
+ optionexpr="workspace.output.formats"
+ onchange="submit();" class="outputsel" /><br />
+ <al-input class="submit" type="submit" name="change_output" value="Change"
+ class="outputsel" />
+ </td>
+ <td>
+<al-lookup name="output_opts">
+ <al-item expr="'PNG'">
+ <al-input type="radio" name="workspace.output.size"
+ value="400x300"> small
+ <al-input type="radio" name="workspace.output.size"
+ value="750x550"> medium
+ <al-input type="radio" name="workspace.output.size"
+ value="1000x700"> large
+ <al-input type="radio" name="workspace.output.size"
+ value="1400x1200"> extra large
+ <br />
+ </al-item>
+ <al-item expr="'PDF'">
+ <al-input type="radio" name="workspace.output.horizontal"
+ value="False"> Portrait
+ <al-input type="radio" name="workspace.output.horizontal"
+ value="True"> Landscape
+ <br />
+ </al-item>
+ <al-item expr="'SVG'">
+ <al-input type="radio" name="workspace.output.want_inline"
+ value="True"> Inline
+ <al-input type="radio" name="workspace.output.want_inline"
+ value="False"> Download
+ <br />
+ </al-item>
+</al-lookup>
+ <al-value expr="workspace.output.format" lookup="output_opts" />
+ </td>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'bool'">
+ <al-expand name="simplefield">
+ <al-input type="checkbox" value="True"
+ nameexpr="'workspace.params.%s' % field.param" />
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'chooseone'">
+ <al-expand name="simplefield">
+ <al-for iter="choose_i" expr="field.options">
+ <al-input type="radio" valueexpr="choose_i.value()[0]"
+ nameexpr="'workspace.params.%s' % field.param" />
+ <al-value expr="choose_i.value()[1]" />
+ <al-if expr="str(choose_i.value()[0]).lower() == 'other'">
+ : <al-input nameexpr="'workspace.params.other_%s' % field.param"
+ onchangeexpr="field.onchangejs()"/>
+ </al-if>
+ <al-if expr="field.horizontal"> <al-else><br /></al-if>
+ </al-for>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'choosemany'">
+ <al-expand name="simplefield">
+ <al-for iter="choose_i" expr="field.options">
+ <al-input type="checkbox" list="list" valueexpr="choose_i.value()[0]"
+ nameexpr="'workspace.params.%s' % field.param" />
+ <al-value expr="choose_i.value()[1]" />
+ <al-if expr="field.horizontal"> <al-else><br /></al-if>
+ </al-for>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'drop'">
+ <al-expand name="fillfield">
+ <al-select nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.options" onchange="submit();" />
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'textarea'">
+ <al-expand name="fillfield">
+ <al-textarea cols="60" rows="3"
+ nameexpr="'workspace.params.%s' % field.param" />
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'dataset'">
+ <al-expand name="fillfield">
+ <al-exec expr="available = field.availablesets(workspace)" />
+ <al-if expr="available">
+ <al-select nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.availablesets(workspace)"
+ onchange="submit();" />
+ <al-else>
+ <div class="error">No datasets match</div>
+ </al-if>
+ </al-expand>
+ </al-item>
+
+ <al-item expr="'popdataset'">
+ <al-expand name="rowtable">
+ <al-exec expr="available = field.availablesets(workspace)" />
+ <al-if expr="available">
+ <td class="fill">
+ <al-select style="width: 50%;"
+ nameexpr="'workspace.params.%s' % field.param"
+ optionexpr="field.availablesets(workspace)"
+ onchange="submit();" />
+ <al-select style="width: 50%;"
+ nameexpr="'workspace.params.%s_popcol' % field.param"
+ optionexpr="field.availablecols(workspace)"
+ onchange="submit();" />
+ </td>
+ <td>
+ <al-input class="submit" type="submit" value="Info"
+ nameexpr="'info/%s' % field.param" />
+ </td>
+ <al-else>
+ <div class="error">No datasets match</div>
+ </al-if>
+ </al-expand>
+ </al-item>
+
+ <!-- DEFAULT: text/float/int -->
+ <al-expand name="fillfield">
+ <al-input nameexpr="'workspace.params.%s' % field.param" />
+ </al-expand>
+
+</al-lookup>
diff --git a/web/pages/filter.html b/web/pages/filter.html
new file mode 100644
index 0000000..40a17f1
--- /dev/null
+++ b/web/pages/filter.html
@@ -0,0 +1,320 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-include name="colvalselect.html" />
+
+<al-macro name="filterbuttons">
+ <table class="filterbuttons">
+ <tr>
+ <td align="left">
+ <al-input type="submit" class="submit" name="cancel" value="Abandon" />
+ </td>
+ <td align="center">
+ <al-input type="submit" class="submit danger" name="delete" value="Del Filter" />
+ </td>
+ <td align="center">
+ <al-input type="submit" class="submit" name="undo" value="Undo" />
+ <al-input type="submit" class="submit" name="redo" value="Redo" />
+ </td>
+ <td align="right">
+ <al-input type="submit" class="submit" name="save" value="Save" />
+ <al-input type="submit" class="submit" name="okay" value="Done" />
+ </td>
+ </tr>
+ </table>
+</al-macro>
+
+<al-macro name="colselect">
+ <al-select name="filter.edit_expr.colname" onchange="submit();">
+ <al-option label="Choose..." value="" />
+ <al-for vars="grouplabel, cols" expr="edit_expr.colgroups()">
+ <al-optgroup labelexpr="grouplabel">
+ <al-for vars="name, label" expr="cols">
+ <al-option labelexpr="label" valueexpr="name" />
+ </al-for>
+ </al-optgroup>
+ </al-for>
+ </al-select>
+</al-macro>
+
+<al-macro name="opselect">
+ <al-select name="filter.edit_expr.op" onchange="submit();">
+ <al-option label="Choose..." value="" />
+ <al-for vars="grouplabel, ops" expr="edit_expr.col_ops()">
+ <al-optgroup labelexpr="grouplabel">
+ <al-for vars="name, label" expr="ops">
+ <al-option labelexpr="label" valueexpr="name" />
+ </al-for>
+ </al-optgroup>
+ </al-for>
+ </al-select>
+</al-macro>
+
+<al-macro name="values">
+
+ <al-if expr="edit_expr.value.markup == 'general'">
+ <al-input name="filter.edit_expr.value.value"
+ expr="filter.edit_expr.value.strval()" />
+ </al-if>
+
+ <al-if expr="edit_expr.value.markup == 'textarea'">
+ <al-textarea name="filter.edit_expr.value.value" rows="6" cols="20" />
+ </al-if>
+
+ <al-if expr="edit_expr.value.markup == 'discrete'">
+ <al-expand name="value_edit" />
+ </al-if>
+
+ <al-if expr="edit_expr.value.markup == 'date'">
+ <al-select name="filter.edit_expr.value.year"
+ optionexpr="edit_expr.value.yearopt()" />
+ <al-select name="filter.edit_expr.value.month"
+ optionexpr="edit_expr.value.monthopt()" />
+ <al-select name="filter.edit_expr.value.day"
+ optionexpr="edit_expr.value.dayopt()" />
+ </al-if>
+
+</al-macro>
+
+<al-macro name="filter_view">
+ <table class="filter">
+ <tr>
+ <al-tree iter="node_i" expr="filter.root">
+ <al-exec expr="node = node_i.value()" />
+ <al-if expr="not node.children">
+ <al-td colspanexpr="node_i.span()"
+ classexpr="filter.node_is_selected(node) and 'leafsel' or 'leaf'">
+ <al-input nameexpr="'node/' + node.path"
+ type="image" src="/nea/target.png" /> <al-value
+ expr="node.describe(filter.dsname)" />
+ </al-td>
+ </tr><tr>
+ <al-else>
+ <al-td rowspanexpr="node.height()"
+ classexpr="filter.node_is_selected(node) and 'groupsel' or 'group'">
+ <al-input nameexpr="'node/' + node.path"
+ type="image" src="/nea/target.png" /> <al-value
+ expr="node.describe(filter.dsname)" />
+ </al-td>
+ </al-if>
+ </al-tree>
+ </tr>
+ </table>
+</al-macro>
+
+<al-macro name="expression_view">
+ <div class="filteredit exprview" style="height: 40ex;">
+ <al-expand name="filterbuttons" />
+ <div class="title">Edit expression:</div>
+ <al-exec expr="edit_expr = filter.edit_expr" />
+ <table class="edit" style="width: 100%;">
+ <tr>
+ <th class="highlight">Insert</th>
+ <th> </th>
+ <al-for vars="mode, label" expr="edit_expr.modes">
+ <al-if expr="edit_expr.state == mode">
+ <th class="highlight">
+ <al-input disabled name="x" type="submit" class="selbutton"
+ expr="label" />
+ </th>
+ <al-else>
+ <th>
+ <al-input type="submit" class="selbutton" expr="label"
+ nameexpr="'expr_select/%d' % mode" />
+ </th>
+ </al-if>
+ </al-for>
+ <td rowspan="2" align="right">
+ <al-input name="clear_edit" class="submit"
+ type="submit" value="Cancel" /><br><br>
+ <al-input name="expr_okay" class="submit"
+ type="submit" value="Okay" /><br><br>
+ <al-input name="expr_okay_add" class="submit"
+ type="submit" value="Add more" /><br><br>
+ </td>
+ </tr>
+ <tr>
+ <td class="highlight">
+ <al-input name="expr_insert/and" class="submit" type="submit" value="and" /><br>
+ <al-input name="expr_insert/or" class="submit" type="submit" value="or" /><br>
+ <br>
+ <b>or</b><br>
+ <al-if expr="filter.copy_buffer">
+ <al-input name="paste" class="submit" type="submit" value="Paste" /><br>
+ </al-if>
+ <al-input name="req_import" class="submit" type="submit" value="Import" /><br>
+ <al-input name="delete_node" class="submit danger" type="submit" value="Delete" />
+ </td>
+ <td> </td>
+ <al-if expr="edit_expr.state == edit_expr.EDITCOL">
+ <td class="highlight">
+ <al-expand name="colselect" />
+ </td>
+ <al-else>
+ <td>
+ <al-if expr="edit_expr.colname">
+ <al-value expr="edit_expr.get_column().label" />
+ </al-if>
+ </td>
+ </al-if>
+ <al-if expr="edit_expr.state == edit_expr.EDITOP">
+ <td align="center" class="highlight"><al-expand name="opselect" /></td>
+ <al-else>
+ <td align="center">
+ <al-if expr="edit_expr.op">
+ <al-value expr="edit_expr.op" />
+ </al-if>
+ </td>
+ </al-if>
+ <al-if expr="edit_expr.state == edit_expr.EDITVALUE">
+ <td class="highlight"><al-expand name="values" /></td>
+ <al-else>
+ <td><al-value expr="edit_expr.pretty_value(workspace)" /></td>
+ </al-if>
+ </tr>
+ <al-if expr="edit_expr.show_search_box(workspace)">
+ <tr>
+ <td colspan="3">
+ <al-expand name="colval_search" />
+ </td>
+ </tr>
+ </al-if>
+ </table>
+ </div>
+</al-macro>
+
+<al-macro name="andor_view">
+ <div class="filteredit exprview">
+ <al-expand name="filterbuttons" />
+ <div class="title">Edit and/or expression:</div>
+ <table class="edit">
+ <tr>
+ <th class="highlight">Insert</th>
+ <th class="highlight">Replace</th>
+ <th class="highlight">Change to</th>
+ </tr>
+ <tr>
+ <td class="highlight">
+ <al-input name="andor_insert/and" class="submit" type="submit" value="and" /><br>
+ <al-input name="andor_insert/or" class="submit" type="submit" value="or" /><br>
+ </td>
+ <td class="highlight">
+ <al-if expr="filter.copy_buffer">
+ <al-input name="paste" class="submit" type="submit" value="Paste" /><br>
+ </al-if>
+ <al-input name="req_import" class="submit" type="submit" value="Import" /><br>
+ <al-input name="delete_node" class="submit danger" type="submit" value="Delete" /><br>
+ </td>
+ <td class="highlight">
+ <al-if expr="filter.edit_andor.node.name == 'and'">
+ <al-input name="andor_change/or" class="submit" type="submit" value="or" /><br>
+ <al-else>
+ <al-input name="andor_change/and" class="submit" type="submit" value="and" /><br>
+ </al-if>
+ <br><b>Add</b><br>
+ <al-input type="submit" class="submit" name="andor_add_expr" value="Expression"><br>
+ </td>
+ </tr>
+ </table>
+ </div>
+</al-macro>
+
+<al-macro name="info_edit">
+ <div class="filteredit filterlabeledit">
+ <al-expand name="filterbuttons" />
+ <div class="title">Edit filter name and label:</div>
+ <table class="edit">
+ <tr>
+ <td><label for="edit_info_name">Filter name:</label></td>
+ <td><al-input id="edit_info_name" class="field" name="filter.edit_info.name" /></td>
+ </tr>
+ <tr>
+ <td><label for="edit_info_label">Filter label:</label></td>
+ <td><al-input id="edit_info_label" class="field" name="filter.edit_info.label" /></td>
+ </tr>
+ <tr>
+ <td align="right" colspan="2">
+ <al-input type="submit" class="submit" name="clear_edit" value="Cancel" />
+ <al-input type="submit" class="submit" name="info_edit_apply" value="Apply" />
+ </td>
+ </tr>
+ </table>
+ </div>
+</al-macro>
+
+<al-expand name="body">
+ <al-if expr="filter.edit_expr">
+ <div id="filtertop" class="filtertop" style="bottom: 40ex;">
+ <al-else>
+ <div id="filtertop" class="filtertop">
+ </al-if>
+ <al-expand name="bannerbox" />
+ <al-expand name="msgs" />
+ <table class="filterlabel">
+ <tr>
+ <td class="label">Editing filter:</td>
+ <td class="value"><al-value expr="filter.name" /></td>
+ <td class="label">Description:</td>
+ <td class="value desc"><al-value expr="filter.label" /></td>
+ <td align="right">
+ <al-if expr="not filter.edit_info">
+ <al-input type="submit" class="submit"
+ name="edit_info" value="Edit Info" />
+ </al-if>
+ </td>
+ </tr>
+ </table>
+
+ <al-expand name="filter_view" />
+ </div>
+
+ <al-if expr="confirm_quit">
+ <div class="filteredit filterdelete">
+ <br>Quit and abandon changes?<br><br>
+ <al-input type="submit" class="submit" name="cancel_cancel" value="No" />
+ <al-input type="submit" class="submit" name="cancel_confirm" value="Yes" />
+ </div>
+ <al-elif expr="delete_confirm">
+ <div class="filteredit filterdelete">
+ <br>Delete this filter?<br><br>
+ <al-input type="submit" class="submit" name="delete_cancel" value="Cancel" />
+ <al-input type="submit" class="submit" name="delete_confirm" value="Delete" />
+ </div>
+ <al-elif expr="want_import">
+ <div class="filteredit exprview">
+ <al-expand name="filterbuttons" />
+ <div class="title">Import another filter at this point</div>
+ <al-select name="import_filter"
+ optionexpr="dsparams.available_filters()" />
+ <al-input class="submit" type="submit" value="Import"
+ name="import" /></td>
+ <al-input name="import_cancel" class="submit"
+ type="submit" value="Cancel" />
+ </div>
+ <al-elif expr="filter.edit_info">
+ <al-expand name="info_edit" />
+ <al-elif expr="filter.edit_expr">
+ <al-expand name="expression_view" />
+ <al-elif expr="filter.edit_andor">
+ <al-expand name="andor_view" />
+ <al-else>
+ <div class="filteredit">
+ <al-expand name="filterbuttons" />
+ </div>
+ </al-if>
+ <script>scrollToClass('filtertop', 'groupsel');scrollToClass('filtertop', 'leafsel');</script>
+</al-expand>
diff --git a/web/pages/macros.html b/web/pages/macros.html
new file mode 100644
index 0000000..081fc6e
--- /dev/null
+++ b/web/pages/macros.html
@@ -0,0 +1,123 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-macro name="body">
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <!-- $Id: macros.html,v 1.35 2005/10/17 06:07:38 andrewm Exp $ -->
+ <html>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+ <meta name="Author" content="tchur at health.nsw.gov.au (Tim Churches)">
+ <meta name="robots" content="noindex,nofollow">
+ <link rel="shortcut icon" href="/<al-value expr="config.appname" />/favicon.ico" type="image/x-icon">
+ <style type="text/css">
+ <!--
+ @import "/<al-value expr="config.appname" />/style.css";
+ -->
+ </style>
+ <al-script srcexpr="'/%s/%s' % (config.appname, 'helpers.js')" type="text/javascript"></al-script>
+ <title><al-value expr="title" /></title>
+ </head>
+ <body>
+ <al-form name="nea" method="post">
+ <al-usearg />
+ </al-form>
+ </body>
+ </html>
+</al-macro>
+
+<al-macro name="msgs">
+ <al-if expr="msgs">
+ <al-for iter="m_i" expr="msgs">
+ <al-exec expr="lvl, msg = m_i.value()" />
+ <al-div classexpr="lvl + '-msg'"><al-value expr="msg" /></al-div>
+ </al-for>
+ <al-exec expr="msgs = []" />
+ </al-if>
+</al-macro>
+
+<al-macro name="helpbutt">
+ <div style="float: right">
+ <a href="/<al-value expr="config.appname" />/help.html" target="_blank">
+ <img src="/<al-value expr="config.appname" />/help.png" border="0" alt="Help"></a></div>
+</al-macro>
+
+<al-macro name="logo">
+ <img src="/<al-value expr="config.appname" />/Netepi_logo_s.png"
+ width="48" height="48" alt="Netepi Logo" />
+</al-macro>
+
+<al-macro name="timedetail">
+ <al-if expr="config.debug and timer">
+ <div class="timedetail">
+ <b>Time detail:</b>
+ <table>
+ <al-for iter="li" expr="timer.end()">
+ <tr>
+ <td><al-value expr="li.value()[0]" /></td>
+ <td><al-value expr="'%.2f' % li.value()[1]" />s</td>
+ </tr>
+ </al-for>
+ </table>
+ <div>
+ </al-if>
+</al-macro>
+
+<al-macro name="page_layout_title">
+ <al-expand name="body">
+ <al-expand name="helpbutt" />
+ <table class="bodytable">
+ <tr class="bannerbox">
+ <td class="logo">
+ <img src="/<al-value expr="config.appname" />/Netepi_logo_m.png"
+ width="120" height="120" alt="Netepi Logo" />
+ </td>
+ <td>
+ <div class="tit">NetEpi Analysis</div>
+ <div class="subtit">Network-enabled tools for epidemiology and public health practice</div>
+ <div class="copyright"><a href="/<al-value expr="config.appname" />/copyright.html" target="_blank">Copyright © 2004-2009 NSW Department of Health. All Rights Reserved</a></div>
+ <div class="subtit">Version <al-value expr="soomversion" /></div>
+ </td>
+ </tr>
+ </table>
+ <al-expand name="msgs" />
+ <al-usearg />
+ </al-expand>
+</al-macro>
+
+<al-macro name="bannerbox">
+ <div class="bannerbox">
+ <al-expand name="helpbutt" />
+ <center>
+ <table border="0">
+ <tr>
+ <td><al-expand name="logo" /></td>
+ <td class="tit">NetEpi Analysis</td>
+ </tr>
+ </table>
+ </center>
+ </div>
+</al-macro>
+
+<al-macro name="page_layout">
+ <al-expand name="body">
+ <al-expand name="bannerbox" />
+ <al-expand name="msgs" />
+ <al-usearg />
+ </al-expand>
+</al-macro>
diff --git a/web/pages/newanalysis.html b/web/pages/newanalysis.html
new file mode 100644
index 0000000..74898b1
--- /dev/null
+++ b/web/pages/newanalysis.html
@@ -0,0 +1,42 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-expand name="page_layout_title">
+ <table class="dsview">
+ <tr>
+ <th> </th>
+ <th>Datasets</th>
+ <th>Record count</th>
+ <th>Updated</th>
+ <th> </th>
+ </tr>
+ <al-for vars="ds" expr="workspace.datasets()">
+ <tr>
+ <td><al-input type="submit" value="Analyse"
+ nameexpr="'analyse/%s' % ds.name" ></td>
+ <td><al-value expr="ds.label"></td>
+ <td align="right"><al-value expr="len(ds)"></td>
+ <td nowrap><al-value expr="ds.date_updated.strftime('%Y-%d-%m %H:%M')"></td>
+ <td><al-input type="submit" value="More"
+ nameexpr="'about/%s' % ds.name" ></td>
+ </tr>
+ </al-for>
+ </table>
+ <al-if expr="bool(analyses)">
+ <al-input type="submit" value="Prepared Analyses" name="show_prepared" />
+ </al-if>
+</al-expand>
diff --git a/web/pages/output_crosstab.html b/web/pages/output_crosstab.html
new file mode 100644
index 0000000..660f3f6
--- /dev/null
+++ b/web/pages/output_crosstab.html
@@ -0,0 +1,105 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+ <al-lookup name="data_style">
+ <al-item expr="'after', True, True"><td class="data-lt"></al-item>
+ <al-item expr="'after', True, False"><td class="data-t"></al-item>
+ <al-item expr="'after', False, True"><td class="data-l"></al-item>
+ <al-item expr="'before', True, True"><td class="data-br"></al-item>
+ <al-item expr="'before', True, False"><td class="data-b"></al-item>
+ <al-item expr="'before', False, True"><td class="data-r"></al-item>
+ <td class="data">
+ </al-lookup>
+
+ <al-exec expr="output = workspace.output" />
+ <div class="header"><al-value expr="workspace.output.title" /></div>
+ <al-if expr="workspace.output.simple_table">
+ <table width="100%" class="crosstab" style="table-layout: fixed;">
+ <al-else>
+ <table width="100%" class="crosstab">
+ </al-if>
+ <colgroup span="<al-value expr='len(output.row_axis)' />" />
+ <al-for iter="colgroup_i" expr="output.col_axis.col_group_count()">
+ <colgroup span="<al-value expr='colgroup_i.value()' />" class="cgroup" />
+ </al-for>
+ <thead>
+ <al-for iter="col_head_i" expr="output.col_axis.get_col_headers()">
+ <tr class="column">
+ <al-if expr="col_head_i.index() == 0">
+ <th colspan="<al-value expr='len(output.row_axis)' />"
+ rowspan="<al-value expr='len(output.col_axis) * 2' />">
+ <al-for iter="scol_i" expr="output.statcols">
+ <al-exec expr="col = output.summaryset[scol_i.value()]" />
+ <al-value expr="col.label"><br />
+ </al-for>
+ <al-for iter="pcol_i" expr="output.propcols">
+ <al-exec expr="col = output.summaryset[pcol_i.value()]" />
+ <al-value expr="col.label"><br />
+ </al-for>
+ </th>
+ </al-if>
+ <al-for iter="col_col_i" expr="col_head_i.value()">
+ <th colspan="<al-value expr='col_col_i.value().span' />" class="<al-value expr='col_col_i.value().markup' />"><al-value expr="col_col_i.value().label" /></th>
+ </al-for>
+ </tr>
+ </al-for>
+ </thead>
+ <tr class="row">
+ <al-for iter="row_label_i" expr="output.row_axis.get_row_headers()">
+ <th class="label"><al-value expr="row_label_i.value().label" /></th>
+ </al-for>
+ </tr>
+ <al-for iter="group_i" expr="output.row_axis.grouped_rows()">
+ <tbody class="rgroup">
+ <al-for iter="row_i" expr="group_i.value()">
+ <al-exec expr="row_headers, row_values = row_i.value()" />
+ <tr class="row">
+ <al-for iter="row_head_i" expr="row_headers">
+ <al-if expr="row_head_i.value()">
+ <th rowspan="<al-value expr='row_head_i.value().span' />" class="<al-value expr='row_head_i.value().markup' />"><al-value expr="row_head_i.value().label" /></th>
+ </al-if>
+ </al-for>
+ <al-for iter="col_i" expr="output.col_axis.header_and_values">
+ <al-exec expr="tot_col, col_values = col_i.value()" />
+ <al-exec expr="rownum = output.get_rownum(row_values, col_values)" />
+ <al-value lookup="data_style" expr="output.marginal_totals,row_headers[-1].total,tot_col" />
+ <al-for iter="scol_i" expr="output.statcols">
+ <al-value expr="output.format_cell(scol_i.value(), rownum)" /><br />
+ </al-for>
+ <al-for iter="pcol_i" expr="output.propcols">
+ <al-exec expr="col = output.summaryset[pcol_i.value()]" />
+ <al-if expr="output.heatmap">
+ <div style="background-color: <al-value expr='output.colours(col[rownum])' />;">
+ </al-if>
+ <al-if expr="output.proptype == 'percent'">
+ <al-value expr="output.propn2perc(col[rownum])" /><br />
+ <al-else>
+ <al-value expr="col.do_format(col[rownum])" /><br />
+ </al-if>
+ <al-if expr="output.heatmap">
+ </div>
+ </al-if>
+ </al-for>
+
+ </td>
+ </al-for>
+ </tr>
+ </al-for>
+ </tbody>
+ </al-for>
+ </table>
+ <div class="footer"><al-value expr="workspace.output.subtitle" /></div>
diff --git a/web/pages/output_dsrows.html b/web/pages/output_dsrows.html
new file mode 100644
index 0000000..a3de9b7
--- /dev/null
+++ b/web/pages/output_dsrows.html
@@ -0,0 +1,82 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-macro name="page_buttons">
+ <al-if expr="not output.pages()">
+ (<al-value expr="len(output.dsrows)" /> result rows)
+ <al-else>
+ <table width="100%">
+ <tr>
+ <td align="left">
+ <al-if expr="output.has_prevpage()">
+ <al-input type="submit" name="prev_page" value="Previous Page" />
+ </al-if>
+ </td>
+ <td align="center" width="100%">
+ Page <al-value expr="output.page()" /> of <al-value expr="output.pages()" /> (<al-value expr="len(output.dsrows)" /> rows overall) Goto page: <al-input name="select_page" size="8" list><al-input type="submit" name="select_page_go" value="Go">
+ </td>
+ <td align="right">
+ <al-if expr="output.has_nextpage()">
+ <al-input type="submit" name="next_page" value="Next Page" />
+ </al-if>
+ </td>
+ </tr>
+ </table>
+ </al-if>
+</al-macro>
+
+ <al-exec expr="output = workspace.output" />
+ <al-exec expr="full_ds = workspace.get_dataset()" />
+ <al-exec expr="ds = SOOMv0.filtered_ds(full_ds, output.dsrows)" />
+ <al-exec expr="cols = ds.get_columns(output.colnames)" />
+ <div class="header"><al-value expr="output.title" /></div>
+ <al-if expr="not output.has_rows()">
+ <i>No results returned by query. Try a less specific query.</i>
+ <al-else>
+ <al-expand name="page_buttons" />
+ <table class="tableout">
+ <tr>
+ <th>Row</th>
+ <al-for iter="col_i" expr="cols">
+ <th><al-value expr="col_i.value().label" /></th>
+ </al-for>
+ </tr>
+ <al-for iter="row_i" expr="output.page_rows()">
+ <tr>
+ <td class="numeric"><al-value expr="row_i.value() + 1" /></td>
+ <al-for iter="col_i" expr="cols">
+ <al-exec expr="col = col_i.value()" />
+ <al-exec expr="value = col.do_outtrans(col[row_i.value()])" />
+ <al-if expr="type(value) in (int, float)">
+ <td class="numeric">
+ <al-value expr="col.do_format(value)" />
+ </td>
+ <al-else>
+ <al-exec expr="highlight = output.highlight_fns.get(col.name)" />
+ <al-if expr="highlight is not None">
+ <td><al-value expr="col.do_format(highlight(value))" noescape /></td>
+ <al-else>
+ <td><al-value expr="col.do_format(value)" /></td>
+ </al-if>
+ </al-if>
+ </al-for>
+ </tr>
+ </al-for>
+ </table>
+ <al-expand name="page_buttons" />
+ </al-if>
+ <div class="footer"><al-value expr="output.subtitle" /></div>
diff --git a/web/pages/output_image.html b/web/pages/output_image.html
new file mode 100644
index 0000000..e8559e3
--- /dev/null
+++ b/web/pages/output_image.html
@@ -0,0 +1,25 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+ <al-for iter="f_i" expr="workspace.output.output_names()">
+ <al-exec expr="filename = f_i.value()" />
+ <al-if expr="workspace.output.inline">
+ <al-img expr="filename" />
+ <al-else>
+ <al-a expr="filename">Download</al-a>
+ </al-if>
+ </al-for>
diff --git a/web/pages/output_table.html b/web/pages/output_table.html
new file mode 100644
index 0000000..d599be9
--- /dev/null
+++ b/web/pages/output_table.html
@@ -0,0 +1,42 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+ <al-exec expr="ds = workspace.output.summaryset" />
+ <div class="header"><al-value expr="workspace.output.title" /></div>
+ <table class="tableout">
+ <tr>
+ <al-for iter="col_i" expr="ds.get_print_columns()">
+ <th><al-value expr="col_i.value().label" /></th>
+ </al-for>
+ </tr>
+ <al-for iter="row_i" expr="xrange(len(ds['row_ordinal']))">
+ <tr>
+ <al-for iter="col_i" expr="ds.get_print_columns()">
+ <al-exec expr="col = col_i.value()" />
+ <al-exec expr="value = col.do_outtrans(col[row_i.value()])" />
+ <al-if expr="type(value) in (int, float)">
+ <td class="numeric">
+ <al-value expr="col.do_format(value)" />
+ </td>
+ <al-else>
+ <td><al-value expr="col.do_format(value)" /></td>
+ </al-if>
+ </al-for>
+ </tr>
+ </al-for>
+ </table>
+ <div class="footer"><al-value expr="workspace.output.subtitle" /></div>
diff --git a/web/pages/output_twobytwo.html b/web/pages/output_twobytwo.html
new file mode 100644
index 0000000..906e1f5
--- /dev/null
+++ b/web/pages/output_twobytwo.html
@@ -0,0 +1,138 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-macro name="stratacounts">
+ <al-if expr="xtab.label">
+ <tr>
+ <th colspan="2" class="subsechead">
+ <al-value expr="xtab.label" />
+ </th>
+ </tr>
+ </al-if>
+ <tr>
+ <td colspan="2">
+ <table class="twobytwotab">
+ <tr>
+ <th></th>
+ <th></th>
+ <th colspan="2"><al-value expr="display.axislabels[0].label" /></th>
+ </tr>
+ <tr>
+ <th></th>
+ <th></th>
+ <th class="top">
+ <al-value expr="display.axislabels[0].values[0]" />
+ </th>
+ <th class="top">
+ <al-value expr="display.axislabels[0].values[1]" />
+ </th>
+ </tr>
+ <tr>
+ <th class="side" rowspan="2">
+ <al-value expr="display.axislabels[1].label" />
+ </th>
+ <th class="side">
+ <al-value expr="display.axislabels[1].values[0]" />
+ </th>
+ <td class="cell"><al-value expr="xtab.data[0,0]" /></td>
+ <td class="cell"><al-value expr="xtab.data[1,0]" /></td>
+ <td class="mt"><al-value expr="xtab.htot[0]" /></td>
+ </tr>
+ <tr>
+ <th class="side">
+ <al-value expr="display.axislabels[1].values[1]" />
+ </th>
+ <td class="cell"><al-value expr="xtab.data[0,1]" /></td>
+ <td class="cell"><al-value expr="xtab.data[1,1]" /></td>
+ <td class="mt"><al-value expr="xtab.htot[1]" /></td>
+ </tr>
+ <tr>
+ <td></td>
+ <td></td>
+ <td class="mt"><al-value expr="xtab.vtot[0]" /></td>
+ <td class="mt"><al-value expr="xtab.vtot[1]" /></td>
+ <td class="mt"><al-value expr="xtab.tot" /></td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+</al-macro>
+
+ <al-exec expr="output = workspace.output" />
+ <div class="header"><al-value expr="workspace.output.title" /></div>
+ <div class="footer"><al-value expr="workspace.output.subtitle" /></div>
+ <table class="twobytwo">
+ <al-exec expr="display = workspace.output.display()" />
+
+ <al-if expr="workspace.output.have_files()">
+ <tbody class="section">
+ <th colspan="2" class="sechead">Fourfold Plots</th>
+ <al-for iter="ii" expr="workspace.output.files()">
+ <tr>
+ <th colspan="2" class="subsechead">
+ <al-value expr="ii.value().label" />
+ </th>
+ </tr>
+ <tr>
+ <td colspan="2" align="center">
+ <al-img expr="ii.value().url()" />
+ </td>
+ </tr>
+ </al-for>
+ </tbody>
+ </al-if>
+
+ <tbody class="section">
+ <th colspan="2" class="sechead">Counts</th>
+ <al-for iter="s_i" expr="display.xtabs">
+ <al-exec expr="xtab = s_i.value()" />
+ <al-expand name="stratacounts" />
+ </al-for>
+ </tbody>
+
+ <al-for iter="section_i" expr="display.report" >
+ <al-exec expr="section = section_i.value()" />
+ <tbody class="section">
+ <tr>
+ <th colspan="2" class="sechead"><al-value expr="section.label" /></th>
+ </tr>
+ <al-for iter="sub_i" expr="section.contents">
+ <al-exec expr="subsec = sub_i.value()" />
+ <al-if expr="subsec.label">
+ <tr>
+ <th colspan="2" class="subsechead">
+ <al-value expr="subsec.label" />
+ </th>
+ </tr>
+ </al-if>
+ <al-for iter="lines_i" expr="subsec.contents">
+ <tr>
+ <al-exec expr="fields = lines_i.value().split(':', 1)" />
+ <al-if expr="len(fields) == 1">
+ <td colspan="2">
+ <al-value expr="fields[0]" />
+ </td>
+ <al-else>
+ <td><al-value expr="fields[0]" />:</td>
+ <td><al-value expr="fields[1]" /></td>
+ </al-if>
+ </tr>
+ </al-for>
+ </al-for>
+ </tbody>
+ </al-for>
+ </table>
diff --git a/web/pages/params.html b/web/pages/params.html
new file mode 100644
index 0000000..90b1529
--- /dev/null
+++ b/web/pages/params.html
@@ -0,0 +1,57 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+<al-include name="fields.html" />
+
+<al-expand name="page_layout">
+ <table class="bodytable">
+ <al-for iter="field_i" expr="workspace.plottype.fields">
+ <al-exec expr="field = field_i.value()" />
+ <al-if expr="field.enabled(workspace)">
+ <al-value expr="field.markup" lookup="field_markup" />
+ </al-if>
+ </al-for>
+
+ <tr>
+ <td></td>
+ <td class="actions">
+ <table width="100%" cellspacing="0" cellpadding="0">
+ <al-if expr="workspace.output.have_files() and not workspace.output.inline">
+ <al-for iter="file_i" expr="workspace.output.output_names()">
+ <tr><td align="center">
+ <al-a expr="file_i.value()">Download Result</al-a>
+ </td></tr>
+ </al-for>
+ </al-if>
+ <tr>
+ <al-if expr="workspace.output.have_files() and workspace.output.inline">
+ <td align="center">
+ <al-input class="submit" type="submit" name="hide_params" value="Prev Result" />
+ </td>
+ </al-if>
+ <td align="center">
+ <al-input class="submit" type="submit" name="save" value="Save Analysis" />
+ </td>
+ <td align="center">
+ <al-input class="submit" type="submit" name="ok" value="Run Analysis" />
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+ <al-expand name="timedetail" />
+</al-expand>
diff --git a/web/pages/paramsave.html b/web/pages/paramsave.html
new file mode 100644
index 0000000..c735551
--- /dev/null
+++ b/web/pages/paramsave.html
@@ -0,0 +1,43 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-expand name="page_layout">
+ <table class="bodytable">
+ <al-if expr="workspace.params.loaded_from">
+ <tr>
+ <td class="section">Loaded from</td>
+ <td class="fill"><al-value expr="workspace.params.loaded_from" /></td>
+ </tr>
+ </al-if>
+ <tr>
+ <td class="section">Save analysis as</td>
+ <td class="fill"><al-input name="workspace.params.save_as" /></td>
+ </tr>
+ <tr>
+ <td></td>
+ <td class="actions">
+ <table width="100%">
+ <tr>
+ <td align="left"><al-input type="submit" name="back" value="Cancel" /></td>
+ <td align="right"><al-input type="submit" name="save" value="Save" /></td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+</al-expand>
+
diff --git a/web/pages/result.html b/web/pages/result.html
new file mode 100644
index 0000000..e5c42f7
--- /dev/null
+++ b/web/pages/result.html
@@ -0,0 +1,56 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-expand name="page_layout">
+ <div align="center">
+ <al-if expr="workspace.output.inline">
+ <div class="bodytable" style="text-align: right;">
+ <al-input type="submit" name="back" value="Start again" />
+ <al-input type="submit" name="show_params" value="Parameters" />
+ </div>
+ </al-if>
+
+ <al-lookup name="output_markup">
+
+ <al-item expr="'imageout'">
+ <al-include name="output_image.html" />
+ </al-item>
+
+ <al-item expr="'tableout'">
+ <al-include name="output_table.html" />
+ </al-item>
+
+ <al-item expr="'crosstab'">
+ <al-include name="output_crosstab.html" />
+ </al-item>
+
+ <al-item expr="'dsrows'">
+ <al-include name="output_dsrows.html" />
+ </al-item>
+
+ <al-item expr="'twobytwo'">
+ <al-include name="output_twobytwo.html" />
+ </al-item>
+
+ </al-lookup>
+
+ <al-value expr="workspace.output.markup" lookup="output_markup" />
+
+ </div>
+ <div class="times">Analysis took <al-value expr="'%.1f' % workspace.output.elapsed" /> seconds</div>
+ <al-expand name="timedetail" />
+</al-expand>
diff --git a/web/pages/start.html b/web/pages/start.html
new file mode 100644
index 0000000..d098d8b
--- /dev/null
+++ b/web/pages/start.html
@@ -0,0 +1,70 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-expand name="page_layout_title">
+ <al-if expr="delete_an">
+ <div class="infobox">
+ <center>
+ <table style="text-align: left;" class="infobox">
+ <tr>
+ <th>Analysis name:</th>
+ <td><al-value expr="delete_an.label"></td>
+ </tr>
+ <tr>
+ <th>Dataset:</th>
+ <td><al-value expr="delete_an.get_dataset().label"></td>
+ </tr>
+ </table>
+ Are you sure you want to delete this analysis?
+ <table>
+ <tr>
+ <td><al-input type="submit" name="delete_cancel" value="No" /></td>
+ <td><al-input type="submit" name="delete_confirm" value="Yes" /></td>
+ </tr>
+ </table>
+ </center>
+ </div>
+ <al-else>
+ <table class="dsview">
+ <tr>
+ <td align="center"><al-input type="submit" value="New" name="new" /></td>
+ <td colspan="5">New Analysis</td>
+ </tr>
+ <tr>
+ <th> </th>
+ <th>Analysis</th>
+ <th>Dataset</th>
+ <th>Record count</th>
+ <th>Updated</th>
+ <th> </th>
+ </tr>
+ <al-for vars="an" expr="analyses.available()">
+ <al-exec expr="ds = an.get_dataset()" />
+ <tr>
+ <td align="center"><al-input type="submit" value="Analyse"
+ nameexpr="'analyse/%s' % an.fn" ></td>
+ <td><al-value expr="an.label"></td>
+ <td><al-value expr="ds.label"></td>
+ <td align="right"><al-value expr="len(ds)"></td>
+ <td nowrap><al-value expr="ds.date_updated.strftime('%Y-%d-%m %H:%M')"></td>
+ <td align="center"><al-input type="submit" value="Delete"
+ nameexpr="'delete/%s' % an.fn" ></td>
+ </tr>
+ </al-for>
+ </table>
+ </al-if>
+</al-expand>
diff --git a/web/pages/twobytwoparams.html b/web/pages/twobytwoparams.html
new file mode 100644
index 0000000..e9485f7
--- /dev/null
+++ b/web/pages/twobytwoparams.html
@@ -0,0 +1,137 @@
+<al-comment>
+
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+</al-comment>
+
+<al-include name="colvalselect.html" />
+
+<al-macro name="twobytwo_buttons">
+ <tr>
+ <td colspan="3" class="buttons">
+ <table width="100%" cellpadding="0" cellspacing="0">
+ <tr>
+ <td align="left" class="butt">
+ <al-input type="submit" name="back" value="Cancel" />
+ </td>
+ <td align="center" class="butt">
+ <al-input type="submit" name="swap" value="Swap" />
+ </td>
+ <td align="center" class="butt">
+ <al-input type="submit" name="clear" value="Clear" />
+ </td>
+ <td align="right" class="butt">
+ <al-input type="submit" name="okay" value="Okay" />
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+</al-macro>
+
+<al-expand name="page_layout">
+ <table class="twobytwoparams">
+ <al-expand name="twobytwo_buttons" />
+ <tr>
+ <td>
+ <table>
+ <tr>
+ <td colspan="2" nowrap>
+ (Checked values are positive)<br />
+ </td>
+ </tr>
+ <al-if expr="params.high_cardinality">
+ <al-if expr="not params.inc">
+ <tr><td class="warn">No values selected</td></tr>
+ <al-else>
+ <al-for iter="vi" expr="params.inc_options(workspace)">
+ <tr>
+ <td width="1em" valign="baseline">
+ <al-input type="checkbox" name="params.inc" list
+ valueexpr="vi.value()[0]" />
+ </td>
+ <td valign="basline">
+ <al-value expr="vi.value()[1]" />
+ </td>
+ </tr>
+ </al-for>
+ </al-if>
+ <al-else>
+ <al-for iter="vi" expr="params.options(workspace)">
+ <tr>
+ <td width="1em" valign="baseline">
+ <al-input type="checkbox" name="params.inc" list
+ valueexpr="vi.value()[0]" />
+ </td>
+ <td valign="baseline">
+ <al-value expr="vi.value()[1]" />
+ </td>
+ </tr>
+ </al-for>
+ </al-if>
+ </table>
+ </td>
+ <td valign="top">
+ <table>
+ <tr>
+ <td>Positive label:</td>
+ <td><al-input name="params.positive_label"></td>
+ </tr>
+ <tr>
+ <td>Negative label:</td>
+ <td><al-input name="params.negative_label"></td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ <al-if expr="params.high_cardinality">
+ <tr>
+ <td colspan="2" class="result">
+ <b>Search: <al-input name="params.pattern" />
+ <al-input class="submit" name="search_go" type="submit" value="Search" />
+ <al-input class="submit" name="search_clear" type="submit" value="Clear" />
+ <br />
+ <al-if expr="params.search_error">
+ <div class="error"><al-value expr="params.search_error" /></div>
+ </al-if>
+ <al-if expr="params.result">
+ <table width="100%">
+ <al-for iter="pi" expr="params.result_options(workspace)">
+ <tr>
+ <td width="1em" valign="baseline">
+ <al-input type="checkbox" name="params.result_inc" list
+ valueexpr="pi.value()[0]" />
+ </td>
+ <td valign="baseline">
+ <al-value expr="pi.value()[1]" />
+ </td>
+ <al-if expr="pi.index() == 0">
+ <al-td rowspanexpr="len(params.result)" class="widebutt" align="right">
+ <al-input type="submit" name="res/add/checked" value="Add Checked" /><br />
+ <al-input type="submit" name="res/del/checked" value="Del Checked" /><br />
+ <al-input type="submit" name="res/add/all" value="Add All" /><br />
+ <al-input type="submit" name="res/del/all" value="Del All" /><br />
+ </al-td>
+ </al-if>
+ </tr>
+ </al-for>
+ </table>
+ </al-if>
+ </td>
+ </tr>
+ </al-if>
+ <al-expand name="twobytwo_buttons" />
+ </table>
+</al-expand>
+
diff --git a/web/static/Netepi_logo_m.png b/web/static/Netepi_logo_m.png
new file mode 100644
index 0000000..9cf155d
Binary files /dev/null and b/web/static/Netepi_logo_m.png differ
diff --git a/web/static/Netepi_logo_s.png b/web/static/Netepi_logo_s.png
new file mode 100644
index 0000000..ec83427
Binary files /dev/null and b/web/static/Netepi_logo_s.png differ
diff --git a/web/static/add.xcf b/web/static/add.xcf
new file mode 100644
index 0000000..0712793
Binary files /dev/null and b/web/static/add.xcf differ
diff --git a/web/static/button-add.png b/web/static/button-add.png
new file mode 100644
index 0000000..2bf082c
Binary files /dev/null and b/web/static/button-add.png differ
diff --git a/web/static/button-del.png b/web/static/button-del.png
new file mode 100644
index 0000000..d72d8c1
Binary files /dev/null and b/web/static/button-del.png differ
diff --git a/web/static/button-down.png b/web/static/button-down.png
new file mode 100644
index 0000000..0d7b0b9
Binary files /dev/null and b/web/static/button-down.png differ
diff --git a/web/static/button-l.png b/web/static/button-l.png
new file mode 100644
index 0000000..4a4db0a
Binary files /dev/null and b/web/static/button-l.png differ
diff --git a/web/static/button-r.png b/web/static/button-r.png
new file mode 100644
index 0000000..993deff
Binary files /dev/null and b/web/static/button-r.png differ
diff --git a/web/static/button-up.png b/web/static/button-up.png
new file mode 100644
index 0000000..550250c
Binary files /dev/null and b/web/static/button-up.png differ
diff --git a/web/static/button.xcf b/web/static/button.xcf
new file mode 100644
index 0000000..3bbc848
Binary files /dev/null and b/web/static/button.xcf differ
diff --git a/web/static/close.png b/web/static/close.png
new file mode 100644
index 0000000..9eb9460
Binary files /dev/null and b/web/static/close.png differ
diff --git a/web/static/close.xcf b/web/static/close.xcf
new file mode 100644
index 0000000..14c3f0c
Binary files /dev/null and b/web/static/close.xcf differ
diff --git a/web/static/copyright.html b/web/static/copyright.html
new file mode 100644
index 0000000..b701040
--- /dev/null
+++ b/web/static/copyright.html
@@ -0,0 +1,567 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+ <title>NetEpi Copyright and Licensing Arrangements</title>
+ <style>
+ <!--
+ body {
+ background-color: white;
+ color: black;
+ }
+ pre {
+ font-family: fixed;
+ font-size: 10pt;
+ }
+ -->
+ </style>
+</head>
+<body bgcolor="white">
+<pre>
+
+COPYRIGHT AND LICENSING ARRANGEMENTS
+
+All material is copyright 2004 Health Administration Corporation (New
+South Wales Department of Health).
+
+NetEpi Analysis is licensed under the terms of the Health
+Administration Corporation Open Source Licence V1.2 (HACOS License V1.2),
+the complete text of which appears below.
+
+HEALTH ADMINISTRATION CORPORATION OPEN SOURCE LICENSE VERSION 1.2
+
+1. DEFINITIONS.
+
+ "Commercial Use" shall mean distribution or otherwise making the
+ Covered Software available to a third party.
+
+ "Contributor" shall mean each entity that creates or contributes to
+ the creation of Modifications.
+
+ "Contributor Version" shall mean in case of any Contributor the
+ combination of the Original Software, prior Modifications used by a
+ Contributor, and the Modifications made by that particular Contributor
+ and in case of Health Administration Corporation in addition the
+ Original Software in any form, including the form as Executable.
+
+ "Covered Software" shall mean the Original Software or Modifications
+ or the combination of the Original Software and Modifications, in
+ each case including portions thereof.
+
+ "Electronic Distribution Mechanism" shall mean a mechanism generally
+ accepted in the software development community for the electronic
+ transfer of data.
+
+ "Executable" shall mean Covered Software in any form other than
+ Source Code.
+
+ "Initial Developer" shall mean the individual or entity identified as
+ the Initial Developer in the Source Code notice required by Exhibit A.
+
+ "Health Administration Corporation" shall mean the Health
+ Administration Corporation as established by the Health Administration
+ Act 1982, as amended, of the State of New South Wales, Australia. The
+ Health Administration Corporation has its offices at 73 Miller Street,
+ North Sydney, New South Wales 2059, Australia.
+
+ "Larger Work" shall mean a work, which combines Covered Software or
+ portions thereof with code not governed by the terms of this License.
+
+ "License" shall mean this document.
+
+ "Licensable" shall mean having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed herein.
+
+ "Modifications" shall mean any addition to or deletion from the
+ substance or structure of either the Original Software or any previous
+ Modifications. When Covered Software is released as a series of files,
+ a Modification is:
+
+ a) Any addition to or deletion from the contents of a file
+ containing Original Software or previous Modifications.
+
+ b) Any new file that contains any part of the Original Software or
+ previous Modifications.
+
+ "Original Software" shall mean the Source Code of computer software
+ code which is described in the Source Code notice required by Exhibit
+ A as Original Software, and which, at the time of its release under
+ this License is not already Covered Software governed by this License.
+
+ "Patent Claims" shall mean any patent claim(s), now owned or hereafter
+ acquired, including without limitation, method, process, and apparatus
+ claims, in any patent Licensable by grantor.
+
+ "Source Code" shall mean the preferred form of the Covered Software
+ for making modifications to it, including all modules it contains,
+ plus any associated interface definition files, scripts used to
+ control compilation and installation of an Executable, or source
+ code differential comparisons against either the Original Software or
+ another well known, available Covered Software of the Contributor's
+ choice. The Source Code can be in a compressed or archival form,
+ provided the appropriate decompression or de-archiving software is
+ widely available for no charge.
+
+ "You" (or "Your") shall mean an individual or a legal entity exercising
+ rights under, and complying with all of the terms of, this License or
+ a future version of this License issued under Section 6.1. For legal
+ entities, "You" includes an entity which controls, is controlled
+ by, or is under common control with You. For the purposes of this
+ definition, "control" means (a) the power, direct or indirect,
+ to cause the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty per cent
+ (50%) of the outstanding shares or beneficial ownership of such entity.
+
+2. SOURCE CODE LICENSE.
+
+2.1 Health Administration Corporation Grant.
+
+Subject to the terms of this License, Health Administration Corporation
+hereby grants You a world-wide, royalty-free, non-exclusive license,
+subject to third party intellectual property claims:
+
+a) under copyrights Licensable by Health Administration Corporation
+ to use, reproduce, modify, display, perform, sublicense and
+ distribute the Original Software (or portions thereof) with or without
+ Modifications, and/or as part of a Larger Work;
+
+b) and under Patents Claims infringed by the making, using or selling
+ of Original Software, to make, have made, use, practice, sell, and
+ offer for sale, and/or otherwise dispose of the Original Software
+ (or portions thereof).
+
+c) The licenses granted in this Section 2.1(a) and (b) are effective
+ on the date Health Administration Corporation first distributes
+ Original Software under the terms of this License.
+
+d) Notwithstanding Section 2.1(b) above, no patent license is granted:
+ 1) for code that You delete from the Original Software; 2) separate
+ from the Original Software; or 3) for infringements caused by: i)
+ the modification of the Original Software or ii) the combination of
+ the Original Software with other software or devices.
+
+2.2 Contributor Grant.
+
+Subject to the terms of this License and subject to third party
+intellectual property claims, each Contributor hereby grants You a
+world-wide, royalty-free, non-exclusive license:
+
+a) under copyrights Licensable by Contributor, to use, reproduce,
+ modify, display, perform, sublicense and distribute the Modifications
+ created by such Contributor (or portions thereof) either on an
+ unmodified basis, with other Modifications, as Covered Software and/or
+ as part of a Larger Work; and
+
+b) under Patent Claims necessarily infringed by the making, using,
+ or selling of Modifications made by that Contributor either alone
+ and/or in combination with its Contributor Version (or portions of
+ such combination), to make, use, sell, offer for sale, have made,
+ and/or otherwise dispose of: 1) Modifications made by that Contributor
+ (or portions thereof); and 2) the combination of Modifications made
+ by that Contributor with its Contributor Version (or portions of
+ such combination).
+
+c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective
+ on the date Contributor first makes Commercial Use of the Covered
+ Software.
+
+d) Notwithstanding Section 2.2(b) above, no patent license is granted:
+ 1) for any code that Contributor has deleted from the Contributor
+ Version; 2) separate from the Contributor Version; 3) for infringements
+ caused by: i) third party modifications of Contributor Version or ii)
+ the combination of Modifications made by that Contributor with other
+ software (except as part of the Contributor Version) or other devices;
+ or 4) under Patent Claims infringed by Covered Software in the absence
+ of Modifications made by that Contributor.
+
+3. DISTRIBUTION OBLIGATIONS.
+
+3.1 Application of License.
+
+The Modifications which You create or to which You contribute are governed
+by the terms of this License, including without limitation Section
+2.2. The Source Code version of Covered Software may be distributed
+only under the terms of this License or a future version of this License
+released under Section 6.1, and You must include a copy of this License
+with every copy of the Source Code You distribute. You may not offer or
+impose any terms on any Source Code version that alters or restricts the
+applicable version of this License or the recipients' rights hereunder.
+
+3.2 Availability of Source Code.
+
+Any Modification which You create or to which You contribute must be made
+available in Source Code form under the terms of this License either on
+the same media as an Executable version or via an accepted Electronic
+Distribution Mechanism to anyone to whom you made an Executable version
+available; and if made available via Electronic Distribution Mechanism,
+must remain available for at least twelve (12) months after the date it
+initially became available, or at least six (6) months after a subsequent
+version of that particular Modification has been made available to
+such recipients. You are responsible for ensuring that the Source Code
+version remains available even if the Electronic Distribution Mechanism
+is maintained by a third party.
+
+3.3 Description of Modifications.
+
+You must cause all Covered Software to which You contribute to contain
+a file documenting the changes You made to create that Covered Software
+and the date of any change. You must include a prominent statement that
+the Modification is derived, directly or indirectly, from Original
+Software provided by Health Administration Corporation and including
+the name of Health Administration Corporation in (a) the Source Code,
+and (b) in any notice in an Executable version or related documentation
+in which You describe the origin or ownership of the Covered Software.
+
+3.4 Intellectual Property Matters
+
+a) Third Party Claims.
+
+ If Contributor has knowledge that a license under a third party's
+ intellectual property rights is required to exercise the rights
+ granted by such Contributor under Sections 2.1 or 2.2, Contributor
+ must include a text file with the Source Code distribution titled
+ "LEGAL'' which describes the claim and the party making the claim
+ in sufficient detail that a recipient will know whom to contact. If
+ Contributor obtains such knowledge after the Modification is made
+ available as described in Section 3.2, Contributor shall promptly
+ modify the LEGAL file in all copies Contributor makes available
+ thereafter and shall take other steps (such as notifying appropriate
+ mailing lists or newsgroups) reasonably calculated to inform those
+ who received the Covered Software that new knowledge has been obtained.
+
+b) Contributor APIs.
+
+ If Contributor's Modifications include an application programming
+ interface (API) and Contributor has knowledge of patent licenses
+ which are reasonably necessary to implement that API, Contributor
+ must also include this information in the LEGAL file.
+
+c) Representations.
+
+ Contributor represents that, except as disclosed pursuant to Section
+ 3.4(a) above, Contributor believes that Contributor's Modifications are
+ Contributor's original creation(s) and/or Contributor has sufficient
+ rights to grant the rights conveyed by this License.
+
+3.5 Required Notices.
+
+You must duplicate the notice in Exhibit A in each file of the Source
+Code. If it is not possible to put such notice in a particular Source
+Code file due to its structure, then You must include such notice in a
+location (such as a relevant directory) where a user would be likely to
+look for such a notice. If You created one or more Modification(s) You
+may add your name as a Contributor to the notice described in Exhibit
+A. You must also duplicate this License in any documentation for the
+Source Code where You describe recipients' rights or ownership rights
+relating to Covered Software. You may choose to offer, and to charge a
+fee for, warranty, support, indemnity or liability obligations to one or
+more recipients of Covered Software. However, You may do so only on Your
+own behalf, and not on behalf of Health Administration Corporation or any
+Contributor. You must make it absolutely clear that any such warranty,
+support, indemnity or liability obligation is offered by You alone,
+and You hereby agree to indemnify Health Administration Corporation and
+every Contributor for any liability incurred by Health Administration
+Corporation or such Contributor as a result of warranty, support,
+indemnity or liability terms You offer.
+
+3.6 Distribution of Executable Versions.
+
+You may distribute Covered Software in Executable form only if the
+requirements of Sections 3.1-3.5 have been met for that Covered Software,
+and if You include a notice stating that the Source Code version of the
+Covered Software is available under the terms of this License, including
+a description of how and where You have fulfilled the obligations of
+Section 3.2. The notice must be conspicuously included in any notice in
+an Executable version, related documentation or collateral in which You
+describe recipients' rights relating to the Covered Software. You may
+distribute the Executable version of Covered Software or ownership rights
+under a license of Your choice, which may contain terms different from
+this License, provided that You are in compliance with the terms of this
+License and that the license for the Executable version does not attempt
+to limit or alter the recipient's rights in the Source Code version from
+the rights set forth in this License. If You distribute the Executable
+version under a different license You must make it absolutely clear
+that any terms which differ from this License are offered by You alone,
+not by Health Administration Corporation or any Contributor. You hereby
+agree to indemnify Health Administration Corporation and every Contributor
+for any liability incurred by Health Administration Corporation or such
+Contributor as a result of any such terms You offer.
+
+3.7 Larger Works.
+
+You may create a Larger Work by combining Covered Software with other
+software not governed by the terms of this License and distribute the
+Larger Work as a single product. In such a case, You must make sure the
+requirements of this License are fulfilled for the Covered Software.
+
+4. INABILITY TO COMPLY DUE TO STATUTE OR REGULATION.
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with the
+terms of this License to the maximum extent possible; and (b) describe the
+limitations and the code they affect. Such description must be included
+in the LEGAL file described in Section 3.4 and must be included with all
+distributions of the Source Code. Except to the extent prohibited by
+statute or regulation, such description must be sufficiently detailed
+for a recipient of ordinary skill to be able to understand it.
+
+5. APPLICATION OF THIS LICENSE.
+
+This License applies to code to which Health Administration Corporation
+has attached the notice in Exhibit A and to related Covered Software.
+
+6. VERSIONS OF THE LICENSE.
+
+6.1 New Versions.
+
+Health Administration Corporation may publish revised and/or new
+versions of the License from time to time. Each version will be given
+a distinguishing version number.
+
+6.2 Effect of New Versions.
+
+Once Covered Software has been published under a particular version
+of the License, You may always continue to use it under the terms of
+that version. You may also choose to use such Covered Software under
+the terms of any subsequent version of the License published by Health
+Administration Corporation. No one other than Health Administration
+Corporation has the right to modify the terms applicable to Covered
+Software created under this License.
+
+7. DISCLAIMER OF WARRANTY.
+
+COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS'' BASIS,
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF
+DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS
+WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU
+(NOT HEALTH ADMINISTRATION CORPORATION, ITS LICENSORS OR AFFILIATES OR
+ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR
+OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
+OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER
+EXCEPT UNDER THIS DISCLAIMER.
+
+8. TERMINATION.
+
+8.1 This License and the rights granted hereunder will terminate
+automatically if You fail to comply with terms herein and fail to
+cure such breach within 30 days of becoming aware of the breach. All
+sublicenses to the Covered Software which are properly granted shall
+survive any termination of this License. Provisions which, by their
+nature, must remain in effect beyond the termination of this License
+shall survive.
+
+8.2 If You initiate litigation by asserting a patent infringement claim
+(excluding declatory judgment actions) against Health Administration
+Corporation or a Contributor (Health Administration Corporation
+or Contributor against whom You file such action is referred to as
+"Participant") alleging that:
+
+a) such Participant's Contributor Version directly or indirectly
+ infringes any patent, then any and all rights granted by such
+ Participant to You under Sections 2.1 and/or 2.2 of this License
+ shall, upon 60 days notice from Participant terminate prospectively,
+ unless if within 60 days after receipt of notice You either: (i)
+ agree in writing to pay Participant a mutually agreeable reasonable
+ royalty for Your past and future use of Modifications made by such
+ Participant, or (ii) withdraw Your litigation claim with respect to
+ the Contributor Version against such Participant. If within 60 days
+ of notice, a reasonable royalty and payment arrangement are not
+ mutually agreed upon in writing by the parties or the litigation
+ claim is not withdrawn, the rights granted by Participant to
+ You under Sections 2.1 and/or 2.2 automatically terminate at the
+ expiration of the 60 day notice period specified above.
+
+b) any software, hardware, or device, other than such Participant's
+ Contributor Version, directly or indirectly infringes any patent,
+ then any rights granted to You by such Participant under Sections
+ 2.1(b) and 2.2(b) are revoked effective as of the date You first
+ made, used, sold, distributed, or had made, Modifications made by
+ that Participant.
+
+8.3 If You assert a patent infringement claim against Participant
+alleging that such Participant's Contributor Version directly or
+indirectly infringes any patent where such claim is resolved (such as by
+license or settlement) prior to the initiation of patent infringement
+litigation, then the reasonable value of the licenses granted by such
+Participant under Sections 2.1 or 2.2 shall be taken into account in
+determining the amount or value of any payment or license.
+
+8.4 In the event of termination under Sections 8.1 or 8.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or any distributor hereunder prior to
+termination shall survive termination.
+
+9. LIMITATION OF LIABILITY.
+
+9.1 UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+(INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, HEALTH
+ADMINISTRATION CORPORATION, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR
+OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE
+TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS
+OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND
+ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE
+BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
+LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
+RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION
+OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, BUT MAY ALLOW
+LIABILITY TO BE LIMITED; IN SUCH CASES, A PARTY'S, ITS EMPLOYEES',
+LICENSORS' OR AFFILIATES' LIABILITY SHALL BE LIMITED TO AUD$100. NOTHING
+CONTAINED IN THIS LICENSE SHALL PREJUDICE THE STATUTORY RIGHTS OF ANY
+PARTY DEALING AS A CONSUMER.
+
+9.2 Notwithstanding any other clause in the licence, and to the extent
+permitted by law:
+
+(a) Health Administration Corporation ("the Corporation") excludes all
+ conditions and warranties which would otherwise be implied into
+ a supply of goods or services arising out of or in relation to
+ the granting of this licence by the Corporation or any associated
+ acquisition of software to which this licence relates;
+
+(b) Where a condition or warranty is implied into such a supply and
+ that condition or warranty cannot be excluded by law that warranty
+ or condition is implied into that supply and the liability of the
+ Health Administration Corporation for a breach of that condition or
+ warranty is limited to the fullest extent permitted by law and, in
+ respect of conditions and warranties implied by the Trade Practices
+ Act (Commonwealth of Australia) 1974, is limited, to the extent
+ permitted by law, to one or more of the following at the election
+ of the Corporation:
+
+ (A) In the case of goods: (i) the replacement of the goods or the
+ supply of equivalent goods; (ii) the repair of the goods; (iii)
+ the payment of the cost of replacing the goods or of acquiring
+ equivalent goods; (iv) the payment of the cost of having the
+ goods repaired; and
+
+ (B) in the case of services: (i) the supplying of the services again;
+ or (ii) the payment of the cost of having the services supplied
+ again.
+
+10. MISCELLANEOUS.
+
+This License represents the complete agreement concerning subject matter
+hereof. All rights in the Covered Software not expressly granted under
+this License are reserved. Nothing in this License shall grant You any
+rights to use any of the trademarks of Health Administration Corporation
+or any of its Affiliates, even if any of such trademarks are included
+in any part of Covered Software and/or documentation to it.
+
+This License is governed by the laws of the State of New South Wales,
+Australia excluding its conflict-of-law provisions. All disputes or
+litigation arising from or relating to this Agreement shall be subject
+to the jurisdiction of the Supreme Court of New South Wales. If any part
+of this Agreement is found void and unenforceable, it will not affect
+the validity of the balance of the Agreement, which shall remain valid
+and enforceable according to its terms.
+
+11. RESPONSIBILITY FOR CLAIMS.
+
+As between Health Administration Corporation and the Contributors,
+each party is responsible for claims and damages arising, directly or
+indirectly, out of its utilisation of rights under this License and You
+agree to work with Health Administration Corporation and Contributors
+to distribute such responsibility on an equitable basis. Nothing herein
+is intended or shall be deemed to constitute any admission of liability.
+
+EXHIBIT A
+
+The contents of this file are subject to the HACOS License Version 1.2
+(the "License"); you may not use this file except in compliance with
+the License.
+
+Software distributed under the License is distributed on an "AS IS"
+basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific language governing rights and limitations under
+the License.
+
+The Original Software is "NetEpi Analysis". The Initial Developer
+of the Original Software is the Health Administration Corporation,
+incorporated in the State of New South Wales, Australia.
+
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+
+APPENDIX 1. DIFFERENCES BETWEEN THE HACOS LICENSE VERSION 1.2, THE
+MOZILLA PUBLIC LICENSE VERSION 1.1 AND THE NOKIA OPEN SOURCE LICENSE
+(NOKOS LICENSE) VERSION 1.0A
+
+The HACOS License Version 1.2 was derived from the Mozilla Public
+License Version 1.1 using some of the changes to the Mozilla Public
+License embodied in the Nokia Open Source License (NOKOS License)
+Version 1.0a. The differences between the HACOS License Version 1.2
+(this document), the Mozilla Public License and the NOKOS License are
+as follows:
+
+i. The title of the license was changed to "Health Administration
+ Corporation Open Source License Version 1.2".
+
+ii. Globally, all references to "Netscape Communications Corporation",
+ "Mozilla", "Nokia" and "Nokia Corporation" were changed to "Health
+ Administration Corporation".
+
+iii. Globally, the words "means", "Covered Code" and "Covered Software"
+ as used in the Mozilla Public License were changed to "shall means",
+ "Covered Code" and "Covered Software" respectively, as used in
+ the NOKOS License.
+
+iv. In Section 1 (Definitions), a definition of "Health Administration
+ Corporation" was added.
+
+v. In Section 2, the term "intellectual property rights" used in the
+ Mozilla Public License was replaced by the term "copyrights"
+ as used in the NOKOS License.
+
+vi. In Section 2.2 (Contributor Grant), the words "Subject to the
+ terms of this License" which appear in the NOKOS License were
+ added to the Mozilla Public License.
+
+vii. The sentence "However, You may include an additional document
+ offering the additional rights described in Section 3.5." which
+ appears in the Mozilla Public License was omitted.
+
+viii. Section 6.3 (Derivative Works) of the Mozilla Public License,
+ which permits modifications to the Mozilla Public License,
+ was omitted.
+
+ix. The original Section 9 (Limitation of Liability) was renumbered
+ as Section 9.1, a maximum liability of AUD$100 was specified
+ for those jurisdictions which do not allow complete exclusion of
+ liability but which do allow limitation of liability. The sentence
+ "NOTHING CONTAINED IN THE LICENSE SHALL PREJUDICE THE STATUTORY
+ RIGHTS OF ANY PARTY DEALING AS A CONSUMER.", which appears in the
+ NOKOS License but not in the Mozilla Public License, was added.
+
+x. Section 9.2 was added in order to further limit liability to the
+ maximum extent permitted by the Commonwealth of Australia Trade
+ Practices Act 1974.
+
+xi. Section 10 of the Mozilla Public License, which provides additional
+ conditions for United States Government End Users, was omitted.
+
+xii. The governing law and jurisdiction for the settlement of disputes
+ in Section 11 of the Mozilla Public License and Section 10 of the
+ NOKOS License was changed to the laws of the State of New South
+ Wales and the Supreme Court of New South Wales respectively. The
+ exclusion of the application of the United Nations Convention on
+ Contracts for the International Sale of Goods which appears in
+ the Mozilla Public License was omitted.
+
+xiii. Section 13 (Multiple-Licensed Code) of the Mozilla Public License
+ was omitted.
+
+xiv. The provisions for alternative licensing arrangement for contributed
+ code which appear in Exhibit A of the Mozilla Public License
+ were omitted.
+
+</pre>
+</body>
+</html>
diff --git a/web/static/favicon.ico b/web/static/favicon.ico
new file mode 100644
index 0000000..02d5bf7
Binary files /dev/null and b/web/static/favicon.ico differ
diff --git a/web/static/help.html b/web/static/help.html
new file mode 100644
index 0000000..7d204ab
--- /dev/null
+++ b/web/static/help.html
@@ -0,0 +1,418 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<!--
+ The contents of this file are subject to the HACOS License Version 1.2
+ (the "License"); you may not use this file except in compliance with
+ the License. Software distributed under the License is distributed
+ on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ implied. See the LICENSE file for the specific language governing
+ rights and limitations under the License. The Original Software
+ is "NetEpi Analysis". The Initial Developer of the Original
+ Software is the Health Administration Corporation, incorporated in
+ the State of New South Wales, Australia.
+
+ Copyright (C) 2004,2005 Health Administration Corporation.
+ All Rights Reserved.
+-->
+<HTML>
+<HEAD>
+ <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
+ <TITLE></TITLE>
+ <META NAME="GENERATOR" CONTENT="OpenOffice.org 1.1.2 (Linux)">
+ <META NAME="CREATED" CONTENT="20041215;17110100">
+ <META NAME="CHANGED" CONTENT="20041220;13145000">
+</HEAD>
+<BODY BGCOLOR="white" LANG="en-US" DIR="A4">
+<H1>NetEpi Analysis Version 0.1 Web interface – a brief
+introduction</H1>
+<P>Tim Churches (<A HREF="mailto:tchur at doh.health.nsw.gov.au">tchur at doh.health.nsw.gov.au</A>),
+Centre for Epidemiology and Research, Population Health Division, New
+South Wales Department of Health</P>
+<P>21 December, 2004</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Introduction</FONT></FONT></P>
+<P>This document provides a brief introduction to the Web interface
+for NetEpi Analysis (NEA) Version 0.1. Please note that this is an
+early version of the application and it is highly likely that the
+interface and capabilities of NEA will change considerably as it is
+developed further.</P>
+<P>NEA was designed and written by Tim Churches (Centre for
+Epidemiology and Research, NSW Department of Health) and Andrew
+McNamara (Object Craft Pty Ltd, working under contract to NSW
+Department of Health). Dave Cole and Ben Golding, also both of Object
+Craft Pty Ltd, contributed fast set-theoretic functions and a filter
+clause parser respectively.</P>
+<P>NEA uses several (somewhat) unusual techniques to ensure
+reasonable performance when dealing with moderately-sized datasets
+(up to about 10 million records), despite being programmed in a
+highly dynamic, late-binding, object-oriented programming language
+(Python). In particular, all datasets are stored in vertically
+partitioned form – that is, column-wise, not row-wise, and dataset
+summarisation is achieved using set-theoretic operations on ordinal
+mappings – that is, the ordinal row position in each column is used
+as an implicit row ID and set intersections (and other set
+operations) are performed on these row IDs. This approach differs
+from more commonly used bit-mapped indexes in that value indexes are
+stored as vectors of sorted integer indexes – an approach which
+sacrifices some performance on very large datasets, but which retains
+storage and processing efficiency even for columns with very high
+cardinality, without having to use complex bitmap compression
+schemes. High cardinality data are routinely encountered in health
+and epidemiological datasets. A peer-reviewed paper describing these
+techniques appeared in 2003 – please see: <A HREF="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=12725961">Churches
+T. Exploratory data analysis using set operations and ordinal
+mapping. Comput Methods Programs Biomed 2003; 71(1):11-23</A>.
+Preprint copies of the paper in PDF format are available from the
+author on request (email: <A HREF="mailto:tchur at doh.health.nsw.gov.au">tchur at doh.health.nsw.gov.au</A>)
+if your institution or organisation does not have access to this
+journal.</P>
+<P>This tutorial assumes that NEA has been installed on a suitable
+computer and that the demonstration datasets have been loaded. If
+not, please see the README file in the top level NEA directory for
+instructions on creating these datasets. We also recommend that you
+run the api_demo.py and plot_demo.py programmes in the demo
+subdirectory to gain an appreciation of the programmatic interface to
+NEA, which can be used directly in any batch or interactive Python
+programme, before working through this introduction to the Web
+interface. Instructions for running demo/api_demo.py and
+demo/plot_demo.py can be found in the README file mentioned above.</P>
+<P>Please report all bugs, problems, feature requests and ideas for
+future development to the NetEpi-discuss mailing list. You need to
+subscribe to this list in order to post messages to it – please see
+the <A HREF="http://lists.sourceforge.net/mailman/listinfo/netepi-discuss">list
+management Web page</A>.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>NEA
+home page</FONT></FONT></P>
+<P>The first step is to open your Web browser and type in the URL for
+the NEA demonstration. If you are using nea-standalone.py to host the
+demonstration Web site on your own computer, then the URL will
+generally be http://localhost:8080/cgi-bin/nea/nea.py If the
+demonstration is being hosted on an Apache web server, the URL will
+be something like http://some_server/cgi-bin/nea/nea.py where
+some_server is the DNS name of the host server.</P>
+<P>Note that unless your system administrator has established access
+control within Apache, no username or password is required to access
+the NEA demonstration site. Future versions of NEA will provide
+built-in user authentication and access control facilities.</P>
+<P>The home page displays two pull-down lists with buttons besides
+each.</P>
+<P>The upper pull-down list allows you to select a dataset to work
+with. As new datasets are added to directories within the SOOM engine
+search path, they automatically appear in this list. Clicking on the
+<B>[Explore]</B> button displays metadata about the selected data in
+a new browser window. You can click on the <B>[View]</B> button to
+the left of each column name to see further details for that column.</P>
+<P>The lower pull-down list allows you to select a type of analysis
+to perform on the selected dataset. At this stage the analyses are
+restricted to exploratory graphs and tables. More sophisticated and
+specialised types of epidemiological analyses will be added in future
+versions.</P>
+<P>Several of the features and facilities are common to all types of
+analyses. These will be covered in detail in the context of bar
+charts, but will not be mentioned for other types of analyses, as
+they work in the same fashion everywhere in NEA.</P>
+<P>Choose the National Hospital Discharge Survey dataset from the
+upper pull-down list, and Bar Chart from the lower analysis type
+pull-down list. Click the <B>[Analyse]</B> button.
+</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Filters</FONT></FONT></P>
+<P>The Filter section allows you to specify filters to restrict your
+analysis to certain rows in the dataset. The text area shows the
+current filter expression. You can directly enter filter expressions
+here if you wish. The syntax is very similar to SQL WHERE clause
+syntax, with a few differences and extensions. In particular, the
+equality operator is “==”, not “=” (this may change in a
+later version). The logical operators AND, OR and NOT are supported,
+as is bracketing (and nested bracketing) of expressions. An “in”
+operator is provided, which tests for values in a list e.g. “column_a
+in (1, 2, 3)” is equivalent to “column_a == 1 OR column_a == 2 OR
+column_a ==3 “. It is also possible to test whether string values
+start with certain substrings by suffixing a colon eg column_b ==:
+'A' will select all values in column_b which start with 'A'.</P>
+<P>However, it is usually easier to use the filter editor to define
+dataset filters. To do this, click the <B>[New]</B> button to the
+right of the filter text box. You will be presented with the filter
+editing page. The filter is shown diagrammatically on the left of the
+page, with the currently selected clause of the filter highlighted in
+light pink. Edit controls for the currently selected clause appear in
+a panel in the middle of the page, and various action buttons appear
+on the right.</P>
+<P>To edit a clause, click the red “target” symbol to the left of
+the clause (if the filter is new and thus undefined, a blank clause
+denoted by “???” should already be highlighted. In the edit panel
+in the middle of the page, select a column for the filter clause from
+one of the three pull-down lists provided, and then click the <B>[>>]</B>
+button (the clause editor operates as a “wizard” or “druid”,
+allowing you to move to the next step or the previous step by
+clicking the <B>[>>]</B> or <B>[<<]</B> buttons
+respectively).</P>
+<P STYLE="font-weight: medium">The operator expression section of the
+clause will then be highlighted, and you can choose an appropriate
+operator from the pull-down list. Note that the “starts with”
+operators only work with string values (which may be strings of
+numerals, eg ICD-9-CM codes). Then click the <B>[>>]</B> button
+again.
+</P>
+<P>The value section of the clause editor will display a pull-down
+selection box for categorical or ordinal columns with small or
+moderate cardinality – select a value form the list (or multiple
+values if the “in” operator has been used – hold down the <B>Ctrl</B>
+key to select multiple items). For high cardinality columns, and
+scalar columns, a text box is displayed in which an appropriate value
+can be entered. Future versions of NetEpi Analysis will have better
+methods for selecting values for high cardinality columns such as
+those which contain ICD codes. If the column you selected is a date
+column, then components of the date may be selected from pull-down
+lists (the next version will support specification of time and
+date/time values in the filter editor as well).</P>
+<P>When you have specified the value, click the <B>[>>]</B>
+button again. The filter clause editor panel will disappear from the
+middle of the page, and the filter clause will be shown on the left.
+</P>
+<P>To edit that clause again, or to add new clauses, click on the
+target symbol at the left-hand side of the clause. The clause editor
+panel will re-appear in the middle of the page.</P>
+<P>To add additional clauses to the filter, click on the <B>[and]</B>
+or <B>[or]</B> buttons in the lower left-hand corner of the clause
+editor panel. The <B>[Delete]</B> button at the lower right of the
+clause editor panel has the obvious effect.</P>
+<P>By repeating the above steps, complex filters can be built up and
+displayed in diagrammatic form. When you have finished, you can click
+the <B>[Okay]</B> button on the right of the page. The filter which
+you have defined will then appear in the filter text box on the
+analysis parameters page. Note that although you can edit the filter
+definition directly in the text box on the analysis parameter pages,
+your edits will not be reflected in the state of the filter displayed
+on the filter editor page - in other words, the connection between
+the two is only unidirectional, from filter editor to filter text
+box, at this stage.</P>
+<P>For more complex filter definitions, rather than just clicking
+<B>[Okay]</B>, it is a good idea to give the definition a name and
+description by clicking on the <B>[Edit info]</B> button, and then
+clicking the <B>[Save]</B> button to save the filter definition. Note
+that filter definitions are currently shared between all users.
+Future versions of NEA will provide user-specific workspaces.</P>
+<P>If you make a mistake while editing a filter definition, you can
+use the <B>[Undo]</B> and <B>[Redo]</B> buttons to roll back or roll
+forward the changes you have made to the definition.</P>
+<P>Saved filter definitions can be loaded by selecting the definition
+by name from the pull-down list to the right of the filter text box
+and clicking <B>[Load]</B>. Loaded or current filter definitions can
+be edited by clicking the <B>[Edit]</B> button.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Titles
+and Footers</FONT></FONT></P>
+<P>These text boxes allow you to specify titles and footers for your
+graphs and tables. They are automatically populated with text, which
+is adjusted each time you set various parameters. You can adjust the
+contents of the title and footer text boxes if you wish. However, if
+you do edit the text, no further automatic changes will be made until
+you start a new analysis. (Note that for Summary Tables and
+Crosstabs, the titles and footers text boxes are populated after you
+run the analysis – they can then be edited and the analysis re-run.
+In future versions the behaviour will be made the same as the plot
+analysis types.)
+</P>
+<P>Future versions will allow similar editing of graph axis labels
+and table row and column headings.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Output
+format</FONT></FONT></P>
+<P>For graphs, you can change the format of the output by choosing
+PNG (web graphics), PDF or SVG formats. SVG requires a browser
+plug-in or stand-alone SVG viewer – see the <A HREF="http://www.w3.org/Graphics/SVG/SVG-Implementations">W3C
+Web page on SVG implementations</A> for more information. After
+selecting a new format, click the <B>[Change]</B> button. The size or
+orientation of the selected output format can be set using the radio
+buttons on the right. Future versions will permit greater control
+over output size and orientation, as well as a wider range of output
+formats.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Bar
+charts</FONT></FONT></P>
+<P>You should have the Bar Chart parameter specification page
+displayed.</P>
+<P>The <B>X Column</B> selection list allows you to select a column
+to use as the X axis of the bar chart. The X axis is by convention
+the horizontal axis, but selecting the <B>Horizontal</B> check box
+makes the X axis the vertical axis (and thus the bars horizontal).
+Note that only categorical, ordinal and date columns are available
+for selection for the bar chart X axis.</P>
+<P>The <B>Measure Column</B> pull-down list is used to select the
+type of measure to use for the Y axis of the bar chart – that is,
+the quantity which determines the height (or length) of each bar. It
+defaults to <B>Frequency</B>, weighted by the default weighting
+column for the dataset, if there is one. Other weighting columns can
+be chosen if they are available for the dataset in question, or
+weighting can be disable by selecting “No weighting” from the
+weighting pull-down selection list (the rightmost one of the three).
+For some types of measures, such as mean or median, a scalar column
+must be chosen as the basis of the measure, using the middle
+pull-down selection list.</P>
+<P>Note also that the first measure shown is the list is a proportion
+(of weighted or unweighted frequencies, as appropriate) for each
+category on the X axis. If stacking, grouping or paneling columns are
+also specified (see below), then proportions for combinations of each
+of these conditioning columns are also automatically made available
+for use. Future versions may offer automatic calculation of
+proportions of other quantities other than counts, where this makes
+sense. Support for other forms of ratio may also be added.</P>
+<P>A <B>Group By</B> column can also be specified – this causes
+separate bars to be displayed for each value of the Group By column.
+Selecting the <B>Stack</B> check box causes stacking (division of
+bars) rather than side-by-side grouping.</P>
+<P>One or more paneling columns can similarly be specified by
+clicking on the <B>[Add]</B> button in the <B>Panel(s</B>) section.
+This causes separate bar charts to be created in panels for each
+value of paneling column(s). It is not a good idea to specify more
+than two or three paneling columns. Also, it is possible to use
+scalar columns for paneling, in which case the paneling columns will
+be automatically partitioned into ranges. However, for large,
+unfiltered datasets, this can be an exceedingly slow operation.
+Future versions will advise users of this fact and offer
+alternatives, such as faster binning of scalar columns in Numeric
+Python before passing data to R for plotting.</P>
+<P STYLE="font-weight: medium">Finally, the origin of bar charts can
+be specified using the <B>Origin</B> text box.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Dot
+charts</FONT></FONT></P>
+<P>Dot charts are very similar to bar charts, except that dots are
+used instead of bars. All of the parameters are the same as for bar
+charts, except that <B>Group-by Column</B> is replaced by <B>Stack-by
+Column</B> because dots are always stacked along a single line for
+each value of the axis column.
+</P>
+<P>Future versions will permit the size, shape and colour of the dots
+to be set by the user.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Line
+plots</FONT></FONT></P>
+<P>Line plots are designed to display time series data, using date,
+time or date/time columns for the X axis. However, categorical and
+ordinal columns can also be used for the X axis. In other respects,
+the parameters for line plots are similar to those for bar and dot
+charts. Once again, future versions will permit the width, style and
+colour of lines to be set by the user (this can already be done via
+the programmatic interface).</P>
+<P>When a date column is used for the X axis, NEA tries to select
+appropriate tick mark points. More control over this will be provided
+in future versions.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Histogram
+and density plots</FONT></FONT></P>
+<P>These allow a single scalar column to be specified. For
+histograms, the <B>Bins</B> parameter allows the number of bars in
+the histogram to be set. Future versions will also allow parameters
+for density plots to be set.</P>
+<P>Note that histogram and density plots, as well as box plots and
+scatter plots (see below), involve the transfer of all the data
+(after filtering) for the selected column from Python to R. This can
+be both slow and use a great deal of memory. Therefore, when using
+large datasets, it is recommended that filters are used to limit the
+amount of data being processed. Future versions will warn the user of
+this issue and offer optional random or stratified random sampling of
+data used for these types of plots.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Box-and-whisker
+plots</FONT></FONT></P>
+<P>Only scalar columns can be chosen for the Y axis, and only
+categorical, ordinal or date columns can be chosen for the X axis.
+The <B>Outliers</B> and <B>Notches</B> options currently have no
+effect. Future versions may calculate the statistics needed for
+drawing the box plots in Numeric Python, which may speed this type of
+analysis considerably for large datasets.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Scatter
+plots</FONT></FONT></P>
+<P>These are self-explanatory. Only scalar columns can be chosen for
+the X and Y axes. Better tick marks for log-scaled axes are on the
+TO-DO list for future versions.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Scatter
+matrix plots </FONT></FONT>
+</P>
+<P>The <B>Measures</B> parameter allows several scalar columns to be
+selected. A matrix of scatter plots is then displayed for each pair
+of columns.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Summary
+tables</FONT></FONT></P>
+<P>This produces simple summaries, similar to those produced by
+GROUP-BY in SQL or PROC SUMMARY in <A HREF="http://www.sas.com/">SAS</A><FONT FACE="Bitstream Vera Serif">®</FONT>.
+Zero or more categorical, ordinal or date columns can be chosen (by
+clicking the <B>[Add]</B> button) to condition the summary. One row
+of summary data is produced for each combination of values in each of
+the conditioning columns. Zero or more <B>Statistics</B>, which are
+equivalent to <B>Measures</B> in the plots, can be specified (if none
+are specified then unweighted frequency counts are shown). The
+inconsistency in nomenclature (<B>Measures</B> versus <B>Statistics</B>)
+will be corrected in a future version, and the ability to specify
+frequency proportions and other ratios will be added, as will the
+ability to save the output as a new dataset, and to export it as a
+CSV or other data file for use in other programmes.</FONT></P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Crosstabs</FONT></FONT></P>
+<P>Cross-tabulations may be
+produced using this analysis type. At least one categorical, ordinal
+or date <B>Column</B> column and one one categorical, ordinal or date
+<B>Row</B> column must be specified. Note hwever that more than one
+Row or Column column can be specified, in which case the row or
+column value headings are nested. By default, crosstabs display
+frequency counts, weighted according to the setting of the <B>Weight
+by column</B> parameter. Other statistics may be added to the table.
+Proportions of frequency counts can also be added by selecting one or
+more proportions in the <B>Proportions</B> list box (hold down the
+<B>Crtl</B> key to select more than one). The method for selecting
+proportions will be made more like that used for the plots in a
+future version.</P>
+<P>The <B>Heatmap</B> check box
+demonstrates the use of table cell shading – in the current
+version, cells are shaded based on absolute value – future versions
+will offer a range of shading options based on the expectation for
+each cell in the table. <B>Marginal Totals</B> may be added at the
+top and left of rows and columns respectively, or at the bottom and
+right (or suppressed entirely, which is the default).</P>
+<P>On some browsers, complex
+crosstabs may not display correctly – if so, retry the analysis
+after selecting the <B>Use simplified table rendering</B> check box.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Demonstration
+datasets</FONT></FONT></P>
+<P>Programmes to load two
+demonstration datasets are provided. The US National Hospital
+Discharge Survey dataset provides a moderately sized dataset (just
+over 2 million records) which represents a sampled survey (and yes,
+we are aware that the variance estimates do not current account for
+the survey design effects – a future version will correct this).
+Please be sure to observe the data use restrictions which the National Center for
+Health Statitics attaches to these files (see the various README files at
+<a href="ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/NHDS/">ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Dataset_Documentation/NHDS/</a> ).
+Some sample WHO national health indicator data are also provided
+(please see the README file in the demo subdirectory for
+instructions on how to obtain fresh version of these indicator data).
+The WHO indicator dataset is small, but provides many more scalar
+columns (as well as demonstrating some current deficiencies in the
+axis labelling routines...). You are encouraged to load additional
+datasets to try out. At the moment there is no formal documentation
+of the API for loading datasets, but it should be fairly obvious from
+the examples in the demo/loaders subdirectory, as well as from the
+programme source code. Full documentation will of course be included
+in future versions.</P>
+<P STYLE="margin-top: 0.42cm; page-break-after: avoid"><FONT FACE="Albany, sans-serif"><FONT SIZE=4>Future
+directions</FONT></FONT></P>
+<P>Features with the highest
+priority for inclusion in the next version are:</P>
+<UL>
+ <LI><P>the ability to suppress
+ certain values from the table and graph output (but not from the
+ underlying computations – filtering can be used to do that);</P>
+ <LI><P>the ability to create
+ ordinal (categorical) columns from scalar (continuous or discrete
+ value) columns using a range of “binning” or histogramming
+ methods (including user-specified ranges);</P>
+ <LI><P>other forms of column
+ recoding facilities using Python syntax;</P>
+ <LI><P>the ability to specify
+ customised orders for the display of column values in tables and
+ plots;</P>
+ <LI><P>he addition of some
+ facilities for the calculation of basic epidemiological quantities
+ such as rates, relative risks and odds ratios.</P>
+ <LI><P>Please see the TODO file in
+ the top level directory of the NetEpi Analysis distribution for
+ details of other planned or possible enhancements.</P>
+</UL>
+<P><BR><BR>
+</P>
+<P><BR><BR>
+</P>
+</BODY>
+</HTML>
+
diff --git a/web/static/help.png b/web/static/help.png
new file mode 100644
index 0000000..5f5f084
Binary files /dev/null and b/web/static/help.png differ
diff --git a/web/static/netepi-2x2.svg b/web/static/netepi-2x2.svg
new file mode 100644
index 0000000..d0fe952
--- /dev/null
+++ b/web/static/netepi-2x2.svg
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://web.resource.org/cc/"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ inkscape:export-ydpi="11.050000"
+ inkscape:export-xdpi="11.050000"
+ inkscape:export-filename="/home/andrewm/oc/health/NetEpi/netepi-2x2.png"
+ sodipodi:docname="netepi-2x2.svg"
+ sodipodi:docbase="/home/andrewm/oc/health/NetEpi"
+ inkscape:version="0.41"
+ sodipodi:version="0.32"
+ id="svg2"
+ height="297mm"
+ width="210mm">
+ <defs
+ id="defs3">
+ <marker
+ inkscape:stockid="DiamondL"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="DiamondL"
+ style="overflow:visible">
+ <path
+ id="path3799"
+ d="M -2.1579186e-005,-7.0710768 L -7.0710894,-8.9383918e-006 L -2.1579186e-005,7.0710589 L 7.0710462,-8.9383918e-006 L -2.1579186e-005,-7.0710768 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none"
+ transform="scale(0.8)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ inkscape:window-y="208"
+ inkscape:window-x="79"
+ inkscape:window-height="908"
+ inkscape:window-width="1044"
+ inkscape:grid-points="true"
+ showguides="false"
+ showgrid="true"
+ inkscape:current-layer="layer1"
+ inkscape:document-units="px"
+ inkscape:cy="884.34839"
+ inkscape:cx="300.48822"
+ inkscape:zoom="1.0000000"
+ inkscape:pageshadow="2"
+ inkscape:pageopacity="0.0"
+ borderopacity="1.0"
+ bordercolor="#666666"
+ pagecolor="#ffffff"
+ id="base" />
+ <metadata
+ id="metadata4">
+ <rdf:RDF
+ id="RDF5">
+ <cc:Work
+ id="Work6"
+ rdf:about="">
+ <dc:format
+ id="format7">image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage"
+ id="type9" />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ id="layer1"
+ inkscape:groupmode="layer"
+ inkscape:label="Layer 1">
+ <path
+ id="path1376"
+ d="M 129.57180,352.64808 L 631.00000,352.64808"
+ style="fill:none;fill-opacity:0.75000000;fill-rule:evenodd;stroke:#000000;stroke-width:20.000000;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000;marker-start:none;marker-mid:none" />
+ <path
+ id="path2136"
+ d="M 380.28590,101.93398 L 380.28590,603.36218"
+ style="fill:none;fill-opacity:0.75000000;fill-rule:evenodd;stroke:#000000;stroke-width:20.000000;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000;marker-start:none" />
+ <rect
+ y="142.00372"
+ x="170.00000"
+ height="143.29248"
+ width="143.39384"
+ id="rect2142"
+ style="fill:#ee7700;fill-opacity:1.0000000;stroke:#ee7700;stroke-width:74.131943;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000" />
+ <rect
+ y="207.50546"
+ x="433.60452"
+ height="91.252190"
+ width="91.316750"
+ id="rect3714"
+ style="fill:#008800;fill-opacity:1.0000000;stroke:#008800;stroke-width:47.209057;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000" />
+ <rect
+ y="409.19489"
+ x="218.21826"
+ height="105.00522"
+ width="105.07949"
+ id="rect3716"
+ style="fill:#110088;fill-opacity:1.0000000;stroke:#110088;stroke-width:54.324135;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000" />
+ <rect
+ y="399.65689"
+ x="427.62405"
+ height="68.132492"
+ width="68.180695"
+ id="rect3885"
+ style="fill:#bb1100;fill-opacity:1.0000000;stroke:#bb1100;stroke-width:35.248146;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4.0000000;stroke-opacity:1.0000000" />
+ </g>
+</svg>
diff --git a/web/static/style.css b/web/static/style.css
new file mode 100644
index 0000000..0d3e5f8
--- /dev/null
+++ b/web/static/style.css
@@ -0,0 +1,610 @@
+body {
+ background-color: white;
+ color: black;
+ font-size: 10pt;
+ font-family: verdana, arial, sans-serif;
+ margin: 0;
+}
+
+/* Delete buttons */
+.danger {
+ background: #fbb;
+ border: 2px outset red;
+ padding-left: 0.5ex;
+ padding-right: 0.5ex;
+}
+
+.err-msg {
+ color: #c00;
+ background-color: #fee;
+ border: 1px solid #c00;
+ border-left: 0.5em solid #c00;
+ margin-top: 0.5ex;
+ font-weight: bold;
+ padding-left: 1ex;
+}
+.warn-msg {
+ color: #c80;
+ background-color: #fed;
+ border: 1px solid #c80;
+ border-left: 0.5em solid #c80;
+ margin-top: 0.5ex;
+ font-weight: bold;
+ padding-left: 1ex;
+}
+.info-msg {
+ color: #080;
+ background-color: #efe;
+ border: 1px solid #080;
+ border-left: 0.5em solid #080;
+ margin-top: 0.5ex;
+ font-weight: bold;
+ padding-left: 1ex;
+}
+
+.infobox {
+ margin-left: auto;
+ margin-right: auto;
+ text-align: center;
+ width: 70%;
+ border: 2px solid #ecc;
+ background-color: #fee;
+}
+
+/* Banner */
+.bannerbox {
+ border-bottom: 1px solid black;
+ text-align: left;
+}
+.bannerbox .logo {
+ text-align: right;
+ border-right: 4px solid #ccc;
+ padding-right: 1ex;
+}
+.bannerbox .tit {
+ font-size: 24pt;
+}
+.bannerbox .subtit {
+ font-size: 8pt;
+}
+.bannerbox .copyright {
+ font-size: 6pt;
+}
+.bannerbox .copyright a {
+ text-decoration: none;
+ color: black;
+ background-color: inherit;
+}
+.bannerbox .copyright a:hover {
+ text-decoration: underline;
+}
+
+ at media print {
+ .bannerbox {
+ display: none;
+ }
+}
+
+/* === */
+.dsview
+{
+ width: 100%;
+}
+.dsview th {
+ text-align: left;
+ border-bottom: 4px solid #ccc;
+ padding-top: 0.5ex;
+}
+.dsview td {
+ background-color: #eee;
+}
+.dsview input {
+ width: 6em;
+}
+
+/* === */
+.explore {
+ text-align: left;
+}
+
+.explore th {
+ text-align: left;
+ font-weight: bold;
+}
+
+.explore .label {
+ text-align: right;
+}
+
+.explore .detail {
+ background-color: #eee;
+ color: inherit;
+}
+
+.scrolltab {
+ overflow: auto;
+ height: 20em;
+ width: 100%;
+ border-collapse: collapse;
+}
+.scrolltab th {
+ border: 1px solid black;
+ text-align: left;
+}
+.scrolltab td {
+ border: 1px solid black;
+ text-align: left;
+}
+
+.times {
+ font-size: 50%;
+ color: #888;
+}
+.timedetail {
+ margin: 0.5ex;
+ padding: 0.5ex;
+ width: 30em;
+ border: 1px solid #ccc;
+ background-color: #eee;
+ font-size: 50%;
+ color: #888;
+}
+
+
+.submit {
+ width: 6em;
+ font-size: 9pt;
+}
+.bodytable {
+ width: 90%;
+ margin: auto;
+ border-collapse: separate;
+ border-spacing: 1ex;
+ text-align: left;
+}
+ at media print {
+ .bodytable {
+ display: none;
+ }
+}
+.bodytable td {
+ vertical-align: top;
+}
+
+.bodytable .actions {
+ padding: 1ex;
+ border: 1px solid black;
+ background: #eee;
+}
+.bodytable .actions input {
+ width: 10em;
+}
+
+.bodytable .section {
+ width: 20%;
+ font-weight: bold;
+ padding-left: 4px;
+/* font-size: 90%; */
+ white-space: nowrap;
+ background: #eee;
+ border-right: 4px solid #ccc;
+ padding-right: 1ex;
+}
+.bodytable .note {
+ font-size: 70%;
+ color: #888;
+}
+.rowtable {
+ width: 100%;
+ border-collapse: collapse;
+ border-spacing: 0ex;
+ white-space: nowrap;
+}
+.rowtable .arrows {
+ width: 14px;
+ vertical-align: middle;
+}
+
+.fill, .fill * {
+ width: 99%;
+}
+
+.error {
+ color: white;
+ font-weight: bold;
+ background-color: #c33;
+}
+
+.warn {
+ color: white;
+ font-weight: bold;
+ background-color: #fc0;
+}
+
+.filteredit {
+ margin: 0;
+ border-top: 2px solid #888;
+ position: absolute;
+ height: 30ex;
+ bottom: 0;
+ left: 0;
+ right: 0;
+ border-collapse: collapse;
+}
+.filteredit .title {
+ font-weight: bold;
+}
+
+.filteredit .filterbuttons {
+ background-color: #ccc;
+ border-bottom: 2px solid #888;
+ width: 100%;
+ margin: 0;
+ border-collapse: collapse;
+ padding: 2px;
+}
+.filteredit .rightfloatbuttons {
+ float: right;
+ height: 100%;
+ vertical-align: middle;
+ width: 6em;
+}
+
+.filterlabel {
+ width: 100%;
+ background-color: #ccf;
+ border-collapse: collapse;
+}
+.filterlabel .label {
+ color: #888;
+ white-space: nowrap;
+ padding-left: 1em;
+}
+.filterlabel .value {
+ font-weight: bold;
+ white-space: nowrap;
+}
+.filterlabel .desc {
+ width: 100%;
+ white-space: normal;
+}
+.filterlabeledit {
+ background-color: #ccf;
+}
+.filterlabeledit .edit th {
+ text-align: left;
+}
+.filterlabeledit .edit td {
+ white-space: nowrap;
+}
+.filterlabeledit .edit .field {
+ width: 30em;
+}
+.filterdelete {
+ background: #fcc;
+ color: #f00;
+ text-align: center;
+}
+
+.filtertop {
+ position: absolute;
+ top: 0;
+ bottom: 30ex;
+ width: 100%;
+ overflow: auto;
+}
+.filter {
+ border-collapse: separate;
+ text-align: left;
+ width: 100%;
+}
+
+.filter .group {
+ background-color: #ccc;
+ color: inherit;
+ border-top: #933 2px solid;
+ border-left: #933 2px solid;
+ border-bottom: #933 2px solid;
+ width: 2em;
+ text-align: center;
+}
+.filter .groupsel {
+ background-color: #fcc;
+ color: inherit;
+ border-top: #933 2px solid;
+ border-left: #933 2px solid;
+ border-bottom: #933 2px solid;
+ width: 2em;
+ text-align: center;
+}
+.filter .leaf {
+ background-color: #ccc;
+ color: inherit;
+ border-top: #933 1px solid;
+ border-right: #933 1px solid;
+ border-bottom: #933 1px solid;
+ /*white-space: nowrap;*/
+}
+.filter .leafsel {
+ background-color: #fcc;
+ color: inherit;
+ border-top: #933 1px solid;
+ border-right: #933 1px solid;
+ border-bottom: #933 1px solid;
+}
+.exprview {
+ background-color: #c99;
+}
+.exprview .edit .sel {
+ width: 14em;
+}
+.exprview .edit .highlight {
+ background-color: #fcc;
+ color: inherit;
+ border-left: 3px solid #c99;
+ border-right: 3px solid #c99;
+ padding: 3px;
+ text-align: center;
+}
+.exprview .edit .selbutton {
+ width: 100%;
+ background-color: #fcc;
+ display: block;
+ font-weight: bold;
+}
+.exprview .edit .selbutton:hover {
+ background-color: #fcc;
+}
+.exprview .edit textarea {
+ width: 99%;
+}
+
+.search {
+ background-color: #ffc;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.butt input {
+ width: 6em;
+}
+
+.outputsel {
+ width: 6em;
+}
+
+.header {
+ font-weight: bold;
+ text-align: center;
+ font-size: 120%;
+ white-space: pre;
+}
+.footer {
+ font-weight: bold;
+ text-align: center;
+ white-space: pre;
+}
+
+.tableout {
+ border: solid #999;
+ border-collapse: collapse;
+}
+.tableout th {
+ border-bottom: double #ccc;
+ border-left: solid 1px #ccc;
+ border-right: solid 1px #ccc;
+ font-weight: bold;
+}
+.tableout td {
+ border-left: solid 1px #ccc;
+ border-right: solid 1px #ccc;
+ border-bottom: solid 1px #ccc;
+ text-align: left;
+ padding-left: 4px;
+ padding-right: 4px;
+}
+.tableout .numeric {
+ text-align: right;
+}
+
+.crosstab {
+ border-collapse: collapse;
+}
+
+.crosstab .rgroup {
+ border: solid 1px black;
+}
+
+.crosstab .cgroup {
+ border: solid 1px black;
+}
+.crosstab .row .label {
+ border: solid 2px black;
+ font-weight: bold;
+ white-space: normal;
+}
+
+.crosstab .row .value {
+ border: solid 1px black;
+ font-weight: bold;
+ font-size: 80%;
+ white-space: normal;
+}
+
+.crosstab .column .label {
+ border: solid 2px black;
+ font-weight: bold;
+ white-space: normal;
+}
+
+.crosstab .column .value {
+ border: solid 1px black;
+ font-weight: bold;
+ font-size: 80%;
+ white-space: normal;
+}
+.crosstab .data {
+ border: dotted 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-t {
+ border: dotted 1px #99f;
+ border-top: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-l {
+ border: dotted 1px #99f;
+ border-left: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-lt {
+ border: dotted 1px #99f;
+ border-left: solid 1px #99f;
+ border-top: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-b {
+ border: dotted 1px #99f;
+ border-bottom: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-r {
+ border: dotted 1px #99f;
+ border-right: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+.crosstab .data-br {
+ border: dotted 1px #99f;
+ border-bottom: solid 1px #99f;
+ border-right: solid 1px #99f;
+ font-size: 80%;
+ white-space: nowrap;
+}
+
+.condcol {
+ margin-left: auto;
+ margin-right: auto;
+ border-collapse: collapse;
+ padding: 2px;
+}
+
+.condcol .buttons {
+ background-color: #ddd;
+}
+
+.condcol td {
+ border-width: 1px;
+ border-color: #888;
+ border-style: none solid solid solid;
+ vertical-align: top;
+}
+.condcol th {
+ border-width: 1px;
+ border-color: #888;
+ border-style: solid solid none solid;
+ border-bottom: 3px double #888;
+}
+.condcol table td {
+ /* we'd rather use a child selector above, but IE doesn't support them, so
+ * we explicitly reset stuff */
+ border-style: none;
+}
+.condcol .edit {
+ background-color: #fdd;
+ width: 100%;
+}
+.condcol .display td {
+ border-collapse: collapse;
+ border-width: 1px;
+ border-color: #888;
+ border-style: none none solid none;
+ vertical-align: top;
+}
+
+.leftcol {
+ width: 50%;
+ display: block;
+ float: left;
+ white-space: normal;
+}
+.rightcol {
+ width: 50%;
+ display: block;
+ float: left;
+ white-space: normal;
+}
+
+.twobytwoparams {
+ margin-left: auto;
+ margin-right: auto;
+ width: 80%;
+}
+.twobytwoparams th {
+ text-align: left;
+}
+.twobytwoparams .ops {
+ vertical-align: top;
+ white-space: nowrap;
+}
+.twobytwoparams .inc {
+ width: 50%;
+ vertical-align: top;
+}
+.twobytwoparams .exc {
+ vertical-align: top;
+}
+.twobytwoparams .buttons {
+ background-color: #ccc;
+}
+.twobytwoparams .widebutt input {
+ width: 8em;
+}
+.twobytwoparams .result {
+ background-color: #ffc;
+}
+
+.twobytwo {
+ text-align: left;
+}
+.twobytwo td {
+ vertical-align: top;
+}
+.twobytwo .sechead {
+ font-size: 120%;
+ font-weight: bold;
+ background-color: #ddd;
+ border-top: 2px solid black;
+}
+.twobytwo .subsechead {
+ font-size: 120%;
+ font-weight: bold;
+ background-color: #eee;
+}
+
+.twobytwo .twobytwotab {
+ border-collapse: collapse;
+}
+.twobytwo .twobytwotab .cell {
+ border: 1px solid black;
+ width: 10em;
+}
+.twobytwo .twobytwotab td {
+ text-align: center;
+}
+.twobytwo .twobytwotab .side {
+ text-align: right;
+}
+.twobytwo .twobytwotab .top {
+ text-align: center;
+}
+.twobytwo .twobytwotab .mt {
+ border: 1px solid black;
+ background-color: #eee;
+ width: 10em;
+}
diff --git a/web/static/target.png b/web/static/target.png
new file mode 100644
index 0000000..e7625d5
Binary files /dev/null and b/web/static/target.png differ
diff --git a/web/static/target.xcf b/web/static/target.xcf
new file mode 100644
index 0000000..ccddc89
Binary files /dev/null and b/web/static/target.xcf differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/netepi-analysis.git
More information about the debian-med-commit
mailing list