[med-svn] [Git][med-team/python-datacache][upstream] New upstream version 1.1.5
Andreas Tille (@tille)
gitlab at salsa.debian.org
Fri Feb 18 21:17:41 GMT 2022
Andreas Tille pushed to branch upstream at Debian Med / python-datacache
Commits:
9f1f5c72 by Andreas Tille at 2022-02-18T18:46:02+01:00
New upstream version 1.1.5
- - - - -
27 changed files:
- − .gitignore
- − .travis.yml
- − LICENSE
- − Makefile
- + PKG-INFO
- README.md
- + datacache.egg-info/PKG-INFO
- + datacache.egg-info/SOURCES.txt
- + datacache.egg-info/dependency_links.txt
- requirements.txt → datacache.egg-info/requires.txt
- + datacache.egg-info/top_level.txt
- datacache/__init__.py
- datacache/cache.py
- datacache/common.py
- datacache/database.py
- datacache/database_helpers.py
- datacache/database_table.py
- datacache/database_types.py
- datacache/download.py
- − datacache/fasta.py
- + setup.cfg
- setup.py
- − test/__init__.py
- test/test_cache_object.py
- test/test_download.py
- − test/test_fasta_db.py
- test/test_fasta_dict.py
Changes:
=====================================
.gitignore deleted
=====================================
@@ -1,54 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-bin/
-build/
-develop-eggs/
-dist/
-eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.cache
-nosetests.xml
-coverage.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
-
-# Rope
-.ropeproject
-
-# Django stuff:
-*.log
-*.pot
-
-# Sphinx documentation
-docs/_build/
-
=====================================
.travis.yml deleted
=====================================
@@ -1,11 +0,0 @@
-# What's need to do run tests via Travis https://travis-ci.org/
-language: python
-python:
-# - "2.6"
- - "2.7"
-# command to install dependencies
-install:
- - "pip install ."
- - "pip install -r requirements.txt"
-# command to run tests
-script: cd test && nose2
=====================================
LICENSE deleted
=====================================
@@ -1,201 +0,0 @@
-Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright {yyyy} {name of copyright owner}
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
\ No newline at end of file
=====================================
Makefile deleted
=====================================
@@ -1,56 +0,0 @@
-# Compatibility for us old-timers.
-
-# Note: This makefile include remake-style target comments.
-# These comments before the targets start with #:
-# remake --tasks to shows the targets and the comments
-
-PHONY=check clean dist distclean test
-GIT2CL ?= git2cl
-PYTHON ?= python
-PYTHON3 ?= python3
-
-#: the default target - same as running "check"
-all: check
-
-check:
- cd test && nose2
-
-#: Clean up temporary files
-clean:
- $(PYTHON) ./setup.py $@
-
-#: Create source (tarball) and binary (egg) distribution
-dist:
- $(PYTHON) ./setup.py sdist bdist
-
-#: Create source tarball
-sdist:
- $(PYTHON) ./setup.py sdist
-
-#: Create binary egg distribution
-bdist_egg:
- $(PYTHON) ./setup.py bdist_egg
-
-# It is too much work to figure out how to add a new command to distutils
-# to do the following. I'm sure distutils will someday get there.
-DISTCLEAN_FILES = build dist *.egg-info *.pyc *.so py*.py
-
-#: Remove ALL dervied files
-distclean: clean
- -rm -fr $(DISTCLEAN_FILES) || true
-
-#: Install package locally
-install:
- $(PYTHON) ./setup.py install
-
-#: Same as 'check' target
-test: check
-
-rmChangeLog:
- rm ChangeLog || true
-
-#: Create a ChangeLog from git via git log and git2cl
-ChangeLog: rmChangeLog
- git log --pretty --numstat --summary | $(GIT2CL) >$@
-
-.PHONY: $(PHONY)
=====================================
PKG-INFO
=====================================
@@ -0,0 +1,35 @@
+Metadata-Version: 1.1
+Name: datacache
+Version: 1.1.5
+Summary: Helpers for transparently downloading datasets
+Home-page: https://github.com/openvax/datacache
+Author: Alex Rubinsteyn
+Author-email: alex.rubinsteyn at mssm.edu
+License: http://www.apache.org/licenses/LICENSE-2.0.html
+Description: DataCache
+ =========
+
+ Helpers for transparently downloading datasets
+
+ API
+ ---
+
+ - **fetch_file**\ (download_url, filename = *None*, decompress =
+ *False*, subdir = *None*)
+ - **fetch_and_transform**\ (transformed_filename, transformer, loader,
+ source_filename, source_url, subdir = *None*)
+ - **fetch_fasta_dict**\ (download_url, filename = *None*, subdir =
+ *None*)
+ - **fetch_fasta_db**\ (table_name, download_url, fasta_filename =
+ *None*, key_column = *‘id’*, value_column = *‘seq’*, subdir = *None*)
+ - **fetch_csv_db**\ (table_name, download_url, csv_filename = *None*,
+ subdir = *None*, \**pandas_kwargs)
+
+Platform: UNKNOWN
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
=====================================
README.md
=====================================
@@ -1,4 +1,14 @@
-datacache
+<a href="https://travis-ci.org/openvax/datacache">
+ <img src="https://travis-ci.org/openvax/datacache.svg?branch=master" alt="Build Status" />
+</a>
+<a href="https://coveralls.io/github/openvax/datacache?branch=master">
+ <img src="https://coveralls.io/repos/openvax/datacache/badge.svg?branch=master&service=github" alt="Coverage Status" />
+</a>
+<a href="https://pypi.python.org/pypi/datacache/">
+ <img src="https://img.shields.io/pypi/v/datacache.svg?maxAge=1000" alt="PyPI" />
+</a>
+
+DataCache
=========
Helpers for transparently downloading datasets
@@ -13,4 +23,3 @@ Helpers for transparently downloading datasets
key_column = *'id'*, value_column = *'seq'*, subdir = *None*)
* **fetch_csv_db**(table_name, download_url, csv_filename = *None*, subdir = *None*,
\*\*pandas_kwargs)
-
=====================================
datacache.egg-info/PKG-INFO
=====================================
@@ -0,0 +1,35 @@
+Metadata-Version: 1.1
+Name: datacache
+Version: 1.1.5
+Summary: Helpers for transparently downloading datasets
+Home-page: https://github.com/openvax/datacache
+Author: Alex Rubinsteyn
+Author-email: alex.rubinsteyn at mssm.edu
+License: http://www.apache.org/licenses/LICENSE-2.0.html
+Description: DataCache
+ =========
+
+ Helpers for transparently downloading datasets
+
+ API
+ ---
+
+ - **fetch_file**\ (download_url, filename = *None*, decompress =
+ *False*, subdir = *None*)
+ - **fetch_and_transform**\ (transformed_filename, transformer, loader,
+ source_filename, source_url, subdir = *None*)
+ - **fetch_fasta_dict**\ (download_url, filename = *None*, subdir =
+ *None*)
+ - **fetch_fasta_db**\ (table_name, download_url, fasta_filename =
+ *None*, key_column = *‘id’*, value_column = *‘seq’*, subdir = *None*)
+ - **fetch_csv_db**\ (table_name, download_url, csv_filename = *None*,
+ subdir = *None*, \**pandas_kwargs)
+
+Platform: UNKNOWN
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
=====================================
datacache.egg-info/SOURCES.txt
=====================================
@@ -0,0 +1,22 @@
+README.md
+setup.py
+datacache/__init__.py
+datacache/cache.py
+datacache/common.py
+datacache/database.py
+datacache/database_helpers.py
+datacache/database_table.py
+datacache/database_types.py
+datacache/download.py
+datacache.egg-info/PKG-INFO
+datacache.egg-info/SOURCES.txt
+datacache.egg-info/dependency_links.txt
+datacache.egg-info/requires.txt
+datacache.egg-info/top_level.txt
+test/test_cache_object.py
+test/test_database_objects.py
+test/test_database_types.py
+test/test_db_from_dataframes.py
+test/test_download.py
+test/test_fasta_dict.py
+test/test_names.py
\ No newline at end of file
=====================================
datacache.egg-info/dependency_links.txt
=====================================
@@ -0,0 +1 @@
+
=====================================
requirements.txt → datacache.egg-info/requires.txt
=====================================
@@ -1,7 +1,6 @@
pandas>=0.15.2
appdirs>=1.4.0
-progressbar>=2.2
-biopython>=1.65
+progressbar33>=2.4
requests>=2.5.1
-typechecks>=0.0.0
-nose>=1.3.3
+typechecks>=0.0.2
+mock
=====================================
datacache.egg-info/top_level.txt
=====================================
@@ -0,0 +1 @@
+datacache
=====================================
datacache/__init__.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
+
from .download import fetch_file, fetch_and_transform, fetch_csv_dataframe
-from .fasta import fetch_fasta_dict
from .database_helpers import (
db_from_dataframe,
db_from_dataframes,
- fetch_fasta_db,
+ db_from_dataframes_with_absolute_path,
fetch_csv_db,
connect_if_correct_version
)
@@ -28,4 +29,23 @@ from .common import (
clear_cache,
build_local_filename
)
-from .cache import Cache
\ No newline at end of file
+from .cache import Cache
+
+__version__ = '1.1.5'
+
+__all__ = [
+ 'fetch_file',
+ 'fetch_and_transform',
+ 'fetch_csv_dataframe',
+ 'db_from_dataframe',
+ 'db_from_dataframes',
+ 'db_from_dataframes_with_absolute_path',
+ 'fetch_csv_db',
+ 'connect_if_correct_version',
+ 'ensure_dir',
+ 'get_data_dir',
+ 'build_path',
+ 'clear_cache',
+ 'build_local_filename',
+ 'Cache',
+]
=====================================
datacache/cache.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from __future__ import print_function
+from __future__ import print_function, division, absolute_import
+
from os.path import exists, join
from os import remove
@@ -65,13 +66,19 @@ class Cache(object):
exists.
"""
return download.file_exists(
- url,
- filename=filename,
- decompress=decompress,
- subdir=self.subdir)
+ url,
+ filename=filename,
+ decompress=decompress,
+ subdir=self.subdir)
- def fetch(self, url, filename=None, decompress=False,
- force=False):
+ def fetch(
+ self,
+ url,
+ filename=None,
+ decompress=False,
+ force=False,
+ timeout=None,
+ use_wget_if_available=True):
"""
Return the local path to the downloaded copy of a given URL.
Don't download the file again if it's already present,
@@ -89,7 +96,9 @@ class Cache(object):
filename=filename,
decompress=decompress,
subdir=self.subdir,
- force=force)
+ force=force,
+ timeout=timeout,
+ use_wget_if_available=use_wget_if_available)
self._local_paths[key] = path
return path
@@ -125,5 +134,5 @@ class Cache(object):
db_filename=db_filename,
table_name=table_name,
df=df,
- key_column_name=key_column_name,
+ primary_key=key_column_name,
subdir=self.subdir)
=====================================
datacache/common.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
import hashlib
from os import makedirs, environ
@@ -26,11 +27,13 @@ def ensure_dir(path):
makedirs(path)
def get_data_dir(subdir=None, envkey=None):
- if envkey and envkey in environ:
- return environ[envkey]
- if subdir is None:
- subdir = "datacache"
- return appdirs.user_cache_dir(subdir)
+ if envkey and environ.get(envkey):
+ envdir = environ[envkey]
+ if subdir:
+ return join(envdir, subdir)
+ else:
+ return envdir
+ return appdirs.user_cache_dir(subdir if subdir else "datacache")
def build_path(filename, subdir=None):
data_dir = get_data_dir(subdir)
=====================================
datacache/database.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018 Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,11 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
+
import logging
import sqlite3
from typechecks import require_integer, require_string, require_iterable_of
+
+logger = logging.getLogger(__name__)
+
METADATA_TABLE_NAME = "_datacache_metadata"
class Database(object):
@@ -60,7 +65,7 @@ class Database(object):
def execute_sql(self, sql, commit=False):
"""Log and then execute a SQL query"""
- logging.info("Running sqlite query: \"%s\"", sql)
+ logger.info("Running sqlite query: \"%s\"", sql)
self.connection.execute(sql)
if commit:
self.connection.commit()
@@ -154,7 +159,7 @@ class Database(object):
if not all(len(row) == n_columns for row in rows):
raise ValueError("Rows must all have %d values" % n_columns)
blank_slots = ", ".join("?" for _ in range(n_columns))
- logging.info("Inserting %d rows into table %s", len(rows), table_name)
+ logger.info("Inserting %d rows into table %s", len(rows), table_name)
sql = "INSERT INTO %s VALUES (%s)" % (table_name, blank_slots)
self.connection.executemany(sql, rows)
@@ -192,9 +197,10 @@ class Database(object):
Which columns should be indexed
"""
- logging.info("Creating index on %s (%s)" % (
- table_name,
- ", ".join(index_columns)))
+ logger.info(
+ "Creating index on %s (%s)",
+ table_name,
+ ", ".join(index_columns))
index_name = "%s_index_%s" % (
table_name,
"_".join(index_columns))
=====================================
datacache/database_helpers.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,24 +12,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
+
from os import remove
-from os.path import splitext, split, exists
+from os.path import splitext, exists
import logging
-from Bio import SeqIO
from typechecks import (
require_string,
require_integer,
require_iterable_of
)
-from .common import build_path, normalize_filename
-from .download import fetch_file, fetch_csv_dataframe
+from .common import build_path
+from .download import fetch_csv_dataframe
from .database import Database
from .database_table import DatabaseTable
from .database_types import db_type
+logger = logging.getLogger(__name__)
+
+
def connect_if_correct_version(db_path, version):
"""Return a sqlite3 database connection if the version in the database's
metadata matches the version argument.
@@ -45,37 +49,29 @@ def connect_if_correct_version(db_path, version):
return None
def _create_cached_db(
- db_filename,
+ db_path,
tables,
- subdir=None,
version=1):
"""
Either create or retrieve sqlite database.
Parameters
--------
- db_filename : str
- Name of sqlite3 database file
+ db_path : str
+ Path to sqlite3 database file
tables : dict
Dictionary mapping table names to datacache.DatabaseTable objects
- subdir : str, optional
-
version : int, optional
Version acceptable as cached data.
Returns sqlite3 connection
"""
- require_string(db_filename, "db_filename")
+ require_string(db_path, "db_path")
require_iterable_of(tables, DatabaseTable)
- if not (subdir is None or isinstance(subdir, str)):
- raise TypeError("Expected subdir to be None or str, got %s : %s" % (
- subdir, type(subdir)))
require_integer(version, "version")
- db_path = build_path(db_filename, subdir)
-
# if the database file doesn't already exist and we encounter an error
# later, delete the file before raising an exception
delete_on_error = not exists(db_path)
@@ -91,20 +87,21 @@ def _create_cached_db(
if db.has_tables(table_names) and \
db.has_version() and \
db.version() == version:
- logging.info("Found existing table in database %s", db_path)
+ logger.info("Found existing table in database %s", db_path)
else:
if len(db.table_names()) > 0:
- logging.info("Dropping tables from database %s: %s",
+ logger.info(
+ "Dropping tables from database %s: %s",
db_path,
", ".join(db.table_names()))
- db.drop_tables()
- logging.info(
+ db.drop_all_tables()
+ logger.info(
"Creating database %s containing: %s",
db_path,
", ".join(table_names))
db.create(tables, version)
except:
- logging.warning(
+ logger.warning(
"Failed to create tables %s in database %s",
table_names,
db_path)
@@ -114,38 +111,75 @@ def _create_cached_db(
raise
return db.connection
-def fetch_fasta_db(
- table_name,
- download_url,
- fasta_filename=None,
- key_column='id',
- value_column='seq',
- subdir=None,
- version=1):
+def build_tables(
+ table_names_to_dataframes,
+ table_names_to_primary_keys={},
+ table_names_to_indices={}):
"""
- Download a FASTA file from `download_url` and store it locally as a sqlite3 database.
+ Parameters
+ ----------
+ table_names_to_dataframes : dict
+ Dictionary mapping each table name to a DataFrame
+
+ table_names_to_primary_keys : dict
+ Dictionary mapping each table to its primary key
+
+ table_names_to_indices : dict
+ Dictionary mapping each table to a set of indices
+
+ Returns list of DatabaseTable objects
"""
+ tables = []
+ for table_name, df in table_names_to_dataframes.items():
+ table_indices = table_names_to_indices.get(table_name, [])
+ primary_key = table_names_to_primary_keys.get(table_name)
+ table = DatabaseTable.from_dataframe(
+ name=table_name,
+ df=df,
+ indices=table_indices,
+ primary_key=primary_key)
+ tables.append(table)
+ return tables
- base_filename = normalize_filename(split(download_url)[1])
- db_filename = "%s.%s.%s.db" % (base_filename, key_column, value_column)
+def db_from_dataframes_with_absolute_path(
+ db_path,
+ table_names_to_dataframes,
+ table_names_to_primary_keys={},
+ table_names_to_indices={},
+ overwrite=False,
+ version=1):
+ """
+ Create a sqlite3 database from a collection of DataFrame objects
- fasta_path = fetch_file(
- download_url=download_url,
- filename=fasta_filename,
- subdir=subdir,
- decompress=True)
+ Parameters
+ ----------
+ db_path : str
+ Path to database file to create
- fasta_dict = SeqIO.index(fasta_path, 'fasta')
- table = DatabaseTable.from_fasta_dict(
- table_name,
- fasta_dict,
- key_column=key_column,
- value_column=value_column)
+ table_names_to_dataframes : dict
+ Dictionary from table names to DataFrame objects
+
+ table_names_to_primary_keys : dict, optional
+ Name of primary key column for each table
+
+ table_names_to_indices : dict, optional
+ Dictionary from table names to list of column name tuples
+ overwrite : bool, optional
+ If the database already exists, overwrite it?
+
+ version : int, optional
+ """
+ if overwrite and exists(db_path):
+ remove(db_path)
+
+ tables = build_tables(
+ table_names_to_dataframes,
+ table_names_to_primary_keys,
+ table_names_to_indices)
return _create_cached_db(
- db_filename,
- tables=[table],
- subdir=subdir,
+ db_path,
+ tables=tables,
version=version)
def db_from_dataframes(
@@ -156,7 +190,9 @@ def db_from_dataframes(
subdir=None,
overwrite=False,
version=1):
- """Create a sqlite3 database with
+ """
+ Create a sqlite3 database from a collection of DataFrame objects
+
Parameters
----------
db_filename : str
@@ -178,25 +214,16 @@ def db_from_dataframes(
version : int, optional
"""
+ if not (subdir is None or isinstance(subdir, str)):
+ raise TypeError("Expected subdir to be None or str, got %s : %s" % (
+ subdir, type(subdir)))
db_path = build_path(db_filename, subdir)
-
- if overwrite and exists(db_path):
- remove(db_path)
-
- tables = []
- for table_name, df in dataframes.items():
- table_indices = indices.get(table_name, [])
- primary_key = primary_keys.get(table_name)
- table = DatabaseTable.from_dataframe(
- name=table_name,
- df=df,
- indices=table_indices,
- primary_key=primary_key)
- tables.append(table)
- return _create_cached_db(
+ return db_from_dataframes_with_absolute_path(
db_path,
- tables=tables,
- subdir=subdir,
+ table_names_to_dataframes=dataframes,
+ table_names_to_primary_keys=primary_keys,
+ table_names_to_indices=indices,
+ overwrite=overwrite,
version=version)
def db_from_dataframe(
=====================================
datacache/database_table.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
+
from .database_types import db_type
class DatabaseTable(object):
=====================================
datacache/database_types.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,30 +14,32 @@
"""Convert from Python type names to sqlite3 column types"""
+from __future__ import print_function, division, absolute_import
+
import numpy as np
_dtype_to_db_type_dict = {
- 'int': 'INT',
- 'int8': 'INT',
- 'int16': 'INT',
- 'int32': 'INT',
- 'int64': 'INT',
-
- 'uint8': 'INT',
- 'uint16': 'INT',
- 'uint32': 'INT',
- 'uint64': 'INT',
-
- 'bool': 'INT',
-
- 'float': 'FLOAT',
- 'float32': 'FLOAT',
- 'float64': 'FLOAT',
-
- 'object': 'TEXT',
- 'object_': 'TEXT',
- 'string_': 'TEXT',
- 'str': 'TEXT',
+ 'int': 'INT',
+ 'int8': 'INT',
+ 'int16': 'INT',
+ 'int32': 'INT',
+ 'int64': 'INT',
+
+ 'uint8': 'INT',
+ 'uint16': 'INT',
+ 'uint32': 'INT',
+ 'uint64': 'INT',
+
+ 'bool': 'INT',
+
+ 'float': 'FLOAT',
+ 'float32': 'FLOAT',
+ 'float64': 'FLOAT',
+
+ 'object': 'TEXT',
+ 'object_': 'TEXT',
+ 'string_': 'TEXT',
+ 'str': 'TEXT',
}
def _lookup_type_name(type_name):
=====================================
datacache/download.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function, division, absolute_import
+
import gzip
import logging
-from os import remove
-from os.path import exists, splitext
+import os
+import subprocess
from shutil import move
from tempfile import NamedTemporaryFile
import zipfile
@@ -23,57 +25,99 @@ import zipfile
import requests
import pandas as pd
+from six.moves import urllib
from .common import build_path, build_local_filename
-try:
- import urllib.request
- import urllib.error
- import urllib.parse
+logger = logging.getLogger(__name__)
+
- # Python 3
- def urllib_response(url):
- req = urllib.request.Request(url)
- return urllib.request.urlopen(req)
-except ImportError:
- # Python 2
- import urllib2
+def _download(download_url, timeout=None):
+ if download_url.startswith("http"):
+ response = requests.get(download_url, timeout=timeout)
+ response.raise_for_status()
+ return response.content
+ else:
+ req = urllib.request.Request(download_url)
+ response = urllib.request.urlopen(req, data=None, timeout=timeout)
+ return response.read()
- def urllib_response(url):
- req = urllib2.Request(url)
- return urllib2.urlopen(req)
-def _download(filename, full_path, download_url):
+def _download_to_temp_file(
+ download_url,
+ timeout=None,
+ base_name="download",
+ ext="tmp",
+ use_wget_if_available=False):
+
+ if not download_url:
+ raise ValueError("URL not provided")
+
+ with NamedTemporaryFile(
+ suffix='.' + ext,
+ prefix=base_name,
+ delete=False) as tmp:
+ tmp_path = tmp.name
+
+ def download_using_python():
+ with open(tmp_path, mode="w+b") as tmp_file:
+ tmp_file.write(
+ _download(download_url, timeout=timeout))
+
+ if not use_wget_if_available:
+ download_using_python()
+ else:
+ try:
+ # first try using wget to download since this works on Travis
+ # even when FTP otherwise fails
+ wget_command_list = [
+ "wget",
+ download_url,
+ "-O", tmp_path,
+ "--no-verbose",
+ ]
+ if download_url.startswith("ftp"):
+ wget_command_list.extend(["--passive-ftp"])
+ if timeout:
+ wget_command_list.extend(["-T", "%s" % timeout])
+ logger.info("Running: %s" % (" ".join(wget_command_list)))
+ subprocess.call(wget_command_list)
+ except OSError as e:
+ if e.errno == os.errno.ENOENT:
+ # wget not found
+ download_using_python()
+ else:
+ raise
+ return tmp_path
+
+
+def _download_and_decompress_if_necessary(
+ full_path,
+ download_url,
+ timeout=None,
+ use_wget_if_available=False):
"""
Downloads remote file at `download_url` to local file at `full_path`
"""
- print("Downloading %s to %s" % (download_url, full_path))
-
- base_name, ext = splitext(filename)
- if download_url.startswith("http"):
- response = requests.get(download_url)
- response.raise_for_status()
- data = response.content
- else:
- response = urllib_response(download_url)
- data = response.read()
- tmp_file = NamedTemporaryFile(
- suffix='.' + ext,
- prefix=base_name,
- delete=False)
- tmp_file.write(data)
- tmp_path = tmp_file.name
- tmp_file.close()
+ logger.info("Downloading %s to %s", download_url, full_path)
+ filename = os.path.split(full_path)[1]
+ base_name, ext = os.path.splitext(filename)
+ tmp_path = _download_to_temp_file(
+ download_url=download_url,
+ timeout=timeout,
+ base_name=base_name,
+ ext=ext,
+ use_wget_if_available=use_wget_if_available)
if download_url.endswith("zip") and not filename.endswith("zip"):
- logging.info("Decompressing zip into %s...", filename)
+ logger.info("Decompressing zip into %s...", filename)
with zipfile.ZipFile(tmp_path) as z:
names = z.namelist()
assert len(names) > 0, "Empty zip archive"
if filename in names:
chosen_filename = filename
else:
- # in case zip archive contains multiple files, choose the biggest
+ # If zip archive contains multiple files, choose the biggest.
biggest_size = 0
chosen_filename = names[0]
for info in z.infolist():
@@ -82,33 +126,34 @@ def _download(filename, full_path, download_url):
biggest_size = info.file_size
extract_path = z.extract(chosen_filename)
move(extract_path, full_path)
- remove(tmp_path)
+ os.remove(tmp_path)
elif download_url.endswith("gz") and not filename.endswith("gz"):
- logging.info("Decompressing gzip into %s...", filename)
+ logger.info("Decompressing gzip into %s...", filename)
with gzip.GzipFile(tmp_path) as src:
contents = src.read()
- remove(tmp_path)
- with open(full_path, 'w') as dst:
+ os.remove(tmp_path)
+ with open(full_path, 'wb') as dst:
dst.write(contents)
- elif download_url.endswith(("html", "htm")):
- logging.info("Extracting HTML table into CSV %s...", filename)
- df = pd.read_html(tmp_path, header=0, infer_types=False)[0]
+ elif download_url.endswith(("html", "htm")) and full_path.endswith(".csv"):
+ logger.info("Extracting HTML table into CSV %s...", filename)
+ df = pd.read_html(tmp_path, header=0)[0]
df.to_csv(full_path, sep=',', index=False, encoding='utf-8')
else:
move(tmp_path, full_path)
-def file_exists(download_url,
- filename=None,
- decompress=False,
- subdir=None):
+def file_exists(
+ download_url,
+ filename=None,
+ decompress=False,
+ subdir=None):
"""
Return True if a local file corresponding to these arguments
exists.
"""
filename = build_local_filename(download_url, filename, decompress)
full_path = build_path(filename, subdir)
- return exists(full_path)
+ return os.path.exists(full_path)
def fetch_file(
@@ -116,7 +161,9 @@ def fetch_file(
filename=None,
decompress=False,
subdir=None,
- force=False):
+ force=False,
+ timeout=None,
+ use_wget_if_available=False):
"""
Download a remote file and store it locally in a cache directory. Don't
download it again if it's already present (unless `force` is True.)
@@ -143,15 +190,27 @@ def fetch_file(
By default, a remote file is not downloaded if it's already present.
However, with this argument set to True, it will be overwritten.
+ timeout : float, optional
+ Timeout for download in seconds, default is None which uses
+ global timeout.
+
+ use_wget_if_available: bool, optional
+ If the `wget` command is available, use that for download instead
+ of Python libraries (default True)
+
Returns the full path of the local file.
"""
filename = build_local_filename(download_url, filename, decompress)
full_path = build_path(filename, subdir)
- if not exists(full_path) or force:
- logging.info("Fetching %s from URL %s", filename, download_url)
- _download(filename, full_path, download_url)
+ if not os.path.exists(full_path) or force:
+ logger.info("Fetching %s from URL %s", filename, download_url)
+ _download_and_decompress_if_necessary(
+ full_path=full_path,
+ download_url=download_url,
+ timeout=timeout,
+ use_wget_if_available=use_wget_if_available)
else:
- logging.info("Cached file %s from URL %s", filename, download_url)
+ logger.info("Cached file %s from URL %s", filename, download_url)
return full_path
@@ -168,14 +227,14 @@ def fetch_and_transform(
object.
"""
transformed_path = build_path(transformed_filename, subdir)
- if not exists(transformed_path):
+ if not os.path.exists(transformed_path):
source_path = fetch_file(source_url, source_filename, subdir)
- logging.info("Generating data file %s from %s", transformed_path, source_path)
+ logger.info("Generating data file %s from %s", transformed_path, source_path)
result = transformer(source_path, transformed_path)
else:
- logging.info("Cached data file: %s", transformed_path)
+ logger.info("Cached data file: %s", transformed_path)
result = loader(transformed_path)
- assert exists(transformed_path)
+ assert os.path.exists(transformed_path)
return result
=====================================
datacache/fasta.py deleted
=====================================
@@ -1,29 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from Bio import SeqIO
-
-from .download import fetch_file
-
-def fetch_fasta_dict(download_url, filename=None, subdir=None):
- """
- Download a remote FASTA file from `download_url` and save it locally as `filename`.
- Load the file using BioPython and return an index mapping from entry keys to their sequence.
- """
- fasta_path = fetch_file(
- download_url=download_url,
- filename=filename,
- decompress=True,
- subdir=subdir)
- return SeqIO.index(fasta_path, 'fasta')
=====================================
setup.cfg
=====================================
@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0
+
=====================================
setup.py
=====================================
@@ -1,4 +1,4 @@
-# Copyright (c) 2014. Mount Sinai School of Medicine
+# Copyright (c) 2014-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
import os
import logging
+import re
from setuptools import setup
@@ -24,35 +25,32 @@ readme_path = os.path.join(current_directory, readme_filename)
try:
with open(readme_path, "r") as f:
- readme = f.read()
+ readme_markdown = f.read()
except:
logging.warn("Failed to load %s", readme_filename)
- readme = ""
+ readme_markdown = ""
try:
import pypandoc
- readme = pypandoc.convert(readme, to="rst", format="md")
+ readme_restructured = pypandoc.convert(readme_markdown, to="rst", format="md")
except:
+ readme_restructured = readme_markdown
logging.warn("Failed to convert %s to reStructuredText", readme_filename)
-
-requirements_path = os.path.join(current_directory, "requirements.txt")
-
-with open(requirements_path, "r") as f:
- requirements = [
- line.strip()
- for line in f.read().splitlines()
- if line.strip()
- ]
+with open('datacache/__init__.py', 'r') as f:
+ version = re.search(
+ r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
+ f.read(),
+ re.MULTILINE).group(1)
if __name__ == "__main__":
setup(
name="datacache",
- version="0.4.7",
+ version=version,
description="Helpers for transparently downloading datasets",
author="Alex Rubinsteyn",
- author_email="alex {dot} rubinsteyn {at} mssm {dot} edu",
- url="https://github.com/hammerlab/datacache",
+ author_email="alex.rubinsteyn at mssm.edu",
+ url="https://github.com/openvax/datacache",
license="http://www.apache.org/licenses/LICENSE-2.0.html",
classifiers=[
"Development Status :: 3 - Alpha",
@@ -63,7 +61,14 @@ if __name__ == "__main__":
"Programming Language :: Python",
"Topic :: Scientific/Engineering :: Bio-Informatics",
],
- install_requires=requirements,
- long_description=readme,
+ install_requires=[
+ "pandas>=0.15.2",
+ "appdirs>=1.4.0",
+ "progressbar33>=2.4",
+ "requests>=2.5.1",
+ "typechecks>=0.0.2",
+ "mock",
+ ],
+ long_description=readme_restructured,
packages=["datacache"],
)
=====================================
test/__init__.py deleted
=====================================
=====================================
test/test_cache_object.py
=====================================
@@ -29,7 +29,7 @@ def test_cache_fetch_google():
eq_(path, cache.local_path(TEST_URL, filename=TEST_FILENAME))
- at patch('datacache.cache.download._download')
+ at patch('datacache.cache.download._download_and_decompress_if_necessary')
def test_cache_fetch_force(mock_download):
cache = Cache("datacache_test")
cache.fetch("http://www.google.com", filename="google", force=True)
=====================================
test/test_download.py
=====================================
@@ -18,16 +18,20 @@ FASTA_FILENAME = 'Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa'
URL = \
'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa.gz'
-def test_fetch_decompress():
- path1 = fetch_file(
- URL,
- decompress=True, subdir="datacache")
- assert path1.endswith(FASTA_FILENAME)
-
- with open(path1, 'r') as f1:
- s1 = f1.read()
- assert "TCAATTTCGTGCCAG" in s1
+def test_fetch_decompress():
+ for use_wget_if_available in [True, False]:
+ for timeout in [None, 10**6]:
+ path1 = fetch_file(
+ URL,
+ decompress=True,
+ subdir="datacache",
+ use_wget_if_available=use_wget_if_available,
+ timeout=timeout)
+ assert path1.endswith(FASTA_FILENAME)
+ with open(path1, 'r') as f1:
+ s1 = f1.read()
+ assert "TCAATTTCGTGCCAG" in s1
def test_fetch_subdirs():
path = fetch_file(URL, decompress=True, subdir="datacache")
=====================================
test/test_fasta_db.py deleted
=====================================
@@ -1,23 +0,0 @@
-# Copyright (c) 2014. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from datacache import fetch_fasta_db
-
-FASTA_FILENAME = 'Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa'
-URL = \
- 'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa.gz'
-
-def test_download_fasta_db():
- db = fetch_fasta_db("DNA", URL)
- assert db is not None
=====================================
test/test_fasta_dict.py
=====================================
@@ -12,17 +12,40 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from datacache import fetch_fasta_dict
+from datacache import fetch_file
+import gzip
+
+URL = "".join([
+ 'ftp://ftp.ensembl.org/pub/release-75',
+ '/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.75',
+ '.dna_rm.chromosome.MT.fa.gz',
+])
+
+def fetch_fasta_dict(path_or_url):
+ path = fetch_file(path_or_url)
+ d = {}
+ value_buffer = []
+ key = None
+ if path.endswith(".gz") or path.endswith(".gzip"):
+ f = gzip.open(path, "r")
+ else:
+ f = open(path, "r")
+ for line in f.readlines():
+ if type(line) is bytes:
+ line = line.decode("ascii")
+ if line.startswith(">"):
+ if key is not None:
+ d[key] = "".join(value_buffer)
+ value_buffer = []
+ key = line.split()[0][1:]
+ else:
+ value_buffer.append(line.strip())
+ if key and value_buffer:
+ d[key] = "".join(value_buffer)
+ f.close()
+ return d
-FASTA_FILENAME = 'Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa'
-URL = \
- 'ftp://ftp.ensembl.org/pub/release-75/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.75.dna_rm.chromosome.MT.fa.gz'
def test_download_fasta_dict():
d = fetch_fasta_dict(URL)
- assert hasattr(d, 'keys'), d
- assert hasattr(d, 'values'), d
assert len(d) > 0
-
- d2 = fetch_fasta_dict(URL, filename=FASTA_FILENAME)
- assert len(d) == len(d2)
\ No newline at end of file
View it on GitLab: https://salsa.debian.org/med-team/python-datacache/-/commit/9f1f5c72f159ca05891e8b34d71c7961b51f1b6e
--
View it on GitLab: https://salsa.debian.org/med-team/python-datacache/-/commit/9f1f5c72f159ca05891e8b34d71c7961b51f1b6e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220218/4375e4c8/attachment-0001.htm>
More information about the debian-med-commit
mailing list