[Python-modules-commits] [python-vertica] 02/08: Import python-vertica_0.5.6.orig.tar.gz
Jean Baptiste Favre
jbfavre-guest at moszumanska.debian.org
Sun Apr 10 15:54:49 UTC 2016
This is an automated email from the git hooks/post-receive script.
jbfavre-guest pushed a commit to branch master
in repository python-vertica.
commit ae8b3b3ef96558d8f3f28f77dfc946ea612084d5
Author: Jean Baptiste Favre <github at jbfavre.org>
Date: Sun Apr 10 17:19:23 2016 +0200
Import python-vertica_0.5.6.orig.tar.gz
---
.gitignore | 3 +
README.md | 35 ++++++++++--
Vagrantfile | 36 ++----------
setup.py | 2 +-
vertica_python/__init__.py | 2 +-
vertica_python/tests/test_commons.py | 4 +-
vertica_python/tests/unicode_tests.py | 16 ++++++
vertica_python/vertica/column.py | 66 ++++++++++++----------
vertica_python/vertica/connection.py | 3 +-
vertica_python/vertica/cursor.py | 16 +++---
.../backend_messages/parameter_description.py | 2 +-
11 files changed, 109 insertions(+), 76 deletions(-)
diff --git a/.gitignore b/.gitignore
index c5888d5..6edc000 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,6 @@ nosetests.xml
# default virtual environment
/env/
+
+# pyenv
+.python-version
diff --git a/README.md b/README.md
index fbc33b0..f8adf91 100644
--- a/README.md
+++ b/README.md
@@ -37,12 +37,16 @@ Source code for vertica-python can be found at:
## Run unit tests
+
+To run the tests, you must have access to a Vertica database. You can
+spin one up with Vagrant that uses the default credentials using
+`vagrant up`. If you want to run it against an existing database
+instead; you can set the environment variables seen in
+`tests/test_commons.py`.
+
# install nose if you don't have it
pip install -r requirements_test.txt
- # you will need to have access to a vertica database.
- # connection info is in tests/basic_tests.py
-
# run tests
nosetests
@@ -61,7 +65,9 @@ conn_info = {'host': '127.0.0.1',
'password': 'some_password',
'database': 'a_database',
# 10 minutes timeout on queries
- 'read_timeout': 600}
+ 'read_timeout': 600,
+ # default throw error on invalid UTF-8 results
+ 'unicode_error': 'strict'}
# simple connection, with manual close
connection = vertica_python.connect(**conn_info)
@@ -204,6 +210,27 @@ cur.nextset()
# None
```
+## UTF-8 encoding issues
+
+While Vertica expects varchars stored to be UTF-8 encoded, sometimes invalid stirngs get intot the database. You can specify how to handle reading these characters using the unicode_error conneciton option. This uses the same values as the unicode type (https://docs.python.org/2/library/functions.html#unicode)
+
+```python
+cur = vertica_python.Connection({..., 'unicode_error': 'strict'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+# caught 'utf8' codec can't decode byte 0xc2 in position 0: unexpected end of data
+
+cur = vertica_python.Connection({..., 'unicode_error': 'replace'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+# �
+
+cur = vertica_python.Connection({..., 'unicode_error': 'ignore'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+#
+```
+
## License
MIT License, please see `LICENSE` for details.
diff --git a/Vagrantfile b/Vagrantfile
index 61bb4a8..82c5ccc 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -1,6 +1,8 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
+ENV["VAGRANT_DEFAULT_PROVIDER"] ||= "docker"
+
VAGRANTFILE_API_VERSION = "2"
#######################################################################
@@ -19,39 +21,13 @@ VAGRANTFILE_API_VERSION = "2"
# >>>
#######################################################################
-
-# Globally install docker - bypass default Vagrant docker installation
-# procedure because it does not work reliably (curl does not follow
-# redirect = Docker won't be installed)
-
-$install_docker = <<SCRIPT
-curl -sSL https://get.docker.io | sh
-usermod -aG docker vagrant
-SCRIPT
-
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
- config.vm.box = "phusion/ubuntu-14.04-amd64"
-
- # Make Vertica available on host machine on port 5433
- config.vm.network "forwarded_port", guest: 5433, host: 5433
- config.vm.provider "virtualbox" do |v|
- v.memory = 2048
- v.cpus = 2
+ config.vm.provider "docker" do |d|
+ d.image = "sumitchawla/vertica:latest"
+ d.ports = ["5433:5433"]
end
- config.vm.provision "shell", inline: $install_docker
-
- # Pull Vertica image when the box is first provisioned
- config.vm.provision "docker" do |d|
- d.pull_images 'sumitchawla/vertica:latest'
- end
-
- # Start Vertica inside container every time box is started
- config.vm.provision "docker", run: "always" do |d|
- d.run 'sumitchawla/vertica',
- cmd: "",
- args: "-d -p 5433:5433 --name vertica"
- end
+ config.vm.synced_folder ".", "/vagrant", disabled: true
end
diff --git a/setup.py b/setup.py
index 71b8559..9c9fb1b 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ opts = ReqOpts(None, 'git')
# version should use the format 'x.x.x' (instead of 'vx.x.x')
setup(
name='vertica-python',
- version='0.5.5',
+ version='0.5.6',
description='A native Python client for the Vertica database.',
author='Justin Berka, Alex Kim, Kenneth Tran',
author_email='justin.berka at gmail.com, alex.kim at uber.com, tran at uber.com',
diff --git a/vertica_python/__init__.py b/vertica_python/__init__.py
index 944ea39..5a7dc99 100644
--- a/vertica_python/__init__.py
+++ b/vertica_python/__init__.py
@@ -6,7 +6,7 @@ from vertica_python.vertica.connection import Connection
# Main module for this library.
# The version number of this library.
-version_info = (0, 5, 5)
+version_info = (0, 5, 6)
__version__ = '.'.join(map(str, version_info))
diff --git a/vertica_python/tests/test_commons.py b/vertica_python/tests/test_commons.py
index da113ec..8875c38 100644
--- a/vertica_python/tests/test_commons.py
+++ b/vertica_python/tests/test_commons.py
@@ -5,8 +5,8 @@ import vertica_python
conn_info = {'host': os.getenv('VP_TEST_HOST', '127.0.0.1'),
'port': int(os.getenv('VP_TEST_PORT', 5433)),
'user': os.getenv('VP_TEST_USER', 'dbadmin'),
- 'password': os.getenv('VP_TEST_PASSWD', 'unit_test'),
- 'database': os.getenv('VP_TEST_DB', 'unit_test')}
+ 'password': os.getenv('VP_TEST_PASSWD', ''),
+ 'database': os.getenv('VP_TEST_DB', 'docker')}
class VerticaTestCase(unittest.TestCase):
diff --git a/vertica_python/tests/unicode_tests.py b/vertica_python/tests/unicode_tests.py
new file mode 100644
index 0000000..42a5bc6
--- /dev/null
+++ b/vertica_python/tests/unicode_tests.py
@@ -0,0 +1,16 @@
+from .test_commons import conn_info, VerticaTestCase
+from .. import connect
+
+
+class UnicodeTestCase(VerticaTestCase):
+ def test_unicode_named_parameter_binding(self):
+ key = u'\u16a0'
+ value = 1
+ query = u"SELECT :%s" % key
+
+ with connect(**conn_info) as conn:
+ cur = conn.cursor()
+ cur.execute(query, {key: value})
+ res = cur.fetchone()
+
+ assert res[0] == value
diff --git a/vertica_python/vertica/column.py b/vertica_python/vertica/column.py
index 871da65..7dd4df5 100644
--- a/vertica_python/vertica/column.py
+++ b/vertica_python/vertica/column.py
@@ -92,31 +92,37 @@ ColumnTuple = namedtuple(
class Column(object):
- DATA_TYPE_CONVERSIONS = [
- ('unspecified', None),
- ('tuple', None),
- ('pos', None),
- ('record', None),
- ('unknown', None),
- ('bool', lambda s: s == 't'),
- ('integer', lambda s: int(s)),
- ('float', lambda s: float(s)),
- ('char', lambda s: unicode(s, 'utf-8')),
- ('varchar', lambda s: unicode(s, 'utf-8')),
- ('date', date_parse),
- ('time', None),
- ('timestamp', timestamp_parse),
- ('timestamp_tz', timestamp_tz_parse),
- ('interval', None),
- ('time_tz', None),
- ('numeric', lambda s: Decimal(s)),
- ('bytea', None),
- ('rle_tuple', None),
- ]
- DATA_TYPES = map(lambda x: x[0], DATA_TYPE_CONVERSIONS)
-
- def __init__(self, col):
-
+ @classmethod
+ def data_type_conversions(cls, unicode_error=None):
+ if unicode_error is None:
+ unicode_error = 'strict'
+ return [
+ ('unspecified', None),
+ ('tuple', None),
+ ('pos', None),
+ ('record', None),
+ ('unknown', None),
+ ('bool', lambda s: s == 't'),
+ ('integer', lambda s: int(s)),
+ ('float', lambda s: float(s)),
+ ('char', lambda s: unicode(s, 'utf-8', unicode_error)),
+ ('varchar', lambda s: unicode(s, 'utf-8', unicode_error)),
+ ('date', date_parse),
+ ('time', None),
+ ('timestamp', timestamp_parse),
+ ('timestamp_tz', timestamp_tz_parse),
+ ('interval', None),
+ ('time_tz', None),
+ ('numeric', lambda s: Decimal(s)),
+ ('bytea', None),
+ ('rle_tuple', None),
+ ]
+
+ @property
+ def data_types():
+ return map(lambda x: x[0], Column.data_type_conversions())
+
+ def __init__(self, col, unicode_error=None):
self.name = col['name']
self.type_code = col['data_type_oid']
self.display_size = None
@@ -124,24 +130,26 @@ class Column(object):
self.precision = None
self.scale = None
self.null_ok = None
+ self.unicode_error = unicode_error
+ self.data_type_conversions = Column.data_type_conversions(unicode_error=self.unicode_error)
# WORKAROUND: Treat LONGVARCHAR as VARCHAR
if self.type_code == 115:
self.type_code = 9
# Mark type_code as unspecified if not within known data types
- if self.type_code >= len(self.DATA_TYPE_CONVERSIONS):
+ if self.type_code >= len(self.data_type_conversions):
self.type_code = 0
#self.props = ColumnTuple(col['name'], col['data_type_oid'], None, col['data_type_size'], None, None, None)
self.props = ColumnTuple(col['name'], self.type_code, None, col['data_type_size'], None, None, None)
- #self.converter = self.DATA_TYPE_CONVERSIONS[col['data_type_oid']][1]
- self.converter = self.DATA_TYPE_CONVERSIONS[self.type_code][1]
+ #self.converter = self.data_type_conversions[col['data_type_oid']][1]
+ self.converter = self.data_type_conversions[self.type_code][1]
# things that are actually sent
# self.name = col['name']
-# self.data_type = self.DATA_TYPE_CONVERSIONS[col['data_type_oid']][0]
+# self.data_type = self.data_type_conversions[col['data_type_oid']][0]
# self.type_modifier = col['type_modifier']
# self.format = 'text' if col['format_code'] == 0 else 'binary'
# self.table_oid = col['table_oid']
diff --git a/vertica_python/vertica/connection.py b/vertica_python/vertica/connection.py
index 8e2e12c..8bc9a1e 100644
--- a/vertica_python/vertica/connection.py
+++ b/vertica_python/vertica/connection.py
@@ -28,7 +28,8 @@ class Connection(object):
)
# we only support one cursor per connection
- self._cursor = Cursor(self, None)
+ self.options.setdefault('unicode_error', None)
+ self._cursor = Cursor(self, None, unicode_error=self.options['unicode_error'])
self.options.setdefault('port', 5433)
self.options.setdefault('read_timeout', 600)
self.startup_connection()
diff --git a/vertica_python/vertica/cursor.py b/vertica_python/vertica/cursor.py
index 391c2f9..4389951 100644
--- a/vertica_python/vertica/cursor.py
+++ b/vertica_python/vertica/cursor.py
@@ -11,9 +11,10 @@ from vertica_python.vertica.column import Column
logger = logging.getLogger('vertica')
class Cursor(object):
- def __init__(self, connection, cursor_type=None):
+ def __init__(self, connection, cursor_type=None, unicode_error='strict'):
self.connection = connection
self.cursor_type = cursor_type
+ self.unicode_error = unicode_error
self._closed = False
self._message = None
@@ -64,7 +65,7 @@ class Cursor(object):
# Using a regex with word boundary to correctly handle params with similar names
# such as :s and :start
match_str = u':%s\\b' % unicode(key)
- operation = re.sub(match_str, v.decode('utf-8'), operation, re.UNICODE)
+ operation = re.sub(match_str, v.decode('utf-8'), operation, flags=re.UNICODE)
elif isinstance(parameters, tuple):
tlist = []
for p in parameters:
@@ -88,7 +89,7 @@ class Cursor(object):
if isinstance(message, messages.ErrorResponse):
raise errors.QueryError.from_error_response(message, operation)
elif isinstance(message, messages.RowDescription):
- self.description = map(lambda fd: Column(fd), message.fields)
+ self.description = map(lambda fd: Column(fd, self.unicode_error), message.fields)
elif isinstance(message, messages.DataRow):
break
elif isinstance(message, messages.ReadyForQuery):
@@ -238,13 +239,14 @@ class Cursor(object):
return self._closed or self.connection.closed()
def row_formatter(self, row_data):
- if not self.cursor_type:
+ if self.cursor_type is None:
return self.format_row_as_array(row_data)
- elif self.cursor_type == 'list':
+ elif self.cursor_type in (list, 'list'):
return self.format_row_as_array(row_data)
- elif self.cursor_type == 'dict':
+ elif self.cursor_type in (dict, 'dict'):
return self.format_row_as_dict(row_data)
- # throw some error
+ else:
+ raise Exception('Unrecognized cursor_type: %r' % self.cursor_type)
def format_row_as_dict(self, row_data):
return dict(
diff --git a/vertica_python/vertica/messages/backend_messages/parameter_description.py b/vertica_python/vertica/messages/backend_messages/parameter_description.py
index 90bc79b..0c0b9fb 100644
--- a/vertica_python/vertica/messages/backend_messages/parameter_description.py
+++ b/vertica_python/vertica/messages/backend_messages/parameter_description.py
@@ -11,7 +11,7 @@ class ParameterDescription(BackendMessage):
def __init__(self, data):
parameter_count = unpack('!H', data)[0]
parameter_type_ids = unpack_from("!{0}N".format(parameter_count), data, 2)
- self.parameter_types = [Column.DATA_TYPES[dtid] for dtid in parameter_type_ids]
+ self.parameter_types = [Column.data_types[dtid] for dtid in parameter_type_ids]
ParameterDescription._message_id('t')
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-vertica.git
More information about the Python-modules-commits
mailing list