[Python-modules-commits] [python-vertica] 02/08: Import python-vertica_0.5.6.orig.tar.gz

Jean Baptiste Favre jbfavre-guest at moszumanska.debian.org
Sun Apr 10 15:54:49 UTC 2016


This is an automated email from the git hooks/post-receive script.

jbfavre-guest pushed a commit to branch master
in repository python-vertica.

commit ae8b3b3ef96558d8f3f28f77dfc946ea612084d5
Author: Jean Baptiste Favre <github at jbfavre.org>
Date:   Sun Apr 10 17:19:23 2016 +0200

    Import python-vertica_0.5.6.orig.tar.gz
---
 .gitignore                                         |  3 +
 README.md                                          | 35 ++++++++++--
 Vagrantfile                                        | 36 ++----------
 setup.py                                           |  2 +-
 vertica_python/__init__.py                         |  2 +-
 vertica_python/tests/test_commons.py               |  4 +-
 vertica_python/tests/unicode_tests.py              | 16 ++++++
 vertica_python/vertica/column.py                   | 66 ++++++++++++----------
 vertica_python/vertica/connection.py               |  3 +-
 vertica_python/vertica/cursor.py                   | 16 +++---
 .../backend_messages/parameter_description.py      |  2 +-
 11 files changed, 109 insertions(+), 76 deletions(-)

diff --git a/.gitignore b/.gitignore
index c5888d5..6edc000 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,6 @@ nosetests.xml
 
 # default virtual environment
 /env/
+
+# pyenv
+.python-version
diff --git a/README.md b/README.md
index fbc33b0..f8adf91 100644
--- a/README.md
+++ b/README.md
@@ -37,12 +37,16 @@ Source code for vertica-python can be found at:
 
 
 ## Run unit tests
+
+To run the tests, you must have access to a Vertica database. You can
+spin one up with Vagrant that uses the default credentials using
+`vagrant up`. If you want to run it against an existing database
+instead; you can set the environment variables seen in
+`tests/test_commons.py`.
+
     # install nose if you don't have it
     pip install -r requirements_test.txt
 
-    # you will need to have access to a vertica database.
-    # connection info is in tests/basic_tests.py
-
     # run tests
     nosetests
 
@@ -61,7 +65,9 @@ conn_info = {'host': '127.0.0.1',
              'password': 'some_password',
              'database': 'a_database',
              # 10 minutes timeout on queries
-             'read_timeout': 600}
+             'read_timeout': 600,
+             # default throw error on invalid UTF-8 results
+             'unicode_error': 'strict'}
 
 # simple connection, with manual close
 connection = vertica_python.connect(**conn_info)
@@ -204,6 +210,27 @@ cur.nextset()
 # None
 ```
 
+## UTF-8 encoding issues
+
+While Vertica expects varchars stored to be UTF-8 encoded, sometimes invalid stirngs get intot the database. You can specify how to handle reading these characters using the unicode_error conneciton option. This uses the same values as the unicode type (https://docs.python.org/2/library/functions.html#unicode)
+
+```python
+cur = vertica_python.Connection({..., 'unicode_error': 'strict'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+# caught 'utf8' codec can't decode byte 0xc2 in position 0: unexpected end of data
+
+cur = vertica_python.Connection({..., 'unicode_error': 'replace'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+# �
+
+cur = vertica_python.Connection({..., 'unicode_error': 'ignore'}).cursor()
+cur.execute(r"SELECT E'\xC2'")
+cur.fetchone()
+# 
+```
+
 ## License
 
 MIT License, please see `LICENSE` for details.
diff --git a/Vagrantfile b/Vagrantfile
index 61bb4a8..82c5ccc 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -1,6 +1,8 @@
 # -*- mode: ruby -*-
 # vi: set ft=ruby :
 
+ENV["VAGRANT_DEFAULT_PROVIDER"] ||= "docker"
+
 VAGRANTFILE_API_VERSION = "2"
 
 #######################################################################
@@ -19,39 +21,13 @@ VAGRANTFILE_API_VERSION = "2"
 # >>>
 #######################################################################
 
-
-# Globally install docker - bypass default Vagrant docker installation
-# procedure because it does not work reliably (curl does not follow
-# redirect = Docker won't be installed)
-
-$install_docker = <<SCRIPT
-curl -sSL https://get.docker.io | sh
-usermod -aG docker vagrant
-SCRIPT
-
 Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
-  config.vm.box = "phusion/ubuntu-14.04-amd64"
-
-  # Make Vertica available on host machine on port 5433
-  config.vm.network "forwarded_port", guest: 5433, host: 5433
 
-  config.vm.provider "virtualbox" do |v|
-     v.memory = 2048
-     v.cpus = 2
+  config.vm.provider "docker" do |d|
+     d.image = "sumitchawla/vertica:latest"
+     d.ports = ["5433:5433"]
   end
 
-  config.vm.provision "shell", inline: $install_docker
-
-  # Pull Vertica image when the box is first provisioned
-  config.vm.provision "docker" do |d|
-    d.pull_images 'sumitchawla/vertica:latest'
-  end
-
-  # Start Vertica inside container every time box is started
-  config.vm.provision "docker", run: "always" do |d|
-    d.run 'sumitchawla/vertica',
-    cmd: "",
-    args: "-d -p 5433:5433 --name vertica"
-  end
+  config.vm.synced_folder ".", "/vagrant", disabled: true
 
 end
diff --git a/setup.py b/setup.py
index 71b8559..9c9fb1b 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ opts = ReqOpts(None, 'git')
 # version should use the format 'x.x.x' (instead of 'vx.x.x')
 setup(
     name='vertica-python',
-    version='0.5.5',
+    version='0.5.6',
     description='A native Python client for the Vertica database.',
     author='Justin Berka, Alex Kim, Kenneth Tran',
     author_email='justin.berka at gmail.com, alex.kim at uber.com, tran at uber.com',
diff --git a/vertica_python/__init__.py b/vertica_python/__init__.py
index 944ea39..5a7dc99 100644
--- a/vertica_python/__init__.py
+++ b/vertica_python/__init__.py
@@ -6,7 +6,7 @@ from vertica_python.vertica.connection import Connection
 # Main module for this library.
 
 # The version number of this library.
-version_info = (0, 5, 5)
+version_info = (0, 5, 6)
 
 __version__ = '.'.join(map(str, version_info))
 
diff --git a/vertica_python/tests/test_commons.py b/vertica_python/tests/test_commons.py
index da113ec..8875c38 100644
--- a/vertica_python/tests/test_commons.py
+++ b/vertica_python/tests/test_commons.py
@@ -5,8 +5,8 @@ import vertica_python
 conn_info = {'host': os.getenv('VP_TEST_HOST', '127.0.0.1'),
              'port': int(os.getenv('VP_TEST_PORT', 5433)),
              'user': os.getenv('VP_TEST_USER', 'dbadmin'),
-             'password': os.getenv('VP_TEST_PASSWD', 'unit_test'),
-             'database': os.getenv('VP_TEST_DB', 'unit_test')}
+             'password': os.getenv('VP_TEST_PASSWD', ''),
+             'database': os.getenv('VP_TEST_DB', 'docker')}
 
 
 class VerticaTestCase(unittest.TestCase):
diff --git a/vertica_python/tests/unicode_tests.py b/vertica_python/tests/unicode_tests.py
new file mode 100644
index 0000000..42a5bc6
--- /dev/null
+++ b/vertica_python/tests/unicode_tests.py
@@ -0,0 +1,16 @@
+from .test_commons import conn_info, VerticaTestCase
+from .. import connect
+
+
+class UnicodeTestCase(VerticaTestCase):
+    def test_unicode_named_parameter_binding(self):
+        key = u'\u16a0'
+        value = 1
+        query = u"SELECT :%s" % key
+
+        with connect(**conn_info) as conn:
+            cur = conn.cursor()
+            cur.execute(query, {key: value})
+            res = cur.fetchone()
+
+            assert res[0] == value
diff --git a/vertica_python/vertica/column.py b/vertica_python/vertica/column.py
index 871da65..7dd4df5 100644
--- a/vertica_python/vertica/column.py
+++ b/vertica_python/vertica/column.py
@@ -92,31 +92,37 @@ ColumnTuple = namedtuple(
 
 class Column(object):
 
-    DATA_TYPE_CONVERSIONS = [
-        ('unspecified', None),
-        ('tuple', None),
-        ('pos', None),
-        ('record', None),
-        ('unknown', None),
-        ('bool', lambda s: s == 't'),
-        ('integer', lambda s: int(s)),
-        ('float', lambda s: float(s)),
-        ('char', lambda s: unicode(s, 'utf-8')),
-        ('varchar', lambda s: unicode(s, 'utf-8')),
-        ('date', date_parse),
-        ('time', None),
-        ('timestamp', timestamp_parse),
-        ('timestamp_tz', timestamp_tz_parse),
-        ('interval', None),
-        ('time_tz', None),
-        ('numeric', lambda s: Decimal(s)),
-        ('bytea', None),
-        ('rle_tuple', None),
-    ]
-    DATA_TYPES = map(lambda x: x[0], DATA_TYPE_CONVERSIONS)
-
-    def __init__(self, col):
-
+    @classmethod
+    def data_type_conversions(cls, unicode_error=None):
+        if unicode_error is None:
+            unicode_error = 'strict'
+        return [
+            ('unspecified', None),
+            ('tuple', None),
+            ('pos', None),
+            ('record', None),
+            ('unknown', None),
+            ('bool', lambda s: s == 't'),
+            ('integer', lambda s: int(s)),
+            ('float', lambda s: float(s)),
+            ('char', lambda s: unicode(s, 'utf-8', unicode_error)),
+            ('varchar', lambda s: unicode(s, 'utf-8', unicode_error)),
+            ('date', date_parse),
+            ('time', None),
+            ('timestamp', timestamp_parse),
+            ('timestamp_tz', timestamp_tz_parse),
+            ('interval', None),
+            ('time_tz', None),
+            ('numeric', lambda s: Decimal(s)),
+            ('bytea', None),
+            ('rle_tuple', None),
+        ]
+
+    @property
+    def data_types():
+        return map(lambda x: x[0], Column.data_type_conversions())
+
+    def __init__(self, col, unicode_error=None):
         self.name = col['name']
         self.type_code = col['data_type_oid']
         self.display_size = None
@@ -124,24 +130,26 @@ class Column(object):
         self.precision = None
         self.scale = None
         self.null_ok = None
+        self.unicode_error = unicode_error
+        self.data_type_conversions = Column.data_type_conversions(unicode_error=self.unicode_error)
 
         # WORKAROUND: Treat LONGVARCHAR as VARCHAR
         if self.type_code == 115:
             self.type_code = 9
 
         # Mark type_code as unspecified if not within known data types
-        if self.type_code >= len(self.DATA_TYPE_CONVERSIONS):
+        if self.type_code >= len(self.data_type_conversions):
             self.type_code = 0
 
         #self.props = ColumnTuple(col['name'], col['data_type_oid'], None, col['data_type_size'], None, None, None)
         self.props = ColumnTuple(col['name'], self.type_code, None, col['data_type_size'], None, None, None)
 
-        #self.converter = self.DATA_TYPE_CONVERSIONS[col['data_type_oid']][1]
-        self.converter = self.DATA_TYPE_CONVERSIONS[self.type_code][1]
+        #self.converter = self.data_type_conversions[col['data_type_oid']][1]
+        self.converter = self.data_type_conversions[self.type_code][1]
 
         # things that are actually sent
 #        self.name = col['name']
-#        self.data_type = self.DATA_TYPE_CONVERSIONS[col['data_type_oid']][0]
+#        self.data_type = self.data_type_conversions[col['data_type_oid']][0]
 #        self.type_modifier = col['type_modifier']
 #        self.format = 'text' if col['format_code'] == 0 else 'binary'
 #        self.table_oid = col['table_oid']
diff --git a/vertica_python/vertica/connection.py b/vertica_python/vertica/connection.py
index 8e2e12c..8bc9a1e 100644
--- a/vertica_python/vertica/connection.py
+++ b/vertica_python/vertica/connection.py
@@ -28,7 +28,8 @@ class Connection(object):
         )
 
         # we only support one cursor per connection
-        self._cursor = Cursor(self, None)
+        self.options.setdefault('unicode_error', None)
+        self._cursor = Cursor(self, None, unicode_error=self.options['unicode_error'])
         self.options.setdefault('port', 5433)
         self.options.setdefault('read_timeout', 600)
         self.startup_connection()
diff --git a/vertica_python/vertica/cursor.py b/vertica_python/vertica/cursor.py
index 391c2f9..4389951 100644
--- a/vertica_python/vertica/cursor.py
+++ b/vertica_python/vertica/cursor.py
@@ -11,9 +11,10 @@ from vertica_python.vertica.column import Column
 logger = logging.getLogger('vertica')
 
 class Cursor(object):
-    def __init__(self, connection, cursor_type=None):
+    def __init__(self, connection, cursor_type=None, unicode_error='strict'):
         self.connection = connection
         self.cursor_type = cursor_type
+        self.unicode_error = unicode_error
         self._closed = False
         self._message = None
 
@@ -64,7 +65,7 @@ class Cursor(object):
                     # Using a regex with word boundary to correctly handle params with similar names
                     # such as :s and :start
                     match_str = u':%s\\b' % unicode(key)
-                    operation = re.sub(match_str, v.decode('utf-8'), operation, re.UNICODE)
+                    operation = re.sub(match_str, v.decode('utf-8'), operation, flags=re.UNICODE)
             elif isinstance(parameters, tuple):
                 tlist = []
                 for p in parameters:
@@ -88,7 +89,7 @@ class Cursor(object):
             if isinstance(message, messages.ErrorResponse):
                 raise errors.QueryError.from_error_response(message, operation)
             elif isinstance(message, messages.RowDescription):
-                self.description = map(lambda fd: Column(fd), message.fields)
+                self.description = map(lambda fd: Column(fd, self.unicode_error), message.fields)
             elif isinstance(message, messages.DataRow):
                 break
             elif isinstance(message, messages.ReadyForQuery):
@@ -238,13 +239,14 @@ class Cursor(object):
         return self._closed or self.connection.closed()
 
     def row_formatter(self, row_data):
-        if not self.cursor_type:
+        if self.cursor_type is None:
             return self.format_row_as_array(row_data)
-        elif self.cursor_type == 'list':
+        elif self.cursor_type in (list, 'list'):
             return self.format_row_as_array(row_data)
-        elif self.cursor_type == 'dict':
+        elif self.cursor_type in (dict, 'dict'):
             return self.format_row_as_dict(row_data)
-            # throw some error
+        else:
+            raise Exception('Unrecognized cursor_type: %r' % self.cursor_type)
 
     def format_row_as_dict(self, row_data):
         return dict(
diff --git a/vertica_python/vertica/messages/backend_messages/parameter_description.py b/vertica_python/vertica/messages/backend_messages/parameter_description.py
index 90bc79b..0c0b9fb 100644
--- a/vertica_python/vertica/messages/backend_messages/parameter_description.py
+++ b/vertica_python/vertica/messages/backend_messages/parameter_description.py
@@ -11,7 +11,7 @@ class ParameterDescription(BackendMessage):
     def __init__(self, data):
         parameter_count = unpack('!H', data)[0]
         parameter_type_ids = unpack_from("!{0}N".format(parameter_count), data, 2)
-        self.parameter_types = [Column.DATA_TYPES[dtid] for dtid in parameter_type_ids]
+        self.parameter_types = [Column.data_types[dtid] for dtid in parameter_type_ids]
 
 
 ParameterDescription._message_id('t')

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-vertica.git



More information about the Python-modules-commits mailing list