[pg_comparator] 02/10: Imported Upstream version 2.2.6
Sebastiaan Couwenberg
sebastic at moszumanska.debian.org
Sun Aug 16 16:10:07 UTC 2015
This is an automated email from the git hooks/post-receive script.
sebastic pushed a commit to branch master
in repository pg_comparator.
commit 91bcc305b1401f8e6e4805bdfc1a0def81748c2b
Author: Bas Couwenberg <sebastic at xs4all.nl>
Date: Sun Aug 16 17:12:08 2015 +0200
Imported Upstream version 2.2.6
---
LICENSE | 2 +-
Makefile | 5 ++-
README.pgc_checksum | 9 ++++-
fnv.c | 74 +++++++++++++++++++++++++++++++++++++
jenkins.c | 25 +++++++------
mysql_checksum.c | 84 +++++++++++++++++++++++++++++++++++++++++-
mysql_checksum.sql | 10 ++++-
pg_comparator | 99 +++++++++++++++++++++++++++++++++++++------------
pgc_checksum.c | 66 ++++++++++++++++++++++++++++++++-
pgc_checksum.sql.in | 20 +++++++++-
sqlite_checksum.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++-
11 files changed, 455 insertions(+), 43 deletions(-)
diff --git a/LICENSE b/LICENSE
index c5a6c30..4eff9fb 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
pg_comparator is distributed under the terms of the BSD License:
-Copyright (c) 2004-2014, Fabien Coelho <fabien at coelho dot net>
+Copyright (c) 2004-2015, Fabien Coelho <fabien at coelho dot net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/Makefile b/Makefile
index 05c166b..fc33c46 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile 1460 2012-11-02 18:21:27Z fabien $
+# $Id: Makefile 1528 2014-08-04 07:09:24Z coelho $
#
# PostgreSQL stuff
@@ -32,6 +32,9 @@ $(name).html: $(name)
sed -e '/^<body style/a<h1>$(name)</h1>' > $@
touch -r $< $@
+# dependencies
+pgc_checksum.o: jenkins.c fnv.c
+
pgsql_install: install
pgsql_uninstall: uninstall
diff --git a/README.pgc_checksum b/README.pgc_checksum
index 560c545..72a5b89 100644
--- a/README.pgc_checksum
+++ b/README.pgc_checksum
@@ -1,5 +1,7 @@
provide fast NOT cryptographycally-secure checksum functions
for TEXT, results being of INT2, INT4 and INT8 types.
+The cksum* functions are based on Jenkins hash.
+The fnv* functions are based on FNV version 1a hash.
load with:
@@ -10,6 +12,9 @@ use as:
psql> SELECT cksum2('some text');
psql> SELECT cksum4('some text');
psql> SELECT cksum8('some text');
+ psql> SELECT fnv2('some text');
+ psql> SELECT fnv4('some text');
+ psql> SELECT fnv8('some text');
-An empty text results in hash value 0.
-A NULL value results in some predefined value.
+For cksum, an NULL text results in hash value 0 and
+an empty text results in some predefined value.
diff --git a/fnv.c b/fnv.c
new file mode 100644
index 0000000..bb27502
--- /dev/null
+++ b/fnv.c
@@ -0,0 +1,74 @@
+/*
+ * $Id: fnv.c 1529 2014-08-04 07:09:38Z coelho $
+ *
+ * https://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html
+ *
+ * Code adapted, simplified and slightly extended from public domain:
+ *
+ * http://www.isthe.com/chongo/src/fnv/hash_64a.c
+ *
+ * By chongo <Landon Curt Noll>
+ */
+
+#include <stdint.h>
+
+/* This prime is probably too small? It seems that it was chosen because it contains
+ * few one bits, this allowing some optimisations on 32 bit processors which would
+ * not have a hardware 64 bit multiply operation.
+ */
+#define FNV_64_PRIME (0x100000001b3ULL)
+#define FNV1a_64_INIT (0xcbf29ce484222325ULL)
+
+static uint64_t fnv1a_64_hash_data(const void * data, const size_t len, uint64_t hval)
+{
+ if (data) { // NOT NULL
+ const unsigned char * bp = (unsigned char *) data;
+ const unsigned char * be = bp + len;
+ while (bp < be) {
+ register uint64_t byte = (uint64_t) (*bp++);
+#if defined(STANDARD_FNV1A_64)
+ hval ^= byte;
+#else
+ // help tweak high bits
+ hval += (byte << 11) | (byte << 31) | (byte << 53);
+ hval ^= byte | (byte << 23) | (byte << 43);
+#endif // STANDARD_FNV1A_64
+ hval *= FNV_64_PRIME;
+ }
+ return hval;
+ }
+ else // NULL
+ return 0ULL;
+}
+
+static uint64_t fnv1a_64_hash(const void * data, const size_t len)
+{
+ return fnv1a_64_hash_data(data, len, FNV1a_64_INIT);
+}
+
+/*
+ SELECT
+ (ABS(fnv8((i+1)::TEXT)) % 100) - (ABS(fnv8(i::TEXT)) % 100) AS diff,
+ COUNT(*) AS nb
+ FROM generate_series(1, 1000) as i
+ GROUP BY diff
+ ORDER BY diff;
+*/
+
+static int16_t fnv_int2(const void * data, const size_t len)
+{
+ uint64_t h = fnv1a_64_hash(data, len);
+ return (int16_t) ((h >> 48) ^ (h >> 32) ^ (h >> 16) ^ h);
+}
+
+static int32_t fnv_int4(const void * data, const size_t len)
+{
+ uint64_t h = fnv1a_64_hash(data, len);
+ return (int32_t) ((h >> 32) ^ h);
+}
+
+static int64_t fnv_int8(const void * data, const size_t len)
+{
+ return (int64_t) fnv1a_64_hash(data, len);
+}
diff --git a/jenkins.c b/jenkins.c
index 212ec7b..68205c0 100644
--- a/jenkins.c
+++ b/jenkins.c
@@ -1,8 +1,7 @@
-/* $Id: jenkins.c 1462 2012-11-03 07:17:10Z fabien $ */
+/* $Id: jenkins.c 1530 2014-08-10 21:45:12Z coelho $ */
#include <stdint.h>
-#define PN_16 15401
#define PN_32_1 433494437
#define PN_32_2 780291637
#define PN_32_3 1073676287
@@ -11,7 +10,11 @@
/* The following function is taken and adapted (wrt len) from
* http://www.burtleburtle.net/bob/hash/doobs.html,
* and is advertised public domain.
+ * This change breaks the incremental aspect of the computation.
+ *
* if hash==0, it is unchanged for the empty string.
+ *
+ * note: the jenkins function uses low-cost operators: + >> << ^
*/
static uint32_t jenkins_one_at_a_time_hash
(uint32_t hash, const unsigned char *key, size_t len)
@@ -29,32 +32,32 @@ static uint32_t jenkins_one_at_a_time_hash
}
/* checksum of sizes 2, 4 and 8.
- * checksum_int?(NULL) == some_predefined_value
- * checksum_int?('') == 0
+ * checksum_int?(NULL) == 0
+ * checksum_int?('') == some value
*/
static int16_t checksum_int2(const unsigned char *data, size_t size)
{
- uint32_t h = PN_16; // default if NULL
- if (data) h = jenkins_one_at_a_time_hash(0, data, size);
+ uint32_t h = 0; // default if NULL
+ if (data) h = jenkins_one_at_a_time_hash(PN_32_1, data, size);
return (int16_t) ((h>>16)^h);
}
// many collision, eg cksum4('16667') = cksum4('53827')
static int32_t checksum_int4(const unsigned char *data, size_t size)
{
- uint32_t h = PN_32_1; // default if NULL
- if (data) h = jenkins_one_at_a_time_hash(0, data, size);
+ uint32_t h = 0; // default if NULL
+ if (data) h = jenkins_one_at_a_time_hash(PN_32_2, data, size);
return (int32_t) h;
}
static int64_t checksum_int8(const unsigned char *data, size_t size)
{
- uint64_t h1 = PN_32_2, h2 = PN_32_3; // default if NULL
+ uint64_t h1 = 0, h2 = 0; // default if NULL
if (data) {
// the 64 bit hash is based on two hashes. first one is chsum4
- h1 = jenkins_one_at_a_time_hash(0, data, size);
+ h1 = jenkins_one_at_a_time_hash(PN_32_3, data, size);
// ensure that size==0 => checksum==0
- h2 = size? jenkins_one_at_a_time_hash(h1 ^ PN_32_4, data, size): 0;
+ h2 = jenkins_one_at_a_time_hash(h1 ^ PN_32_4, data, size);
}
return (int64_t) ((h1<<32)|h2);
}
diff --git a/mysql_checksum.c b/mysql_checksum.c
index 62c9110..b0b74d1 100644
--- a/mysql_checksum.c
+++ b/mysql_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: mysql_checksum.c 1126 2012-08-08 07:47:13Z fabien $
+/* $Id: mysql_checksum.c 1525 2014-08-03 12:34:17Z coelho $
*
* This function computes a simple and fast checksum of a binary
* It is unclear to me what happends on different encodings.
@@ -9,6 +9,8 @@
#include <my_global.h>
#include <mysql.h>
+/* Jenkins-based hash functions
+ */
/* foo_init, foo, foo_deinit */
my_bool cksum8_init(UDF_INIT *, UDF_ARGS *, char *);
longlong cksum8(UDF_INIT *, UDF_ARGS *, char *, char *);
@@ -87,3 +89,83 @@ my_bool cksum8_init(
{
return 0;
}
+
+/* FNV-based hash functions
+ */
+my_bool fnv8_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv8(UDF_INIT *, UDF_ARGS *, char *, char *);
+my_bool fnv4_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv4(UDF_INIT *, UDF_ARGS *, char *, char *);
+my_bool fnv2_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv2(UDF_INIT *, UDF_ARGS *, char *, char *);
+
+#include "fnv.c"
+
+longlong fnv2(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args,
+ char *is_null,
+ char *error __attribute__((unused)))
+{
+ // if in doubt, return NULL
+ if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+ {
+ *is_null = 1;
+ return 0;
+ }
+ return (longlong) checksum_int2(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv2_init(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args __attribute__((unused)),
+ char *message __attribute__((unused)))
+{
+ return 0;
+}
+
+longlong fnv4(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args,
+ char *is_null __attribute__((unused)),
+ char *error __attribute__((unused)))
+{
+ // if in doubt, return NULL
+ if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+ {
+ *is_null = 1;
+ return 0;
+ }
+ return (longlong) checksum_int4(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv4_init(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args __attribute__((unused)),
+ char *message __attribute__((unused)))
+{
+ return 0;
+}
+
+longlong fnv8(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args,
+ char *is_null __attribute__((unused)),
+ char *error __attribute__((unused)))
+{
+ // if in doubt, return NULL
+ if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+ {
+ *is_null = 1;
+ return 0;
+ }
+ return (longlong) checksum_int8(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv8_init(
+ UDF_INIT *initid __attribute__((unused)),
+ UDF_ARGS *args __attribute__((unused)),
+ char *message __attribute__((unused)))
+{
+ return 0;
+}
diff --git a/mysql_checksum.sql b/mysql_checksum.sql
index bcdc342..352849a 100644
--- a/mysql_checksum.sql
+++ b/mysql_checksum.sql
@@ -1,5 +1,5 @@
--
--- $Id: mysql_checksum.sql 687 2010-04-03 12:07:15Z fabien $
+-- $Id: mysql_checksum.sql 1520 2014-08-03 11:27:06Z coelho $
--
DROP FUNCTION IF EXISTS cksum8;
@@ -9,3 +9,11 @@ DROP FUNCTION IF EXISTS cksum2;
CREATE FUNCTION cksum8 RETURNS INTEGER SONAME 'mysql_checksum.so';
CREATE FUNCTION cksum4 RETURNS INTEGER SONAME 'mysql_checksum.so';
CREATE FUNCTION cksum2 RETURNS INTEGER SONAME 'mysql_checksum.so';
+
+DROP FUNCTION IF EXISTS fnv8;
+DROP FUNCTION IF EXISTS fnv4;
+DROP FUNCTION IF EXISTS fnv2;
+
+CREATE FUNCTION fnv8 RETURNS INTEGER SONAME 'mysql_checksum.so';
+CREATE FUNCTION fnv4 RETURNS INTEGER SONAME 'mysql_checksum.so';
+CREATE FUNCTION fnv2 RETURNS INTEGER SONAME 'mysql_checksum.so';
diff --git a/pg_comparator b/pg_comparator
index aa5d9c5..66bff82 100755
--- a/pg_comparator
+++ b/pg_comparator
@@ -1,6 +1,6 @@
#!/usr/bin/perl
#
-# $Id: pg_comparator.pl 1512 2014-07-24 08:21:31Z coelho $
+# $Id: pg_comparator.pl 1540 2015-04-18 06:23:47Z coelho $
#
# HELP 1: pg_comparator --man
# HELP 2: pod2text pg_comparator
@@ -84,13 +84,13 @@ Default is B<create> because it always works for both databases.
=item C<--checksum-function=fun> or C<--cf=fun> or C<-c fun>
-Checksum function to use, either B<ck> or B<md5>.
-For PostgreSQL, MySQL and SQLite the provided B<ck> checksum functions must be
-loaded into the target databases.
+Checksum function to use, either B<ck>, B<fnv> or B<md5>.
+For PostgreSQL, MySQL and SQLite the provided B<ck> and B<fnv> checksum
+functions must be loaded into the target databases.
Choosing B<md5> does not come free either: the provided cast functions must be
loaded into the target databases and the computation is more expensive.
-Default is B<ck>, which is faster, especially if the operation is cpu-bound
+Default is B<ck>, which is fast, especially if the operation is cpu-bound
and the bandwidth is reasonably high.
=item C<--checksum-size=n> or C<--check-size=n> or C<--cs=n> or C<-z n>
@@ -116,7 +116,7 @@ Note that they are dropped implicitly by default when the connection
is closed as they are temporary, see C<-(-no)-temporary> option.
This option is useful for debugging.
-Default is B<not> to clear explicitely the checksum and summary tables,
+Default is B<not> to clear explicitly the checksum and summary tables,
as it is not needed.
=item C<--debug> or C<-d>
@@ -144,7 +144,7 @@ This option is only used for non regression tests. See the TESTS section.
Folding factor: log2 of the number of rows grouped together at each stage,
starting from the leaves so that the first round always groups as many records
-as possible. The power of two allows to use masked computations.
+as possible. The power of two allows one to use masked computations.
The minimum value of 1 builds a binary tree.
Default folding factor log2 is B<7>, i.e. size 128 folds.
@@ -169,7 +169,7 @@ Default is to build both key and tuple checksums on the fly.
=item C<--lock>, C<--no-lock>
Whether to lock tables.
-Setting the option explicitely overrides the default one way or another.
+Setting the option explicitly overrides the default one way or another.
For PostgreSQL, this option requires C<--transaction>, which is enabled by
default.
@@ -207,7 +207,7 @@ the C<--max-ratio> option, with a mimimum of 100 differences allowed.
=item C<--max-levels=0>
-Maximum number of levels used. Allows to cut-off folding. 0 means no cut-off.
+Maximum number of levels used. Allows one to cut-off folding. 0 means no cut-off.
Setting a value of 1 would only use the checksum table, without summaries.
A value of 3 or 4 would be raisonable, as the last levels of the tree are
nice for the theoretical complexity formula, but do not improve performance
@@ -275,6 +275,12 @@ this source specification so that the queries' syntax is the right one.
Default is to rely on the two URL arguments.
+=item C<--skip-inserts>, C<--skip-updates>, C<--skip-deletes>
+
+When synchronizing, do not perform these operations.
+
+Default under C<--synchronize> is to do all operations.
+
=item C<--stats=(txt|csv)>
Show various statistics about the comparison performed in this format.
@@ -593,6 +599,14 @@ C<share/contrib/pgc_casts.sql>. New checksums and casts are also available
for MySQL, see C<mysql_*.sql>. An loadable implementation of suitable
checksum functions is also available for SQLite, see C<sqlite_checksum.*>.
+The C<ck> checksum is based on
+L<Jenkins hash|https://en.wikipedia.org/wiki/Jenkins_hash>,
+which relies on simple add, shift and xor integer operations.
+The C<fnv> checksum is inspired by
+L<FNV hash|https://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash>
+(64 bits 1a version) which uses xor and mult integer operations,
+although I also added some shift and add to help tweak high bits.
+
=item 3
An aggregate function is used to summarize checksums for a range of rows.
@@ -871,7 +885,7 @@ there is a lot of options the combination of which cannot all be tested.
If the tables to compare are in the same database, a simple SQL
query can extract the differences. Assuming Tables I<T1> and I<T2>
with primary key I<id> and non null contents I<data>, then their
-differences, that is how I<T1> differs from the reference I<T2>,
+differences, that is how I<T2> differs from the reference I<T1>,
is summarized by the following query:
SELECT COALESCE(T1.id, T2.id) AS key,
@@ -1012,7 +1026,7 @@ L<xSQL Software Data Compare|http://www.xsqlsoftware.com/Product/Sql_Data_Compar
=head1 TESTS
The paper reports numerous performance tests with PostgreSQL under various
-bandwith constraints.
+bandwidth constraints.
Moreover, non regression tests are run over randomly generated tables
when the software is upgraded:
@@ -1064,7 +1078,7 @@ null handling, foldings, number of key and value attributes...
=head1 BUGS
-All softwares have bugs. This is a software, hence it has bugs.
+All software have bugs. This is a software, hence it has bugs.
Reporting bugs is good practice, so tell me if you find one.
If you have a fix, this is even better!
@@ -1103,6 +1117,16 @@ tables: this imply that you must be allowed to do that for the comparison...
However, read-only replicas do not allow creating objects, which mean that you
cannot use pg_comparator to compare table contents on a synchronized replica.
+=head1 TODO
+
+Allow larger checksum sizes.
+
+Make it a PostgreSQL extension.
+
+Add an option to avoid IN (x,y,...) syntax, maybe with a temporary table
+to hold values and use a JOIN on that. I'm not sure about the performance
+implications, though.
+
=head1 VERSIONS
See L<PG Foundry|http://pgfoundry.org/projects/pg-comparator/> for the latest
@@ -1110,6 +1134,16 @@ version. My L<web site|http://www.coelho.net/pg_comparator/> for the tool.
=over 4
+=item B<version 2.2.6> (r1540 on 2015-04-18)
+
+Fix some typos found by Lintian and pointed out by I<Ivan Mincik>.
+Add support for FNV (Fowler Noll Vo) version 1a inspired hash functions.
+Add option to skip inserts, updates or deletes when synchronizing,
+which may be useful to deal with foreign keys, issue pointed
+out by I<Graeme Bell>.
+The I<release> validation was run successfully
+on PostgreSQL 9.4.1 and MySQL 5.5.41.
+
=item B<version 2.2.5> (r1512 on 2014-07-24)
Fix broken URL defaults to use UNIX sockets with an empty host name,
@@ -1378,7 +1412,7 @@ Initial revision.
=head1 COPYRIGHT
-Copyright (c) 2004-2014, I<Fabien Coelho>
+Copyright (c) 2004-2015, I<Fabien Coelho>
<pg dot comparator at coelho dot net> L<http://www.coelho.net/>
This software is distributed under the terms of the BSD Licence.
@@ -1393,8 +1427,8 @@ saying so. See my webpage for current address.
=cut
-my $script_version = '2.2.5 (r1512)';
-my $revision = '$Revision: 1512 $';
+my $script_version = '2.2.6 (r1540)';
+my $revision = '$Revision: 1540 $';
$revision =~ tr/0-9//cd;
################################################################# SOME DEFAULTS
@@ -1406,6 +1440,7 @@ my ($cleanup, $size, $usekey, $usenull, $synchronize) = (0, 0, 0, 1, 0);
my ($do_it, $do_trans, $prefix, $ckcmp) = (0, 1, 'pgc_cmp', 'create');
my ($maskleft, $name, $key_size, $col_size, $where) = (1, 'none', 0, 0, '');
my ($factor, $expect_warn) = (7, 0);
+my ($skip_inserts, $skip_updates, $skip_deletes) = (0, 0, 0);
# condition, tests, max size of blobs, data sources...
my ($expect, $longreadlen, $source1, $source2, $key_cs, $tup_cs, $do_lock,
$env_pass, $max_report, $stats, $pg_copy);
@@ -1471,6 +1506,7 @@ sub firebird_cast($$) {
sub pgsql_cksum_template($$) {
my ($algo, $sz) = @_;
return "CKSUM$sz((%s)::TEXT)" if $algo eq 'ck';
+ return "FNV$sz((%s)::TEXT)" if $algo eq 'fnv';
return pgsql_cast("DECODE(MD5(%s::TEXT),'hex')::BIT(" . 8*$sz . ")", $sz)
if $algo eq 'md5';
die "unexpected checksum $algo for pgsql";
@@ -1479,6 +1515,7 @@ sub pgsql_cksum_template($$) {
sub mysql_cksum_template($$) {
my ($algo, $sz) = @_;
return "CKSUM$sz(CAST(%s AS BINARY))" if $algo eq 'ck';
+ return "FNV$sz(CAST(%s AS BINARY))" if $algo eq 'fnv';
return mysql_cast("CONV(LEFT(MD5(%s),". 2*$sz ."),16,10)", $sz)
if $algo eq 'md5';
die "unexpected checksum $algo for mysql";
@@ -1487,6 +1524,7 @@ sub mysql_cksum_template($$) {
sub sqlite_cksum_template($$) {
my ($algo, $sz) = @_;
return "CKSUM$sz(CAST(%s AS TEXT))" if $algo eq 'ck';
+ return "FNV$sz(CAST(%s AS TEXT))" if $algo eq 'fnv';
return "PGC_MD5($sz, CAST(%s AS TEXT))" if $algo eq 'md5';
die "unexpected checksum $algo for sqlite";
}
@@ -1718,7 +1756,8 @@ my %M = (
"COALESCE(DECODE(MD5(${att}::TEXT),'hex'),''::BYTEA)" .
"::BIT(" . 8*$sz . ")", $sz);
},
- 'ck' => sub { my ($sz, $att) = @_; return "CKSUM$sz(${att}::TEXT)"; }
+ 'ck' => sub { my ($sz, $att) = @_; return "CKSUM$sz(${att}::TEXT)"; },
+ 'fnv' => sub { my ($sz, $att) = @_; return "FNV$sz(${att}::TEXT)"; }
},
# sql checksum template: cksum($algo, $size)
'cksum' => \&pgsql_cksum_template,
@@ -1773,6 +1812,9 @@ my %M = (
},
'ck' => sub { my ($sz, $att) = @_;
return "CKSUM$sz(CAST($att AS BINARY))"
+ },
+ 'fnv' => sub { my ($sz, $att) = @_;
+ return "FNV$sz(CAST($att AS BINARY))"
}
},
'cksum' => \&mysql_cksum_template,
@@ -1822,6 +1864,9 @@ my %M = (
},
'ck' => sub { my ($sz, $att) = @_;
return "CKSUM$sz(CAST($att AS TEXT))";
+ },
+ 'fnv' => sub { my ($sz, $att) = @_;
+ return "FNV$sz(CAST($att AS TEXT))";
}
},
'cksum' => \&sqlite_cksum_template,
@@ -2860,6 +2905,9 @@ GetOptions(
# functions
"synchronize|sync|S!" => \$synchronize,
"do-it|do|D!" => \$do_it,
+ "skip-inserts!" => \$skip_inserts,
+ "skip-updates!" => \$skip_updates,
+ "skip-deletes!" => \$skip_deletes,
"expect|e=i" => \$expect,
"expect-warn" => \$expect_warn, # hidden option used by the validation
"report|r!" => \$report,
@@ -2957,8 +3005,8 @@ $t2 = $t1 unless defined $t2;
die "null should be 'text' or 'hash', got $null"
unless $null =~ /^(text|hash)$/i;
-die "checksum should be 'md5' or 'ck', got ($checksum)"
- unless $checksum =~ /^(md5|ck)$/i;
+die "checksum should be 'md5', 'ck' or 'fnv', got ($checksum)"
+ unless $checksum =~ /^(md5|ck|fnv)$/i;
die "checksize must be 2, 4 or 8, got ($checksize)"
unless $checksize =~ /^[248]$/;
@@ -3518,7 +3566,10 @@ if ($synchronize and
($where? "($where) AND ": '') . $where_k2;
verb 2, $del_sql;
my $del_sth = $dbh2->prepare($del_sql) if $do_it;
- for my $d (@$del, @$delb, $pg_copy? @$upt: ()) {
+ my @alldels = ();
+ push @alldels, (@$del, @$delb) unless $skip_deletes;
+ push @alldels, @$upt if $pg_copy and not $skip_updates;
+ for my $d (@alldels) {
sth_param_exec($do_it, "DELETE $t2", $del_sth, $d);
}
# undef $del_sth;
@@ -3533,7 +3584,9 @@ if ($synchronize and
$select .= "($where) AND " if $where;
$select .= "(" . join(',', @$k1) . ") IN (";
# we COPY both inserts and updates
- my @allins = (@$ins, @$insb, @$upt);
+ my @allins = ();
+ push @allins, (@$ins, @$insb) unless $skip_inserts;
+ push @allins, @$upt unless $skip_updates;
while (@allins) {
my $bulk = '';
for my $k (splice(@allins, 0, $pg_copy)) { # chunked
@@ -3561,11 +3614,11 @@ if ($synchronize and
($where? "($where) AND ": '') . $where_k1;
verb 2, $val_sql;
$val_sth = $dbh1->prepare($val_sql)
- if @$ins or @$insb or @$upt;
+ if @$ins or @$insb or @$upt;
}
# handle inserts
- if (@$ins or @$insb)
+ if ((@$ins or @$insb) and not $skip_inserts)
{
my $ins_sql = "INSERT INTO $t2(" . join(',', @$c2, @$k2) . ") " .
'VALUES(?' . ',?' x (@$k2+@$c2-1) . ')';
@@ -3591,7 +3644,7 @@ if ($synchronize and
}
# handle updates
- if (@$upt)
+ if (@$upt and not $skip_updates)
{
die "there must be some columns to update" unless $c1;
my $upt_sql = "UPDATE $t2 SET $set_c2 WHERE " .
diff --git a/pgc_checksum.c b/pgc_checksum.c
index 8d74332..8d4c3d3 100644
--- a/pgc_checksum.c
+++ b/pgc_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: pgc_checksum.c 1022 2010-08-06 07:28:07Z fabien $
+/* $Id: pgc_checksum.c 1520 2014-08-03 11:27:06Z coelho $
*
* This function computes a simple and fast checksum of a text.
* It is unclear to me what happends on different encodings.
@@ -20,6 +20,8 @@ PG_FUNCTION_INFO_V1(text_checksum2);
PG_FUNCTION_INFO_V1(text_checksum4);
PG_FUNCTION_INFO_V1(text_checksum8);
+/* Jenkins-based checksums
+ */
#include "jenkins.c"
Datum text_checksum2(PG_FUNCTION_ARGS)
@@ -72,3 +74,65 @@ Datum text_checksum8(PG_FUNCTION_ARGS)
}
PG_RETURN_INT64(checksum_int8(data, size));
}
+
+/* FNV-based checksums
+ */
+extern Datum text_fnv2(PG_FUNCTION_ARGS);
+extern Datum text_fnv4(PG_FUNCTION_ARGS);
+extern Datum text_fnv8(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(text_fnv2);
+PG_FUNCTION_INFO_V1(text_fnv4);
+PG_FUNCTION_INFO_V1(text_fnv8);
+
+#include "fnv.c"
+
+Datum text_fnv2(PG_FUNCTION_ARGS)
+{
+ unsigned char * data;
+ size_t size;
+ if (PG_ARGISNULL(0))
+ {
+ data = NULL, size = 0;
+ }
+ else
+ {
+ text *t = PG_GETARG_TEXT_P(0);
+ size = VARSIZE(t) - VARHDRSZ;
+ data = (unsigned char *) VARDATA(t);
+ }
+ PG_RETURN_INT16(fnv_int2(data, size));
+}
+
+Datum text_fnv4(PG_FUNCTION_ARGS)
+{
+ unsigned char * data;
+ size_t size;
+ if (PG_ARGISNULL(0))
+ {
+ data = NULL, size = 0;
+ }
+ else
+ {
+ text *t = PG_GETARG_TEXT_P(0);
+ size = VARSIZE(t) - VARHDRSZ;
+ data = (unsigned char *) VARDATA(t);
+ }
+ PG_RETURN_INT32(fnv_int4(data, size));
+}
+
+Datum text_fnv8(PG_FUNCTION_ARGS)
+{
+ unsigned char * data;
+ size_t size;
+ if (PG_ARGISNULL(0))
+ {
+ data = NULL, size = 0;
+ }
+ else
+ {
+ text *t = PG_GETARG_TEXT_P(0);
+ size = VARSIZE(t) - VARHDRSZ;
+ data = (unsigned char *) VARDATA(t);
+ }
+ PG_RETURN_INT64(fnv_int8(data, size));
+}
diff --git a/pgc_checksum.sql.in b/pgc_checksum.sql.in
index 6380bb7..533e5d0 100644
--- a/pgc_checksum.sql.in
+++ b/pgc_checksum.sql.in
@@ -1,4 +1,4 @@
--- $Id: pgc_checksum.sql.in 1022 2010-08-06 07:28:07Z fabien $
+-- $Id: pgc_checksum.sql.in 1520 2014-08-03 11:27:06Z coelho $
LOAD 'MODULE_PATHNAME';
@@ -19,3 +19,21 @@ RETURNS INT8
LANGUAGE C
CALLED ON NULL INPUT
AS 'MODULE_PATHNAME', 'text_checksum8';
+
+CREATE OR REPLACE FUNCTION fnv2(TEXT)
+RETURNS INT2
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv2';
+
+CREATE OR REPLACE FUNCTION fnv4(TEXT)
+RETURNS INT4
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv4';
+
+CREATE OR REPLACE FUNCTION fnv8(TEXT)
+RETURNS INT8
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv8';
diff --git a/sqlite_checksum.c b/sqlite_checksum.c
index 00dd7fb..a426df5 100644
--- a/sqlite_checksum.c
+++ b/sqlite_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: sqlite_checksum.c 1460 2012-11-02 18:21:27Z fabien $ */
+/* $Id: sqlite_checksum.c 1520 2014-08-03 11:27:06Z coelho $ */
/*
* SQLite extensions for pg_comparator.
*
@@ -108,6 +108,90 @@ static void sqlite_checksum_int8(
sqlite3_result_int64(ctx, checksum_int8(txt, len));
}
+static void sqlite_fnv_int2(
+ sqlite3_context * ctx,
+ int argc,
+ sqlite3_value ** argv)
+{
+ assert(argc==1);
+ const unsigned char * txt;
+ size_t len;
+ switch (sqlite3_value_type(argv[0])) {
+ case SQLITE_NULL:
+ txt = NULL;
+ len = 0;
+ break;
+ case SQLITE_TEXT:
+ txt = sqlite3_value_text(argv[0]);
+ len = sqlite3_value_bytes(argv[0]);
+ break;
+ // hmmm... should I do something else?
+ case SQLITE_INTEGER:
+ case SQLITE_FLOAT:
+ case SQLITE_BLOB:
+ default:
+ sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+ return;
+ }
+ sqlite3_result_int(ctx, checksum_int2(txt, len));
+}
+
+static void sqlite_fnv_int4(
+ sqlite3_context * ctx,
+ int argc,
+ sqlite3_value ** argv)
+{
+ assert(argc==1);
+ const unsigned char * txt;
+ size_t len;
+ switch (sqlite3_value_type(argv[0])) {
+ case SQLITE_NULL:
+ txt = NULL;
+ len = 0;
+ break;
+ case SQLITE_TEXT:
+ txt = sqlite3_value_text(argv[0]);
+ len = sqlite3_value_bytes(argv[0]);
+ break;
+ // hmmm... should I do something else?
+ case SQLITE_INTEGER:
+ case SQLITE_FLOAT:
+ case SQLITE_BLOB:
+ default:
+ sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+ return;
+ }
+ sqlite3_result_int(ctx, checksum_int4(txt, len));
+}
+
+static void sqlite_fnv_int8(
+ sqlite3_context * ctx,
+ int argc,
+ sqlite3_value ** argv)
+{
+ assert(argc==1);
+ const unsigned char * txt;
+ size_t len;
+ switch (sqlite3_value_type(argv[0])) {
+ case SQLITE_NULL:
+ txt = NULL;
+ len = 0;
+ break;
+ case SQLITE_TEXT:
+ txt = sqlite3_value_text(argv[0]);
+ len = sqlite3_value_bytes(argv[0]);
+ break;
+ // hmmm... should I do something else?
+ case SQLITE_INTEGER:
+ case SQLITE_FLOAT:
+ case SQLITE_BLOB:
+ default:
+ sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+ return;
+ }
+ sqlite3_result_int64(ctx, checksum_int8(txt, len));
+}
+
/***************************************************** INTEGER XOR AGGREGATE */
static void ixor_step(
@@ -171,6 +255,24 @@ int sqlite3_extension_init(
sqlite_checksum_int8, NULL, NULL);
sqlite3_create_function(db,
+ // name, #arg, txt, data,
+ "fnv2", 1, SQLITE_UTF8, NULL,
+ // func, step, final
+ sqlite_fnv_int2, NULL, NULL);
+
+ sqlite3_create_function(db,
+ // name, #arg, txt, data,
+ "fnv4", 1, SQLITE_UTF8, NULL,
+ // func, step, final
+ sqlite_fnv_int4, NULL, NULL);
+
+ sqlite3_create_function(db,
+ // name, #arg, txt, data,
+ "fnv8", 1, SQLITE_UTF8, NULL,
+ // func, step, final
+ sqlite_fnv_int8, NULL, NULL);
+
+ sqlite3_create_function(db,
// name, #args, txt, data,
"xor", 1, SQLITE_UTF8, NULL,
// func, step, final
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pg_comparator.git
More information about the Pkg-grass-devel
mailing list