[pg_comparator] 02/10: Imported Upstream version 2.2.6

Sebastiaan Couwenberg sebastic at moszumanska.debian.org
Sun Aug 16 16:10:07 UTC 2015


This is an automated email from the git hooks/post-receive script.

sebastic pushed a commit to branch master
in repository pg_comparator.

commit 91bcc305b1401f8e6e4805bdfc1a0def81748c2b
Author: Bas Couwenberg <sebastic at xs4all.nl>
Date:   Sun Aug 16 17:12:08 2015 +0200

    Imported Upstream version 2.2.6
---
 LICENSE             |   2 +-
 Makefile            |   5 ++-
 README.pgc_checksum |   9 ++++-
 fnv.c               |  74 +++++++++++++++++++++++++++++++++++++
 jenkins.c           |  25 +++++++------
 mysql_checksum.c    |  84 +++++++++++++++++++++++++++++++++++++++++-
 mysql_checksum.sql  |  10 ++++-
 pg_comparator       |  99 +++++++++++++++++++++++++++++++++++++------------
 pgc_checksum.c      |  66 ++++++++++++++++++++++++++++++++-
 pgc_checksum.sql.in |  20 +++++++++-
 sqlite_checksum.c   | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 11 files changed, 455 insertions(+), 43 deletions(-)

diff --git a/LICENSE b/LICENSE
index c5a6c30..4eff9fb 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 pg_comparator is distributed under the terms of the BSD License:
 
-Copyright (c) 2004-2014, Fabien Coelho <fabien at coelho dot net>
+Copyright (c) 2004-2015, Fabien Coelho <fabien at coelho dot net>
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without 
diff --git a/Makefile b/Makefile
index 05c166b..fc33c46 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile 1460 2012-11-02 18:21:27Z fabien $
+# $Id: Makefile 1528 2014-08-04 07:09:24Z coelho $
 
 #
 # PostgreSQL stuff
@@ -32,6 +32,9 @@ $(name).html: $(name)
 	  sed -e '/^<body style/a<h1>$(name)</h1>' > $@
 	touch -r $< $@
 
+# dependencies
+pgc_checksum.o: jenkins.c fnv.c
+
 pgsql_install: install
 pgsql_uninstall: uninstall
 
diff --git a/README.pgc_checksum b/README.pgc_checksum
index 560c545..72a5b89 100644
--- a/README.pgc_checksum
+++ b/README.pgc_checksum
@@ -1,5 +1,7 @@
 provide fast NOT cryptographycally-secure checksum functions
 for TEXT, results being of INT2, INT4 and INT8 types.
+The cksum* functions are based on Jenkins hash.
+The fnv* functions are based on FNV version 1a hash.
 
 load with:
 
@@ -10,6 +12,9 @@ use as:
 	psql> SELECT cksum2('some text');
 	psql> SELECT cksum4('some text');
 	psql> SELECT cksum8('some text');
+	psql> SELECT fnv2('some text');
+	psql> SELECT fnv4('some text');
+	psql> SELECT fnv8('some text');
 
-An empty text results in hash value 0.
-A NULL value results in some predefined value.
+For cksum, an NULL text results in hash value 0 and
+an empty text results in some predefined value.
diff --git a/fnv.c b/fnv.c
new file mode 100644
index 0000000..bb27502
--- /dev/null
+++ b/fnv.c
@@ -0,0 +1,74 @@
+/*
+ * $Id: fnv.c 1529 2014-08-04 07:09:38Z coelho $
+ *
+ * https://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html
+ *
+ * Code adapted, simplified and slightly extended from public domain:
+ *
+ *   http://www.isthe.com/chongo/src/fnv/hash_64a.c
+ *
+ * By chongo <Landon Curt Noll>
+ */
+
+#include <stdint.h>
+
+/* This prime is probably too small? It seems that it was chosen because it contains
+ * few one bits, this allowing some optimisations on 32 bit processors which would
+ * not have a hardware 64 bit multiply operation.
+ */
+#define FNV_64_PRIME (0x100000001b3ULL)
+#define FNV1a_64_INIT (0xcbf29ce484222325ULL)
+
+static uint64_t fnv1a_64_hash_data(const void * data, const size_t len, uint64_t hval)
+{
+  if (data) { // NOT NULL
+    const unsigned char * bp = (unsigned char *) data;
+    const unsigned char * be = bp + len;
+    while (bp < be) {
+      register uint64_t byte = (uint64_t) (*bp++);
+#if defined(STANDARD_FNV1A_64)
+      hval ^= byte;
+#else
+      // help tweak high bits
+      hval += (byte << 11) | (byte << 31) | (byte << 53);
+      hval ^= byte | (byte << 23) | (byte << 43);
+#endif // STANDARD_FNV1A_64
+      hval *= FNV_64_PRIME;
+    }
+    return hval;
+  }
+  else // NULL
+    return 0ULL;
+}
+
+static uint64_t fnv1a_64_hash(const void * data, const size_t len)
+{
+  return fnv1a_64_hash_data(data, len, FNV1a_64_INIT);
+}
+
+/*
+   SELECT
+     (ABS(fnv8((i+1)::TEXT)) % 100) - (ABS(fnv8(i::TEXT)) % 100) AS diff,
+     COUNT(*) AS nb
+   FROM generate_series(1, 1000) as i
+   GROUP BY diff
+   ORDER BY diff;
+*/
+
+static int16_t fnv_int2(const void * data, const size_t len)
+{
+  uint64_t h = fnv1a_64_hash(data, len);
+  return (int16_t) ((h >> 48) ^ (h >> 32) ^ (h >> 16) ^ h);
+}
+
+static int32_t fnv_int4(const void * data, const size_t len)
+{
+  uint64_t h = fnv1a_64_hash(data, len);
+  return (int32_t) ((h >> 32) ^ h);
+}
+
+static int64_t fnv_int8(const void * data, const size_t len)
+{
+  return (int64_t) fnv1a_64_hash(data, len);
+}
diff --git a/jenkins.c b/jenkins.c
index 212ec7b..68205c0 100644
--- a/jenkins.c
+++ b/jenkins.c
@@ -1,8 +1,7 @@
-/* $Id: jenkins.c 1462 2012-11-03 07:17:10Z fabien $ */
+/* $Id: jenkins.c 1530 2014-08-10 21:45:12Z coelho $ */
 
 #include <stdint.h>
 
-#define PN_16	15401
 #define PN_32_1 433494437
 #define PN_32_2 780291637
 #define PN_32_3 1073676287
@@ -11,7 +10,11 @@
 /* The following function is taken and adapted (wrt len) from
  * http://www.burtleburtle.net/bob/hash/doobs.html,
  * and is advertised public domain.
+ * This change breaks the incremental aspect of the computation.
+ *
  * if hash==0, it is unchanged for the empty string.
+ *
+ * note: the jenkins function uses low-cost operators: + >> << ^
  */
 static uint32_t jenkins_one_at_a_time_hash
   (uint32_t hash, const unsigned char *key, size_t len)
@@ -29,32 +32,32 @@ static uint32_t jenkins_one_at_a_time_hash
 }
 
 /* checksum of sizes 2, 4 and 8.
- * checksum_int?(NULL) == some_predefined_value
- * checksum_int?('') == 0
+ * checksum_int?(NULL) == 0
+ * checksum_int?('') == some value
  */
 static int16_t checksum_int2(const unsigned char *data, size_t size)
 {
-  uint32_t h = PN_16; // default if NULL
-  if (data) h = jenkins_one_at_a_time_hash(0, data, size);
+  uint32_t h = 0; // default if NULL
+  if (data) h = jenkins_one_at_a_time_hash(PN_32_1, data, size);
   return (int16_t) ((h>>16)^h);
 }
 
 // many collision, eg cksum4('16667') = cksum4('53827')
 static int32_t checksum_int4(const unsigned char *data, size_t size)
 {
-  uint32_t h = PN_32_1; // default if NULL
-  if (data) h = jenkins_one_at_a_time_hash(0, data, size);
+  uint32_t h = 0; // default if NULL
+  if (data) h = jenkins_one_at_a_time_hash(PN_32_2, data, size);
   return (int32_t) h;
 }
 
 static int64_t checksum_int8(const unsigned char *data, size_t size)
 {
-  uint64_t h1 = PN_32_2, h2 = PN_32_3; // default if NULL
+  uint64_t h1 = 0, h2 = 0; // default if NULL
   if (data) {
     // the 64 bit hash is based on two hashes. first one is chsum4
-    h1 = jenkins_one_at_a_time_hash(0, data, size);
+    h1 = jenkins_one_at_a_time_hash(PN_32_3, data, size);
     // ensure that size==0 => checksum==0
-    h2 = size? jenkins_one_at_a_time_hash(h1 ^ PN_32_4, data, size): 0;
+    h2 = jenkins_one_at_a_time_hash(h1 ^ PN_32_4, data, size);
   }
   return (int64_t) ((h1<<32)|h2);
 }
diff --git a/mysql_checksum.c b/mysql_checksum.c
index 62c9110..b0b74d1 100644
--- a/mysql_checksum.c
+++ b/mysql_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: mysql_checksum.c 1126 2012-08-08 07:47:13Z fabien $
+/* $Id: mysql_checksum.c 1525 2014-08-03 12:34:17Z coelho $
  *
  * This function computes a simple and fast checksum of a binary
  * It is unclear to me what happends on different encodings.
@@ -9,6 +9,8 @@
 #include <my_global.h>
 #include <mysql.h>
 
+/* Jenkins-based hash functions
+ */
 /* foo_init, foo, foo_deinit */
 my_bool cksum8_init(UDF_INIT *, UDF_ARGS *, char *);
 longlong cksum8(UDF_INIT *, UDF_ARGS *, char *, char *);
@@ -87,3 +89,83 @@ my_bool cksum8_init(
 {
   return 0;
 }
+
+/* FNV-based hash functions
+ */
+my_bool fnv8_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv8(UDF_INIT *, UDF_ARGS *, char *, char *);
+my_bool fnv4_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv4(UDF_INIT *, UDF_ARGS *, char *, char *);
+my_bool fnv2_init(UDF_INIT *, UDF_ARGS *, char *);
+longlong fnv2(UDF_INIT *, UDF_ARGS *, char *, char *);
+
+#include "fnv.c"
+
+longlong fnv2(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args,
+  char *is_null,
+  char *error __attribute__((unused)))
+{
+  // if in doubt, return NULL
+  if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+  {
+    *is_null = 1;
+    return 0;
+  }
+  return (longlong) checksum_int2(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv2_init(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args __attribute__((unused)),
+  char *message __attribute__((unused)))
+{
+  return 0;
+}
+
+longlong fnv4(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args,
+  char *is_null __attribute__((unused)),
+  char *error __attribute__((unused)))
+{
+  // if in doubt, return NULL
+  if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+  {
+    *is_null = 1;
+    return 0;
+  }
+  return (longlong) checksum_int4(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv4_init(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args __attribute__((unused)),
+  char *message __attribute__((unused)))
+{
+  return 0;
+}
+
+longlong fnv8(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args,
+  char *is_null __attribute__((unused)),
+  char *error __attribute__((unused)))
+{
+  // if in doubt, return NULL
+  if (args->arg_count!=1 || args->arg_type[0]!=STRING_RESULT)
+  {
+    *is_null = 1;
+    return 0;
+  }
+  return (longlong) checksum_int8(args->args[0], args->lengths[0]);
+}
+
+my_bool fnv8_init(
+  UDF_INIT *initid __attribute__((unused)),
+  UDF_ARGS *args __attribute__((unused)),
+  char *message __attribute__((unused)))
+{
+  return 0;
+}
diff --git a/mysql_checksum.sql b/mysql_checksum.sql
index bcdc342..352849a 100644
--- a/mysql_checksum.sql
+++ b/mysql_checksum.sql
@@ -1,5 +1,5 @@
 --
--- $Id: mysql_checksum.sql 687 2010-04-03 12:07:15Z fabien $
+-- $Id: mysql_checksum.sql 1520 2014-08-03 11:27:06Z coelho $
 --
 
 DROP FUNCTION IF EXISTS cksum8;
@@ -9,3 +9,11 @@ DROP FUNCTION IF EXISTS cksum2;
 CREATE FUNCTION cksum8 RETURNS INTEGER SONAME 'mysql_checksum.so';
 CREATE FUNCTION cksum4 RETURNS INTEGER SONAME 'mysql_checksum.so';
 CREATE FUNCTION cksum2 RETURNS INTEGER SONAME 'mysql_checksum.so';
+
+DROP FUNCTION IF EXISTS fnv8;
+DROP FUNCTION IF EXISTS fnv4;
+DROP FUNCTION IF EXISTS fnv2;
+
+CREATE FUNCTION fnv8 RETURNS INTEGER SONAME 'mysql_checksum.so';
+CREATE FUNCTION fnv4 RETURNS INTEGER SONAME 'mysql_checksum.so';
+CREATE FUNCTION fnv2 RETURNS INTEGER SONAME 'mysql_checksum.so';
diff --git a/pg_comparator b/pg_comparator
index aa5d9c5..66bff82 100755
--- a/pg_comparator
+++ b/pg_comparator
@@ -1,6 +1,6 @@
 #!/usr/bin/perl
 #
-# $Id: pg_comparator.pl 1512 2014-07-24 08:21:31Z coelho $
+# $Id: pg_comparator.pl 1540 2015-04-18 06:23:47Z coelho $
 #
 # HELP 1: pg_comparator --man
 # HELP 2: pod2text pg_comparator
@@ -84,13 +84,13 @@ Default is B<create> because it always works for both databases.
 
 =item C<--checksum-function=fun> or C<--cf=fun> or C<-c fun>
 
-Checksum function to use, either B<ck> or B<md5>.
-For PostgreSQL, MySQL and SQLite the provided B<ck> checksum functions must be
-loaded into the target databases.
+Checksum function to use, either B<ck>, B<fnv> or B<md5>.
+For PostgreSQL, MySQL and SQLite the provided B<ck> and B<fnv> checksum
+functions must be loaded into the target databases.
 Choosing B<md5> does not come free either: the provided cast functions must be
 loaded into the target databases and the computation is more expensive.
 
-Default is B<ck>, which is faster, especially if the operation is cpu-bound
+Default is B<ck>, which is fast, especially if the operation is cpu-bound
 and the bandwidth is reasonably high.
 
 =item C<--checksum-size=n> or C<--check-size=n> or C<--cs=n> or C<-z n>
@@ -116,7 +116,7 @@ Note that they are dropped implicitly by default when the connection
 is closed as they are temporary, see C<-(-no)-temporary> option.
 This option is useful for debugging.
 
-Default is B<not> to clear explicitely the checksum and summary tables,
+Default is B<not> to clear explicitly the checksum and summary tables,
 as it is not needed.
 
 =item C<--debug> or C<-d>
@@ -144,7 +144,7 @@ This option is only used for non regression tests. See the TESTS section.
 
 Folding factor: log2 of the number of rows grouped together at each stage,
 starting from the leaves so that the first round always groups as many records
-as possible. The power of two allows to use masked computations.
+as possible. The power of two allows one to use masked computations.
 The minimum value of 1 builds a binary tree.
 
 Default folding factor log2 is B<7>, i.e. size 128 folds.
@@ -169,7 +169,7 @@ Default is to build both key and tuple checksums on the fly.
 =item C<--lock>, C<--no-lock>
 
 Whether to lock tables.
-Setting the option explicitely overrides the default one way or another.
+Setting the option explicitly overrides the default one way or another.
 For PostgreSQL, this option requires C<--transaction>, which is enabled by
 default.
 
@@ -207,7 +207,7 @@ the C<--max-ratio> option, with a mimimum of 100 differences allowed.
 
 =item C<--max-levels=0>
 
-Maximum number of levels used. Allows to cut-off folding. 0 means no cut-off.
+Maximum number of levels used. Allows one to cut-off folding. 0 means no cut-off.
 Setting a value of 1 would only use the checksum table, without summaries.
 A value of 3 or 4 would be raisonable, as the last levels of the tree are
 nice for the theoretical complexity formula, but do not improve performance
@@ -275,6 +275,12 @@ this source specification so that the queries' syntax is the right one.
 
 Default is to rely on the two URL arguments.
 
+=item C<--skip-inserts>, C<--skip-updates>, C<--skip-deletes>
+
+When synchronizing, do not perform these operations.
+
+Default under C<--synchronize> is to do all operations.
+
 =item C<--stats=(txt|csv)>
 
 Show various statistics about the comparison performed in this format.
@@ -593,6 +599,14 @@ C<share/contrib/pgc_casts.sql>. New checksums and casts are also available
 for MySQL, see C<mysql_*.sql>. An loadable implementation of suitable
 checksum functions is also available for SQLite, see C<sqlite_checksum.*>.
 
+The C<ck> checksum is based on
+L<Jenkins hash|https://en.wikipedia.org/wiki/Jenkins_hash>,
+which relies on simple add, shift and xor integer operations.
+The C<fnv> checksum is inspired by
+L<FNV hash|https://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash>
+(64 bits 1a version) which uses xor and mult integer operations,
+although I also added some shift and add to help tweak high bits.
+
 =item 3
 
 An aggregate function is used to summarize checksums for a range of rows.
@@ -871,7 +885,7 @@ there is a lot of options the combination of which cannot all be tested.
 If the tables to compare are in the same database, a simple SQL
 query can extract the differences. Assuming Tables I<T1> and I<T2>
 with primary key I<id> and non null contents I<data>, then their
-differences, that is how I<T1> differs from the reference I<T2>,
+differences, that is how I<T2> differs from the reference I<T1>,
 is summarized by the following query:
 
 	SELECT COALESCE(T1.id, T2.id) AS key,
@@ -1012,7 +1026,7 @@ L<xSQL Software Data Compare|http://www.xsqlsoftware.com/Product/Sql_Data_Compar
 =head1 TESTS
 
 The paper reports numerous performance tests with PostgreSQL under various
-bandwith constraints.
+bandwidth constraints.
 
 Moreover, non regression tests are run over randomly generated tables
 when the software is upgraded:
@@ -1064,7 +1078,7 @@ null handling, foldings, number of key and value attributes...
 
 =head1 BUGS
 
-All softwares have bugs. This is a software, hence it has bugs.
+All software have bugs. This is a software, hence it has bugs.
 
 Reporting bugs is good practice, so tell me if you find one.
 If you have a fix, this is even better!
@@ -1103,6 +1117,16 @@ tables: this imply that you must be allowed to do that for the comparison...
 However, read-only replicas do not allow creating objects, which mean that you
 cannot use pg_comparator to compare table contents on a synchronized replica.
 
+=head1 TODO
+
+Allow larger checksum sizes.
+
+Make it a PostgreSQL extension.
+
+Add an option to avoid IN (x,y,...) syntax, maybe with a temporary table
+to hold values and use a JOIN on that. I'm not sure about the performance
+implications, though.
+
 =head1 VERSIONS
 
 See L<PG Foundry|http://pgfoundry.org/projects/pg-comparator/> for the latest
@@ -1110,6 +1134,16 @@ version. My L<web site|http://www.coelho.net/pg_comparator/> for the tool.
 
 =over 4
 
+=item B<version 2.2.6> (r1540 on 2015-04-18)
+
+Fix some typos found by Lintian and pointed out by I<Ivan Mincik>.
+Add support for FNV (Fowler Noll Vo) version 1a inspired hash functions.
+Add option to skip inserts, updates or deletes when synchronizing,
+which may be useful to deal with foreign keys, issue pointed
+out by I<Graeme Bell>.
+The I<release> validation was run successfully
+on PostgreSQL 9.4.1 and MySQL 5.5.41.
+
 =item B<version 2.2.5> (r1512 on 2014-07-24)
 
 Fix broken URL defaults to use UNIX sockets with an empty host name,
@@ -1378,7 +1412,7 @@ Initial revision.
 
 =head1 COPYRIGHT
 
-Copyright (c) 2004-2014, I<Fabien Coelho>
+Copyright (c) 2004-2015, I<Fabien Coelho>
 <pg dot comparator at coelho dot net> L<http://www.coelho.net/>
 
 This software is distributed under the terms of the BSD Licence.
@@ -1393,8 +1427,8 @@ saying so. See my webpage for current address.
 
 =cut
 
-my $script_version = '2.2.5 (r1512)';
-my $revision = '$Revision: 1512 $';
+my $script_version = '2.2.6 (r1540)';
+my $revision = '$Revision: 1540 $';
 $revision =~ tr/0-9//cd;
 
 ################################################################# SOME DEFAULTS
@@ -1406,6 +1440,7 @@ my ($cleanup, $size, $usekey, $usenull, $synchronize) = (0, 0, 0, 1, 0);
 my ($do_it, $do_trans, $prefix, $ckcmp) = (0, 1, 'pgc_cmp', 'create');
 my ($maskleft, $name, $key_size, $col_size, $where) = (1, 'none', 0, 0, '');
 my ($factor, $expect_warn) = (7, 0);
+my ($skip_inserts, $skip_updates, $skip_deletes) = (0, 0, 0);
 # condition, tests, max size of blobs, data sources...
 my ($expect, $longreadlen, $source1, $source2, $key_cs, $tup_cs, $do_lock,
     $env_pass, $max_report, $stats, $pg_copy);
@@ -1471,6 +1506,7 @@ sub firebird_cast($$) {
 sub pgsql_cksum_template($$) {
   my ($algo, $sz) = @_;
   return "CKSUM$sz((%s)::TEXT)" if $algo eq 'ck';
+  return "FNV$sz((%s)::TEXT)" if $algo eq 'fnv';
   return pgsql_cast("DECODE(MD5(%s::TEXT),'hex')::BIT(" . 8*$sz . ")", $sz)
     if $algo eq 'md5';
   die "unexpected checksum $algo for pgsql";
@@ -1479,6 +1515,7 @@ sub pgsql_cksum_template($$) {
 sub mysql_cksum_template($$) {
   my ($algo, $sz) = @_;
   return "CKSUM$sz(CAST(%s AS BINARY))" if $algo eq 'ck';
+  return "FNV$sz(CAST(%s AS BINARY))" if $algo eq 'fnv';
   return mysql_cast("CONV(LEFT(MD5(%s),". 2*$sz ."),16,10)", $sz)
     if $algo eq 'md5';
   die "unexpected checksum $algo for mysql";
@@ -1487,6 +1524,7 @@ sub mysql_cksum_template($$) {
 sub sqlite_cksum_template($$) {
   my ($algo, $sz) = @_;
   return "CKSUM$sz(CAST(%s AS TEXT))" if $algo eq 'ck';
+  return "FNV$sz(CAST(%s AS TEXT))" if $algo eq 'fnv';
   return "PGC_MD5($sz, CAST(%s AS TEXT))" if $algo eq 'md5';
   die "unexpected checksum $algo for sqlite";
 }
@@ -1718,7 +1756,8 @@ my %M = (
 	  "COALESCE(DECODE(MD5(${att}::TEXT),'hex'),''::BYTEA)" .
 	  "::BIT(" .  8*$sz . ")", $sz);
 	},
-      'ck' => sub { my ($sz, $att) = @_; return "CKSUM$sz(${att}::TEXT)"; }
+      'ck' => sub { my ($sz, $att) = @_; return "CKSUM$sz(${att}::TEXT)"; },
+      'fnv' => sub { my ($sz, $att) = @_; return "FNV$sz(${att}::TEXT)"; }
     },
     # sql checksum template: cksum($algo, $size)
     'cksum' => \&pgsql_cksum_template,
@@ -1773,6 +1812,9 @@ my %M = (
 	},
       'ck' => sub { my ($sz, $att) = @_;
 	return "CKSUM$sz(CAST($att AS BINARY))"
+      },
+      'fnv' => sub { my ($sz, $att) = @_;
+	return "FNV$sz(CAST($att AS BINARY))"
       }
     },
     'cksum' => \&mysql_cksum_template,
@@ -1822,6 +1864,9 @@ my %M = (
 	},
       'ck' => sub { my ($sz, $att) = @_;
         return "CKSUM$sz(CAST($att AS TEXT))";
+      },
+      'fnv' => sub { my ($sz, $att) = @_;
+        return "FNV$sz(CAST($att AS TEXT))";
       }
     },
     'cksum' => \&sqlite_cksum_template,
@@ -2860,6 +2905,9 @@ GetOptions(
   # functions
   "synchronize|sync|S!" => \$synchronize,
   "do-it|do|D!" => \$do_it,
+  "skip-inserts!" => \$skip_inserts,
+  "skip-updates!" => \$skip_updates,
+  "skip-deletes!" => \$skip_deletes,
   "expect|e=i" => \$expect,
   "expect-warn" => \$expect_warn, # hidden option used by the validation
   "report|r!" => \$report,
@@ -2957,8 +3005,8 @@ $t2 = $t1 unless defined $t2;
 die "null should be 'text' or 'hash', got $null"
   unless $null =~ /^(text|hash)$/i;
 
-die "checksum should be 'md5' or 'ck', got ($checksum)"
-  unless $checksum =~ /^(md5|ck)$/i;
+die "checksum should be 'md5', 'ck' or 'fnv', got ($checksum)"
+  unless $checksum =~ /^(md5|ck|fnv)$/i;
 
 die "checksize must be 2, 4 or 8, got ($checksize)"
   unless $checksize =~ /^[248]$/;
@@ -3518,7 +3566,10 @@ if ($synchronize and
 	($where? "($where) AND ": '') . $where_k2;
     verb 2, $del_sql;
     my $del_sth = $dbh2->prepare($del_sql) if $do_it;
-    for my $d (@$del, @$delb, $pg_copy? @$upt: ()) {
+    my @alldels = ();
+    push @alldels, (@$del, @$delb) unless $skip_deletes;
+    push @alldels, @$upt if $pg_copy and not $skip_updates;
+    for my $d (@alldels) {
       sth_param_exec($do_it, "DELETE $t2", $del_sth, $d);
     }
     # undef $del_sth;
@@ -3533,7 +3584,9 @@ if ($synchronize and
     $select .= "($where) AND " if $where;
     $select .= "(" . join(',', @$k1) . ") IN (";
     # we COPY both inserts and updates
-    my @allins = (@$ins, @$insb, @$upt);
+    my @allins = ();
+    push @allins, (@$ins, @$insb) unless $skip_inserts;
+    push @allins, @$upt unless $skip_updates;
     while (@allins) {
       my $bulk = '';
       for my $k (splice(@allins, 0, $pg_copy)) { # chunked
@@ -3561,11 +3614,11 @@ if ($synchronize and
       ($where? "($where) AND ": '') . $where_k1;
       verb 2, $val_sql;
       $val_sth = $dbh1->prepare($val_sql)
-      if @$ins or @$insb or @$upt;
+        if @$ins or @$insb or @$upt;
     }
 
     # handle inserts
-    if (@$ins or @$insb)
+    if ((@$ins or @$insb) and not $skip_inserts)
     {
       my $ins_sql = "INSERT INTO $t2(" . join(',', @$c2, @$k2) . ") " .
 	'VALUES(?' . ',?' x (@$k2+@$c2-1) . ')';
@@ -3591,7 +3644,7 @@ if ($synchronize and
     }
 
     # handle updates
-    if (@$upt)
+    if (@$upt and not $skip_updates)
     {
       die "there must be some columns to update" unless $c1;
       my $upt_sql = "UPDATE $t2 SET $set_c2 WHERE " .
diff --git a/pgc_checksum.c b/pgc_checksum.c
index 8d74332..8d4c3d3 100644
--- a/pgc_checksum.c
+++ b/pgc_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: pgc_checksum.c 1022 2010-08-06 07:28:07Z fabien $
+/* $Id: pgc_checksum.c 1520 2014-08-03 11:27:06Z coelho $
  *
  * This function computes a simple and fast checksum of a text.
  * It is unclear to me what happends on different encodings.
@@ -20,6 +20,8 @@ PG_FUNCTION_INFO_V1(text_checksum2);
 PG_FUNCTION_INFO_V1(text_checksum4);
 PG_FUNCTION_INFO_V1(text_checksum8);
 
+/* Jenkins-based checksums
+ */
 #include "jenkins.c"
 
 Datum text_checksum2(PG_FUNCTION_ARGS)
@@ -72,3 +74,65 @@ Datum text_checksum8(PG_FUNCTION_ARGS)
   }
   PG_RETURN_INT64(checksum_int8(data, size));
 }
+
+/* FNV-based checksums
+ */
+extern Datum text_fnv2(PG_FUNCTION_ARGS);
+extern Datum text_fnv4(PG_FUNCTION_ARGS);
+extern Datum text_fnv8(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(text_fnv2);
+PG_FUNCTION_INFO_V1(text_fnv4);
+PG_FUNCTION_INFO_V1(text_fnv8);
+
+#include "fnv.c"
+
+Datum text_fnv2(PG_FUNCTION_ARGS)
+{
+  unsigned char * data;
+  size_t size;
+  if (PG_ARGISNULL(0))
+  {
+    data = NULL, size = 0;
+  }
+  else
+  {
+    text *t = PG_GETARG_TEXT_P(0);
+    size = VARSIZE(t) - VARHDRSZ;
+    data = (unsigned char *) VARDATA(t);
+  }
+  PG_RETURN_INT16(fnv_int2(data, size));
+}
+
+Datum text_fnv4(PG_FUNCTION_ARGS)
+{
+  unsigned char * data;
+  size_t size;
+  if (PG_ARGISNULL(0))
+  {
+    data = NULL, size = 0;
+  }
+  else
+  {
+    text *t = PG_GETARG_TEXT_P(0);
+    size = VARSIZE(t) - VARHDRSZ;
+    data = (unsigned char *) VARDATA(t);
+  }
+  PG_RETURN_INT32(fnv_int4(data, size));
+}
+
+Datum text_fnv8(PG_FUNCTION_ARGS)
+{
+  unsigned char * data;
+  size_t size;
+  if (PG_ARGISNULL(0))
+  {
+    data = NULL, size = 0;
+  }
+  else
+  {
+    text *t = PG_GETARG_TEXT_P(0);
+    size = VARSIZE(t) - VARHDRSZ;
+    data = (unsigned char *) VARDATA(t);
+  }
+  PG_RETURN_INT64(fnv_int8(data, size));
+}
diff --git a/pgc_checksum.sql.in b/pgc_checksum.sql.in
index 6380bb7..533e5d0 100644
--- a/pgc_checksum.sql.in
+++ b/pgc_checksum.sql.in
@@ -1,4 +1,4 @@
--- $Id: pgc_checksum.sql.in 1022 2010-08-06 07:28:07Z fabien $
+-- $Id: pgc_checksum.sql.in 1520 2014-08-03 11:27:06Z coelho $
 
 LOAD 'MODULE_PATHNAME';
 
@@ -19,3 +19,21 @@ RETURNS INT8
 LANGUAGE C
 CALLED ON NULL INPUT
 AS 'MODULE_PATHNAME', 'text_checksum8';
+
+CREATE OR REPLACE FUNCTION fnv2(TEXT)
+RETURNS INT2
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv2';
+
+CREATE OR REPLACE FUNCTION fnv4(TEXT)
+RETURNS INT4
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv4';
+
+CREATE OR REPLACE FUNCTION fnv8(TEXT)
+RETURNS INT8
+LANGUAGE C
+CALLED ON NULL INPUT
+AS 'MODULE_PATHNAME', 'text_fnv8';
diff --git a/sqlite_checksum.c b/sqlite_checksum.c
index 00dd7fb..a426df5 100644
--- a/sqlite_checksum.c
+++ b/sqlite_checksum.c
@@ -1,4 +1,4 @@
-/* $Id: sqlite_checksum.c 1460 2012-11-02 18:21:27Z fabien $ */
+/* $Id: sqlite_checksum.c 1520 2014-08-03 11:27:06Z coelho $ */
 /*
  * SQLite extensions for pg_comparator.
  *
@@ -108,6 +108,90 @@ static void sqlite_checksum_int8(
   sqlite3_result_int64(ctx, checksum_int8(txt, len));
 }
 
+static void sqlite_fnv_int2(
+  sqlite3_context * ctx,
+  int argc,
+  sqlite3_value ** argv)
+{
+  assert(argc==1);
+  const unsigned char * txt;
+  size_t len;
+  switch (sqlite3_value_type(argv[0])) {
+  case SQLITE_NULL:
+    txt = NULL;
+    len = 0;
+    break;
+  case SQLITE_TEXT:
+    txt = sqlite3_value_text(argv[0]);
+    len = sqlite3_value_bytes(argv[0]);
+    break;
+    // hmmm... should I do something else?
+  case SQLITE_INTEGER:
+  case SQLITE_FLOAT:
+  case SQLITE_BLOB:
+  default:
+    sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+    return;
+  }
+  sqlite3_result_int(ctx, checksum_int2(txt, len));
+}
+
+static void sqlite_fnv_int4(
+  sqlite3_context * ctx,
+  int argc,
+  sqlite3_value ** argv)
+{
+  assert(argc==1);
+  const unsigned char * txt;
+  size_t len;
+  switch (sqlite3_value_type(argv[0])) {
+  case SQLITE_NULL:
+    txt = NULL;
+    len = 0;
+    break;
+  case SQLITE_TEXT:
+    txt = sqlite3_value_text(argv[0]);
+    len = sqlite3_value_bytes(argv[0]);
+    break;
+    // hmmm... should I do something else?
+  case SQLITE_INTEGER:
+  case SQLITE_FLOAT:
+  case SQLITE_BLOB:
+  default:
+    sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+    return;
+  }
+  sqlite3_result_int(ctx, checksum_int4(txt, len));
+}
+
+static void sqlite_fnv_int8(
+  sqlite3_context * ctx,
+  int argc,
+  sqlite3_value ** argv)
+{
+  assert(argc==1);
+  const unsigned char * txt;
+  size_t len;
+  switch (sqlite3_value_type(argv[0])) {
+  case SQLITE_NULL:
+    txt = NULL;
+    len = 0;
+    break;
+  case SQLITE_TEXT:
+    txt = sqlite3_value_text(argv[0]);
+    len = sqlite3_value_bytes(argv[0]);
+    break;
+    // hmmm... should I do something else?
+  case SQLITE_INTEGER:
+  case SQLITE_FLOAT:
+  case SQLITE_BLOB:
+  default:
+    sqlite3_result_error(ctx, "expecting TEXT or NULL", -1);
+    return;
+  }
+  sqlite3_result_int64(ctx, checksum_int8(txt, len));
+}
+
 /***************************************************** INTEGER XOR AGGREGATE */
 
 static void ixor_step(
@@ -171,6 +255,24 @@ int sqlite3_extension_init(
 			  sqlite_checksum_int8, NULL, NULL);
 
   sqlite3_create_function(db,
+			  // name, #arg, txt, data,
+			  "fnv2", 1, SQLITE_UTF8, NULL,
+			  // func, step, final
+			  sqlite_fnv_int2, NULL, NULL);
+
+  sqlite3_create_function(db,
+			  // name, #arg, txt, data,
+			  "fnv4", 1, SQLITE_UTF8, NULL,
+			  // func, step, final
+			  sqlite_fnv_int4, NULL, NULL);
+
+  sqlite3_create_function(db,
+			  // name, #arg, txt, data,
+			  "fnv8", 1, SQLITE_UTF8, NULL,
+			  // func, step, final
+			  sqlite_fnv_int8, NULL, NULL);
+
+  sqlite3_create_function(db,
         // name, #args, txt, data,
         "xor", 1, SQLITE_UTF8, NULL,
         // func, step, final

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pg_comparator.git



More information about the Pkg-grass-devel mailing list