Mailing List Archive

[interchange] Updated UTF8-handling for GDBM files
commit 25f07cb57a7347398cd92ffaf42c25aa08f58c82
Author: David Christensen <david@endpoint.com>
Date: Thu Sep 22 10:47:05 2016 -0500

Updated UTF8-handling for GDBM files

Remove the existing cruft related to GDBM_ENABLE_UTF8, which
appears never to have worked entirely correctly.

In the meantime, tie this explicitly to whether UTF8 is enabled in the
catalog only and don't configure separately.

Also make sure :utf8 is set for data files read in and out.

Make sure we respect the MINIVEND_DISABLE_UTF8 environment variable.

lib/Vend/Data.pm | 5 ++++-
lib/Vend/Table/Common.pm | 26 ++++++++++++++++++++++----
lib/Vend/Table/GDBM.pm | 29 +----------------------------
3 files changed, 27 insertions(+), 33 deletions(-)
---
diff --git a/lib/Vend/Data.pm b/lib/Vend/Data.pm
index f8c97f9..5313f72 100644
--- a/lib/Vend/Data.pm
+++ b/lib/Vend/Data.pm
@@ -1248,7 +1248,10 @@ sub export_database {
or die "Couldn't exclusive lock $file: $!\n";
open(EXPORT, "+>$file") or
die "Couldn't write $file: $!\n";
-
+
+ # we should be outputting as UTF8 if we're so configured
+ binmode(\*EXPORT, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};
+
#::logDebug("EXPORT_SORT=" . $db->config('EXPORT_SORT'));
if($opt->{sort} ||= $db->config('EXPORT_SORT')) {
#::logDebug("Found EXPORT_SORT=$opt->{sort}");
diff --git a/lib/Vend/Table/Common.pm b/lib/Vend/Table/Common.pm
index 510a9f5..d3a39ce 100644
--- a/lib/Vend/Table/Common.pm
+++ b/lib/Vend/Table/Common.pm
@@ -1,8 +1,6 @@
# Vend::Table::Common - Common access methods for Interchange databases
#
-# $Id: Common.pm,v 2.51 2008-05-26 02:30:04 markj Exp $
-#
-# Copyright (C) 2002-2008 Interchange Development Group
+# Copyright (C) 2002-2016 Interchange Development Group
# Copyright (C) 1996-2002 Red Hat, Inc.
#
# This program was originally based on Vend 0.2 and 0.3
@@ -23,7 +21,7 @@
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301 USA.

-$VERSION = substr(q$Revision: 2.51 $, 10);
+$VERSION = '2.52';
use strict;

package Vend::Table::Common;
@@ -34,6 +32,19 @@ no warnings qw(uninitialized numeric);
use Symbol;
use Vend::Util;

+our $Has_Encode = 0;
+
+if ($ENV{MINIVEND_DISABLE_UTF8}) {
+ # stub routines to pass-thru data if disabled
+ *encode_utf8 = sub {@_};
+ *decode_utf8 = sub {@_};
+}
+else {
+ require Encode;
+ import Encode qw( encode_utf8 decode_utf8 );
+ $Has_Encode = 1;
+}
+
use Exporter;
use vars qw($Storable $VERSION @EXPORT @EXPORT_OK);
@EXPORT = qw(create_columns import_ascii_delimited import_csv config columns);
@@ -164,6 +175,8 @@ sub unlock_table {

sub stuff {
my ($val) = @_;
+ $val = encode_utf8($val)
+ if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
$val =~ s,([\t\%]),$Hex_string[ord($1)],eg;
return $val;
}
@@ -171,6 +184,8 @@ sub stuff {
sub unstuff {
my ($val) = @_;
$val =~ s,%(..),chr(hex($1)),eg;
+ $val = decode_utf8($val)
+ if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
return $val;
}

@@ -1080,6 +1095,9 @@ sub import_ascii_delimited {

new_filehandle(\*IN);

+ # we should be inputting as UTF8 if we're so configured
+ binmode(\*IN, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};
+
my $field_hash;
my $para_sep;
my $codere = '[\w-_#/.]+';
diff --git a/lib/Vend/Table/GDBM.pm b/lib/Vend/Table/GDBM.pm
index 55a4610..5b6a5a2 100644
--- a/lib/Vend/Table/GDBM.pm
+++ b/lib/Vend/Table/GDBM.pm
@@ -27,17 +27,8 @@ use vars qw($VERSION @ISA);
use GDBM_File;
use Vend::Table::Common;

-if ($ENV{MINIVEND_DISABLE_UTF8}) {
- sub encode($$;$){}
- sub decode($$;$){}
-}
-else {
- require Encode;
- import Encode qw( decode encode );
-}
-
@ISA = qw(Vend::Table::Common);
-$VERSION = '2.21';
+$VERSION = '2.22';

sub new {
my ($class, $obj) = @_;
@@ -120,8 +111,6 @@ sub open_table {
die ::errmsg("%s could not tie to '%s': %s", 'GDBM', $filename, $!)
unless $dbm;

- apply_utf8_filters($dbm) if $config->{GDBM_ENABLE_UTF8};
-
my $columns = [split(/\t/, $tie->{'c'})];

$config->{VERBATIM_FIELDS} = 1 unless defined $config->{VERBATIM_FIELDS};
@@ -140,22 +129,6 @@ sub open_table {
bless $s, $class;
}

-sub apply_utf8_filters {
- my ($handle) = shift;
-
-#::logDebug("applying UTF-8 filters to GDBM handle");
-
- my $out_filter = sub { $_ = encode('utf8', $_) };
- my $in_filter = sub { $_ = decode('utf8', $_) };
-
- $handle->filter_store_key($out_filter);
- $handle->filter_store_value($out_filter);
- $handle->filter_fetch_key($in_filter);
- $handle->filter_fetch_value($in_filter);
-
- return $handle;
-}
-
# Unfortunate hack need for Safe searches
*column_index = \&Vend::Table::Common::column_index;
*column_exists = \&Vend::Table::Common::column_exists;

_______________________________________________
interchange-cvs mailing list
interchange-cvs@icdevgroup.org
http://www.icdevgroup.org/mailman/listinfo/interchange-cvs