[Pkg-puppet-devel] [facter] 133/352: (FACT-134) Add module for normalizing fact values

Stig Sandbeck Mathisen ssm at debian.org
Sun Apr 6 22:21:39 UTC 2014


This is an automated email from the git hooks/post-receive script.

ssm pushed a commit to branch master
in repository facter.

commit ab4d7c4775f6948039368114f88a33c080289e68
Author: Adrien Thebo <git at somethingsinistral.net>
Date:   Thu Jan 9 11:07:44 2014 -0800

    (FACT-134) Add module for normalizing fact values
    
    This commits adds a module for normalizing fact values to UTF-8, and validating
    that the string is properly encoded, e.g. a string may claim to be UTF-8
    encoded, but actually contain binary data.
---
 lib/facter/util/normalization.rb     | 98 ++++++++++++++++++++++++++++++++++++
 spec/unit/util/normalization_spec.rb | 97 +++++++++++++++++++++++++++++++++++
 2 files changed, 195 insertions(+)

diff --git a/lib/facter/util/normalization.rb b/lib/facter/util/normalization.rb
new file mode 100644
index 0000000..693deb8
--- /dev/null
+++ b/lib/facter/util/normalization.rb
@@ -0,0 +1,98 @@
+module Facter
+  module Util
+    module Normalization
+      class NormalizationError < StandardError; end
+
+      VALID_TYPES = [Integer, Float, TrueClass, FalseClass, NilClass, String, Array, Hash]
+
+      module_function
+
+      # Recursively normalize the given data structure
+      #
+      # @api public
+      # @raise [NormalizationError] If the data structure contained an invalid element.
+      # @return [void]
+      def normalize(value)
+        case value
+        when Integer, Float, TrueClass, FalseClass, NilClass
+          true
+        when String
+          normalize_string(value)
+        when Array
+          normalize_array(value)
+        when Hash
+          normalize_hash(value)
+        else
+          raise NormalizationError, "Expected #{value} to be one of #{VALID_TYPES.inspect}, but was #{value.class}"
+        end
+      end
+
+      # @!method normalize_string(value)
+      #
+      # Attempt to normalize and validate the given string.
+      #
+      # On Ruby 1.8 the string is checked by stripping out all non UTF-8
+      # characters and comparing the converted string to the original. If they
+      # do not match then the string is considered invalid.
+      #
+      # On Ruby 1.9+, the string is validate by checking that the string encoding
+      # is UTF-8 and that the string content matches the encoding. If the string
+      # is not an expected encoding then it is converted to UTF-8.
+      #
+      # @api public
+      # @raise [NormalizationError] If the string used an unsupported encoding or did not match its encoding
+      # @param value [String]
+      # @return [void]
+
+      if RUBY_VERSION =~ /^1\.8/
+        require 'iconv'
+
+        def normalize_string(value)
+          converted = Iconv.conv('UTF-8//IGNORE', 'UTF-8', value)
+          if converted != value
+            raise NormalizationError, "String #{value.inspect} is not valid UTF-8"
+          end
+        end
+      else
+        def normalize_string(value)
+          unless value.encoding == Encoding::UTF_8
+            begin
+              value.encode!(Encoding::UTF_8)
+            rescue EncodingError
+              raise NormalizationError, "String encoding #{value.encoding} is not UTF-8 and could not be converted to UTF-8"
+            end
+          end
+
+          unless value.valid_encoding?
+            raise NormalizationError, "String #{value.inspect} doesn't match the reported encoding #{value.encoding}"
+          end
+        end
+      end
+
+      # Validate all elements of the array.
+      #
+      # @api public
+      # @raise [NormalizationError] If one of the elements failed validation
+      # @param value [Array]
+      # @return [void]
+      def normalize_array(value)
+        value.each do |elem|
+          normalize(elem)
+        end
+      end
+
+      # Validate all keys and values of the hash.
+      #
+      # @api public
+      # @raise [NormalizationError] If one of the keys or values failed normalization
+      # @param value [Hash]
+      # @return [void]
+      def normalize_hash(value)
+        value.each_pair do |k, v|
+          normalize(k)
+          normalize(v)
+        end
+      end
+    end
+  end
+end
diff --git a/spec/unit/util/normalization_spec.rb b/spec/unit/util/normalization_spec.rb
new file mode 100644
index 0000000..9e7cb0e
--- /dev/null
+++ b/spec/unit/util/normalization_spec.rb
@@ -0,0 +1,97 @@
+# encoding: utf-8
+
+require 'spec_helper'
+require 'facter/util/normalization'
+
+describe Facter::Util::Normalization do
+
+  subject { described_class }
+
+  describe "validating strings" do
+    describe "and string encoding is supported", :if => String.instance_methods.include?(:encoding) do
+      it "accepts strings that are ASCII and match their encoding and converts them to UTF-8" do
+        str = "ASCII".encode(Encoding::ASCII)
+        subject.normalize(str)
+        expect(str.encoding).to eq(Encoding::UTF_8)
+      end
+
+      it "accepts strings that are UTF-8 and match their encoding" do
+        str = "let's make a ☃!".encode(Encoding::UTF_8)
+        subject.normalize(str)
+      end
+
+      it "converts valid non UTF-8 strings to UTF-8" do
+        str = "let's make a ☃!".encode(Encoding::UTF_16LE)
+        subject.normalize(str)
+        expect(str.encoding).to eq(Encoding::UTF_8)
+      end
+
+      it "rejects strings that are not UTF-8 and do not match their claimed encoding" do
+        invalid_shift_jis = "\xFF\x5C!".force_encoding(Encoding::SHIFT_JIS)
+        expect {
+          subject.normalize(invalid_shift_jis)
+        }.to raise_error(Facter::Util::Normalization::NormalizationError, /String encoding Shift_JIS is not UTF-8 and could not be converted to UTF-8/)
+      end
+
+      it "rejects strings that claim to be UTF-8 encoded but aren't" do
+        str = "\255ay!".force_encoding(Encoding::UTF_8)
+        expect {
+          subject.normalize(str)
+        }.to raise_error(Facter::Util::Normalization::NormalizationError, /String.*doesn't match the reported encoding UTF-8/)
+      end
+    end
+
+    describe "and string encoding is not supported", :unless => String.instance_methods.include?(:encoding) do
+      it "accepts strings that are UTF-8 and match their encoding" do
+        str = "let's make a ☃!"
+        subject.normalize(str)
+      end
+
+      it "rejects strings that are not UTF-8" do
+        str = "let's make a \255\255\255!"
+        expect {
+          subject.normalize(str)
+        }.to raise_error(Facter::Util::Normalization::NormalizationError, /String .* is not valid UTF-8/)
+      end
+    end
+  end
+
+  describe "validating arrays" do
+    it "normalizes each element in the array" do
+      arr = ['first', 'second', ['third', 'fourth']]
+
+      subject.expects(:normalize).with('first')
+      subject.expects(:normalize).with('second')
+      subject.expects(:normalize).with(['third', 'fourth'])
+
+      subject.normalize_array(arr)
+    end
+  end
+
+  describe "validating hashes" do
+    it "normalizes each element in the array" do
+      hsh = {'first' => 'second', 'third' => ['fourth', 'fifth']}
+
+      subject.expects(:normalize).with('first')
+      subject.expects(:normalize).with('second')
+      subject.expects(:normalize).with('third')
+      subject.expects(:normalize).with(['fourth', 'fifth'])
+
+      subject.normalize_hash(hsh)
+    end
+  end
+
+  [1, 1.0, true, false, nil].each do |val|
+    it "accepts #{val.inspect}:#{val.class}" do
+      subject.normalize(val)
+    end
+  end
+
+  [:sym, Object.new, Set.new].each do |val|
+    it "rejects #{val.inspect}:#{val.class}" do
+      expect {
+        subject.normalize(val)
+      }.to raise_error(Facter::Util::Normalization::NormalizationError, /Expected .*but was #{val.class}/ )
+    end
+  end
+end

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-puppet/facter.git



More information about the Pkg-puppet-devel mailing list