Remove deprecated methods in ActiveSupport::Multibyte::Unicode

2020-10-29 15:43:33 +00:00 · 2020-10-29 15:43:33 +00:00 · 2c6f5c0b8a
parent 8f14d5ad4b
commit 2c6f5c0b8a
6 changed files with 15 additions and 191 deletions
--- a/activesupport/CHANGELOG.md
+++ b/activesupport/CHANGELOG.md
@ -1,3 +1,11 @@
+*   Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`,
+    `ActiveSupport::Multibyte::Unicode.unpack_graphemes`,
+    `ActiveSupport::Multibyte::Unicode.normalize`,
+    `ActiveSupport::Multibyte::Unicode.downcase`,
+    `ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`.
+
+    *Rafael Mendonça França*
+
 *   Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`.

    *Rafael Mendonça França*
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@ -10,13 +10,6 @@ module ActiveSupport
      # information about normalization.
      NORMALIZATION_FORMS = [:c, :kc, :d, :kd]

-      NORMALIZATION_FORM_ALIASES = { # :nodoc:
-        c: :nfc,
-        d: :nfd,
-        kc: :nfkc,
-        kd: :nfkd
-      }
-
      # The Unicode version that is supported by the implementation
      UNICODE_VERSION = RbConfig::CONFIG["UNICODE_VERSION"]

@ -25,34 +18,7 @@ module ActiveSupport
      # in NORMALIZATION_FORMS.
      #
      #   ActiveSupport::Multibyte::Unicode.default_normalization_form = :c
-      attr_accessor :default_normalization_form
-      @default_normalization_form = :kc
-
-      # Unpack the string at grapheme boundaries. Returns a list of character
-      # lists.
-      #
-      #   Unicode.unpack_graphemes('क्षि') # => [[2325, 2381], [2359], [2367]]
-      #   Unicode.unpack_graphemes('Café') # => [[67], [97], [102], [233]]
-      def unpack_graphemes(string)
-        ActiveSupport::Deprecation.warn(<<-MSG.squish)
-          ActiveSupport::Multibyte::Unicode#unpack_graphemes is deprecated and will be
-          removed from Rails 6.1. Use string.scan(/\X/).map(&:codepoints) instead.
-        MSG
-
-        string.scan(/\X/).map(&:codepoints)
-      end
-
-      # Reverse operation of unpack_graphemes.
-      #
-      #   Unicode.pack_graphemes(Unicode.unpack_graphemes('क्षि')) # => 'क्षि'
-      def pack_graphemes(unpacked)
-        ActiveSupport::Deprecation.warn(<<-MSG.squish)
-          ActiveSupport::Multibyte::Unicode#pack_graphemes is deprecated and will be
-          removed from Rails 6.1. Use array.flatten.pack("U*") instead.
-        MSG
-
-        unpacked.flatten.pack("U*")
-      end
+      attr_accessor :default_normalization_form # TODO: Deprecate

      # Decompose composed characters to the decomposed form.
      def decompose(type, codepoints)
@ -107,46 +73,6 @@ module ActiveSupport
        end
      end

-      # Returns the KC normalization of the string by default. NFKC is
-      # considered the best normalization form for passing strings to databases
-      # and validations.
-      #
-      # * <tt>string</tt> - The string to perform normalization on.
-      # * <tt>form</tt> - The form you want to normalize in. Should be one of
-      #   the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
-      #   Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
-      def normalize(string, form = nil)
-        form ||= @default_normalization_form
-
-        # See https://www.unicode.org/reports/tr15, Table 1
-        if alias_form = NORMALIZATION_FORM_ALIASES[form]
-          ActiveSupport::Deprecation.warn(<<-MSG.squish)
-            ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
-            removed from Rails 6.1. Use String#unicode_normalize(:#{alias_form}) instead.
-          MSG
-
-          string.unicode_normalize(alias_form)
-        else
-          ActiveSupport::Deprecation.warn(<<-MSG.squish)
-            ActiveSupport::Multibyte::Unicode#normalize is deprecated and will be
-            removed from Rails 6.1. Use String#unicode_normalize instead.
-          MSG
-
-          raise ArgumentError, "#{form} is not a valid normalization variant", caller
-        end
-      end
-
-      %w(downcase upcase swapcase).each do |method|
-        define_method(method) do |string|
-          ActiveSupport::Deprecation.warn(<<-MSG.squish)
-          ActiveSupport::Multibyte::Unicode##{method} is deprecated and
-          will be removed from Rails 6.1. Use String methods directly.
-          MSG
-
-          string.public_send(method)
-        end
-      end
-
      private
        def recode_windows1252_chars(string)
          string.encode(Encoding::UTF_8, Encoding::Windows_1252, invalid: :replace, undef: :replace)
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@ -678,36 +678,6 @@ class MultibyteCharsExtrasTest < ActiveSupport::TestCase
    assert_equal BYTE_STRING.dup.mb_chars.class, ActiveSupport::Multibyte::Chars
  end

-  def test_unicode_normalize_deprecation
-    # String#unicode_normalize default form is `:nfc`, and
-    # different than Multibyte::Unicode default, `:nkfc`.
-    # Deprecation should suggest the right form if no params
-    # are given and default is used.
-    assert_deprecated(/unicode_normalize\(:nfkc\)/) do
-      ActiveSupport::Multibyte::Unicode.normalize("")
-    end
-
-    assert_deprecated(/unicode_normalize\(:nfd\)/) do
-      ActiveSupport::Multibyte::Unicode.normalize("", :d)
-    end
-  end
-
-  def test_unicode_deprecations
-    assert_deprecated { ActiveSupport::Multibyte::Unicode.downcase("") }
-    assert_deprecated { ActiveSupport::Multibyte::Unicode.upcase("") }
-    assert_deprecated { ActiveSupport::Multibyte::Unicode.swapcase("") }
-  end
-
-  def test_normalize_non_unicode_string
-    # Fullwidth Latin Capital Letter A in Windows 31J
-    str = "\u{ff21}".encode(Encoding::Windows_31J)
-    assert_raise Encoding::CompatibilityError do
-      ActiveSupport::Deprecation.silence do
-        ActiveSupport::Multibyte::Unicode.normalize(str)
-      end
-    end
-  end
-
  private
    def string_from_classes(classes)
      # Characters from the character classes as described in UAX #29
--- a/activesupport/test/multibyte_grapheme_break_conformance_test.rb
+++ b/activesupport/test/multibyte_grapheme_break_conformance_test.rb
@ -1,60 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "abstract_unit"
-require_relative "multibyte_test_helpers"
-
-class MultibyteGraphemeBreakConformanceTest < ActiveSupport::TestCase
-  include MultibyteTestHelpers
-
-  UNIDATA_FILE = "/auxiliary/GraphemeBreakTest.txt"
-  RUN_P = begin
-            Downloader.download(UNIDATA_URL + UNIDATA_FILE, CACHE_DIR + UNIDATA_FILE)
-          rescue
-          end
-
-  def setup
-    skip "Unable to download test data" unless RUN_P
-  end
-
-  def test_breaks
-    ActiveSupport::Deprecation.silence do
-      each_line_of_break_tests do |*cols|
-        *clusters, comment = *cols
-        packed = ActiveSupport::Multibyte::Unicode.pack_graphemes(clusters)
-        assert_equal clusters, ActiveSupport::Multibyte::Unicode.unpack_graphemes(packed), comment
-      end
-    end
-  end
-
-  private
-    def each_line_of_break_tests(&block)
-      lines = 0
-      max_test_lines = 0 # Don't limit below 21, because that's the header of the testfile
-      File.open(File.join(CACHE_DIR, UNIDATA_FILE), "r") do | f |
-        until f.eof? || (max_test_lines > 21 && lines > max_test_lines)
-          lines += 1
-          line = f.gets.chomp!
-          next if line.empty? || line.start_with?("#")
-
-          cols, comment = line.split("#")
-          # Cluster breaks are represented by ÷
-          clusters = cols.split("÷").map { |e| e.strip }.reject { |e| e.empty? }
-          clusters = clusters.map do |cluster|
-            # Codepoints within each cluster are separated by ×
-            codepoints = cluster.split("×").map { |e| e.strip }.reject { |e| e.empty? }
-            # codepoints are in hex in the test suite, pack wants them as integers
-            codepoints.map { |codepoint| codepoint.to_i(16) }
-          end
-
-          # The tests contain a solitary U+D800 <Non Private Use High
-          # Surrogate, First> character, which Ruby does not allow to stand
-          # alone in a UTF-8 string. So we'll just skip it.
-          next if clusters.flatten.include?(0xd800)
-
-          clusters << comment.strip
-
-          yield(*clusters)
-        end
-      end
-    end
-end
--- a/activesupport/test/multibyte_test_helpers.rb
+++ b/activesupport/test/multibyte_test_helpers.rb
@ -1,32 +1,6 @@
 # frozen_string_literal: true

-require "fileutils"
-require "open-uri"
-require "tmpdir"
-
 module MultibyteTestHelpers
-  class Downloader
-    def self.download(from, to)
-      unless File.exist?(to)
-        unless File.exist?(File.dirname(to))
-          system "mkdir -p #{File.dirname(to)}"
-        end
-        URI.open(from) do |source|
-          File.open(to, "w") do |target|
-            source.each_line do |l|
-              target.write l
-            end
-          end
-        end
-      end
-      true
-    end
-  end
-
-  UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}/ucd"
-  CACHE_DIR = "#{Dir.tmpdir}/cache/unicode_conformance/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}"
-  FileUtils.mkdir_p(CACHE_DIR)
-
  UNICODE_STRING = "こにちわ"
  ASCII_STRING = "ohayo"
  BYTE_STRING = (+"\270\236\010\210\245").force_encoding("ASCII-8BIT").freeze
--- a/guides/source/6_1_release_notes.md
+++ b/guides/source/6_1_release_notes.md
@ -306,6 +306,12 @@ Please refer to the [Changelog][active-support] for detailed changes.

 *   Remove deprecated `ActiveSupport::Multibyte::Chars#consumes?` and `ActiveSupport::Multibyte::Chars#normalize`.

+*   Remove deprecated `ActiveSupport::Multibyte::Unicode.pack_graphemes`,
+    `ActiveSupport::Multibyte::Unicode.unpack_graphemes`,
+    `ActiveSupport::Multibyte::Unicode.normalize`,
+    `ActiveSupport::Multibyte::Unicode.downcase`,
+    `ActiveSupport::Multibyte::Unicode.upcase` and `ActiveSupport::Multibyte::Unicode.swapcase`.
+
 ### Deprecations

 ### Notable changes