strip markup in ContentHelper#content_normalized

This commit is contained in:
Ben Balter 2017-12-20 14:17:46 -05:00
parent 7a4b22ffa3
commit ee6e66426b
No known key found for this signature in database
GPG Key ID: DBB67C246AD356C4
1 changed files with 7 additions and 1 deletions

View File

@ -4,13 +4,14 @@ require 'digest'
module Licensee
module ContentHelper
DIGEST = Digest::SHA1
END_OF_TERMS_REGEX = /^\s*end of terms and conditions\s*$/i
END_OF_TERMS_REGEX = /^[\s#]*end of terms and conditions\s*$/i
HR_REGEX = /[=\-\*][=\-\*\s]{3,}/
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
ALL_RIGHTS_RESERVED_REGEX = /\Aall rights reserved\.?$/i
WHITESPACE_REGEX = /\s+/
MARKDOWN_HEADING_REGEX = /\A\s*#+/
VERSION_REGEX = /\Aversion.*$/i
MARKUP_REGEX = /[^\w'\.\-]+/
# A set of each word in the license, without duplicates
def wordset
@ -77,6 +78,7 @@ module Licensee
end
string = strip_all_rights_reserved(string)
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
string = strip_markup(string)
strip_whitespace(string)
end
@ -155,6 +157,10 @@ module Licensee
strip(string, ALL_RIGHTS_RESERVED_REGEX)
end
def strip_markup(string)
strip(string, MARKUP_REGEX)
end
def strip(string, regex)
string.gsub(regex, ' ').squeeze(' ').strip
end