mirror of https://github.com/licensee/licensee.git
fix normalization errors
This commit is contained in:
parent
998aeece2e
commit
cc7b1350a9
|
@ -8,27 +8,26 @@ module Licensee
|
||||||
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
|
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
|
||||||
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
|
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
|
||||||
REGEXES = {
|
REGEXES = {
|
||||||
hrs: /^\s*[=\-\*][=\-\*]{2,}\s*/,
|
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
||||||
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
||||||
whitespace: /\s+/,
|
whitespace: /\s+/,
|
||||||
markdown_headings: /#{START_REGEX}#+/,
|
markdown_headings: /#{START_REGEX}#+/,
|
||||||
version: /#{START_REGEX}version.*$/i,
|
version: /#{START_REGEX}version.*$/i,
|
||||||
markup: /(?:[_*~`]+.*?[_*~`]+|^\s*[>-]|\[.*?\]\(.*?\))/,
|
span_markup: /[_*~]+(.*?)[_*~]+/,
|
||||||
|
link_markup: /\[(.+?)\]\(.+?\)/,
|
||||||
|
block_markup: /^\s*>/,
|
||||||
|
border_markup: /^[\*-](.*?)[\*-]$/,
|
||||||
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
||||||
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
||||||
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
||||||
quote_begin: /[`'"‘“]/,
|
quote_begin: /[`'"‘“]/,
|
||||||
quote_end: /['"’”]/
|
quote_end: /[`'"’”]/
|
||||||
}.freeze
|
}.freeze
|
||||||
NORMALIZATIONS = {
|
NORMALIZATIONS = {
|
||||||
lists: { from: /^\s*(\d\.|\*)/, to: '-' },
|
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
||||||
https: { from: /http:/, to: 'https:' },
|
https: { from: /http:/, to: 'https:' },
|
||||||
ampersands: { from: '&', to: 'and' },
|
ampersands: { from: '&', to: 'and' },
|
||||||
dashes: { from: /[—–-]+/, to: '-' },
|
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
||||||
copyright: {
|
|
||||||
from: /(?:copyright\ )?#{Matchers::Copyright::COPYRIGHT_SYMBOLS}/,
|
|
||||||
to: 'copyright'
|
|
||||||
},
|
|
||||||
quotes: {
|
quotes: {
|
||||||
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
||||||
to: '"\1"'
|
to: '"\1"'
|
||||||
|
@ -82,7 +81,8 @@ module Licensee
|
||||||
'owner' => 'holder'
|
'owner' => 'holder'
|
||||||
}.freeze
|
}.freeze
|
||||||
STRIP_METHODS = %i[
|
STRIP_METHODS = %i[
|
||||||
hrs markdown_headings borders markup title version url copyright
|
hrs markdown_headings borders title version url copyright
|
||||||
|
block_markup span_markup link_markup
|
||||||
all_rights_reserved developed_by end_of_terms whitespace
|
all_rights_reserved developed_by end_of_terms whitespace
|
||||||
].freeze
|
].freeze
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ module Licensee
|
||||||
def content_without_title_and_version
|
def content_without_title_and_version
|
||||||
@content_without_title_and_version ||= begin
|
@content_without_title_and_version ||= begin
|
||||||
@_content = nil
|
@_content = nil
|
||||||
%w[markdown_headings hrs title version].each { |op| strip(op) }
|
%i[hrs markdown_headings title version].each { |op| strip(op) }
|
||||||
_content
|
_content
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -186,19 +186,21 @@ module Licensee
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.title_regex
|
def self.title_regex
|
||||||
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
@title_regex ||= begin
|
||||||
titles = licenses.map(&:title_regex)
|
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
||||||
|
titles = licenses.map(&:title_regex)
|
||||||
|
|
||||||
# Title regex must include the version to support matching within
|
# Title regex must include the version to support matching within
|
||||||
# families, but for sake of normalization, we can be less strict
|
# families, but for sake of normalization, we can be less strict
|
||||||
without_versions = licenses.map do |license|
|
without_versions = licenses.map do |license|
|
||||||
next if license.title == license.name_without_version
|
next if license.title == license.name_without_version
|
||||||
|
|
||||||
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
||||||
|
end
|
||||||
|
titles.concat(without_versions.compact)
|
||||||
|
|
||||||
|
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
||||||
end
|
end
|
||||||
titles.concat(without_versions.compact)
|
|
||||||
|
|
||||||
/#{START_REGEX}\(?(the )?#{Regexp.union titles}.*$/i
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -213,10 +215,10 @@ module Licensee
|
||||||
return unless _content
|
return unless _content
|
||||||
|
|
||||||
if regex_or_sym.is_a?(Symbol)
|
if regex_or_sym.is_a?(Symbol)
|
||||||
if REGEXES[regex_or_sym]
|
if respond_to?("strip_#{regex_or_sym}", true)
|
||||||
regex_or_sym = REGEXES[regex_or_sym]
|
|
||||||
elsif respond_to?("strip_#{regex_or_sym}", true)
|
|
||||||
return send("strip_#{regex_or_sym}")
|
return send("strip_#{regex_or_sym}")
|
||||||
|
elsif REGEXES[regex_or_sym]
|
||||||
|
regex_or_sym = REGEXES[regex_or_sym]
|
||||||
else
|
else
|
||||||
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
||||||
end
|
end
|
||||||
|
@ -225,12 +227,6 @@ module Licensee
|
||||||
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
||||||
end
|
end
|
||||||
|
|
||||||
STRIP_METHODS.each do |sym|
|
|
||||||
define_method "strip_#{sym}" do
|
|
||||||
strip(sym)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def strip_title
|
def strip_title
|
||||||
while _content =~ ContentHelper.title_regex
|
while _content =~ ContentHelper.title_regex
|
||||||
strip(ContentHelper.title_regex)
|
strip(ContentHelper.title_regex)
|
||||||
|
@ -238,7 +234,7 @@ module Licensee
|
||||||
end
|
end
|
||||||
|
|
||||||
def strip_borders
|
def strip_borders
|
||||||
normalize(/^[\*-](.*?)\*$/, '\1')
|
normalize(REGEXES[:border_markup], '\1')
|
||||||
end
|
end
|
||||||
|
|
||||||
def strip_copyright
|
def strip_copyright
|
||||||
|
@ -251,15 +247,18 @@ module Licensee
|
||||||
@_content = body
|
@_content = body
|
||||||
end
|
end
|
||||||
|
|
||||||
NORMALIZATIONS.each do |key, _op|
|
def strip_span_markup
|
||||||
define_method("normalize_#{key}") do
|
normalize(REGEXES[:span_markup], '\1')
|
||||||
normalize(key)
|
end
|
||||||
end
|
|
||||||
|
def strip_link_markup
|
||||||
|
normalize(REGEXES[:link_markup], '\1')
|
||||||
end
|
end
|
||||||
|
|
||||||
def normalize(from_or_key, to = nil)
|
def normalize(from_or_key, to = nil)
|
||||||
operation = { from: from_or_key, to: to } if to
|
operation = { from: from_or_key, to: to } if to
|
||||||
operation ||= NORMALIZATIONS[from_or_key]
|
operation ||= NORMALIZATIONS[from_or_key]
|
||||||
|
|
||||||
if operation
|
if operation
|
||||||
@_content = _content.gsub operation[:from], operation[:to]
|
@_content = _content.gsub operation[:from], operation[:to]
|
||||||
elsif respond_to?("normalize_#{from_or_key}", true)
|
elsif respond_to?("normalize_#{from_or_key}", true)
|
||||||
|
|
|
@ -55,7 +55,6 @@ module Licensee
|
||||||
def attribution
|
def attribution
|
||||||
@attribution ||= begin
|
@attribution ||= begin
|
||||||
return unless copyright? || license.content =~ /\[fullname\]/
|
return unless copyright? || license.content =~ /\[fullname\]/
|
||||||
|
|
||||||
matches = Matchers::Copyright::REGEX
|
matches = Matchers::Copyright::REGEX
|
||||||
.match(content_without_title_and_version)
|
.match(content_without_title_and_version)
|
||||||
matches[0] if matches
|
matches[0] if matches
|
||||||
|
|
|
@ -1,36 +1,36 @@
|
||||||
{
|
{
|
||||||
"upl-1.0": "093b8b048dec7bc685c9ee6a5afffa4a1d148c02",
|
"upl-1.0": "093b8b048dec7bc685c9ee6a5afffa4a1d148c02",
|
||||||
"ofl-1.1": "b6eb018d65c3ef1aecd29a99ad4653b47b34323d",
|
"ofl-1.1": "1fb0563aa1250e18a6948afde286edc95761f461",
|
||||||
"lgpl-3.0": "fde363437aa287dddb4305dbbe1a59c41e98ea2b",
|
"lgpl-3.0": "bdb3c042bd84f914eacfbe4977c5e58352745809",
|
||||||
"agpl-3.0": "8184105f82a05296bd50332643c3a3cc4067ba54",
|
"agpl-3.0": "d445855a1f169b12cbee97d320c2e3522d053016",
|
||||||
"gpl-2.0": "f5e7151e1cd9830c0caf577bc747d7736f36658e",
|
"gpl-2.0": "3becd209e8ed8039656c1debe01dd17b9a79208f",
|
||||||
"cc-by-4.0": "df16a2377ab7ea49e9bf80a8f3344e38121671f4",
|
"cc-by-4.0": "899872bc08626e6cf154dcf9e08ff0de82c9b3db",
|
||||||
"ms-rl": "eb5794a2e90d1b83fd19e46d6790b2b66b8b857b",
|
"ms-rl": "402bf344e506a8d10175c1e516b396c060ffd823",
|
||||||
"wtfpl": "dfa47b27c85780af7b2ddb2c30bdd7808e3060d7",
|
"wtfpl": "f8544c074f203d86cdcb24082fedfb2cf2fe411a",
|
||||||
"osl-3.0": "39c1c650cb46ecc34c72693eb5ba967611ac1fed",
|
"osl-3.0": "ab241ef932d3ac038e8ed62c860e9eba051ae7a0",
|
||||||
"bsl-1.0": "ca8f916d00c234719956e932061f192abb2d5bf9",
|
"bsl-1.0": "ca8f916d00c234719956e932061f192abb2d5bf9",
|
||||||
"lgpl-2.1": "cc67fdbf1313fa11056c01a573c0287fab61bc17",
|
"lgpl-2.1": "91e779a787786276618f58d6e396a5e64a981805",
|
||||||
"mpl-2.0": "b0285063c18aea6fae6a146882193f0d3de6dc44",
|
"mpl-2.0": "b4db668fa7573bfdcae74eb51eafc961034f0a61",
|
||||||
"isc": "d168f98624be864548b2bbf4f198fdbf702d6743",
|
"isc": "d168f98624be864548b2bbf4f198fdbf702d6743",
|
||||||
"cc0-1.0": "d76b663aad99ce405c971acd22cfbe23bfd29378",
|
"cc0-1.0": "ec5027313ed11fea202060f6958ac25b086d6dcb",
|
||||||
"bsd-3-clause-clear": "6cd7a95b9e5f0e866b07b46fcfebb70f1c42994f",
|
"bsd-3-clause-clear": "251d4599b622d2a87b2c4bb21dfacd438c048466",
|
||||||
"gpl-3.0": "39d041443ec3f4f2aa13e1fa2e9aef7d4356a04b",
|
"gpl-3.0": "b22f1b1f953a38a8a11686587b98831858d6468b",
|
||||||
"unlicense": "86c75861af1b9b9e0573b190dcb2c2cdbbee7037",
|
"unlicense": "86c75861af1b9b9e0573b190dcb2c2cdbbee7037",
|
||||||
"bsd-2-clause": "8c6525f4700252c313825f1f85acd04cd7c30394",
|
"bsd-2-clause": "59f0099ff04225daf184db3fe55e478256133b1a",
|
||||||
"artistic-2.0": "68cc5c6eb6563437200308f227d36af5ba32f9be",
|
"artistic-2.0": "a2ff6e7fb76e51bda9a5350c759a824f206049d1",
|
||||||
"zlib": "4768246ef0140435f718039efc0a11ef437e58fa",
|
"zlib": "8d43f632a4884e70c72a1ac5926fc87f98305490",
|
||||||
"lppl-1.3c": "7025cef767e2d508bde52922c28e6c0ec7831230",
|
"lppl-1.3c": "60961652297042d28bb689c17fac47eca7348d16",
|
||||||
"epl-2.0": "d858a8a6f0dfcc337acd93e3e791957d60f790b2",
|
"epl-2.0": "b57663bc9c3f41446a8cd3f0050149221a58fe66",
|
||||||
"mit": "d64f3bb4282a97b37454b5bb96a8a264a3363dc3",
|
"mit": "d64f3bb4282a97b37454b5bb96a8a264a3363dc3",
|
||||||
"postgresql": "87550a6bb3409db00d8552b2ac07d373ea56a024",
|
"postgresql": "87550a6bb3409db00d8552b2ac07d373ea56a024",
|
||||||
"afl-3.0": "c564c5cf16eb650c6ee784d71b90818bbbc5d3ae",
|
"afl-3.0": "4702ff33018a2874510beeef5916d6e8629cdc32",
|
||||||
"ncsa": "58a1d83992144038eab133b4af8a31ddbc575b56",
|
"ncsa": "04c052b69de47ab0641068657a14632cdf9aa48d",
|
||||||
"cc-by-sa-4.0": "145990c59e69fa6f691008c30994c909d865caa5",
|
"cc-by-sa-4.0": "d11590d97684231d5358252e0cc97373d62ec4f1",
|
||||||
"bsd-3-clause": "78f89f12ad4369a2dc932076182946195f1fdb04",
|
"bsd-3-clause": "fa22c672927af9c7334874561198799cbf4bdf31",
|
||||||
"epl-1.0": "0e1bc53f3b94e1b1e0d9e2eb565df10e6800e60d",
|
"epl-1.0": "e306464a81ab0e6688653c6509245b451637172c",
|
||||||
"ms-pl": "e72c4981307230d82983f1a3272d30c7c9fa37e1",
|
"ms-pl": "c900293d66a241e54f7817367a8f32f7f94e12ff",
|
||||||
"ecl-2.0": "8669b2b35e243e378a99d8ceee2c05f6ce3603b9",
|
"ecl-2.0": "58e7f645bfa1c5ccca7e2c37e626b3487e4d9d1b",
|
||||||
"eupl-1.2": "bab4a863ebdbd2f2f30bc333fe4635dc038136d1",
|
"eupl-1.2": "f122f96b9f1a56e4806a89cb1cc6ca2bb956f3e5",
|
||||||
"apache-2.0": "1dd463ea99a5cd7537b8230e05c9af07b6cc582f",
|
"apache-2.0": "ab3901051663cb8ee5dea9ebdff406ad136910e3",
|
||||||
"eupl-1.1": "26d0bb98b95d434f861b73cb8194b5620e945d94"
|
"eupl-1.1": "873e30dbc5f75d076d7aecb6ceb84fb6bb765452"
|
||||||
}
|
}
|
|
@ -10,7 +10,7 @@ RSpec.describe 'detect command' do
|
||||||
let(:stdout) { output[0] }
|
let(:stdout) { output[0] }
|
||||||
let(:stderr) { output[1] }
|
let(:stderr) { output[1] }
|
||||||
let(:status) { output[2] }
|
let(:status) { output[2] }
|
||||||
let(:hash) { 'd64f3bb4282a97b37454b5bb96a8a264a3363dc3' }
|
let(:hash) { license_hashes['mit'] }
|
||||||
let(:expected) do
|
let(:expected) do
|
||||||
{
|
{
|
||||||
'License' => 'MIT',
|
'License' => 'MIT',
|
||||||
|
|
|
@ -91,7 +91,9 @@ RSpec.describe Licensee::ContentHelper do
|
||||||
borders: '* Foo *',
|
borders: '* Foo *',
|
||||||
title: "The MIT License\nfoo",
|
title: "The MIT License\nfoo",
|
||||||
copyright: "The MIT License\nCopyright 2018 Ben Balter\nFoo",
|
copyright: "The MIT License\nCopyright 2018 Ben Balter\nFoo",
|
||||||
end_of_terms: "Foo\nend of terms and conditions\nbar"
|
end_of_terms: "Foo\nend of terms and conditions\nbar",
|
||||||
|
block_markup: "> Foo",
|
||||||
|
link_markup: "[Foo](http://exmaple.com)"
|
||||||
}.each do |field, fixture|
|
}.each do |field, fixture|
|
||||||
context "#strip_#{field}" do
|
context "#strip_#{field}" do
|
||||||
let(:content) { fixture }
|
let(:content) { fixture }
|
||||||
|
@ -102,12 +104,11 @@ RSpec.describe Licensee::ContentHelper do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'markup' do
|
context "span markup" do
|
||||||
let(:content) { "> foo\n_foo_ [bar](#baz) ~foo~ `bar` *baz*" }
|
let(:content) { '_foo_ *foo* **foo** ~foo~'}
|
||||||
|
|
||||||
it 'strips markup' do
|
it "strips span markup" do
|
||||||
skip 'failing'
|
expect(normalized_content).to eql('foo foo foo foo')
|
||||||
expect(normalized_content).to eql('foo foo bar foo bar baz')
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -177,6 +178,46 @@ RSpec.describe Licensee::ContentHelper do
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'normalizing' do
|
context 'normalizing' do
|
||||||
|
context 'https' do
|
||||||
|
let(:content) { 'http://example.com' }
|
||||||
|
|
||||||
|
it 'normalized URL protocals' do
|
||||||
|
expect(subject.content_normalized).to eql('https://example.com')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'ampersands' do
|
||||||
|
let(:content) { 'Foo & Bar' }
|
||||||
|
|
||||||
|
it 'normalized ampersands' do
|
||||||
|
expect(subject.content_normalized).to eql('foo and bar')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "lists" do
|
||||||
|
let(:content) { "1. Foo\n * Bar"}
|
||||||
|
|
||||||
|
it 'normalizes lists' do
|
||||||
|
expect(subject.content_normalized).to eql("- foo - bar")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "dashes" do
|
||||||
|
let(:content) { "Foo-Bar—–baz-buzz"}
|
||||||
|
|
||||||
|
it 'normalizes dashes' do
|
||||||
|
expect(subject.content_normalized).to eql("foo-bar-baz-buzz")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "quotes" do
|
||||||
|
let(:content) { "`a` 'b' \"c\" ‘d’ “e”" }
|
||||||
|
|
||||||
|
it 'normalizes quotes' do
|
||||||
|
expect(subject.content_normalized).to eql('"a" "b" "c" "d" "e"')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it 'strips formatting from the MPL' do
|
it 'strips formatting from the MPL' do
|
||||||
license = Licensee::License.find('mpl-2.0')
|
license = Licensee::License.find('mpl-2.0')
|
||||||
expect(license.content_normalized).to_not include('* *')
|
expect(license.content_normalized).to_not include('* *')
|
||||||
|
|
|
@ -134,3 +134,7 @@ RSpec::Matchers.define :be_detected_as do |expected|
|
||||||
|
|
||||||
diffable
|
diffable
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def license_hashes
|
||||||
|
@license_hashese ||= JSON.parse(fixture_contents('license-hashes.json'))
|
||||||
|
end
|
||||||
|
|
|
@ -5,7 +5,6 @@ RSpec.describe 'vendored licenses' do
|
||||||
end
|
end
|
||||||
let(:detected_license) { license_file.license if license_file }
|
let(:detected_license) { license_file.license if license_file }
|
||||||
let(:wtfpl) { Licensee::License.find('wtfpl') }
|
let(:wtfpl) { Licensee::License.find('wtfpl') }
|
||||||
let(:expected_hashes) { JSON.parse(fixture_contents('license-hashes.json')) }
|
|
||||||
|
|
||||||
Licensee.licenses(hidden: true).each do |license|
|
Licensee.licenses(hidden: true).each do |license|
|
||||||
next if license.pseudo_license?
|
next if license.pseudo_license?
|
||||||
|
@ -14,7 +13,7 @@ RSpec.describe 'vendored licenses' do
|
||||||
context "the #{license.name} license" do
|
context "the #{license.name} license" do
|
||||||
let(:content_with_copyright) { sub_copyright_info(license) }
|
let(:content_with_copyright) { sub_copyright_info(license) }
|
||||||
let(:content) { content_with_copyright }
|
let(:content) { content_with_copyright }
|
||||||
let(:expected_hash) { expected_hashes[license.key] }
|
let(:expected_hash) { license_hashes[license.key] }
|
||||||
let(:hash_change_msg) do
|
let(:hash_change_msg) do
|
||||||
msg = 'Did you update a vendored license? Run script/hash-licenses. '
|
msg = 'Did you update a vendored license? Run script/hash-licenses. '
|
||||||
msg << 'Changes in license hashes must be a MINOR (or MAJOR) bump.'
|
msg << 'Changes in license hashes must be a MINOR (or MAJOR) bump.'
|
||||||
|
|
Loading…
Reference in New Issue