mirror of https://github.com/licensee/licensee.git
fix normalization errors
This commit is contained in:
parent
998aeece2e
commit
cc7b1350a9
|
@ -8,27 +8,26 @@ module Licensee
|
|||
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
|
||||
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
|
||||
REGEXES = {
|
||||
hrs: /^\s*[=\-\*][=\-\*]{2,}\s*/,
|
||||
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
||||
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
||||
whitespace: /\s+/,
|
||||
markdown_headings: /#{START_REGEX}#+/,
|
||||
version: /#{START_REGEX}version.*$/i,
|
||||
markup: /(?:[_*~`]+.*?[_*~`]+|^\s*[>-]|\[.*?\]\(.*?\))/,
|
||||
span_markup: /[_*~]+(.*?)[_*~]+/,
|
||||
link_markup: /\[(.+?)\]\(.+?\)/,
|
||||
block_markup: /^\s*>/,
|
||||
border_markup: /^[\*-](.*?)[\*-]$/,
|
||||
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
||||
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
||||
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
||||
quote_begin: /[`'"‘“]/,
|
||||
quote_end: /['"’”]/
|
||||
quote_end: /[`'"’”]/
|
||||
}.freeze
|
||||
NORMALIZATIONS = {
|
||||
lists: { from: /^\s*(\d\.|\*)/, to: '-' },
|
||||
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
||||
https: { from: /http:/, to: 'https:' },
|
||||
ampersands: { from: '&', to: 'and' },
|
||||
dashes: { from: /[—–-]+/, to: '-' },
|
||||
copyright: {
|
||||
from: /(?:copyright\ )?#{Matchers::Copyright::COPYRIGHT_SYMBOLS}/,
|
||||
to: 'copyright'
|
||||
},
|
||||
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
||||
quotes: {
|
||||
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
||||
to: '"\1"'
|
||||
|
@ -82,7 +81,8 @@ module Licensee
|
|||
'owner' => 'holder'
|
||||
}.freeze
|
||||
STRIP_METHODS = %i[
|
||||
hrs markdown_headings borders markup title version url copyright
|
||||
hrs markdown_headings borders title version url copyright
|
||||
block_markup span_markup link_markup
|
||||
all_rights_reserved developed_by end_of_terms whitespace
|
||||
].freeze
|
||||
|
||||
|
@ -131,7 +131,7 @@ module Licensee
|
|||
def content_without_title_and_version
|
||||
@content_without_title_and_version ||= begin
|
||||
@_content = nil
|
||||
%w[markdown_headings hrs title version].each { |op| strip(op) }
|
||||
%i[hrs markdown_headings title version].each { |op| strip(op) }
|
||||
_content
|
||||
end
|
||||
end
|
||||
|
@ -186,19 +186,21 @@ module Licensee
|
|||
end
|
||||
|
||||
def self.title_regex
|
||||
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
||||
titles = licenses.map(&:title_regex)
|
||||
@title_regex ||= begin
|
||||
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
||||
titles = licenses.map(&:title_regex)
|
||||
|
||||
# Title regex must include the version to support matching within
|
||||
# families, but for sake of normalization, we can be less strict
|
||||
without_versions = licenses.map do |license|
|
||||
next if license.title == license.name_without_version
|
||||
# Title regex must include the version to support matching within
|
||||
# families, but for sake of normalization, we can be less strict
|
||||
without_versions = licenses.map do |license|
|
||||
next if license.title == license.name_without_version
|
||||
|
||||
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
||||
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
||||
end
|
||||
titles.concat(without_versions.compact)
|
||||
|
||||
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
||||
end
|
||||
titles.concat(without_versions.compact)
|
||||
|
||||
/#{START_REGEX}\(?(the )?#{Regexp.union titles}.*$/i
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -213,10 +215,10 @@ module Licensee
|
|||
return unless _content
|
||||
|
||||
if regex_or_sym.is_a?(Symbol)
|
||||
if REGEXES[regex_or_sym]
|
||||
regex_or_sym = REGEXES[regex_or_sym]
|
||||
elsif respond_to?("strip_#{regex_or_sym}", true)
|
||||
if respond_to?("strip_#{regex_or_sym}", true)
|
||||
return send("strip_#{regex_or_sym}")
|
||||
elsif REGEXES[regex_or_sym]
|
||||
regex_or_sym = REGEXES[regex_or_sym]
|
||||
else
|
||||
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
||||
end
|
||||
|
@ -225,12 +227,6 @@ module Licensee
|
|||
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
||||
end
|
||||
|
||||
STRIP_METHODS.each do |sym|
|
||||
define_method "strip_#{sym}" do
|
||||
strip(sym)
|
||||
end
|
||||
end
|
||||
|
||||
def strip_title
|
||||
while _content =~ ContentHelper.title_regex
|
||||
strip(ContentHelper.title_regex)
|
||||
|
@ -238,7 +234,7 @@ module Licensee
|
|||
end
|
||||
|
||||
def strip_borders
|
||||
normalize(/^[\*-](.*?)\*$/, '\1')
|
||||
normalize(REGEXES[:border_markup], '\1')
|
||||
end
|
||||
|
||||
def strip_copyright
|
||||
|
@ -251,15 +247,18 @@ module Licensee
|
|||
@_content = body
|
||||
end
|
||||
|
||||
NORMALIZATIONS.each do |key, _op|
|
||||
define_method("normalize_#{key}") do
|
||||
normalize(key)
|
||||
end
|
||||
def strip_span_markup
|
||||
normalize(REGEXES[:span_markup], '\1')
|
||||
end
|
||||
|
||||
def strip_link_markup
|
||||
normalize(REGEXES[:link_markup], '\1')
|
||||
end
|
||||
|
||||
def normalize(from_or_key, to = nil)
|
||||
operation = { from: from_or_key, to: to } if to
|
||||
operation ||= NORMALIZATIONS[from_or_key]
|
||||
|
||||
if operation
|
||||
@_content = _content.gsub operation[:from], operation[:to]
|
||||
elsif respond_to?("normalize_#{from_or_key}", true)
|
||||
|
|
|
@ -55,7 +55,6 @@ module Licensee
|
|||
def attribution
|
||||
@attribution ||= begin
|
||||
return unless copyright? || license.content =~ /\[fullname\]/
|
||||
|
||||
matches = Matchers::Copyright::REGEX
|
||||
.match(content_without_title_and_version)
|
||||
matches[0] if matches
|
||||
|
|
|
@ -1,36 +1,36 @@
|
|||
{
|
||||
"upl-1.0": "093b8b048dec7bc685c9ee6a5afffa4a1d148c02",
|
||||
"ofl-1.1": "b6eb018d65c3ef1aecd29a99ad4653b47b34323d",
|
||||
"lgpl-3.0": "fde363437aa287dddb4305dbbe1a59c41e98ea2b",
|
||||
"agpl-3.0": "8184105f82a05296bd50332643c3a3cc4067ba54",
|
||||
"gpl-2.0": "f5e7151e1cd9830c0caf577bc747d7736f36658e",
|
||||
"cc-by-4.0": "df16a2377ab7ea49e9bf80a8f3344e38121671f4",
|
||||
"ms-rl": "eb5794a2e90d1b83fd19e46d6790b2b66b8b857b",
|
||||
"wtfpl": "dfa47b27c85780af7b2ddb2c30bdd7808e3060d7",
|
||||
"osl-3.0": "39c1c650cb46ecc34c72693eb5ba967611ac1fed",
|
||||
"ofl-1.1": "1fb0563aa1250e18a6948afde286edc95761f461",
|
||||
"lgpl-3.0": "bdb3c042bd84f914eacfbe4977c5e58352745809",
|
||||
"agpl-3.0": "d445855a1f169b12cbee97d320c2e3522d053016",
|
||||
"gpl-2.0": "3becd209e8ed8039656c1debe01dd17b9a79208f",
|
||||
"cc-by-4.0": "899872bc08626e6cf154dcf9e08ff0de82c9b3db",
|
||||
"ms-rl": "402bf344e506a8d10175c1e516b396c060ffd823",
|
||||
"wtfpl": "f8544c074f203d86cdcb24082fedfb2cf2fe411a",
|
||||
"osl-3.0": "ab241ef932d3ac038e8ed62c860e9eba051ae7a0",
|
||||
"bsl-1.0": "ca8f916d00c234719956e932061f192abb2d5bf9",
|
||||
"lgpl-2.1": "cc67fdbf1313fa11056c01a573c0287fab61bc17",
|
||||
"mpl-2.0": "b0285063c18aea6fae6a146882193f0d3de6dc44",
|
||||
"lgpl-2.1": "91e779a787786276618f58d6e396a5e64a981805",
|
||||
"mpl-2.0": "b4db668fa7573bfdcae74eb51eafc961034f0a61",
|
||||
"isc": "d168f98624be864548b2bbf4f198fdbf702d6743",
|
||||
"cc0-1.0": "d76b663aad99ce405c971acd22cfbe23bfd29378",
|
||||
"bsd-3-clause-clear": "6cd7a95b9e5f0e866b07b46fcfebb70f1c42994f",
|
||||
"gpl-3.0": "39d041443ec3f4f2aa13e1fa2e9aef7d4356a04b",
|
||||
"cc0-1.0": "ec5027313ed11fea202060f6958ac25b086d6dcb",
|
||||
"bsd-3-clause-clear": "251d4599b622d2a87b2c4bb21dfacd438c048466",
|
||||
"gpl-3.0": "b22f1b1f953a38a8a11686587b98831858d6468b",
|
||||
"unlicense": "86c75861af1b9b9e0573b190dcb2c2cdbbee7037",
|
||||
"bsd-2-clause": "8c6525f4700252c313825f1f85acd04cd7c30394",
|
||||
"artistic-2.0": "68cc5c6eb6563437200308f227d36af5ba32f9be",
|
||||
"zlib": "4768246ef0140435f718039efc0a11ef437e58fa",
|
||||
"lppl-1.3c": "7025cef767e2d508bde52922c28e6c0ec7831230",
|
||||
"epl-2.0": "d858a8a6f0dfcc337acd93e3e791957d60f790b2",
|
||||
"bsd-2-clause": "59f0099ff04225daf184db3fe55e478256133b1a",
|
||||
"artistic-2.0": "a2ff6e7fb76e51bda9a5350c759a824f206049d1",
|
||||
"zlib": "8d43f632a4884e70c72a1ac5926fc87f98305490",
|
||||
"lppl-1.3c": "60961652297042d28bb689c17fac47eca7348d16",
|
||||
"epl-2.0": "b57663bc9c3f41446a8cd3f0050149221a58fe66",
|
||||
"mit": "d64f3bb4282a97b37454b5bb96a8a264a3363dc3",
|
||||
"postgresql": "87550a6bb3409db00d8552b2ac07d373ea56a024",
|
||||
"afl-3.0": "c564c5cf16eb650c6ee784d71b90818bbbc5d3ae",
|
||||
"ncsa": "58a1d83992144038eab133b4af8a31ddbc575b56",
|
||||
"cc-by-sa-4.0": "145990c59e69fa6f691008c30994c909d865caa5",
|
||||
"bsd-3-clause": "78f89f12ad4369a2dc932076182946195f1fdb04",
|
||||
"epl-1.0": "0e1bc53f3b94e1b1e0d9e2eb565df10e6800e60d",
|
||||
"ms-pl": "e72c4981307230d82983f1a3272d30c7c9fa37e1",
|
||||
"ecl-2.0": "8669b2b35e243e378a99d8ceee2c05f6ce3603b9",
|
||||
"eupl-1.2": "bab4a863ebdbd2f2f30bc333fe4635dc038136d1",
|
||||
"apache-2.0": "1dd463ea99a5cd7537b8230e05c9af07b6cc582f",
|
||||
"eupl-1.1": "26d0bb98b95d434f861b73cb8194b5620e945d94"
|
||||
"afl-3.0": "4702ff33018a2874510beeef5916d6e8629cdc32",
|
||||
"ncsa": "04c052b69de47ab0641068657a14632cdf9aa48d",
|
||||
"cc-by-sa-4.0": "d11590d97684231d5358252e0cc97373d62ec4f1",
|
||||
"bsd-3-clause": "fa22c672927af9c7334874561198799cbf4bdf31",
|
||||
"epl-1.0": "e306464a81ab0e6688653c6509245b451637172c",
|
||||
"ms-pl": "c900293d66a241e54f7817367a8f32f7f94e12ff",
|
||||
"ecl-2.0": "58e7f645bfa1c5ccca7e2c37e626b3487e4d9d1b",
|
||||
"eupl-1.2": "f122f96b9f1a56e4806a89cb1cc6ca2bb956f3e5",
|
||||
"apache-2.0": "ab3901051663cb8ee5dea9ebdff406ad136910e3",
|
||||
"eupl-1.1": "873e30dbc5f75d076d7aecb6ceb84fb6bb765452"
|
||||
}
|
|
@ -10,7 +10,7 @@ RSpec.describe 'detect command' do
|
|||
let(:stdout) { output[0] }
|
||||
let(:stderr) { output[1] }
|
||||
let(:status) { output[2] }
|
||||
let(:hash) { 'd64f3bb4282a97b37454b5bb96a8a264a3363dc3' }
|
||||
let(:hash) { license_hashes['mit'] }
|
||||
let(:expected) do
|
||||
{
|
||||
'License' => 'MIT',
|
||||
|
|
|
@ -91,7 +91,9 @@ RSpec.describe Licensee::ContentHelper do
|
|||
borders: '* Foo *',
|
||||
title: "The MIT License\nfoo",
|
||||
copyright: "The MIT License\nCopyright 2018 Ben Balter\nFoo",
|
||||
end_of_terms: "Foo\nend of terms and conditions\nbar"
|
||||
end_of_terms: "Foo\nend of terms and conditions\nbar",
|
||||
block_markup: "> Foo",
|
||||
link_markup: "[Foo](http://exmaple.com)"
|
||||
}.each do |field, fixture|
|
||||
context "#strip_#{field}" do
|
||||
let(:content) { fixture }
|
||||
|
@ -102,12 +104,11 @@ RSpec.describe Licensee::ContentHelper do
|
|||
end
|
||||
end
|
||||
|
||||
context 'markup' do
|
||||
let(:content) { "> foo\n_foo_ [bar](#baz) ~foo~ `bar` *baz*" }
|
||||
context "span markup" do
|
||||
let(:content) { '_foo_ *foo* **foo** ~foo~'}
|
||||
|
||||
it 'strips markup' do
|
||||
skip 'failing'
|
||||
expect(normalized_content).to eql('foo foo bar foo bar baz')
|
||||
it "strips span markup" do
|
||||
expect(normalized_content).to eql('foo foo foo foo')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -177,6 +178,46 @@ RSpec.describe Licensee::ContentHelper do
|
|||
end
|
||||
|
||||
context 'normalizing' do
|
||||
context 'https' do
|
||||
let(:content) { 'http://example.com' }
|
||||
|
||||
it 'normalized URL protocals' do
|
||||
expect(subject.content_normalized).to eql('https://example.com')
|
||||
end
|
||||
end
|
||||
|
||||
context 'ampersands' do
|
||||
let(:content) { 'Foo & Bar' }
|
||||
|
||||
it 'normalized ampersands' do
|
||||
expect(subject.content_normalized).to eql('foo and bar')
|
||||
end
|
||||
end
|
||||
|
||||
context "lists" do
|
||||
let(:content) { "1. Foo\n * Bar"}
|
||||
|
||||
it 'normalizes lists' do
|
||||
expect(subject.content_normalized).to eql("- foo - bar")
|
||||
end
|
||||
end
|
||||
|
||||
context "dashes" do
|
||||
let(:content) { "Foo-Bar—–baz-buzz"}
|
||||
|
||||
it 'normalizes dashes' do
|
||||
expect(subject.content_normalized).to eql("foo-bar-baz-buzz")
|
||||
end
|
||||
end
|
||||
|
||||
context "quotes" do
|
||||
let(:content) { "`a` 'b' \"c\" ‘d’ “e”" }
|
||||
|
||||
it 'normalizes quotes' do
|
||||
expect(subject.content_normalized).to eql('"a" "b" "c" "d" "e"')
|
||||
end
|
||||
end
|
||||
|
||||
it 'strips formatting from the MPL' do
|
||||
license = Licensee::License.find('mpl-2.0')
|
||||
expect(license.content_normalized).to_not include('* *')
|
||||
|
|
|
@ -134,3 +134,7 @@ RSpec::Matchers.define :be_detected_as do |expected|
|
|||
|
||||
diffable
|
||||
end
|
||||
|
||||
def license_hashes
|
||||
@license_hashese ||= JSON.parse(fixture_contents('license-hashes.json'))
|
||||
end
|
||||
|
|
|
@ -5,7 +5,6 @@ RSpec.describe 'vendored licenses' do
|
|||
end
|
||||
let(:detected_license) { license_file.license if license_file }
|
||||
let(:wtfpl) { Licensee::License.find('wtfpl') }
|
||||
let(:expected_hashes) { JSON.parse(fixture_contents('license-hashes.json')) }
|
||||
|
||||
Licensee.licenses(hidden: true).each do |license|
|
||||
next if license.pseudo_license?
|
||||
|
@ -14,7 +13,7 @@ RSpec.describe 'vendored licenses' do
|
|||
context "the #{license.name} license" do
|
||||
let(:content_with_copyright) { sub_copyright_info(license) }
|
||||
let(:content) { content_with_copyright }
|
||||
let(:expected_hash) { expected_hashes[license.key] }
|
||||
let(:expected_hash) { license_hashes[license.key] }
|
||||
let(:hash_change_msg) do
|
||||
msg = 'Did you update a vendored license? Run script/hash-licenses. '
|
||||
msg << 'Changes in license hashes must be a MINOR (or MAJOR) bump.'
|
||||
|
|
Loading…
Reference in New Issue