This commit is contained in:
Ben Balter 2018-10-24 20:40:30 -04:00
parent 1a05d304b5
commit c4076d83c8
No known key found for this signature in database
GPG Key ID: DBB67C246AD356C4
3 changed files with 51 additions and 45 deletions

View File

@ -5,8 +5,8 @@ module Licensee
module ContentHelper
DIGEST = Digest::SHA1
START_REGEX = /(?<=\A|<<endOptional>>)\s*/i
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
REGEXES = {
end_of_terms: /^[\s#*_]*end of terms and conditions\s*$/i,
hrs: /^\s*[=\-\*][=\-\* ]{2,}/,
alt_title: License::ALT_TITLE_REGEX,
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
@ -20,11 +20,11 @@ module Licensee
developed_by: /\Adeveloped by:.*?\n\n/im,
quote_begin: /[`'"‘“]/,
quote_end: /['"’”]/
}
}.freeze
NORMALIZATIONS = {
lists: { from: /^\s*(\d\.|\*)/, to: '-' },
https: { from: /http:/, to: 'https:' },
ampersands: { from: "&", to: "and" },
ampersands: { from: '&', to: 'and' },
dashes: { from: /[—–-]+/, to: '-' },
copyright: {
from: /(?:copyright\ )?#{Matchers::Copyright::COPYRIGHT_SYMBOLS}/,
@ -34,7 +34,7 @@ module Licensee
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
to: '"\1"'
}
}
}.freeze
# Legally equivalent words that schould be ignored for comparison
# See https://spdx.org/spdx-license-list/matching-guidelines
@ -82,10 +82,10 @@ module Licensee
'cent' => 'percent',
'owner' => 'holder'
}.freeze
STRIP_METHODS = %i(
STRIP_METHODS = %i[
version hrs markdown_headings whitespace all_rights_reserved markup
url developed_by
)
].freeze
# A set of each word in the license, without duplicates
def wordset
@ -162,6 +162,12 @@ module Licensee
end
end
# Backwards compatibalize constants to avoid a breaking change
def self.const_missing(const)
key = const.to_s.downcase.gsub('_regex', '').to_sym
REGEXES[key] || super
end
# Wrap text to the given line length
def self.wrap(text, line_width = 80)
return if text.nil?
@ -215,7 +221,7 @@ module Licensee
if regex_or_sym.is_a?(Symbol)
if REGEXES[regex_or_sym]
regex_or_sym = REGEXES[regex_or_sym]
elsif self.respond_to?("strip_#{regex_or_sym}", true)
elsif respond_to?("strip_#{regex_or_sym}", true)
return send("strip_#{regex_or_sym}")
else
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
@ -245,11 +251,11 @@ module Licensee
end
def strip_end_of_terms
body, _partition, _instructions = _content.partition(REGEXES[:end_of_terms])
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
@_content = body
end
NORMALIZATIONS.each do |key, op|
NORMALIZATIONS.each do |key, _op|
define_method("normalize_#{key}") do
normalize(key)
end

View File

@ -1,36 +1,36 @@
{
"upl-1.0": "15a23e5025c5bbdc2d5d6c0ffdf2ce3ceb7dda25",
"ofl-1.1": "1f2f8b67e9eb2ce609451e872b681c2d6f054c6d",
"lgpl-3.0": "9ea522227639caaf4145825696d8d508600a7ad0",
"agpl-3.0": "2d7246023a36e6b375961b5eeeba3673a9b0e28c",
"gpl-2.0": "e7d5b8963c29bbed6cabe5dd1ea94e4a6c14babf",
"cc-by-4.0": "f6b43738f63a095ac0c6d31e26e2f3d18e8a2ed0",
"ms-rl": "9d7f8376072a8e85371a10ef68a942c26cf447ed",
"upl-1.0": "839ee26ab9bebb4f48d1171cc7d4f86016b71dd3",
"ofl-1.1": "b24a15d53991d3d94bb286b2e80fc14601286eb1",
"lgpl-3.0": "1d3a0cd71ae2cc1786c31567ce5de68556a4d173",
"agpl-3.0": "5fac5edefe89c4736a84a9f6bb2764296744fdb1",
"gpl-2.0": "0202ddb3df617688bc3b290101778a466b3f7147",
"cc-by-4.0": "a0934d57e274f8502b5feda6bfb382d7fbf654d5",
"ms-rl": "16ad951ac53fa053155fbb4104935d34a516a7a3",
"wtfpl": "f8544c074f203d86cdcb24082fedfb2cf2fe411a",
"osl-3.0": "43a0e637ee5d0bcc96f56ef62b9efb8304e26761",
"osl-3.0": "45c68fc93865c63deccd331ac7ac3c597905600d",
"bsl-1.0": "3ec243ab7ccca1840fedd6a345efe1d430e9cc0f",
"lgpl-2.1": "8368021571425b9e6a43b143ccbedaaa368cf684",
"mpl-2.0": "c959f24cafefb415c86783264ba047c997642681",
"lgpl-2.1": "4f21b7badc9d39b4d495981d8cb22a1f6344f4eb",
"mpl-2.0": "023980221c96eff137b379de20f3265fdd316d4b",
"isc": "d168f98624be864548b2bbf4f198fdbf702d6743",
"cc0-1.0": "83624ed81a5eea4456349757fe58ac7f721c154e",
"cc0-1.0": "d55df23910f8a8cb9cb46ad9e3ea144babba0aeb",
"bsd-3-clause-clear": "59222d36c8681934d15cb203bcc08928f10d4f55",
"gpl-3.0": "fb4942f66306942d6b76feff8169f4d138fae884",
"unlicense": "66a699d88454b16cf25cc4a0e366cbd70d308cb2",
"gpl-3.0": "8271a167fcded006776e9ffd4da51f97149cfd33",
"unlicense": "1ae4ac7271f1206be23241cc1f6523bab8391eed",
"bsd-2-clause": "dabf2ff1ba469c873941f9cb87bf43e4ad8a42da",
"artistic-2.0": "12c1f84dded26a6856c4cbfc7ed5a2ec589dbb65",
"zlib": "7748f3b03c052a5e5458e2096cac208160ce5128",
"lppl-1.3c": "c4512153439533a721a7e2fffd28635c2202b344",
"epl-2.0": "572122b933e7aeb55234933303d0ad51eb8781e4",
"artistic-2.0": "980d8c6b45010c92d5f08d2cce2ecb69719e1033",
"zlib": "576139d1f785b8f3539099b9702fc68005ee2213",
"lppl-1.3c": "dcea4431ea5fb8a71e2762ef4b8b9c4a95381719",
"epl-2.0": "7c7fecad29ff536138abc2c304ae8957caa66263",
"mit": "46cdc03462b9af57968df67b450cc4372ac41f53",
"postgresql": "5858b773632d5c144e88eee687230410da78db43",
"afl-3.0": "8dbee3ca3d575436f6034a76c35be0b8a7e5383b",
"ncsa": "2f7451b63136de0c04d8e66e36d9a361e57d7b39",
"cc-by-sa-4.0": "11ee387392f1aa3b5cba37ebc2c79557f08c7db8",
"afl-3.0": "53e7fcf4e526309dcd3ef726b94ef76c42d26f4a",
"ncsa": "b5781dbb5dcc5506a68616c1d273df7eed46304a",
"cc-by-sa-4.0": "f3918d28e92781e93e6fefbcbf884833da3c147e",
"bsd-3-clause": "b1393f39bd07fba46f442cc801b4e1488b982cec",
"epl-1.0": "7b680c65af131013ddb8c26f52f0fdafd5b95c9f",
"ms-pl": "d5015f1d7677886a7a978c1a8ccfee1f813f6153",
"ecl-2.0": "e9ba3d8c230860027d4be037993a6f5d98e7367a",
"eupl-1.2": "384efd90fd7ac3bc6372c3dec7c29e24c5e50f19",
"apache-2.0": "38af80cfcd81261023ea4c45f5335800a5a4cc97",
"eupl-1.1": "79f54e63b881c679e626c2631d2c40f42d8091bf"
"epl-1.0": "e64b097321924b93ba36c010cfc41e3280cd7324",
"ms-pl": "737a05312c157a683c019ba104eba473aab8dd4b",
"ecl-2.0": "bd9fc5c6637eb1320f50f25664a97a39ca1f1d37",
"eupl-1.2": "4a217833b3b39d7a55c5bb35471f35b5640566b1",
"apache-2.0": "8302d06088d817c2cb1bb34e90f83abc570aa21b",
"eupl-1.1": "50e2621d70adbc1371d4c7bfd7739a147165695f"
}

View File

@ -22,12 +22,12 @@ RSpec.describe Licensee::Matchers::Dice do
it 'sorts licenses by similarity' do
expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
expect(subject.matches_by_similarity[1]).to eql([agpl, 95.76581285938317])
expect(subject.matches_by_similarity[1]).to eql([agpl, 95.67966280295047])
end
it 'returns a list of licenses above the confidence threshold' do
expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
expect(subject.matches_by_similarity[1]).to eql([agpl, 95.76581285938317])
expect(subject.matches_by_similarity[1]).to eql([agpl, 95.67966280295047])
end
it 'returns the match confidence' do