Merge pull request #238 from benbalter/match-source

Allow ReferenceMatcher to match references to licenses by their source
This commit is contained in:
Ben Balter 2017-11-14 10:39:44 -05:00 committed by GitHub
commit 676c984d82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 3 deletions

View File

@ -91,6 +91,9 @@ module Licensee
'bsd-3-clause-clear' => /(?:clear bsd|bsd 3-clause(?: clear)?)/i
}.freeze
SOURCE_PREFIX = %r{https?://(?:www\.)?}i
SOURCE_SUFFIX = %r{(?:\.html?|\.txt|\/)}i
include Licensee::ContentHelper
extend Forwardable
def_delegators :meta, *LicenseMeta.helper_methods
@ -119,7 +122,7 @@ module Licensee
end
def title_regex
return @regex if defined? @regex
return @title_regex if defined? @title_regex
title_regex = ALT_TITLE_REGEX[key]
title_regex ||= begin
@ -139,7 +142,26 @@ module Licensee
parts.push Regexp.new meta.nickname.sub(/\bGNU /i, '(?:GNU )?')
end
@regex = Regexp.union parts
@title_regex = Regexp.union parts
end
# Returns a regex that will match the license source
#
# The following variations are supported (as presumed identical):
# 1. HTTP or HTTPS
# 2. www or non-www
# 3. .txt, .html, .htm, or / suffix
#
# Returns the regex, or nil if no source exists
def source_regex
return @source_regex if defined? @source_regex
return unless meta.source
source = meta.source.dup.sub(/\A#{SOURCE_PREFIX}/, '')
source = source.sub(/#{SOURCE_SUFFIX}\z/, '')
escaped_source = Regexp.escape(source)
@source_regex = /#{SOURCE_PREFIX}#{escaped_source}(?:#{SOURCE_SUFFIX})?/i
end
def other?

View File

@ -4,7 +4,8 @@ module Licensee
class Reference < Licensee::Matchers::Matcher
def match
License.all(hidden: true, psuedo: false).find do |license|
/\b#{license.title_regex}\b/ =~ file.content
title_or_source = [license.title_regex, license.source_regex].compact
/\b#{Regexp.union(title_or_source)}\b/ =~ file.content
end
end

View File

@ -421,4 +421,41 @@ RSpec.describe Licensee::License do
end
end
end
context 'source regex' do
Licensee::License.all(hidden: true, psuedo: false).each do |license|
context "the #{license.title} license" do
let(:source) { URI.parse(license.source) }
%w[http https].each do |scheme|
context "with a #{scheme}:// scheme" do
before { source.scheme = scheme }
['www.', ''].each do |prefix|
context "with '#{prefix}' before the host" do
before do
source.host = "#{prefix}#{source.host.sub(/\Awww\./, '')}"
end
['.html', '.htm', '.txt', ''].each do |suffix|
context "with '#{suffix}' after the path" do
before do
next if license.key == 'wtfpl'
regex = /#{Licensee::License::SOURCE_SUFFIX}\z/
source.path = source.path.sub(regex, '')
source.path = "#{source.path}#{suffix}"
end
it 'matches' do
expect(source.to_s).to match(license.source_regex)
end
end
end
end
end
end
end
end
end
end
end

View File

@ -39,4 +39,13 @@ RSpec.describe Licensee::Matchers::Reference do
expect(subject.match).to eql(license)
end
end
context 'with a license source' do
let(:license) { Licensee::License.find('mpl-2.0') }
let(:content) { "The [license](#{license.source})" }
it 'matches' do
expect(subject.match).to eql(license)
end
end
end