prioritize regex

This commit is contained in:
Ben Balter 2015-07-20 17:12:18 -04:00
parent 7cf70c48bf
commit e796c9b424
2 changed files with 31 additions and 64 deletions

View File

@ -38,13 +38,15 @@ class Licensee
#
# filename - (string) the name of the file to score
#
# Returns 1 if the file is definately a license file
# Return 0.5 if the file is likely a license file
# Returns 0 if the file is definately not a license file
# Returns 1.0 if the file is definately a license file
# Returns 0.75 if the file is probably a license file
# Returns 0.5 if the file is likely a license file
# Returns 0.0 if the file is definately not a license file
def self.match_license_file(filename)
return 1 if self.license_file?(filename)
return 0.5 if self.maybe_license_file?(filename)
return 0
return 1.0 if filename =~ /\A(un)?licen[sc]e(\.[^.]+)?\z/i
return 0.75 if filename =~ /\Acopy(ing|right)(\.[^.]+)?\z/i
return 0.5 if filename =~ /licen[sc]e/i
return 0.0
end
private
@ -60,8 +62,7 @@ class Licensee
# Detects the license file, if any
# Returns the blob hash as detected in the tree
def license_hash
license_hash = tree.find { |blob| self.class.license_file?(blob[:name]) }
license_hash ||= tree.find { |blob| self.class.maybe_license_file?(blob[:name]) }
tree.sort_by { |blob| self.class.match_license_file(blob[:name]) }.last
end
def license_blob
@ -71,26 +72,5 @@ class Licensee
def license_path
license_hash[:name] if license_hash
end
# Regex to detect license files
#
# Examples it should match:
# - LICENSE.md
# - licence.txt
# - unlicense
# - copying
# - copyright
def self.license_file?(filename)
!!(filename =~ /\A(un)?licen[sc]e|copy(ing|right)(\.[^.]+)?\z/i)
end
# Regex to detect things that look like license files
#
# Examples it should match:
# - license-MIT.txt
# - MIT-LICENSE
def self.maybe_license_file?(filename)
!!(filename =~ /licen[sc]e/i)
end
end
end

View File

@ -79,42 +79,29 @@ class TestLicenseeProject < Minitest::Test
end
end
context "license filename matching" do
# Standard license names
["license",
"LICENSE",
"LICENCE",
"license.md",
"unlicense",
"unlicence",
"copying",
"copyRIGHT",
"license.txt"
].each do |license|
should "match #{license}" do
assert Licensee::Project.license_file?(license)
end
end
context "license filename scoring" do
should "not match MIT-LICENSE" do
refute Licensee::Project.license_file?("MIT-LICENSE")
end
EXPECTATIONS = {
"license" => 1.0,
"LICENCE" => 1.0,
"license.md" => 1.0,
"LICENSE.md" => 1.0,
"license.txt" => 1.0,
"unLICENSE" => 1.0,
"unlicence" => 1.0,
"COPYING" => 0.75,
"copyRIGHT" => 0.75,
"COPYRIGHT.txt" => 0.75,
"LICENSE-MIT" => 0.5,
"MIT-LICENSE.txt" => 0.5,
"mit-license-foo.md" => 0.5,
"README.txt" => 0.0
}
# Abnormal license names
[
"LICENSE-MIT",
"MIT-LICENSE.txt",
"mit-license-foo.md"
].each do |license|
should "match #{license}" do
assert Licensee::Project.maybe_license_file?(license)
end
end
should "return the proper license files scores" do
assert_equal 1, Licensee::Project.match_license_file("LICENSE.md")
assert_equal 0.5, Licensee::Project.match_license_file("MIT-LICENSE")
assert_equal 0, Licensee::Project.match_license_file("README.txt")
EXPECTATIONS.each do |filename, expected|
should "score a license named `#{filename}` as `#{expected}`" do
assert_equal expected, Licensee::Project.match_license_file(filename)
end
end
end
end