mirror of https://github.com/licensee/licensee.git
reimplement levenshtein as a fallback
This commit is contained in:
parent
3e6a2ed2b3
commit
526b84343d
|
@ -6,6 +6,7 @@ license = Licensee::Project.new(Dir.pwd).license_file
|
|||
if license
|
||||
puts "License: #{license.match ? license.match.meta['title'] : 'no license'}"
|
||||
puts "Confidence: #{license.confidence}%"
|
||||
puts "Method: #{license.matcher.class}"
|
||||
else
|
||||
puts "Unknown"
|
||||
end
|
||||
|
|
|
@ -1,19 +1,30 @@
|
|||
require 'yaml'
|
||||
require 'rugged'
|
||||
require 'levenshtein'
|
||||
|
||||
require_relative "licensee/license"
|
||||
require_relative "licensee/licenses"
|
||||
require_relative "licensee/license_file"
|
||||
require_relative "licensee/project"
|
||||
require_relative "licensee/matcher"
|
||||
require_relative "licensee/matchers/git"
|
||||
require_relative "licensee/matchers/levenshtein"
|
||||
|
||||
class Licensee
|
||||
CONFIDENCE_THRESHOLD = 90
|
||||
|
||||
def self.licenses
|
||||
Licensee::Licenses.list
|
||||
end
|
||||
class << self
|
||||
|
||||
def self.license(path)
|
||||
Licensee::Project.new(path).license
|
||||
def licenses
|
||||
Licensee::Licenses.list
|
||||
end
|
||||
|
||||
def license(path)
|
||||
Licensee::Project.new(path).license
|
||||
end
|
||||
|
||||
def matchers
|
||||
[Licensee::GitMatcher, Licensee::LevenshteinMatcher]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -24,16 +24,16 @@ class Licensee
|
|||
nil
|
||||
end
|
||||
|
||||
def length
|
||||
@length ||= body.length
|
||||
end
|
||||
|
||||
def body
|
||||
@body ||= parts[2]
|
||||
end
|
||||
alias_method :to_s, :body
|
||||
alias_method :text, :body
|
||||
|
||||
def body_normalized
|
||||
@content_normalized ||= body.downcase.gsub("\n", " ").strip
|
||||
end
|
||||
|
||||
def hashsig
|
||||
@hashsig ||= Rugged::Blob::HashSignature.new(
|
||||
body, Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
||||
|
|
|
@ -7,43 +7,40 @@ class Licensee
|
|||
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
||||
end
|
||||
|
||||
def contents
|
||||
@contents ||= blob.content
|
||||
def content
|
||||
@contents ||= begin
|
||||
blob.content
|
||||
end
|
||||
end
|
||||
alias_method :to_s, :contents
|
||||
alias_method :content, :contents
|
||||
alias_method :to_s, :content
|
||||
alias_method :contents, :content
|
||||
|
||||
def content_wrapped
|
||||
|
||||
def length
|
||||
@length ||= blob.size
|
||||
end
|
||||
|
||||
def matches
|
||||
@matches ||= Licensee::Licenses.list.map { |l| [l, calculate_similarity(l)] }.select { |l,sim| sim > 0 }
|
||||
def content_normalized
|
||||
@content_normalized ||= content.downcase.gsub("\n", " ").strip
|
||||
end
|
||||
|
||||
def match_info
|
||||
@match_info ||= matches.max_by { |license, similarity| similarity }
|
||||
end
|
||||
|
||||
def match
|
||||
match_info ? match_info[0] : nil
|
||||
end
|
||||
|
||||
def confidence
|
||||
match_info ? match_info[1] : nil
|
||||
end
|
||||
alias_method :similarity, :confidence
|
||||
|
||||
def diff(options={})
|
||||
options = options.merge(:reverse => true)
|
||||
blob.diff(match.body, options).to_s if match
|
||||
end
|
||||
|
||||
private
|
||||
def matcher
|
||||
@matcher ||= Licensee.matchers.each do |matcher|
|
||||
matcher = matcher.new(self)
|
||||
break matcher if matcher.match
|
||||
end
|
||||
end
|
||||
|
||||
# Pulled out for easier testing
|
||||
def calculate_similarity(other)
|
||||
blob.similarity(other.hashsig)
|
||||
def match
|
||||
@match ||= matcher.match if matcher
|
||||
end
|
||||
|
||||
def confidence
|
||||
@condience ||= matcher.confidence
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
class Licensee
|
||||
class Matcher
|
||||
attr_reader :file
|
||||
|
||||
def self.match(file)
|
||||
self.new(file).match
|
||||
end
|
||||
|
||||
def initialize(file)
|
||||
@file = file
|
||||
end
|
||||
|
||||
def matches
|
||||
[]
|
||||
end
|
||||
|
||||
def match
|
||||
nil
|
||||
end
|
||||
|
||||
def confidence
|
||||
0
|
||||
end
|
||||
alias_method :similarity, :confidence
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
class Licensee
|
||||
class GitMatcher < Matcher
|
||||
|
||||
def matches
|
||||
@matches ||= Licensee::Licenses.list.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
||||
end
|
||||
|
||||
def match
|
||||
match_info ? match_info[0] : nil
|
||||
end
|
||||
|
||||
def confidence
|
||||
match_info ? match_info[1] : nil
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def similarity(other)
|
||||
file.blob.similarity(other.hashsig)
|
||||
end
|
||||
|
||||
# Pulled out for easier testing
|
||||
def match_info
|
||||
@match_info ||= begin
|
||||
match = matches.max_by { |license, similarity| similarity }
|
||||
match if match[1] > Licensee::CONFIDENCE_THRESHOLD
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,42 @@
|
|||
class Licensee
|
||||
class LevenshteinMatcher < Matcher
|
||||
|
||||
def match
|
||||
potential_licenses.find do |license|
|
||||
similarity(license) >= Licensee::CONFIDENCE_THRESHOLD
|
||||
end
|
||||
end
|
||||
|
||||
def potential_licenses
|
||||
@potential_licenses ||= begin
|
||||
Licensee::Licenses.list.select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }.reverse
|
||||
end
|
||||
end
|
||||
|
||||
def length_delta(license)
|
||||
(file.content_normalized.length - license.body_normalized.length).abs
|
||||
end
|
||||
|
||||
def max_delta
|
||||
@max_delta ||= (file.content_normalized.length * (Licensee::CONFIDENCE_THRESHOLD.to_f / 100.to_f ))
|
||||
end
|
||||
|
||||
def confidence
|
||||
@confidence ||= match ? similarity(match) : 0
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def length
|
||||
file.content_normalized.length.to_f
|
||||
end
|
||||
|
||||
def similarity(license)
|
||||
100 * (length - distance(license)) / length
|
||||
end
|
||||
|
||||
def distance(license)
|
||||
Levenshtein.distance(file.content_normalized, license.body_normalized).to_f
|
||||
end
|
||||
end
|
||||
end
|
|
@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
|
|||
gem.executables << 'licensee'
|
||||
|
||||
gem.add_dependency('rugged', '~> 0.21.1b2')
|
||||
gem.add_dependency('levenshtein-ffi', '~> 1.1')
|
||||
gem.add_development_dependency('pry', '~> 0.9')
|
||||
gem.add_development_dependency('shoulda', '~> 3.5')
|
||||
gem.add_development_dependency('rake', '~> 10.3')
|
||||
|
|
|
@ -60,7 +60,7 @@ def verify_license_file(license, chaos = false, wrap=false)
|
|||
|
||||
actual = license_file.match
|
||||
assert actual, "No match for #{expected}."
|
||||
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match. Matches: #{license_file.matches}"
|
||||
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match. Confidence: #{license_file.confidence}. Method: #{license_file.matcher.class}"
|
||||
end
|
||||
|
||||
def wrap(text, line_width=80)
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
require 'helper'
|
||||
|
||||
class TestLicenseeGitMatcher < Minitest::Test
|
||||
|
||||
def setup
|
||||
text = license_from_path( Licensee::Licenses.find("mit").path )
|
||||
blob = FakeBlob.new(text)
|
||||
@mit = Licensee::LicenseFile.new(blob)
|
||||
end
|
||||
|
||||
should "match the license" do
|
||||
assert_equal "mit", Licensee::GitMatcher.match(@mit).name
|
||||
end
|
||||
|
||||
should "know the match confidence" do
|
||||
assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
|
||||
end
|
||||
|
||||
should "know the matches" do
|
||||
assert_equal 1, Licensee::GitMatcher.new(@mit).matches.size
|
||||
end
|
||||
end
|
|
@ -0,0 +1,34 @@
|
|||
require 'helper'
|
||||
|
||||
class TestLicenseeLevenshteinMatcher < Minitest::Test
|
||||
|
||||
def setup
|
||||
text = license_from_path( Licensee::Licenses.find("mit").path )
|
||||
blob = FakeBlob.new(text)
|
||||
@mit = Licensee::LicenseFile.new(blob)
|
||||
end
|
||||
|
||||
should "match the license" do
|
||||
assert_equal "mit", Licensee::LevenshteinMatcher.match(@mit).name
|
||||
end
|
||||
|
||||
should "know the match confidence" do
|
||||
matcher = Licensee::LevenshteinMatcher.new(@mit)
|
||||
assert matcher.confidence > 98, "#{matcher.confidence} < 98"
|
||||
end
|
||||
|
||||
should "calculate max delta" do
|
||||
assert_equal 968.4, Licensee::LevenshteinMatcher.new(@mit).max_delta
|
||||
end
|
||||
|
||||
should "calculate length delta" do
|
||||
isc = Licensee::Licenses.find("isc")
|
||||
assert_equal 2, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::Licenses.find("mit"))
|
||||
assert_equal 336, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
|
||||
end
|
||||
|
||||
should "round up potential licenses" do
|
||||
assert_equal 5, Licensee::LevenshteinMatcher.new(@mit).potential_licenses.size
|
||||
end
|
||||
|
||||
end
|
|
@ -8,7 +8,6 @@ class TestLicenseeLicense < Minitest::Test
|
|||
|
||||
should "read the license body" do
|
||||
assert @license.body
|
||||
assert @license.length > 0
|
||||
assert @license.text =~ /MIT/
|
||||
end
|
||||
|
||||
|
|
|
@ -14,17 +14,6 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|||
assert @file.contents =~ /MIT/
|
||||
end
|
||||
|
||||
should "known the file length" do
|
||||
assert_equal 1077, @file.length
|
||||
end
|
||||
|
||||
should "calculate similiarty" do
|
||||
actual = @file.send(:calculate_similarity, @mit)
|
||||
assert actual > Licensee::CONFIDENCE_THRESHOLD, "expected #{actual} to be > 90% for MIT"
|
||||
actual = @file.send(:calculate_similarity, @gpl)
|
||||
assert actual < 1, "expected #{actual} to be < 1% for GPL"
|
||||
end
|
||||
|
||||
should "match the license" do
|
||||
assert_equal "mit", @file.match.name
|
||||
end
|
||||
|
@ -33,4 +22,8 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|||
expected = "-Copyright (c) [year] [fullname]\n+Copyright (c) 2014 Ben Balter"
|
||||
assert @file.diff.include?(expected)
|
||||
end
|
||||
|
||||
should "calculate confidence" do
|
||||
assert_equal 94, @file.confidence
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
require 'helper'
|
||||
|
||||
class TestLicenseeMatcher < Minitest::Test
|
||||
should "match the license without raising an error" do
|
||||
assert_nil Licensee::Matcher.match(nil)
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue