mirror of https://github.com/licensee/licensee.git
comment all the things
This commit is contained in:
parent
8e230f4151
commit
2e2b2c74c5
|
@ -12,14 +12,18 @@ require_relative "licensee/matchers/git_matcher"
|
|||
require_relative "licensee/matchers/levenshtein_matcher"
|
||||
|
||||
class Licensee
|
||||
|
||||
# Over watch percent is a match considered a match
|
||||
CONFIDENCE_THRESHOLD = 90
|
||||
|
||||
class << self
|
||||
|
||||
# Returns an array of Licensee::License instances
|
||||
def licenses
|
||||
Licensee::Licenses.list
|
||||
@licenses ||= Licensee::Licenses.list
|
||||
end
|
||||
|
||||
# Returns the license for a given git repo
|
||||
def license(path)
|
||||
Licensee::Project.new(path).license
|
||||
end
|
||||
|
|
|
@ -6,34 +6,36 @@ class Licensee
|
|||
@name=name.downcase
|
||||
end
|
||||
|
||||
# Path to vendored license file on disk
|
||||
def path
|
||||
@path ||= File.expand_path "#{@name}.txt", Licensee::Licenses.base
|
||||
end
|
||||
|
||||
# Raw content of license file, including YAML front matter
|
||||
def content
|
||||
@content ||= File.open(path).read
|
||||
end
|
||||
|
||||
def parts
|
||||
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
||||
end
|
||||
|
||||
# License metadata from YAML front matter
|
||||
def meta
|
||||
@meta ||= front_matter = YAML.load(parts[1]) if parts[1]
|
||||
rescue
|
||||
nil
|
||||
end
|
||||
|
||||
# The license body (e.g., contents - frontmatter)
|
||||
def body
|
||||
@body ||= parts[2]
|
||||
end
|
||||
alias_method :to_s, :body
|
||||
alias_method :text, :body
|
||||
|
||||
# License body with all whitespace replaced with a single space
|
||||
def body_normalized
|
||||
@content_normalized ||= body.downcase.gsub(/\s+/, " ").strip
|
||||
end
|
||||
|
||||
# Git-computed hash signature for the license file
|
||||
def hashsig
|
||||
@hashsig ||= Rugged::Blob::HashSignature.new(
|
||||
body, Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
||||
|
@ -42,5 +44,11 @@ class Licensee
|
|||
def inspect
|
||||
"#<Licensee::License name=\"#{name}\">"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parts
|
||||
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,6 +7,7 @@ class Licensee
|
|||
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
||||
end
|
||||
|
||||
# Raw file contents
|
||||
def content
|
||||
@contents ||= begin
|
||||
blob.content
|
||||
|
@ -15,23 +16,28 @@ class Licensee
|
|||
alias_method :to_s, :content
|
||||
alias_method :contents, :content
|
||||
|
||||
# File content with all whitespace replaced with a single space
|
||||
def content_normalized
|
||||
@content_normalized ||= content.downcase.gsub(/\s+/, " ").strip
|
||||
end
|
||||
|
||||
# Comptutes a diff between known license and project license
|
||||
def diff(options={})
|
||||
options = options.merge(:reverse => true)
|
||||
blob.diff(match.body, options).to_s if match
|
||||
end
|
||||
|
||||
# Determines which matching strategy to use, returns an instane of that matcher
|
||||
def matcher
|
||||
@matcher ||= Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
|
||||
end
|
||||
|
||||
# Returns an Licensee::License instance of the matches license
|
||||
def match
|
||||
@match ||= matcher.match if matcher
|
||||
end
|
||||
|
||||
# Returns the percent confident with the match
|
||||
def confidence
|
||||
@condience ||= matcher.confidence if matcher
|
||||
end
|
||||
|
|
|
@ -1,6 +1,29 @@
|
|||
class Licensee
|
||||
class Licenses
|
||||
class << self
|
||||
|
||||
# Returns an array of Licensee::License instances
|
||||
def list
|
||||
@licenses ||= begin
|
||||
licenses = []
|
||||
names.each { |name| licenses.push License.new(name) }
|
||||
licenses
|
||||
end
|
||||
end
|
||||
|
||||
# Given a license name, attempt to return a matching Licensee::License instance
|
||||
def find(name)
|
||||
list.find { |l| l.name.downcase == name.downcase }
|
||||
end
|
||||
|
||||
# Path to vendored licenses
|
||||
def base
|
||||
@base ||= File.expand_path "../../vendor/choosealicense.com/_licenses", File.dirname(__FILE__)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Returns a list of potential license names, as vendored
|
||||
def names
|
||||
@names ||= begin
|
||||
names = Dir.entries(base)
|
||||
|
@ -10,22 +33,6 @@ class Licensee
|
|||
end
|
||||
end
|
||||
|
||||
def list
|
||||
@licenses ||= begin
|
||||
licenses = []
|
||||
names.each { |name| licenses.push License.new(name) }
|
||||
licenses
|
||||
end
|
||||
end
|
||||
|
||||
def base
|
||||
@base ||= File.expand_path "../../vendor/choosealicense.com/_licenses", File.dirname(__FILE__)
|
||||
end
|
||||
|
||||
def find(name)
|
||||
name = name.downcase
|
||||
list.find { |l| l.name.downcase == name }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
# Abstract class to describe different matching strategies
|
||||
# Must respond to:
|
||||
# - match
|
||||
# - confidence
|
||||
#
|
||||
# Can assume file will be a Licensee::LicenseFile instance
|
||||
class Licensee
|
||||
class Matcher
|
||||
attr_reader :file
|
||||
|
@ -10,10 +16,6 @@ class Licensee
|
|||
@file = file
|
||||
end
|
||||
|
||||
def matches
|
||||
[]
|
||||
end
|
||||
|
||||
def match
|
||||
nil
|
||||
end
|
||||
|
@ -22,6 +24,5 @@ class Licensee
|
|||
0
|
||||
end
|
||||
alias_method :similarity, :confidence
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
class Licensee
|
||||
class ExactMatcher < Matcher
|
||||
def matches
|
||||
[match]
|
||||
end
|
||||
|
||||
def match
|
||||
Licensee::Licenses.list.find { |l| l.body_normalized == file.content_normalized }
|
||||
Licensee.licenses.find { |l| l.body_normalized == file.content_normalized }
|
||||
end
|
||||
|
||||
def confidence
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
class Licensee
|
||||
class GitMatcher < Matcher
|
||||
|
||||
def matches
|
||||
@matches ||= Licensee::Licenses.list.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
||||
end
|
||||
|
||||
def match
|
||||
match_info[0] unless match_info.nil?
|
||||
end
|
||||
|
@ -15,6 +11,10 @@ class Licensee
|
|||
|
||||
private
|
||||
|
||||
def matches
|
||||
@matches ||= Licensee.licenses.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
||||
end
|
||||
|
||||
def similarity(other)
|
||||
file.blob.similarity(other.hashsig)
|
||||
end
|
||||
|
|
|
@ -1,40 +1,53 @@
|
|||
class Licensee
|
||||
class LevenshteinMatcher < Matcher
|
||||
|
||||
# Return the first potential license that is more similar than the confidence threshold
|
||||
def match
|
||||
@match ||= potential_licenses.find do |license|
|
||||
similarity(license) >= Licensee::CONFIDENCE_THRESHOLD
|
||||
end
|
||||
end
|
||||
|
||||
# Sort all licenses, in decending order, by difference in length to the file
|
||||
# Difference in lengths cannot exceed the file's length * the confidence threshold / 100
|
||||
def potential_licenses
|
||||
@potential_licenses ||= begin
|
||||
Licensee::Licenses.list.select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }.reverse
|
||||
Licensee.licenses.select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }.reverse
|
||||
end
|
||||
end
|
||||
|
||||
# Calculate the difference between the file length and a given license's length
|
||||
def length_delta(license)
|
||||
(file_length - license.body_normalized.length).abs
|
||||
end
|
||||
|
||||
# Maximum possible difference between file length and license length
|
||||
# for a license to be a potential license to be matched
|
||||
def max_delta
|
||||
@max_delta ||= (file_length * (Licensee::CONFIDENCE_THRESHOLD.to_f / 100.to_f ))
|
||||
end
|
||||
|
||||
# Confidence that the matched license is a match
|
||||
def confidence
|
||||
@confidence ||= match ? similarity(match) : 0
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Length of the file, normalized to strip whitespace
|
||||
def file_length
|
||||
@file_length ||= file.content_normalized.length.to_f
|
||||
end
|
||||
|
||||
# Calculate percent changed between file and potential license
|
||||
def similarity(license)
|
||||
100 * (file_length - distance(license)) / file_length
|
||||
end
|
||||
|
||||
# Calculate the levenshtein distance between file and license
|
||||
# Note: We used content/body normalized because white space and capitalization
|
||||
# isn't legally significant in this context. Fewer characters lets levenshtein
|
||||
# work faster. As long as they both undergo the same transformation, should match.
|
||||
def distance(license)
|
||||
Levenshtein.distance(license.body_normalized, file.content_normalized).to_f
|
||||
end
|
||||
|
|
|
@ -2,7 +2,8 @@ class Licensee
|
|||
class Project
|
||||
attr_reader :repository
|
||||
|
||||
VALID_FILENAMES = %w[
|
||||
# Array of file names to look for potential license files, in order
|
||||
LICENSE_FILENAMES = %w[
|
||||
LICENSE
|
||||
LICENSE.txt
|
||||
LICENSE.md
|
||||
|
@ -10,6 +11,10 @@ class Licensee
|
|||
COPYING
|
||||
]
|
||||
|
||||
# Initializes a new project
|
||||
#
|
||||
# path_or_repo path to git repo or Rugged::Repository instance
|
||||
# revsion - revision ref, if any
|
||||
def initialize(path_or_repo, revision = nil)
|
||||
if path_or_repo.kind_of? Rugged::Repository
|
||||
@repository = path_or_repo
|
||||
|
@ -20,22 +25,20 @@ class Licensee
|
|||
@revision = revision
|
||||
end
|
||||
|
||||
# Detects the license file, if any
|
||||
# Returns a Licensee::LicenseFile instance
|
||||
def license_file
|
||||
return @license_file if defined? @license_file
|
||||
|
||||
commit = @revision ? @repository.lookup(@revision) : @repository.last_commit
|
||||
license_blob = commit.tree.each_blob { |blob| break blob if VALID_FILENAMES.include? blob[:name] }
|
||||
|
||||
license_blob = commit.tree.each_blob { |blob| break blob if LICENSE_FILENAMES.include? blob[:name] }
|
||||
|
||||
@license_file = if license_blob
|
||||
LicenseFile.new(@repository.lookup(license_blob[:oid]))
|
||||
end
|
||||
end
|
||||
|
||||
def matches
|
||||
@matches ||= license_file.matches if license_file
|
||||
end
|
||||
|
||||
# Returns the matching Licensee::License instance if a license can be detected
|
||||
def license
|
||||
@license ||= license_file.match if license_file
|
||||
end
|
||||
|
|
|
@ -15,8 +15,4 @@ class TestLicenseeExactMatcher < Minitest::Test
|
|||
should "know the match confidence" do
|
||||
assert_equal 100, Licensee::ExactMatcher.new(@mit).confidence
|
||||
end
|
||||
|
||||
should "know the matches" do
|
||||
assert_equal 1, Licensee::ExactMatcher.new(@mit).matches.size
|
||||
end
|
||||
end
|
||||
|
|
|
@ -15,8 +15,4 @@ class TestLicenseeGitMatcher < Minitest::Test
|
|||
should "know the match confidence" do
|
||||
assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
|
||||
end
|
||||
|
||||
should "know the matches" do
|
||||
assert_equal 1, Licensee::GitMatcher.new(@mit).matches.size
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,8 +3,8 @@ require 'helper'
|
|||
class TestLicenseeLicenses < Minitest::Test
|
||||
|
||||
should "know license names" do
|
||||
assert_equal Array, Licensee::Licenses.names.class
|
||||
assert_equal 15, Licensee::Licenses.names.size
|
||||
assert_equal Array, Licensee::Licenses.send(:names).class
|
||||
assert_equal 15, Licensee::Licenses.send(:names).size
|
||||
end
|
||||
|
||||
should "load the licenses" do
|
||||
|
|
Loading…
Reference in New Issue