add support for detecting non-licensed projects

This commit is contained in:
Ben Balter 2015-03-07 15:06:29 -05:00
parent d72a362f48
commit f8bd2dbeb2
11 changed files with 99 additions and 10 deletions

View File

@ -10,6 +10,7 @@ require_relative "licensee/license_file"
require_relative "licensee/project"
require_relative "licensee/matcher"
require_relative "licensee/matchers/exact_matcher"
require_relative "licensee/matchers/copyright_matcher"
require_relative "licensee/matchers/git_matcher"
require_relative "licensee/matchers/levenshtein_matcher"
@ -41,7 +42,7 @@ class Licensee
# Array of matchers to use, in order of preference
# The order should be decending order of anticipated speed to match
def matchers
[Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
[Licensee::ExactMatcher, Licensee::CopyrightMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
end
end
end

View File

@ -55,6 +55,8 @@ class Licensee
def hashsig
@hashsig ||= Rugged::Blob::HashSignature.new(
body, Rugged::Blob::HashSignature::WHITESPACE_SMART)
rescue Rugged::InvalidError
nil
end
def inspect

View File

@ -0,0 +1,18 @@
class Licensee
class CopyrightMatcher < Matcher
REGEX = /^(©|\(c\)|Copyright) \d{4}(.*)$/i
def no_license
@no_license ||= Licensee::Licenses.find("no-license")
end
def match
no_license if file.content_normalized =~ REGEX
end
def confidence
100
end
end
end

View File

@ -16,7 +16,7 @@ class Licensee
end
def similarity(other)
file.blob.similarity(other.hashsig)
other.hashsig ? file.blob.similarity(other.hashsig) : 0
end
# Pulled out for easier testing

View File

@ -1,3 +1,2 @@
rm -Rf vendor/choosealicense.com
bower install github/choosealicense.com#gh-pages
rm -f vendor/choosealicense.com/_licenses/no-license.txt

View File

@ -30,11 +30,13 @@ class FakeBlob
end
def similarity(other)
Rugged::Blob::HashSignature.compare(self.hashsig, other)
self.hashsig ? Rugged::Blob::HashSignature.compare(self.hashsig, other) : 0
end
def hashsig(options = 0)
@hashsig ||= Rugged::Blob::HashSignature.new(content, options)
rescue Rugged::InvalidError
nil
end
end

View File

@ -3,7 +3,7 @@ require 'helper'
class TestLicensee < Minitest::Test
should "know the licenses" do
assert_equal Array, Licensee.licenses.class
assert_equal 15, Licensee.licenses.size
assert_equal 16, Licensee.licenses.size
assert_equal Licensee::License, Licensee.licenses.first.class
end

View File

@ -0,0 +1,39 @@
require 'helper'
class TestLicenseeCopyrightMatcher < Minitest::Test
def setup
text = "Copyright 2015 Ben Balter"
blob = FakeBlob.new(text)
@file = Licensee::LicenseFile.new(blob)
end
should "match the license" do
assert_equal "no-license", Licensee::CopyrightMatcher.match(@file).key
end
should "know the match confidence" do
assert_equal 100, Licensee::CopyrightMatcher.new(@file).confidence
end
should "match (C) copyright notices" do
text = "(C) 2015 Ben Balter"
blob = FakeBlob.new(text)
file = Licensee::LicenseFile.new(blob)
assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key
end
should "match © copyright notices" do
text = "© 2015 Ben Balter"
blob = FakeBlob.new(text)
file = Licensee::LicenseFile.new(blob)
assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key
end
should "not false positive" do
text = File.open(Licensee::Licenses.find("mit").path).read.split("---").last
blob = FakeBlob.new(text)
file = Licensee::LicenseFile.new(blob)
assert_equal nil, Licensee::CopyrightMatcher.match(file)
end
end

View File

@ -4,12 +4,12 @@ class TestLicenseeLicenses < Minitest::Test
should "know license names" do
assert_equal Array, Licensee::Licenses.keys.class
assert_equal 15, Licensee::Licenses.keys.size
assert_equal 16, Licensee::Licenses.keys.size
end
should "load the licenses" do
assert_equal Array, Licensee::Licenses.list.class
assert_equal 15, Licensee::Licenses.list.size
assert_equal 16, Licensee::Licenses.list.size
assert_equal Licensee::License, Licensee::Licenses.list.first.class
end

View File

@ -11,21 +11,21 @@ class TestLicenseeVendor < Minitest::Test
should "detect each vendored license when modified" do
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
licenses.each do |license|
verify_license_file(license, true)
verify_license_file(license, true) unless license =~ /no-license\.txt$/
end
end
should "detect each vendored license with different line lengths" do
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
licenses.each do |license|
verify_license_file(license, false, 50)
verify_license_file(license, false, 50) unless license =~ /no-license\.txt$/
end
end
should "detect each vendored license with different line lengths when modified" do
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
licenses.each do |license|
verify_license_file(license, true, 50)
verify_license_file(license, true, 50) unless license =~ /no-license\.txt$/
end
end
end

28
vendor/choosealicense.com/_licenses/no-license.txt generated vendored Normal file
View File

@ -0,0 +1,28 @@
---
layout: license
permalink: /licenses/no-license/
category: No License
class: license-types
title: No License
description: You retain all rights and do not permit distribution, reproduction, or derivative works. You may grant some rights in cases where you publish your source code to a site that requires accepting terms of service. For example, publishing code in a public repository on GitHub requires that you allow others to view and fork your code.
note: This option may be subject to the Terms Of Use of the site where you publish your source code.
how: Simply do nothing, though including a copyright notice is recommended.
required:
- include-copyright
permitted:
- commercial-use
- private-use
forbidden:
- modifications
- distribution
- sublicense
---
Copyright [year] [fullname]