From f8bd2dbeb2d8c16d32be84852344d4b647d81e83 Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Sat, 7 Mar 2015 15:06:29 -0500 Subject: [PATCH] add support for detecting non-licensed projects --- lib/licensee.rb | 3 +- lib/licensee/license.rb | 2 + lib/licensee/matchers/copyright_matcher.rb | 18 +++++++++ lib/licensee/matchers/git_matcher.rb | 2 +- script/vendor-licenses | 1 - test/functions.rb | 4 +- test/test_licensee.rb | 2 +- test/test_licensee_copyright_matcher.rb | 39 +++++++++++++++++++ test/test_licensee_licenses.rb | 4 +- test/test_licensee_vendor.rb | 6 +-- .../_licenses/no-license.txt | 28 +++++++++++++ 11 files changed, 99 insertions(+), 10 deletions(-) create mode 100644 lib/licensee/matchers/copyright_matcher.rb create mode 100644 test/test_licensee_copyright_matcher.rb create mode 100644 vendor/choosealicense.com/_licenses/no-license.txt diff --git a/lib/licensee.rb b/lib/licensee.rb index ea07beb..0a4fc84 100644 --- a/lib/licensee.rb +++ b/lib/licensee.rb @@ -10,6 +10,7 @@ require_relative "licensee/license_file" require_relative "licensee/project" require_relative "licensee/matcher" require_relative "licensee/matchers/exact_matcher" +require_relative "licensee/matchers/copyright_matcher" require_relative "licensee/matchers/git_matcher" require_relative "licensee/matchers/levenshtein_matcher" @@ -41,7 +42,7 @@ class Licensee # Array of matchers to use, in order of preference # The order should be decending order of anticipated speed to match def matchers - [Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher] + [Licensee::ExactMatcher, Licensee::CopyrightMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher] end end end diff --git a/lib/licensee/license.rb b/lib/licensee/license.rb index 8f3d041..075ef89 100644 --- a/lib/licensee/license.rb +++ b/lib/licensee/license.rb @@ -55,6 +55,8 @@ class Licensee def hashsig @hashsig ||= Rugged::Blob::HashSignature.new( body, Rugged::Blob::HashSignature::WHITESPACE_SMART) + rescue Rugged::InvalidError + nil end def inspect diff --git a/lib/licensee/matchers/copyright_matcher.rb b/lib/licensee/matchers/copyright_matcher.rb new file mode 100644 index 0000000..ad28b42 --- /dev/null +++ b/lib/licensee/matchers/copyright_matcher.rb @@ -0,0 +1,18 @@ +class Licensee + class CopyrightMatcher < Matcher + + REGEX = /^(©|\(c\)|Copyright) \d{4}(.*)$/i + + def no_license + @no_license ||= Licensee::Licenses.find("no-license") + end + + def match + no_license if file.content_normalized =~ REGEX + end + + def confidence + 100 + end + end +end diff --git a/lib/licensee/matchers/git_matcher.rb b/lib/licensee/matchers/git_matcher.rb index a5984ac..14a5703 100644 --- a/lib/licensee/matchers/git_matcher.rb +++ b/lib/licensee/matchers/git_matcher.rb @@ -16,7 +16,7 @@ class Licensee end def similarity(other) - file.blob.similarity(other.hashsig) + other.hashsig ? file.blob.similarity(other.hashsig) : 0 end # Pulled out for easier testing diff --git a/script/vendor-licenses b/script/vendor-licenses index e296ad1..77ff532 100755 --- a/script/vendor-licenses +++ b/script/vendor-licenses @@ -1,3 +1,2 @@ rm -Rf vendor/choosealicense.com bower install github/choosealicense.com#gh-pages -rm -f vendor/choosealicense.com/_licenses/no-license.txt diff --git a/test/functions.rb b/test/functions.rb index 75ba756..d42bfaf 100644 --- a/test/functions.rb +++ b/test/functions.rb @@ -30,11 +30,13 @@ class FakeBlob end def similarity(other) - Rugged::Blob::HashSignature.compare(self.hashsig, other) + self.hashsig ? Rugged::Blob::HashSignature.compare(self.hashsig, other) : 0 end def hashsig(options = 0) @hashsig ||= Rugged::Blob::HashSignature.new(content, options) + rescue Rugged::InvalidError + nil end end diff --git a/test/test_licensee.rb b/test/test_licensee.rb index 2818537..85c474a 100644 --- a/test/test_licensee.rb +++ b/test/test_licensee.rb @@ -3,7 +3,7 @@ require 'helper' class TestLicensee < Minitest::Test should "know the licenses" do assert_equal Array, Licensee.licenses.class - assert_equal 15, Licensee.licenses.size + assert_equal 16, Licensee.licenses.size assert_equal Licensee::License, Licensee.licenses.first.class end diff --git a/test/test_licensee_copyright_matcher.rb b/test/test_licensee_copyright_matcher.rb new file mode 100644 index 0000000..ddb65c0 --- /dev/null +++ b/test/test_licensee_copyright_matcher.rb @@ -0,0 +1,39 @@ +require 'helper' + +class TestLicenseeCopyrightMatcher < Minitest::Test + + def setup + text = "Copyright 2015 Ben Balter" + blob = FakeBlob.new(text) + @file = Licensee::LicenseFile.new(blob) + end + + should "match the license" do + assert_equal "no-license", Licensee::CopyrightMatcher.match(@file).key + end + + should "know the match confidence" do + assert_equal 100, Licensee::CopyrightMatcher.new(@file).confidence + end + + should "match (C) copyright notices" do + text = "(C) 2015 Ben Balter" + blob = FakeBlob.new(text) + file = Licensee::LicenseFile.new(blob) + assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key + end + + should "match © copyright notices" do + text = "© 2015 Ben Balter" + blob = FakeBlob.new(text) + file = Licensee::LicenseFile.new(blob) + assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key + end + + should "not false positive" do + text = File.open(Licensee::Licenses.find("mit").path).read.split("---").last + blob = FakeBlob.new(text) + file = Licensee::LicenseFile.new(blob) + assert_equal nil, Licensee::CopyrightMatcher.match(file) + end +end diff --git a/test/test_licensee_licenses.rb b/test/test_licensee_licenses.rb index bf6f857..af91dfc 100644 --- a/test/test_licensee_licenses.rb +++ b/test/test_licensee_licenses.rb @@ -4,12 +4,12 @@ class TestLicenseeLicenses < Minitest::Test should "know license names" do assert_equal Array, Licensee::Licenses.keys.class - assert_equal 15, Licensee::Licenses.keys.size + assert_equal 16, Licensee::Licenses.keys.size end should "load the licenses" do assert_equal Array, Licensee::Licenses.list.class - assert_equal 15, Licensee::Licenses.list.size + assert_equal 16, Licensee::Licenses.list.size assert_equal Licensee::License, Licensee::Licenses.list.first.class end diff --git a/test/test_licensee_vendor.rb b/test/test_licensee_vendor.rb index 0328fe6..39c2d15 100644 --- a/test/test_licensee_vendor.rb +++ b/test/test_licensee_vendor.rb @@ -11,21 +11,21 @@ class TestLicenseeVendor < Minitest::Test should "detect each vendored license when modified" do licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle licenses.each do |license| - verify_license_file(license, true) + verify_license_file(license, true) unless license =~ /no-license\.txt$/ end end should "detect each vendored license with different line lengths" do licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle licenses.each do |license| - verify_license_file(license, false, 50) + verify_license_file(license, false, 50) unless license =~ /no-license\.txt$/ end end should "detect each vendored license with different line lengths when modified" do licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle licenses.each do |license| - verify_license_file(license, true, 50) + verify_license_file(license, true, 50) unless license =~ /no-license\.txt$/ end end end diff --git a/vendor/choosealicense.com/_licenses/no-license.txt b/vendor/choosealicense.com/_licenses/no-license.txt new file mode 100644 index 0000000..7efcf1c --- /dev/null +++ b/vendor/choosealicense.com/_licenses/no-license.txt @@ -0,0 +1,28 @@ +--- +layout: license +permalink: /licenses/no-license/ +category: No License +class: license-types +title: No License + +description: You retain all rights and do not permit distribution, reproduction, or derivative works. You may grant some rights in cases where you publish your source code to a site that requires accepting terms of service. For example, publishing code in a public repository on GitHub requires that you allow others to view and fork your code. + +note: This option may be subject to the Terms Of Use of the site where you publish your source code. + +how: Simply do nothing, though including a copyright notice is recommended. + +required: + - include-copyright + +permitted: + - commercial-use + - private-use + +forbidden: + - modifications + - distribution + - sublicense + +--- + +Copyright [year] [fullname] \ No newline at end of file