From c310aed48d3418aec73766ee036921620d90951a Mon Sep 17 00:00:00 2001 From: Mike Linksvayer Date: Wed, 22 Nov 2017 16:13:16 -0800 Subject: [PATCH] spdx file matcher --- lib/licensee/matchers.rb | 1 + lib/licensee/matchers/spdx.rb | 16 ++++++++ lib/licensee/project_files/license_file.rb | 12 +++--- .../project_files/package_manager_file.rb | 6 ++- lib/licensee/project_files/project_file.rb | 2 +- spec/licensee/matchers/spdx_matcher_spec.rb | 41 +++++++++++++++++++ .../project_files/license_file_spec.rb | 4 +- 7 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 lib/licensee/matchers/spdx.rb create mode 100644 spec/licensee/matchers/spdx_matcher_spec.rb diff --git a/lib/licensee/matchers.rb b/lib/licensee/matchers.rb index bb18369..8994ce3 100644 --- a/lib/licensee/matchers.rb +++ b/lib/licensee/matchers.rb @@ -11,5 +11,6 @@ module Licensee autoload :NpmBower, 'licensee/matchers/npm_bower' autoload :Package, 'licensee/matchers/package' autoload :Reference, 'licensee/matchers/reference' + autoload :Spdx, 'licensee/matchers/spdx' end end diff --git a/lib/licensee/matchers/spdx.rb b/lib/licensee/matchers/spdx.rb new file mode 100644 index 0000000..6c0688e --- /dev/null +++ b/lib/licensee/matchers/spdx.rb @@ -0,0 +1,16 @@ +module Licensee + module Matchers + class Spdx < Licensee::Matchers::Package + # While we could parse the LICENSE.spdx file, prefer + # a lenient regex for speed and security. Moar parsing moar problems. + LICENSE_REGEX = /PackageLicenseDeclared:\s*([a-z\-0-9\. +()]+)\s*/i + + private + + def license_property + match = @file.content.match LICENSE_REGEX + match[1].downcase if match && match[1] + end + end + end +end diff --git a/lib/licensee/project_files/license_file.rb b/lib/licensee/project_files/license_file.rb index 4de644a..05b258f 100644 --- a/lib/licensee/project_files/license_file.rb +++ b/lib/licensee/project_files/license_file.rb @@ -7,8 +7,8 @@ module Licensee PREFERRED_EXT = %w[md markdown txt].freeze PREFERRED_EXT_REGEX = /\.#{Regexp.union(PREFERRED_EXT)}\z/ - # Regex to match any extension - ANY_EXT_REGEX = %r{\.[^./]+\z} + # Regex to match any extension except .spdx + NONSPDX_EXT_REGEX = %r{\.(?!spdx)[^./]+\z} # Regex to match, LICENSE, LICENCE, unlicense, etc. LICENSE_REGEX = /(un)?licen[sc]e/i @@ -28,17 +28,17 @@ module Licensee /\A#{LICENSE_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.95, # LICENSE.md /\A#{COPYING_REGEX}\z/ => 0.90, # COPYING /\A#{COPYING_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.85, # COPYING.md - /\A#{LICENSE_REGEX}#{ANY_EXT_REGEX}\z/ => 0.80, # LICENSE.textile - /\A#{COPYING_REGEX}#{ANY_EXT_REGEX}\z/ => 0.75, # COPYING.textile + /\A#{LICENSE_REGEX}#{NONSPDX_EXT_REGEX}\z/ => 0.80, # LICENSE.textile + /\A#{COPYING_REGEX}#{NONSPDX_EXT_REGEX}\z/ => 0.75, # COPYING.textile /\A#{LICENSE_REGEX}[-_]/ => 0.70, # LICENSE-MIT /\A#{COPYING_REGEX}[-_]/ => 0.65, # COPYING-MIT /[-_]#{LICENSE_REGEX}/ => 0.60, # MIT-LICENSE-MIT /[-_]#{COPYING_REGEX}/ => 0.55, # MIT-COPYING /\A#{OFL_REGEX}#{PREFERRED_EXT_REGEX}/ => 0.50, # OFL.md - /\A#{OFL_REGEX}#{ANY_EXT_REGEX}/ => 0.45, # OFL.textile + /\A#{OFL_REGEX}#{NONSPDX_EXT_REGEX}/ => 0.45, # OFL.textile /\A#{OFL_REGEX}\z/ => 0.40, # OFL /\A#{PATENTS_REGEX}\z/ => 0.35, # PATENTS - /\A#{PATENTS_REGEX}#{ANY_EXT_REGEX}\z/ => 0.30, # PATENTS.txt + /\A#{PATENTS_REGEX}#{NONSPDX_EXT_REGEX}\z/ => 0.30, # PATENTS.txt // => 0.00 # Catch all }.freeze diff --git a/lib/licensee/project_files/package_manager_file.rb b/lib/licensee/project_files/package_manager_file.rb index 021a5ac..82da50e 100644 --- a/lib/licensee/project_files/package_manager_file.rb +++ b/lib/licensee/project_files/package_manager_file.rb @@ -10,8 +10,9 @@ module Licensee # Hash of Filename => [possible matchers] FILENAMES_EXTENSIONS = { - 'DESCRIPTION' => [Matchers::Cran], - 'dist.ini' => [Matchers::DistZilla] + 'DESCRIPTION' => [Matchers::Cran], + 'dist.ini' => [Matchers::DistZilla], + 'LICENSE.spdx' => [Matchers::Spdx] }.freeze def possible_matchers @@ -21,6 +22,7 @@ module Licensee def self.name_score(filename) return 1.0 if ['.gemspec', '.cabal'].include?(File.extname(filename)) return 1.0 if filename == 'package.json' + return 1.0 if filename == 'LICENSE.spdx' return 0.8 if filename == 'dist.ini' return 0.9 if filename == 'DESCRIPTION' return 0.75 if filename == 'bower.json' diff --git a/lib/licensee/project_files/project_file.rb b/lib/licensee/project_files/project_file.rb index 6385aa0..0a1e315 100644 --- a/lib/licensee/project_files/project_file.rb +++ b/lib/licensee/project_files/project_file.rb @@ -66,7 +66,7 @@ module Licensee def copyright? return false unless is_a?(LicenseFile) return false unless matcher.is_a?(Matchers::Copyright) - filename =~ /\Acopyright(?:#{LicenseFile::ANY_EXT_REGEX})?\z/i + filename =~ /\Acopyright(?:#{LicenseFile::NONSPDX_EXT_REGEX})?\z/i end end end diff --git a/spec/licensee/matchers/spdx_matcher_spec.rb b/spec/licensee/matchers/spdx_matcher_spec.rb new file mode 100644 index 0000000..f3bce9c --- /dev/null +++ b/spec/licensee/matchers/spdx_matcher_spec.rb @@ -0,0 +1,41 @@ +RSpec.describe Licensee::Matchers::Spdx do + let(:content) { 'PackageLicenseDeclared: MIT' } + let(:file) do + Licensee::ProjectFiles::LicenseFile.new(content, 'LICENSE.spdx') + end + let(:mit) { Licensee::License.find('mit') } + let(:other) { Licensee::License.find('other') } + subject { described_class.new(file) } + + it 'matches' do + expect(subject.match).to eql(mit) + end + + it 'has a confidence' do + expect(subject.confidence).to eql(90) + end + + context 'no license field' do + let(:content) { 'foo: bar' } + + it 'returns nil' do + expect(subject.match).to be_nil + end + end + + context 'an unknown license' do + let(:content) { 'PackageLicenseDeclared: xyz' } + + it 'returns other' do + expect(subject.match).to eql(other) + end + end + + context 'a license expression' do + let(:content) { 'PackageLicenseDeclared: (MIT OR Apache-2.0)' } + + it 'returns other' do + expect(subject.match).to eql(other) + end + end +end diff --git a/spec/licensee/project_files/license_file_spec.rb b/spec/licensee/project_files/license_file_spec.rb index f62c232..9d8df6d 100644 --- a/spec/licensee/project_files/license_file_spec.rb +++ b/spec/licensee/project_files/license_file_spec.rb @@ -109,11 +109,11 @@ RSpec.describe Licensee::ProjectFiles::LicenseFile do context 'any extension regex' do it 'matches .foo' do - expect(described_class::ANY_EXT_REGEX).to match('.foo') + expect(described_class::NONSPDX_EXT_REGEX).to match('.foo') end it 'does not match .md/foo' do - expect(described_class::ANY_EXT_REGEX).to_not match('.md/foo') + expect(described_class::NONSPDX_EXT_REGEX).to_not match('.md/foo') end end