better no license matching

This commit is contained in:
Ben Balter 2015-03-07 16:06:34 -05:00
parent 64457e2fcb
commit 55e231bdee
3 changed files with 15 additions and 7 deletions

View File

@ -4,7 +4,13 @@ class Licensee
def initialize(blob)
@blob = blob
end
def similarity(other)
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
other.hashsig ? blob.similarity(other.hashsig) : 0
rescue Rugged::InvalidError
0
end
# Raw file contents

View File

@ -1,11 +1,7 @@
class Licensee
class CopyrightMatcher < Matcher
REGEX = /^(©|\(c\)|Copyright) \d{4}(.*)\n?$/i
def no_license
@no_license ||= Licensee::Licenses.find("no-license")
end
REGEX = /\A(Copyright )?(©|\(c\)) \d{4}(.*)\n?\z/i
def match
no_license if file.content_normalized =~ REGEX
@ -14,5 +10,11 @@ class Licensee
def confidence
100
end
private
def no_license
@no_license ||= Licensee::Licenses.find("no-license")
end
end
end

View File

@ -12,11 +12,11 @@ class Licensee
private
def matches
@matches ||= Licensee.licenses.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
@matches ||= Licensee.licenses.map { |l| [l, file.similarity(l)] }.select { |l,sim| sim > 0 }
end
def similarity(other)
other.hashsig ? file.blob.similarity(other.hashsig) : 0
file.similarity(other)
end
# Pulled out for easier testing