canvas-lms/spec/lib/text_helper_spec.rb

# encoding: UTF-8
#
# Copyright (C) 2011 Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#

require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe TextHelper do

  class TestClassForMixins
    extend TextHelper
    def self.t(*args)
      I18n.t(*args)
    end
  end

  def th
    TestClassForMixins
  end

  context "datetime_string" do

    it "should just give the start if no end is provided" do
      datetime = Time.zone.parse("#{Time.zone.now.year}-01-01 12:00:00")
      th.datetime_string(datetime).should == "Jan 1 at 12pm"
    end

    it "should omit the time if shorten_midnight is true and it's (due) at midnight" do
      datetime = Time.zone.now.midnight
      th.datetime_string(datetime, :event, nil, true).should == th.date_string(datetime, :no_words)
      datetime -= 1.minute
      th.datetime_string(datetime, :due_date, nil, true).should == th.date_string(datetime, :no_words)
    end

    it "should ignore end if the type is due_date" do
      datetime = Time.zone.parse("#{Time.now.year}-01-01 12:00:00")
      expected = "Jan 1 by 12pm"
      th.datetime_string(datetime, :due_date).should == expected
      th.datetime_string(datetime, :due_date, datetime + 1.hour).should == expected
    end

    it "should give a multi-day range if start and end are on different days" do
      start_datetime = Time.zone.parse("#{Time.zone.now.year}-01-01 12:00:00")
      end_datetime = start_datetime + 2.days
      th.datetime_string(start_datetime, :event, end_datetime).should ==
        "Jan 1 at 12pm to Jan 3 at 12pm"
    end

    it "should give a same-day range if start and end are on the same day" do
      start_datetime = Time.zone.parse("#{Time.zone.now.year}-01-01 12:00:00")
      end_datetime = start_datetime.advance(:hours => 1)
      th.datetime_string(start_datetime, :event, end_datetime).should ==
        "Jan 1 from 12pm to  1pm"
    end

    it "should include the year if the current year isn't the same" do
      today = Time.zone.now
      nextyear = today.advance(:years => 1)
      datestring = th.datetime_string nextyear
      datestring.split[2].to_i.should == nextyear.year
      th.datetime_string(today).split.size.should == (datestring.split.size - 1)
    end

  end

  context "time_string" do

    it "should be formatted properly" do
      time = Time.zone.now
      time += 1.minutes if time.min == 0
      th.time_string(time).should == I18n.l(time, :format => :tiny)
    end

    it "should omit the minutes if it's on the hour" do
      time = Time.zone.now
      time -= time.min.minutes
      th.time_string(time).should == I18n.l(time, :format => :tiny_on_the_hour)
    end

  end

  context "date_string" do

    it "should include the year if the current year isn't the same" do
      today = Time.zone.now
      # cause we don't want to deal with day-of-the-week stuff, offset 8 days
      if today.year == (today + 8.days).year
        today += 8.days
      else
        today -= 8.days
      end
      nextyear = today.advance(:years => 1)
      datestring = th.date_string nextyear
      datestring.split[2].to_i.should == nextyear.year
      th.date_string(today).split.size.should == (datestring.split.size - 1)
    end

    it "should say the Yesterday/Today/Tomorrow if it's yesterday/today/tomorrow" do
      today = Time.zone.now
      tommorrow = today + 1.day
      yesterday = today - 1.day
      th.date_string(today).should == "Today"
      th.date_string(tommorrow).should == "Tomorrow"
      th.date_string(yesterday).should == "Yesterday"
    end

    it "should not say the day of the week if it's exactly a few years away" do
      aday = Time.zone.now + 2.days
      nextyear = aday.advance(:years => 1)
      th.date_string(aday).should == aday.strftime("%A")
      th.date_string(nextyear).should_not == nextyear.strftime("%A")
      # in fact,
      th.date_string(nextyear).split[2].to_i.should == nextyear.year
    end

    it "should ignore the end date if it matches the start date" do
      start_date = Time.parse("2012-01-01 12:00:00")
      end_date = Time.parse("2012-01-01 13:00:00")
      th.date_string(start_date, end_date).should == th.date_string(start_date)
    end

    it "should do date ranges if the end date differs from the start date" do
      start_date = Time.parse("2012-01-01 12:00:00")
      end_date = Time.parse("2012-01-08 12:00:00")
      th.date_string(start_date, end_date).should == "#{th.date_string(start_date)} to #{th.date_string(end_date)}"
    end
  end

  context "format_message" do
    it "should detect and linkify URLs" do
      str = th.format_message("click here: (http://www.instructure.com) to check things out\nnewline").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      link['href'].should == "http://www.instructure.com"

      str = th.format_message("click here: http://www.instructure.com\nnewline").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      link['href'].should == "http://www.instructure.com"

      str = th.format_message("click here: www.instructure.com/a/b?a=1&b=2\nnewline").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      link['href'].should == "http://www.instructure.com/a/b?a=1&b=2"

      str = th.format_message("click here: http://www.instructure.com/\nnewline").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      link['href'].should == "http://www.instructure.com/"

      str = th.format_message("click here: http://www.instructure.com/courses/1/wiki/informação").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      link['href'].should == "http://www.instructure.com/courses/1/wiki/informa%C3%A7%C3%A3o"

      str = th.format_message("click here: http://www.instructure.com/'onclick=alert(document.cookie)//\nnewline").first
      html = Nokogiri::HTML::DocumentFragment.parse(str)
      link = html.css('a').first
      # we don't match parens in a url, so the link ends on the opening paren
      link['href'].should == "http://www.instructure.com/%27onclick=alert"
    end

    it "should handle having the placeholder in the text body" do
      str = th.format_message("this text has the placeholder #{TextHelper::AUTO_LINKIFY_PLACEHOLDER} embedded right in it.\nhttp://www.instructure.com/\n").first
      str.should == "this text has the placeholder #{TextHelper::AUTO_LINKIFY_PLACEHOLDER} embedded right in it.<br/>\r\n<a href='http://www.instructure.com/'>http://www.instructure.com/</a><br/>\r"
    end
  end

  context "truncate_text" do
    it "should not split if max_length is exact text length" do
      str = "I am an exact length"
      th.truncate_text(str, :max_length => str.length).should == str
    end

    it "should split on multi-byte character boundaries" do
      str = "This\ntext\nhere\n获\nis\nutf-8"

      th.truncate_text(str, :max_length => 9).should ==  "This\nt..."
      th.truncate_text(str, :max_length => 18).should == "This\ntext\nhere\n..."
      th.truncate_text(str, :max_length => 19).should == "This\ntext\nhere\n获..."
      th.truncate_text(str, :max_length => 20).should == "This\ntext\nhere\n获\n..."
      th.truncate_text(str, :max_length => 21).should == "This\ntext\nhere\n获\ni..."
      th.truncate_text(str, :max_length => 22).should == "This\ntext\nhere\n获\nis..."
      th.truncate_text(str, :max_length => 23).should == "This\ntext\nhere\n获\nis\n..."
      th.truncate_text(str, :max_length => 80).should == str
    end

    it "should split on words if specified" do
      str = "I am a sentence with areallylongwordattheendthatcantbesplit and then a few more words"
      th.truncate_text(str, :max_words => 4, :max_length => 30).should == "I am a sentence"
      th.truncate_text(str, :max_words => 6, :max_length => 30).should == "I am a sentence with areall..."
      th.truncate_text(str, :max_words => 5, :max_length => 20).should == "I am a sentence with"
    end
  end

  context "truncate_html" do
    it "should truncate in the middle of an element" do
      str = "<div>a b c d e</div>"
      th.truncate_html(str, :num_words => 3).should == "<div>a b c<span>...</span>\n</div>"
    end

    it "should truncate at the end of an element" do
      str = "<div><div>a b c</div>d e</div>"
      th.truncate_html(str, :num_words => 3).should == "<div><div>a b c<span>...</span>\n</div></div>"
    end

    it "should truncate at the beginning of an element" do
      str = "<div>a b c<div>d e</div></div>"
      th.truncate_html(str, :num_words => 3).should == "<div>a b c<span>...</span>\n</div>"
    end
  end

  it "should insert reply to into subject" do
    TextHelper.make_subject_reply_to('ohai').should == 'Re: ohai'
    TextHelper.make_subject_reply_to('Re: ohai').should == 'Re: ohai'
  end

  context ".html_to_text" do
    it "should format links in markdown-like style" do
      th.html_to_text("<a href='www.example.com'>Link</a>").should == "[Link](www.example.com)"
      th.html_to_text("<a href='www.example.com'>www.example.com</a>").should == "www.example.com"
    end

    it "should turn images into urls" do
      th.html_to_text("<img src='http://www.example.com/a'>").should == "http://www.example.com/a"
    end

    it "should insert newlines for ps and brs" do
      th.html_to_text("Ohai<br>Text <p>paragraph of text.</p>End").should == "Ohai\n\nText paragraph of text.\n\nEnd"
    end

    it "should return a string with no html back unchanged" do
      th.html_to_text('String without HTML').should == 'String without HTML'
    end

    it "should return an empty string if passed a nil value" do
      th.html_to_text(nil).should == ''
    end
  end

  describe "simplify html" do
    before(:each) do
      @body = <<-END.strip_heredoc.strip
        <p><strong>This is a bold tag</strong></p>
        <p><em>This is an em tag</em></p>
        <p><h1>This is an h1 tag</h1></p>
        <p><h2>This is an h2 tag</h2></p>
        <p><h3>This is an h3 tag</h3></p>
        <p><h4>This is an h4 tag</h4></p>
        <p><h5>This is an h5 tag</h5></p>
        <p><h6>This is an h6 tag</h6></p>
        <p><a href="http://foo.com">Link to Foo</a></p>
        <p><img src="http://google.com/someimage.png" width="50" height="50" alt="Some Image" title="Some Image" /></p>
      END
    end

    it "should convert simple tags to plain text" do
      text = th.html_to_simple_text(@body)
      text.should == <<-END.strip_heredoc.strip
        This is a bold tag

        This is an em tag

        *****************
        This is an h1 tag
        *****************

        -----------------
        This is an h2 tag
        -----------------

        This is an h3 tag
        -----------------

        This is an h4 tag
        -----------------

        This is an h5 tag
        -----------------

        This is an h6 tag
        -----------------

        Link to Foo ( http://foo.com )

        Some Image
      END
    end

    it "should convert simple tags to minimal html" do
      html = th.html_to_simple_html(@body).gsub("\r\n", "\n") # gsub only for test matching
      html.should == <<-END.strip_heredoc.strip
        <p>This is a bold tag<br/>
        <br/>
        This is an em tag<br/>
        <br/>
        This is an h1 tag<br/>
        <br/>
        This is an h2 tag<br/>
        <br/>
        This is an h3 tag<br/>
        <br/>
        This is an h4 tag<br/>
        <br/>
        This is an h5 tag<br/>
        <br/>
        This is an h6 tag<br/>
        <br/>
        Link to Foo ( <a href='http://foo.com'>http://foo.com</a> )<br/>
        <br/>
        Some Image</p>
      END
    end

    it "should convert relative links to absolute links" do
      html = th.html_to_simple_html("<a href=\"/this/is/a/relative/link\">Relative Link</a>", :base_url => "http://example.com")
      html.should == "<p>Relative Link ( <a href='http://example.com/this/is/a/relative/link'>http://example.com/this/is/a/relative/link</a> )</p>"
    end
  end

  context "markdown" do
    context "safety" do
      it "should escape Strings correctly" do
        str = "`a` **b** _c_ ![d](e)\n# f\n + g\n - h"
        expected = "\\`a\\` \\*\\*b\\*\\* \\_c\\_ \\!\\[d\\]\\(e\\)\n\\# f\n \\+ g\n \\- h"
        (escaped = th.markdown_escape(str)).should == expected
        th.markdown_escape(escaped).should == expected
      end
    end
    context "i18n" do
      it "should automatically escape Strings" do
        th.mt(:foo, "We **don't** trust the following input: %{input}", :input => "`a` **b** _c_ ![d](e)\n# f\n + g\n - h").
          should == "We <strong>don&#39;t</strong> trust the following input: `a` **b** _c_ ![d](e) # f + g - h"
      end

      it "should not escape MarkdownSafeBuffers" do
        th.mt(:foo, "We **do** trust the following input: %{input}", :input => th.markdown_safe("`a` **b** _c_ ![d](e)\n# f\n + g\n - h")).
          should == <<-HTML.strip
<p>We <strong>do</strong> trust the following input: <code>a</code> <strong>b</strong> <em>c</em> <img src="e" alt="d" /></p>

<h1>f</h1>

<ul>
<li>g</li>
<li>h</li>
</ul>
        HTML
      end

      it "should inlinify single paragraphs by default" do
        th.mt(:foo, "**this** is a test").
          should == "<strong>this</strong> is a test"

        th.mt(:foo, "**this** is another test\n\nwhat will happen?").
          should == "<p><strong>this</strong> is another test</p>\n\n<p>what will happen?</p>"
      end

      it "should not inlinify single paragraphs if :inlinify => :never" do
        th.mt(:foo, "**one** more test", :inlinify => :never).
          should == "<p><strong>one</strong> more test</p>"
      end

      it "should allow wrapper with markdown" do
        th.mt(:foo, %{Dolore jerky bacon officia t-bone aute magna. Officia corned beef et ut bacon.

Commodo in ham, *short ribs %{name} pastrami* sausage elit sunt dolore eiusmod ut ea proident ribeye.

Ad dolore andouille meatball irure, ham hock tail exercitation minim ribeye sint quis **eu short loin pancetta**.},
        :name => '<b>test</b>'.html_safe,
        :wrapper => {
          '*' => '<span>\1</span>',
          '**' => '<a>\1</a>',
        }).should == "<p>Dolore jerky bacon officia t-bone aute magna. Officia corned beef et ut bacon.</p>\n\n<p>Commodo in ham, <span>short ribs <b>test</b> pastrami</span> sausage elit sunt dolore eiusmod ut ea proident ribeye.</p>\n\n<p>Ad dolore andouille meatball irure, ham hock tail exercitation minim ribeye sint quis <a>eu short loin pancetta</a>.</p>"
      end

      it "should inlinify complex single paragraphs" do
        th.mt(:foo, "**this** is a *test*").
          should == "<strong>this</strong> is a <em>test</em>"

        th.mt(:foo, "*%{button}*", :button => '<button type="submit" />'.html_safe, :wrapper => '<span>\1</span>').
          should == '<span><button type="submit" /></span>'
      end

      it "should not inlinify multiple paragraphs" do
        th.mt(:foo, "para1\n\npara2").
          should == "<p>para1</p>\n\n<p>para2</p>"
      end
    end
  end

  it "should strip out invalid utf-8" do
    test_strings = {
      "hai\xfb" => "hai",
      "hai\xfb there" => "hai there",
      "hai\xfba" => "haia",
      "hai\xfbab" => "haiab",
      "hai\xfbabc" => "haiabc",
      "hai\xfbabcd" => "haiabcd"
    }

    test_strings.each do |input, output|
      input = input.dup.force_encoding("UTF-8")
      TextHelper.strip_invalid_utf8(input).should == output
    end
  end

  describe "YAML invalid UTF8 stripping" do
    it "should recursively strip out invalid utf-8" do
      data = YAML.load(%{
---
answers:
- !map:HashWithIndifferentAccess
  id: 2
  text: "t\xEAwo"
  valid_ascii: !binary |
    oHRleHSg
      }.strip)
      answer = data['answers'][0]['text']
      answer.valid_encoding?.should be_false
      TextHelper.recursively_strip_invalid_utf8!(data, true)
      answer.should == "two"
      answer.encoding.should == Encoding::UTF_8
      answer.valid_encoding?.should be_true

      # in some edge cases, Syck will return a string as ASCII-8BIT if it's not valid UTF-8
      # so we added a force_encoding step to recursively_strip_invalid_utf8!
      ascii = data['answers'][0]['valid_ascii']
      ascii.should == 'text'
      ascii.encoding.should == Encoding::UTF_8
    end

    it "should strip out invalid utf-8 when deserializing a column" do
      # non-binary invalid utf-8 can't even be inserted into the db in this environment,
      # so we only test the !binary case here
      yaml_blob = %{
---
 answers:
 - !map:HashWithIndifferentAccess
   weight: 0
   id: 2
   html: ab&ecirc;cd.
   valid_ascii: !binary |
     oHRleHSg
   migration_id: QUE_2
 question_text: What is the answer
 position: 2
      }.force_encoding('binary').strip
      # now actually insert it into an AR column
      aq = assessment_question_model(bank: AssessmentQuestionBank.create!(context: Course.create!))
      AssessmentQuestion.where(:id => aq).update_all(:question_data => yaml_blob)
      text = aq.reload.question_data['answers'][0]['valid_ascii']
      text.should == "text"
      text.encoding.should == Encoding::UTF_8
    end

    describe "unserialize_attribute_with_utf8_check" do
      it "should not strip columns not on the list" do
        TextHelper.expects(:recursively_strip_invalid_utf8!).never
        a = Account.find(Account.default.id)
        a.settings # deserialization is lazy, trigger it
      end

      it "should strip columns on the list" do
        TextHelper.unstub(:recursively_strip_invalid_utf8!)
        aq = assessment_question_model(bank: AssessmentQuestionBank.create!(context: Course.create!))
        TextHelper.expects(:recursively_strip_invalid_utf8!).with(instance_of(HashWithIndifferentAccess), true)
        aq = AssessmentQuestion.find(aq)
        aq.question_data
      end
    end
  end
end