Format strings: a character literal should be printed with %c, not %d.

The type of a character literal is 'int' in C, but if the user writes a character /as/ a literal, we should assume they meant it to be a character and not a numeric value, and thus offer %c as a correction rather than %d. There's a special case for multi-character literals (like 'MooV'), which have implementation-defined value and usually cannot be printed with %c. These still use %d as the suggestion. In C++, the type of a character literal is 'char', and so this problem doesn't exist. <rdar://problem/12282316> llvm-svn: 169398
2012-12-05 18:44:40 +00:00 · 2012-12-05 18:44:40 +00:00 · 598ec0992d
parent 6aaa87e0d2
commit 598ec0992d
2 changed files with 51 additions and 10 deletions
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -2717,8 +2717,8 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
  if (!AT.isValid())
    return true;

-  QualType IntendedTy = E->getType();
-  if (AT.matchesType(S.Context, IntendedTy))
+  QualType ExprTy = E->getType();
+  if (AT.matchesType(S.Context, ExprTy))
    return true;

  // Look through argument promotions for our error message's reported type.
@ -2729,7 +2729,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
    if (ICE->getCastKind() == CK_IntegralCast ||
        ICE->getCastKind() == CK_FloatingCast) {
      E = ICE->getSubExpr();
-      IntendedTy = E->getType();
+      ExprTy = E->getType();

      // Check if we didn't match because of an implicit cast from a 'char'
      // or 'short' to an 'int'.  This is done because printf is a varargs
@ -2737,12 +2737,20 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
      if (ICE->getType() == S.Context.IntTy ||
          ICE->getType() == S.Context.UnsignedIntTy) {
        // All further checking is done on the subexpression.
-        if (AT.matchesType(S.Context, IntendedTy))
+        if (AT.matchesType(S.Context, ExprTy))
          return true;
      }
    }
+  } else if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E)) {
+    // Special case for 'a', which has type 'int' in C.
+    // Note, however, that we do /not/ want to treat multibyte constants like
+    // 'MooV' as characters! This form is deprecated but still exists.
+    if (ExprTy == S.Context.IntTy)
+      if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue()))
+        ExprTy = S.Context.CharTy;
  }

+  QualType IntendedTy = ExprTy;
  if (S.Context.getTargetInfo().getTriple().isOSDarwin()) {
    // Special-case some of Darwin's platform-independence types.
    if (const TypedefType *UserTy = IntendedTy->getAs<TypedefType>()) {
@ -2769,7 +2777,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,

    CharSourceRange SpecRange = getSpecifierRange(StartSpecifier, SpecifierLen);

-    if (IntendedTy != E->getType()) {
+    if (IntendedTy != ExprTy) {
      // The canonical type for formatting this value is different from the
      // actual type of the expression. (This occurs, for example, with Darwin's
      // NSInteger on 32-bit platforms, where it is typedef'd as 'int', but
@ -2809,7 +2817,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,

      // We extract the name from the typedef because we don't want to show
      // the underlying type in the diagnostic.
-      const TypedefType *UserTy = cast<TypedefType>(E->getType());
+      const TypedefType *UserTy = cast<TypedefType>(ExprTy);
      StringRef Name = UserTy->getDecl()->getName();

      // Finally, emit the diagnostic.
@ -2834,9 +2842,9 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
    // Since the warning for passing non-POD types to variadic functions
    // was deferred until now, we emit a warning for non-POD
    // arguments here.
-    if (S.isValidVarArgType(E->getType()) == Sema::VAK_Invalid) {
+    if (S.isValidVarArgType(ExprTy) == Sema::VAK_Invalid) {
      unsigned DiagKind;
-      if (E->getType()->isObjCObjectType())
+      if (ExprTy->isObjCObjectType())
        DiagKind = diag::err_cannot_pass_objc_interface_to_vararg_format;
      else
        DiagKind = diag::warn_non_pod_vararg_with_format_string;
@ -2844,7 +2852,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
      EmitFormatDiagnostic(
        S.PDiag(DiagKind)
          << S.getLangOpts().CPlusPlus0x
-          << E->getType()
+          << ExprTy
          << CallType
          << AT.getRepresentativeTypeName(S.Context)
          << CSR
@ -2855,7 +2863,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
    } else
      EmitFormatDiagnostic(
        S.PDiag(diag::warn_printf_conversion_argument_type_mismatch)
-          << AT.getRepresentativeTypeName(S.Context) << E->getType()
+          << AT.getRepresentativeTypeName(S.Context) << ExprTy
          << CSR
          << E->getSourceRange(),
        E->getLocStart(), /*IsStringLocation*/false, CSR);
--- a/clang/test/FixIt/format.m
+++ b/clang/test/FixIt/format.m
@ -146,4 +146,37 @@ void test_char(char c, signed char s, unsigned char u, uint8_t n) {

  NSLog(@"%c", n); // no-warning
  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%hhu"
+
+
+  NSLog(@"%s", 'a'); // expected-warning{{format specifies type 'char *' but the argument has type 'char'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%c"
+
+  NSLog(@"%lf", 'a'); // expected-warning{{format specifies type 'double' but the argument has type 'char'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%c"
+
+  NSLog(@"%@", 'a'); // expected-warning{{format specifies type 'id' but the argument has type 'char'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%c"
+
+  NSLog(@"%c", 'a'); // no-warning
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%c"
+
+
+  NSLog(@"%s", 'abcd'); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d"
+
+  NSLog(@"%lf", 'abcd'); // expected-warning{{format specifies type 'double' but the argument has type 'int'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%d"
+
+  NSLog(@"%@", 'abcd'); // expected-warning{{format specifies type 'id' but the argument has type 'int'}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d"
+}
+
+void multichar_constants_false_negative() {
+  // The value of a multi-character constant is implementation-defined, but
+  // almost certainly shouldn't be printed with %c. However, the current
+  // type-checker expects %c to correspond to an integer argument, because
+  // many C library functions like fgetc() actually return an int (using -1
+  // as a sentinel).
+  NSLog(@"%c", 'abcd'); // missing-warning{{format specifies type 'char' but the argument has type 'int'}}
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:13}:"%d"
 }