This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fix for PR objc/34033 "compiler accepts invalid string concatenation"


>> Does clang or Apple's gcc accept concat with c strings and objc
>> string?
>
> Assuming Nicola followed their lead, no.  But I was thinking the same 
> when looking at the code and testcase.
>
> Actually, I would allow only concat-ing C to ObjC, like @"ab" "cd".  I 
> don't think "cd" @"ab" should be allowed.

Excellent comments, thanks to both of you :-)

gcc-llvm does the same as GCC 4.6 used to do, but clang indeed does the same 
that you both suggest; it accepts @"ab" "cd" but rejects "ab" @"cd".

Below, a revised patch which implements this behaviour.  Summarizing:

 * the first string must start with '@' to trigger an Objective-C string concat

 * the other strings can either have one '@' or zero '@'; it doesn't really matter,
as they're all concat-ed together into a final CPP_OBJC_STRING

 * there should be no '@' with no following string at the end of the concat (eg, 
NSString *test = @"test"@; is invalid because the last '@' makes no sense)

Ok to commit ?

Thanks

PS: I had not paid enough attention to clang (I was mostly looking at the llvm-gcc
testsuite, which has got lots of testcases; clang doesn't have as many).  I'll pay
more attention in the future.

Index: gcc/c-family/ChangeLog
===================================================================
--- gcc/c-family/ChangeLog      (revision 167002)
+++ gcc/c-family/ChangeLog      (working copy)
@@ -1,3 +1,9 @@
+2010-11-22  Nicola Pero  <nicola.pero@meta-innovation.com>
+
+       PR objc/34033
+       * c-lex.c (lex_string): Check that each Objective-C string in a
+       string concatenation sequence starts with either one or zero '@'.
+
 2010-11-20  Joseph Myers  <joseph@codesourcery.com>
 
        * c-pragma.c: Remove conditionals on HANDLE_PRAGMA_PACK,
Index: gcc/c-family/c-lex.c
===================================================================
--- gcc/c-family/c-lex.c        (revision 167002)
+++ gcc/c-family/c-lex.c        (working copy)
@@ -889,10 +889,12 @@ interpret_fixed (const cpp_token *token,
 
 /* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
    UTF8STRING tokens into a tree, performing string constant
-   concatenation.  TOK is the first of these.  VALP is the location
-   to write the string into. OBJC_STRING indicates whether an '@' token
-   preceded the incoming token.
-   Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
+   concatenation.  TOK is the first of these.  VALP is the location to
+   write the string into.  OBJC_STRING indicates whether an '@' token
+   preceded the incoming token (in that case, the strings can either
+   be ObjC strings, preceded by a single '@', or normal strings, not
+   preceded by '@'.  The result will be a CPP_OBJC_STRING).  Returns
+   the CPP token type of the result (CPP_STRING, CPP_WSTRING,
    CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING).
 
    This is unfortunately more work than it should be.  If any of the
@@ -918,6 +920,12 @@ lex_string (const cpp_token *tok, tree *
   cpp_string str = tok->val.str;
   cpp_string *strs = &str;
 
+  /* objc_at_sign_was_seen is only used when doing Objective-C string
+     concatenation.  It is 'true' if we have seen an '@' before the
+     current string, and 'false' if not.  We must see exactly one or
+     zero '@' before each string.  */
+  bool objc_at_sign_was_seen = false;
+
  retry:
   tok = cpp_get_token (parse_in);
   switch (tok->type)
@@ -925,9 +933,12 @@ lex_string (const cpp_token *tok, tree *
     case CPP_PADDING:
       goto retry;
     case CPP_ATSIGN:
-      if (c_dialect_objc ())
+      if (objc_string)
        {
-         objc_string = true;
+         if (objc_at_sign_was_seen)
+           error ("repeated %<@%> before Objective-C string");
+
+         objc_at_sign_was_seen = true;
          goto retry;
        }
       /* FALLTHROUGH */
@@ -956,9 +967,15 @@ lex_string (const cpp_token *tok, tree *
 
       concats++;
       obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
+      if (objc_string)
+       objc_at_sign_was_seen = false;
       goto retry;
     }
 
+  /* It is an error if we saw a '@' with no following string.  */
+  if (objc_at_sign_was_seen)
+    error ("stray %<@%> in program");
+
   /* We have read one more token than we want.  */
   _cpp_backup_tokens (parse_in, 1);
   if (concats)
Index: gcc/testsuite/ChangeLog
===================================================================
--- gcc/testsuite/ChangeLog     (revision 167002)
+++ gcc/testsuite/ChangeLog     (working copy)
@@ -1,3 +1,11 @@
+2010-11-22  Nicola Pero  <nicola.pero@meta-innovation.com>
+
+       PR objc/34033
+       * objc.dg/strings-1.m: New.
+       * objc.dg/strings-2.m: New.
+       * obj-c++.dg/strings-1.mm: New.
+       * obj-c++.dg/strings-2.mm: New. 
+
 2010-11-21  Uros Bizjak  <ubizjak@gmail.com>
 
        PR target/46533
Index: gcc/testsuite/objc.dg/strings-1.m
===================================================================
--- gcc/testsuite/objc.dg/strings-1.m   (revision 0)
+++ gcc/testsuite/objc.dg/strings-1.m   (revision 0)
@@ -0,0 +1,33 @@
+/* Contributed by Nicola Pero <nicola.pero@meta-innovation.com>, November 2010.  */
+/* { dg-do compile } */
+
+#include "../objc-obj-c++-shared/Object1.h"
+#include "../objc-obj-c++-shared/next-mapping.h"
+#ifndef __NEXT_RUNTIME__
+#include <objc/NXConstStr.h>
+#endif
+
+/* The following are correct.  */
+id test_valid1 = @"test";
+id test_valid2 = @"te" @"st";
+id test_valid3 = @"te" @"s" @"t";
+id test_valid4 = @ "t" @ "e" @ "s" @ "t";
+
+/* The following are accepted too; you can concat an ObjC string to a
+   C string, the result being an ObjC string.  */
+id test_valid5 = @"te" "st";
+id test_valid6 = @"te" "s" @"t";
+id test_valid7 = @"te" @"s" "t";
+
+/* The following are not correct.  */
+id test_invalid1          = @@"test";            /* { dg-error "stray .@. in program" } */
+const char *test_invalid2 = "test"@;             /* { dg-error "stray .@. in program" } */
+const char *test_invalid3 = "test"@@;            /* { dg-error "stray .@. in program" } */
+const char *test_invalid4 = "te" @"st";          /* { dg-error "expected" } */
+id test_invalid5          = @"te" @@"st";        /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid6          = @@"te" @"st";        /* { dg-error "stray .@. in program" } */
+id test_invalid7          = @"te" @"s" @@"t";    /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid8          = @"te" @@"s" @"t";    /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid9          = @"te" @"s" @"t" @;   /* { dg-error "stray .@. in program" } */
+id test_invalidA          = @"te" @ st;          /* { dg-error "stray .@. in program" } */
+                                                 /* { dg-error "expected" "" { target *-*-* } 32 } */
Index: gcc/testsuite/objc.dg/strings-2.m
===================================================================
--- gcc/testsuite/objc.dg/strings-2.m   (revision 0)
+++ gcc/testsuite/objc.dg/strings-2.m   (revision 0)
@@ -0,0 +1,51 @@
+/* Contributed by Nicola Pero <nicola.pero@meta-innovation.com>, November 2010.  */
+
+/* { dg-do run } */
+/* { dg-xfail-run-if "Needs OBJC2 ABI" { *-*-darwin* && { lp64 && { ! objc2 } } } { "-fnext-runtime" } { "" } } */
+/* { dg-options "-fconstant-string-class=MyTestString" } */
+/* { dg-options "-mno-constant-cfstrings -fconstant-string-class=MyTestString" { target *-*-darwin* } } */
+
+/* { dg-additional-sources "../objc-obj-c++-shared/Object1.m" } */
+
+#include "../objc-obj-c++-shared/Object1.h"
+#include "../objc-obj-c++-shared/next-mapping.h"
+
+#include <stdlib.h> /* For abort() */
+
+@interface MyTestString : Object
+{
+  char *string;
+  unsigned int len;
+}
+/* All strings should contain the C string 'test'.  Call -check to
+   test that this is true.  */
+- (void) check;
+@end
+
+@implementation MyTestString
+- (void) check
+{
+  if (len != 4 || string[0] != 't' || string[1] != 'e'
+      || string[2] != 's' || string[3] != 't' || string[4] != '\0')
+    abort ();
+}
+@end
+
+int main (void)
+{
+  MyTestString *test_valid1 = @"test";
+  MyTestString *test_valid2 = @"te" @"st";
+  MyTestString *test_valid3 = @"te" @"s" @"t";
+  MyTestString *test_valid4 = @ "t" @ "e" @ "s" @ "t";
+  MyTestString *test_valid5 = @ "t" "e" "s" "t";
+  MyTestString *test_valid6 = @ "t" "e" "s" @ "t";
+
+  [test_valid1 check];
+  [test_valid2 check];
+  [test_valid3 check];
+  [test_valid4 check];
+  [test_valid5 check];
+  [test_valid6 check];
+
+  return 0;
+}
Index: gcc/testsuite/obj-c++.dg/strings-1.mm
===================================================================
--- gcc/testsuite/obj-c++.dg/strings-1.mm       (revision 0)
+++ gcc/testsuite/obj-c++.dg/strings-1.mm       (revision 0)
@@ -0,0 +1,33 @@
+/* Contributed by Nicola Pero <nicola.pero@meta-innovation.com>, November 2010.  */
+/* { dg-do compile } */
+
+#include "../objc-obj-c++-shared/Object1.h"
+#include "../objc-obj-c++-shared/next-mapping.h"
+#ifndef __NEXT_RUNTIME__
+#include <objc/NXConstStr.h>
+#endif
+
+/* The following are correct.  */
+id test_valid1 = @"test";
+id test_valid2 = @"te" @"st";
+id test_valid3 = @"te" @"s" @"t";
+id test_valid4 = @ "t" @ "e" @ "s" @ "t";
+
+/* The following are accepted too; you can concat an ObjC string to a
+   C string, the result being an ObjC string.  */
+id test_valid5 = @"te" "st";
+id test_valid6 = @"te" "s" @"t";
+id test_valid7 = @"te" @"s" "t";
+
+/* The following are not correct.  */
+id test_invalid1          = @@"test";            /* { dg-error "stray .@. in program" } */
+const char *test_invalid2 = "test"@;             /* { dg-error "stray .@. in program" } */
+const char *test_invalid3 = "test"@@;            /* { dg-error "stray .@. in program" } */
+const char *test_invalid4 = "te" @"st";          /* { dg-error "expected" } */
+id test_invalid5          = @"te" @@"st";        /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid6          = @@"te" @"st";        /* { dg-error "stray .@. in program" } */
+id test_invalid7          = @"te" @"s" @@"t";    /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid8          = @"te" @@"s" @"t";    /* { dg-error "repeated .@. before Objective-C string" } */
+id test_invalid9          = @"te" @"s" @"t" @;   /* { dg-error "stray .@. in program" } */
+id test_invalidA          = @"te" @ st;          /* { dg-error "stray .@. in program" } */
+                                                 /* { dg-error "expected" "" { target *-*-* } 32 } */
Index: gcc/testsuite/obj-c++.dg/strings-2.mm
===================================================================
--- gcc/testsuite/obj-c++.dg/strings-2.mm       (revision 0)
+++ gcc/testsuite/obj-c++.dg/strings-2.mm       (revision 0)
@@ -0,0 +1,51 @@
+/* Contributed by Nicola Pero <nicola.pero@meta-innovation.com>, November 2010.  */
+
+/* { dg-do run } */
+/* { dg-xfail-run-if "Needs OBJC2 ABI" { *-*-darwin* && { lp64 && { ! objc2 } } } { "-fnext-runtime" } { "" } } */
+/* { dg-options "-fconstant-string-class=MyTestString" } */
+/* { dg-options "-mno-constant-cfstrings -fconstant-string-class=MyTestString" { target *-*-darwin* } } */
+
+/* { dg-additional-sources "../objc-obj-c++-shared/Object1.mm" } */
+
+#include "../objc-obj-c++-shared/Object1.h"
+#include "../objc-obj-c++-shared/next-mapping.h"
+
+#include <stdlib.h> /* For abort() */
+
+@interface MyTestString : Object
+{
+  char *string;
+  unsigned int len;
+}
+/* All strings should contain the C string 'test'.  Call -check to
+   test that this is true.  */
+- (void) check;
+@end
+
+@implementation MyTestString
+- (void) check
+{
+  if (len != 4 || string[0] != 't' || string[1] != 'e'
+      || string[2] != 's' || string[3] != 't' || string[4] != '\0')
+    abort ();
+}
+@end
+
+int main (void)
+{
+  MyTestString *test_valid1 = @"test";
+  MyTestString *test_valid2 = @"te" @"st";
+  MyTestString *test_valid3 = @"te" @"s" @"t";
+  MyTestString *test_valid4 = @ "t" @ "e" @ "s" @ "t";
+  MyTestString *test_valid5 = @ "t" "e" "s" "t";
+  MyTestString *test_valid6 = @ "t" "e" "s" @ "t";
+
+  [test_valid1 check];
+  [test_valid2 check];
+  [test_valid3 check];
+  [test_valid4 check];
+  [test_valid5 check];
+  [test_valid6 check];
+
+  return 0;
+}






Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]