Home
Reading
Searching
Subscribe
Sponsors
Statistics
Posting
Contact
Spam
Lists
Links
About
Hosting
Filtering
Features Download
Marketing
Archives
FAQ
Blog
 
Gmane
From: Simon Josefsson <simon <at> josefsson.org>
Subject: Re: AW: treatment of U+002E that is produced by NFKC
Newsgroups: gmane.comp.gnu.libidn.general
Date: Monday 14th January 2008 10:10:44 UTC (over 9 years ago)
"Alexander Gnauck"  writes:

>> Sure, that is one way to deal with this. Libidn users may not be
>> clamoring for a resolution. Other implementations may be in more of a
>> rush to resolve the conflict. (I work for Google.)
>
> What about adding a define to deal with this, and make a note in the
> documentation about this "issue".

Yes, we should definitely document the problem in the manual.  Erik, do
you know of any good links that discuss this issue?

Fortunately, all the idna_* APIs in libidn takes a 'flags' parameter.
It would be possibly to add a new flag IDNA_TREAT_U2024_AS_DOT and have
the code treat U+2024 as a dot character as per RFC 3490 section 3.1 if
the flag is given.  I've confirmed that this makes libidn produce the
same output as MSIE/Firefox output.  See initial skeleton patch below.

/Simon

diff --git a/lib/idna.c b/lib/idna.c
index b815a3f..09ef929 100644
--- a/lib/idna.c
+++ b/lib/idna.c
@@ -1,5 +1,5 @@
 /* idna.c --- Convert to or from IDN strings.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008  Simon Josefsson
  *
  * This file is part of GNU Libidn.
  *
@@ -30,8 +30,9 @@
 
 #include "idna.h"
 
-#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||	\
-		 (c) == 0xFF0E || (c) == 0xFF61)
+#define DOTP(c, flags) ((c) == 0x002E || (c) == 0x3002 ||	\
+			(c) == 0xFF0E || (c) == 0xFF61 ||		\
+			((flags & IDNA_TREAT_U2024_AS_DOT) && (c) == 0x2024))
 
 /* Core functions */
 
@@ -475,7 +476,7 @@ idna_to_ascii_4z (const uint32_t * input, char
**output, int flags)
       return IDNA_SUCCESS;
     }
 
-  if (DOTP (input[0]) && input[1] == 0)
+  if (DOTP (input[0], flags) && input[1] == 0)
     {
       /* Handle explicit zero-length root label. */
       *output = malloc (2);
@@ -490,7 +491,7 @@ idna_to_ascii_4z (const uint32_t * input, char
**output, int flags)
     {
       end = start;
 
-      for (; *end && !DOTP (*end); end++)
+      for (; *end && !DOTP (*end, flags); end++)
 	;
 
       if (*end == '\0' && start == end)
@@ -628,7 +629,7 @@ idna_to_unicode_4z4z (const uint32_t * input, uint32_t
** output, int flags)
     {
       end = start;
 
-      for (; *end && !DOTP (*end); end++)
+      for (; *end && !DOTP (*end, flags); end++)
 	;
 
       buflen = end - start;
diff --git a/lib/idna.h b/lib/idna.h
index f6b24ac..e968d33 100644
--- a/lib/idna.h
+++ b/lib/idna.h
@@ -1,5 +1,5 @@
 /* idna.h --- Declarations for Internationalized Domain Name in
Applications.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008  Simon Josefsson
  *
  * This file is part of GNU Libidn.
  *
@@ -54,7 +54,8 @@ extern "C"
   typedef enum
   {
     IDNA_ALLOW_UNASSIGNED = 0x0001,
-    IDNA_USE_STD3_ASCII_RULES = 0x0002
+    IDNA_USE_STD3_ASCII_RULES = 0x0002,
+    IDNA_TREAT_U2024_AS_DOT = 0x0004
   } Idna_flags;
 
 # ifndef IDNA_ACE_PREFIX
diff --git a/src/idn.c b/src/idn.c
index abb545e..cf4009b 100644
--- a/src/idn.c
+++ b/src/idn.c
@@ -370,8 +370,10 @@ main (int argc, char *argv[])
 				 (args_info.allow_unassigned_given ?
 				  IDNA_ALLOW_UNASSIGNED : 0) |
 				 (args_info.usestd3asciirules_given ?
-				  IDNA_USE_STD3_ASCII_RULES : 0));
-	  free (q);
+				  IDNA_USE_STD3_ASCII_RULES : 0) |
+				 (args_info.treatu2024asdot_given ?
+				  IDNA_TREAT_U2024_AS_DOT : 0));
+      free (q);
 	  if (rc != IDNA_SUCCESS)
 	    error (EXIT_FAILURE, 0, _("idna_to_ascii_4z: %s"),
 		   idna_strerror (rc));
@@ -385,7 +387,9 @@ main (int argc, char *argv[])
 					 (args_info.allow_unassigned_given ?
 					  IDNA_ALLOW_UNASSIGNED : 0) |
 					 (args_info.usestd3asciirules_given ?
-					  IDNA_USE_STD3_ASCII_RULES : 0));
+					  IDNA_USE_STD3_ASCII_RULES : 0) |
+					 (args_info.treatu2024asdot_given ?
+					  IDNA_TREAT_U2024_AS_DOT : 0));
 	      if (rc != IDNA_SUCCESS)
 		error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z (TLD): %s"),
 		       idna_strerror (rc));
@@ -450,7 +454,9 @@ main (int argc, char *argv[])
 				     (args_info.allow_unassigned_given ?
 				      IDNA_ALLOW_UNASSIGNED : 0) |
 				     (args_info.usestd3asciirules_given ?
-				      IDNA_USE_STD3_ASCII_RULES : 0));
+				      IDNA_USE_STD3_ASCII_RULES : 0) |
+				     (args_info.treatu2024asdot_given ?
+				      IDNA_TREAT_U2024_AS_DOT : 0));
 	  free (p);
 	  if (rc != IDNA_SUCCESS)
 	    error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z: %s"),
diff --git a/src/idn.ggo b/src/idn.ggo
index 620f9f6..680686f 100644
--- a/src/idn.ggo
+++ b/src/idn.ggo
@@ -1,4 +1,4 @@
-# Copyright (C) 2003, 2004, 2005, 2006, 2007 Simon Josefsson.
+# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson.
 #
 # This file is part of GNU Libidn.
 #
@@ -31,6 +31,7 @@ option "idna-to-ascii"   a "Convert to ACE according to
IDNA (default)" no
 option "idna-to-unicode" u "Convert from ACE according to IDNA" no
 option "allow-unassigned" - "Toggle IDNA AllowUnassigned flag" flag off
 option "usestd3asciirules" - "Toggle IDNA UseSTD3ASCIIRules flag" flag off
+option "treatu2024asdot" - "Toggle IDNA TreatU2024AsDot flag" flag off
 option "tld" t "Check string for TLD specific rules\nOnly for
--idna-to-ascii and --idna-to-unicode" flag on
 option "profile" p "Use specified stringprep profile instead\nValid
stringprep profiles are `Nameprep', `iSCSI', `Nodeprep', `Resourceprep',
`trace', and `SASLprep'." string no
 option "debug" - "Print debugging information" flag off
 
CD: 3ms