--- R-2.10.1/src/main/util.c.orig Mon Sep 28 00:05:39 2009 +++ R-2.10.1/src/main/util.c Wed Dec 16 05:18:24 2009 @@ -913,7 +913,7 @@ n = LENGTH(x); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { - if(IS_LATIN1(STRING_ELT(x, i))) tmp = "latin1"; + if(IS_LATIN1(STRING_ELT(x, i))) tmp = "ISO8859-1"; else if(IS_UTF8(STRING_ELT(x, i))) tmp = "UTF-8"; else tmp = "unknown"; SET_STRING_ELT(ans, i, mkChar(tmp)); --- R-2.10.1/m4/R.m4.orig Mon Sep 28 00:05:11 2009 +++ R-2.10.1/m4/R.m4 Wed Dec 16 05:50:14 2009 @@ -3291,7 +3291,7 @@ if test "$ac_cv_func_iconv" != no; then AC_DEFINE(HAVE_ICONV, 1, [Define if you have the `iconv' function.]) - AC_CACHE_CHECK([whether iconv accepts "UTF-8", "latin1" and "UCS-*"], + AC_CACHE_CHECK([whether iconv accepts "UTF-8", "ISO8859-1" and "UCS-*"], [r_cv_iconv_latin1], [AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include "confdefs.h" @@ -3302,19 +3302,21 @@ int main () { iconv_t cd; - cd = iconv_open("latin1","UTF-8"); + cd = iconv_open("ISO8859-1","UTF-8"); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); - cd = iconv_open("UTF-8","latin1"); + cd = iconv_open("UTF-8","ISO8859-1"); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); - cd = iconv_open("","latin1"); +/* Solaris iconv doesn't support empty strings in neither from nor to + Doesn't matter since usually not used or caught gracefully + cd = iconv_open("","ISO8859-1"); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); cd = iconv_open("","UTF-8"); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); - cd = iconv_open("latin1", ""); + cd = iconv_open("ISO8859-1", ""); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); cd = iconv_open("UTF-8",""); @@ -3344,6 +3346,7 @@ cd = iconv_open("", "UCS-4BE"); if(cd == (iconv_t)(-1)) exit(1); iconv_close(cd); +*/ exit(0); } ]])], [r_cv_iconv_latin1=yes], [r_cv_iconv_latin1=no], --- R-2.10.1/src/library/base/man/iconv.Rd.orig Mon Sep 28 00:05:34 2009 +++ R-2.10.1/src/library/base/man/iconv.Rd Tue Dec 29 15:34:45 2009 @@ -116,10 +116,10 @@ charToRaw(xx <- iconv(x, "latin1", "UTF-8")) xx -iconv(x, "latin1", "ASCII") # NA -iconv(x, "latin1", "ASCII", "?") # "fa?ile" -iconv(x, "latin1", "ASCII", "") # "faile" -iconv(x, "latin1", "ASCII", "byte") # "faile" +try(iconv(x, "latin1", "ASCII")) # NA +try(iconv(x, "latin1", "ASCII", "?")) # "fa?ile" +try(iconv(x, "latin1", "ASCII", "")) # "faile" +try(iconv(x, "latin1", "ASCII", "byte")) # "faile" # Extracts from R help files x <- c("Ekstr\xf8m", "J\xf6reskog", "bi\xdfchen Z\xfcrcher") @@ -126,7 +126,7 @@ Encoding(x) <- "latin1" x try(iconv(x, "latin1", "ASCII//TRANSLIT")) # platform-dependent -iconv(x, "latin1", "ASCII", sub="byte") +try(iconv(x, "latin1", "ASCII", sub="byte")) } \keyword{ character } \keyword{ utilities } --- R-2.10.1/src/library/tools/man/showNonASCII.Rd.orig Mon Sep 28 00:05:23 2009 +++ R-2.10.1/src/library/tools/man/showNonASCII.Rd Tue Dec 29 22:32:06 2009 @@ -35,7 +35,7 @@ "}") cat(out, file = "my.Rd", sep = "\n") -showNonASCII(readLines("my.Rd")) +try(showNonASCII(readLines("my.Rd"))) unlink("my.Rd") } \keyword{utilities} --- R-2.10.1/src/library/utils/man/citEntry.Rd.orig Mon Sep 22 00:05:11 2008 +++ R-2.10.1/src/library/utils/man/citEntry.Rd Wed Dec 30 01:36:10 2009 @@ -115,6 +115,6 @@ \keyword{misc} \examples{ basecit <- system.file("CITATION", package="base") -source(basecit, echo=TRUE) +source(basecit, echo=TRUE, encoding="ISO8859-1") readCitationFile(basecit) } --- R-2.10.1/src/main/sysutils.c.orig Mon Sep 28 00:05:39 2009 +++ R-2.10.1/src/main/sysutils.c Sat Jan 9 07:21:15 2010 @@ -50,6 +50,8 @@ #ifdef HAVE_SYS_STAT_H # include #endif +#include +#include #if HAVE_AQUA extern int (*ptr_CocoaSystem)(char*); @@ -121,6 +123,9 @@ { return(filename ? fopen(filename, fixmode(mode)) : NULL ); } +static int isEmpty(char *src) { + return src == NULL || src[0] == '\0'; +} /* The point of this function is to allow file names in foreign character sets. On Unix-alikes in a UTF-8 locale all that is @@ -146,8 +151,9 @@ wcscpy(filename, L""); return filename; } - if(IS_LATIN1(fn)) from = "latin1"; + if(IS_LATIN1(fn)) from = "ISO8859-1"; if(IS_UTF8(fn)) from = "UTF-8"; + if(isEmpty(from)) from = getDefaultCharmap(); obj = Riconv_open("UCS-2LE", from); if(obj == (void *)(-1)) error("unsupported conversion from '%s' in 'filenameToWchar' in codepage %d", @@ -524,6 +530,39 @@ #include "RBufferUtils.h" +static char* +getDefaultCharmap() { + static char *map; + static int loaded = 0; + if (loaded) { + return map; + } + { + char *tmp = getenv("LC_ALL"); + if (!isEmpty(tmp)) { + tmp = setlocale(LC_ALL, tmp); + } + if (isEmpty(tmp)) { + tmp = getenv("LC_CTYPE"); + if (!isEmpty(tmp)) { + tmp = setlocale(LC_CTYPE, tmp); + } + } + if (isEmpty(tmp)) { + tmp = getenv("LANG"); + if (!isEmpty(tmp)) { + tmp = setlocale(LC_CTYPE, tmp); /* LANG is a GNU extension */ + } + } + map = strdup(nl_langinfo(CODESET)); + if (strncmp(map,"646",3) == 0) { + free(map); + map = "ASCII"; + } + } + return map; +} + /* iconv(x, from, to, sub, mark) */ SEXP attribute_hidden do_iconv(SEXP call, SEXP op, SEXP args, SEXP env) { @@ -549,7 +588,7 @@ #endif } else { int mark; - const char *from, *to; + char *from, *to; Rboolean isLatin1 = FALSE, isUTF8 = FALSE; if(TYPEOF(x) != STRSXP) @@ -566,7 +605,13 @@ if(mark == NA_LOGICAL) error(_("invalid '%s' argument"), "mark"); from = CHAR(STRING_ELT(CADR(args), 0)); /* ASCII */ + if (isEmpty(from)) { + from = getDefaultCharmap(); + } to = CHAR(STRING_ELT(CADDR(args), 0)); + if (isEmpty(to)) { + to = getDefaultCharmap(); + } /* some iconv's allow "UTF8", but libiconv does not */ if(streql(from, "UTF8") || streql(from, "utf8") ) from = "UTF-8"; if(streql(to, "UTF8") || streql(from, "utf8") ) to = "UTF-8"; @@ -574,8 +619,8 @@ if(streql(to, "UTF-8")) isUTF8 = TRUE; if(streql(to, "latin1") || streql(to, "ISO_8859-1") || streql(to, "CP1252")) isLatin1 = TRUE; - if(streql(to, "") && known_to_be_latin1) isLatin1 = TRUE; - if(streql(to, "") && known_to_be_utf8) isUTF8 = TRUE; + if(isEmpty(to) && known_to_be_latin1) isLatin1 = TRUE; + if(isEmpty(to) && known_to_be_utf8) isUTF8 = TRUE; obj = Riconv_open(to, from); if(obj == (iconv_t)(-1)) #ifdef Win32 @@ -671,7 +716,22 @@ else if(!*fromcode) return iconv_open(tocode, cp); else return iconv_open(tocode, fromcode); #else - return iconv_open(tocode, fromcode); + const char *to, *from; + iconv_t res; + to = isEmpty(tocode) + ? getDefaultCharmap() + : streql(tocode,"latin1") ? "ISO8859-1" : tocode; + from = isEmpty(fromcode) + ? getDefaultCharmap() + : streql(fromcode,"latin1") ? "ISO8859-1" : fromcode; + res = iconv_open(to, from); + if (res == (iconv_t) -1) { + char *env = getenv("DEBUG"); + if (env != NULL && strstr(env, "iconv") != NULL) { + fprintf(stderr,"iconv_open(%s,%s) failed\n", to , from); + } + } + return res; #endif } @@ -714,15 +774,16 @@ if(IS_LATIN1(x)) { if(!latin1_obj) { - obj = Riconv_open("", "latin1"); + const char *to = getDefaultCharmap(); + obj = Riconv_open(to, "ISO8859-1"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error("unsupported conversion from %s in codepage %d", - "latin1", localeCP); + "ISO8859-1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), - "latin1", ""); + "ISO8859-1", to); #endif latin1_obj = obj; } @@ -729,15 +790,16 @@ obj = latin1_obj; } else { if(!utf8_obj) { - obj = Riconv_open("", "UTF-8"); + const char *to = getDefaultCharmap(); + obj = Riconv_open(to, "UTF-8"); /* should never happen */ if(obj == (void *)(-1)) #ifdef Win32 error("unsupported conversion from %s in codepage %d", - "latin1", localeCP); + "ISO8859-1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), - "latin1", ""); + "ISO8859-1", to); #endif utf8_obj = obj; } @@ -814,14 +876,14 @@ if(IS_UTF8(x)) return ans; if(strIsASCII(CHAR(x))) return ans; - obj = Riconv_open("UTF-8", IS_LATIN1(x) ? "latin1" : ""); + obj = Riconv_open("UTF-8", IS_LATIN1(x) ? "ISO8859-1" : getDefaultCharmap()); if(obj == (void *)(-1)) #ifdef Win32 error("unsupported conversion from %s in codepage %d", - "latin1", localeCP); + "ISO8859-1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), - "latin1", "UTF-8"); + "ISO8859-1", "UTF-8"); #endif R_AllocStringBuffer(0, &cbuff); top_of_loop: @@ -888,10 +950,10 @@ if(IS_LATIN1(x)) { if(!latin1_wobj) { - obj = Riconv_open(TO_WCHAR, "latin1"); + obj = Riconv_open(TO_WCHAR, "ISO8859-1"); if(obj == (void *)(-1)) error(_("unsupported conversion from '%s' to '%s'"), - "latin1", TO_WCHAR); + "ISO8859-1", TO_WCHAR); latin1_wobj = obj; } else obj = latin1_wobj; @@ -901,19 +963,20 @@ obj = Riconv_open(TO_WCHAR, "UTF-8"); if(obj == (void *)(-1)) error(_("unsupported conversion from '%s' to '%s'"), - "latin1", TO_WCHAR); + "ISO8859-1", TO_WCHAR); utf8_wobj = obj; } else obj = utf8_wobj; knownEnc = TRUE; } else { - obj = Riconv_open(TO_WCHAR, ""); + const char *from = getDefaultCharmap(); + obj = Riconv_open(TO_WCHAR, from); if(obj == (void *)(-1)) #ifdef Win32 error("unsupported conversion to '%s' from codepage %d", TO_WCHAR, localeCP); #else - error(_("unsupported conversion from '%s' to '%s'"), "", TO_WCHAR); + error(_("unsupported conversion from '%s' to '%s'"), from, TO_WCHAR); #endif } @@ -995,8 +1058,8 @@ } case CE_LATIN1: fromcode = "CP1252"; break; #else - case CE_NATIVE: fromcode = ""; break; - case CE_LATIN1: fromcode = "latin1"; break; + case CE_NATIVE: fromcode = getDefaultCharmap(); break; + case CE_LATIN1: fromcode = "ISO8859-1"; break; #endif case CE_UTF8: fromcode = "UTF-8"; break; default: return x; @@ -1012,9 +1075,9 @@ break; } #else - case CE_NATIVE: tocode = ""; break; + case CE_NATIVE: tocode = getDefaultCharmap(); break; #endif - case CE_LATIN1: tocode = "latin1"; break; + case CE_LATIN1: tocode = "ISO8859-1"; break; case CE_UTF8: tocode = "UTF-8"; break; default: return x; } @@ -1113,7 +1176,7 @@ wcs[0] = wc; if(ucsmb_obj == NULL) { - if((void *)(-1) == (cd = Riconv_open("", UNICODE))) { + if((void *)(-1) == (cd = Riconv_open(getDefaultCharmap(), UNICODE))) { #ifndef Win32 char tocode[128]; /* locale set fuzzy case */ @@ -1162,7 +1225,7 @@ if(s[0] == 0) {*wc = 0; return 1;} - if((void *)(-1) == (cd = Riconv_open(UNICODE, ""))) return (size_t)(-1); + if((void *)(-1) == (cd = Riconv_open(UNICODE, getDefaultCharmap()))) return (size_t)(-1); status = Riconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (status == (size_t) -1) { --- R-2.10.1/src/library/tools/R/QC.R.orig Tue Oct 6 04:05:11 2009 +++ R-2.10.1/src/library/tools/R/QC.R Sat Feb 20 21:33:49 2010 @@ -3542,12 +3542,11 @@ ## abide by POSIX. These locales need not exist, but ## do in glibc. switch(enc, - "latin1" = Sys.setlocale("LC_CTYPE", "en_US"), + "latin1" = Sys.setlocale("LC_CTYPE", "en_US.ISO8859-1"), "utf-8" =, # not valid, but used - "UTF-8" = Sys.setlocale("LC_CTYPE", "en_US.utf8"), + "UTF-8" = Sys.setlocale("LC_CTYPE", "en_US.UTF-8"), "latin2" = Sys.setlocale("LC_CTYPE", "pl_PL"), - "latin9" = Sys.setlocale("LC_CTYPE", - "fr_FR.iso885915@euro"), + "latin9" = Sys.setlocale("LC_CTYPE", "fr_FR.ISO8859-15"), Sys.setlocale("LC_CTYPE", "C") ) }