| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 | /* Determine a canonical name for the current locale's character encoding.   Copyright (C) 2000-2003 Free Software Foundation, Inc.   This program is free software; you can redistribute it and/or modify it   under the terms of the GNU Library General Public License as published   by the Free Software Foundation; either version 2, or (at your option)   any later version.   This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Library General Public License for more details.   You should have received a copy of the GNU Library General Public   License along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,   USA.  *//* Written by Bruno Haible <bruno@clisp.org>.  */#ifdef HAVE_CONFIG_H# include <config.h>#endif/* Specification.  */#include "localcharset.h"#if HAVE_STDDEF_H# include <stddef.h>#endif#include <stdio.h>#if HAVE_STRING_H# include <string.h>#else# include <strings.h>#endif#if HAVE_STDLIB_H# include <stdlib.h>#endif#if defined _WIN32 || defined __WIN32__# undef WIN32   /* avoid warning on mingw32 */# define WIN32#endif#if defined __EMX__/* Assume EMX program runs on OS/2, even if compiled under DOS.  */# define OS2#endif#if !defined WIN32# if HAVE_LANGINFO_CODESET#  include <langinfo.h># else#  if HAVE_SETLOCALE#   include <locale.h>#  endif# endif#elif defined WIN32# define WIN32_LEAN_AND_MEAN# include <windows.h>#endif#if defined OS2# define INCL_DOS# include <os2.h>#endif#if ENABLE_RELOCATABLE# include "relocatable.h"#else# define relocate(pathname) (pathname)#endif#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__  /* Win32, OS/2, DOS */# define ISSLASH(C) ((C) == '/' || (C) == '\\')#endif#ifndef DIRECTORY_SEPARATOR# define DIRECTORY_SEPARATOR '/'#endif#ifndef ISSLASH# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)#endif#ifdef HAVE_GETC_UNLOCKED# undef getc# define getc getc_unlocked#endif/* The following static variable is declared 'volatile' to avoid a   possible multithread problem in the function get_charset_aliases. If we   are running in a threaded environment, and if two threads initialize   'charset_aliases' simultaneously, both will produce the same value,   and everything will be ok if the two assignments to 'charset_aliases'   are atomic. But I don't know what will happen if the two assignments mix.  */#if __STDC__ != 1# define volatile /* empty */#endif/* Pointer to the contents of the charset.alias file, if it has already been   read, else NULL.  Its format is:   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */static const char * volatile charset_aliases;/* Return a pointer to the contents of the charset.alias file.  */static const char *get_charset_aliases (){  const char *cp;  cp = charset_aliases;  if (cp == NULL)    {#if !(defined VMS || defined WIN32)      FILE *fp;      const char *dir = relocate (LIBDIR);      const char *base = "charset.alias";      char *file_name;      /* Concatenate dir and base into freshly allocated file_name.  */      {	size_t dir_len = strlen (dir);	size_t base_len = strlen (base);	int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));	file_name = (char *) malloc (dir_len + add_slash + base_len + 1);	if (file_name != NULL)	  {	    memcpy (file_name, dir, dir_len);	    if (add_slash)	      file_name[dir_len] = DIRECTORY_SEPARATOR;	    memcpy (file_name + dir_len + add_slash, base, base_len + 1);	  }      }      if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)	/* Out of memory or file not found, treat it as empty.  */	cp = "";      else	{	  /* Parse the file's contents.  */	  int c;	  char buf1[50+1];	  char buf2[50+1];	  char *res_ptr = NULL;	  size_t res_size = 0;	  size_t l1, l2;	  for (;;)	    {	      c = getc (fp);	      if (c == EOF)		break;	      if (c == '\n' || c == ' ' || c == '\t')		continue;	      if (c == '#')		{		  /* Skip comment, to end of line.  */		  do		    c = getc (fp);		  while (!(c == EOF || c == '\n'));		  if (c == EOF)		    break;		  continue;		}	      ungetc (c, fp);	      if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)		break;	      l1 = strlen (buf1);	      l2 = strlen (buf2);	      if (res_size == 0)		{		  res_size = l1 + 1 + l2 + 1;		  res_ptr = (char *) malloc (res_size + 1);		}	      else		{		  res_size += l1 + 1 + l2 + 1;		  res_ptr = (char *) realloc (res_ptr, res_size + 1);		}	      if (res_ptr == NULL)		{		  /* Out of memory. */		  res_size = 0;		  break;		}	      strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);	      strcpy (res_ptr + res_size - (l2 + 1), buf2);	    }	  fclose (fp);	  if (res_size == 0)	    cp = "";	  else	    {	      *(res_ptr + res_size) = '\0';	      cp = res_ptr;	    }	}      if (file_name != NULL)	free (file_name);#else# if defined VMS      /* To avoid the troubles of an extra file charset.alias_vms in the	 sources of many GNU packages, simply inline the aliases here.  */      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation	 "Compaq C Run-Time Library Reference Manual for OpenVMS systems"	 section 10.7 "Handling Different Character Sets".  */      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"	   "ISO8859-2" "\0" "ISO-8859-2" "\0"	   "ISO8859-5" "\0" "ISO-8859-5" "\0"	   "ISO8859-7" "\0" "ISO-8859-7" "\0"	   "ISO8859-8" "\0" "ISO-8859-8" "\0"	   "ISO8859-9" "\0" "ISO-8859-9" "\0"	   /* Japanese */	   "eucJP" "\0" "EUC-JP" "\0"	   "SJIS" "\0" "SHIFT_JIS" "\0"	   "DECKANJI" "\0" "DEC-KANJI" "\0"	   "SDECKANJI" "\0" "EUC-JP" "\0"	   /* Chinese */	   "eucTW" "\0" "EUC-TW" "\0"	   "DECHANYU" "\0" "DEC-HANYU" "\0"	   "DECHANZI" "\0" "GB2312" "\0"	   /* Korean */	   "DECKOREAN" "\0" "EUC-KR" "\0";# endif# if defined WIN32      /* To avoid the troubles of installing a separate file in the same	 directory as the DLL and of retrieving the DLL's directory at	 runtime, simply inline the aliases here.  */      cp = "CP936" "\0" "GBK" "\0"	   "CP1361" "\0" "JOHAB" "\0"	   "CP20127" "\0" "ASCII" "\0"	   "CP20866" "\0" "KOI8-R" "\0"	   "CP21866" "\0" "KOI8-RU" "\0"	   "CP28591" "\0" "ISO-8859-1" "\0"	   "CP28592" "\0" "ISO-8859-2" "\0"	   "CP28593" "\0" "ISO-8859-3" "\0"	   "CP28594" "\0" "ISO-8859-4" "\0"	   "CP28595" "\0" "ISO-8859-5" "\0"	   "CP28596" "\0" "ISO-8859-6" "\0"	   "CP28597" "\0" "ISO-8859-7" "\0"	   "CP28598" "\0" "ISO-8859-8" "\0"	   "CP28599" "\0" "ISO-8859-9" "\0"	   "CP28605" "\0" "ISO-8859-15" "\0";# endif#endif      charset_aliases = cp;    }  return cp;}/* Determine the current locale's character encoding, and canonicalize it   into one of the canonical names listed in config.charset.   The result must not be freed; it is statically allocated.   If the canonical name cannot be determined, the result is a non-canonical   name.  */#ifdef STATICSTATIC#endifconst char *locale_charset (){  const char *codeset;  const char *aliases;#if !(defined WIN32 || defined OS2)# if HAVE_LANGINFO_CODESET  /* Most systems support nl_langinfo (CODESET) nowadays.  */  codeset = nl_langinfo (CODESET);# else  /* On old systems which lack it, use setlocale or getenv.  */  const char *locale = NULL;  /* But most old systems don't have a complete set of locales.  Some     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't     use setlocale here; it would return "C" when it doesn't support the     locale name the user has set.  */#  if HAVE_SETLOCALE && 0  locale = setlocale (LC_CTYPE, NULL);#  endif  if (locale == NULL || locale[0] == '\0')    {      locale = getenv ("LC_ALL");      if (locale == NULL || locale[0] == '\0')	{	  locale = getenv ("LC_CTYPE");	  if (locale == NULL || locale[0] == '\0')	    locale = getenv ("LANG");	}    }  /* On some old systems, one used to set locale = "iso8859_1". On others,     you set it to "language_COUNTRY.charset". In any case, we resolve it     through the charset.alias file.  */  codeset = locale;# endif#elif defined WIN32  static char buf[2 + 10 + 1];  /* Woe32 has a function returning the locale's codepage as a number.  */  sprintf (buf, "CP%u", GetACP ());  codeset = buf;#elif defined OS2  const char *locale;  static char buf[2 + 10 + 1];  ULONG cp[3];  ULONG cplen;  /* Allow user to override the codeset, as set in the operating system,     with standard language environment variables.  */  locale = getenv ("LC_ALL");  if (locale == NULL || locale[0] == '\0')    {      locale = getenv ("LC_CTYPE");      if (locale == NULL || locale[0] == '\0')	locale = getenv ("LANG");    }  if (locale != NULL && locale[0] != '\0')    {      /* If the locale name contains an encoding after the dot, return it.  */      const char *dot = strchr (locale, '.');      if (dot != NULL)	{	  const char *modifier;	  dot++;	  /* Look for the possible @... trailer and remove it, if any.  */	  modifier = strchr (dot, '@');	  if (modifier == NULL)	    return dot;	  if (modifier - dot < sizeof (buf))	    {	      memcpy (buf, dot, modifier - dot);	      buf [modifier - dot] = '\0';	      return buf;	    }	}      /* Resolve through the charset.alias file.  */      codeset = locale;    }  else    {      /* OS/2 has a function returning the locale's codepage as a number.  */      if (DosQueryCp (sizeof (cp), cp, &cplen))	codeset = "";      else	{	  sprintf (buf, "CP%u", cp[0]);	  codeset = buf;	}    }#endif  if (codeset == NULL)    /* The canonical name cannot be determined.  */    codeset = "";  /* Resolve alias. */  for (aliases = get_charset_aliases ();       *aliases != '\0';       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)    if (strcmp (codeset, aliases) == 0	|| (aliases[0] == '*' && aliases[1] == '\0'))      {	codeset = aliases + strlen (aliases) + 1;	break;      }  /* Don't return an empty string.  GNU libc and GNU libiconv interpret     the empty string as denoting "the locale's character encoding",     thus GNU libiconv would call this function a second time.  */  if (codeset[0] == '\0')    codeset = "ASCII";  return codeset;}
 |