/* gdkanji.c (Kanji code converter) */
/* written by Masahito Yamaga (ma@yama-ga.com) */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gd.h"
#include "gdhelpers.h"
#include <stdarg.h>
#if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
#include <iconv.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#endif
#if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
#define HAVE_ICONV 1
#endif
#define LIBNAME "any2eucjp()"
#if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
#ifndef SJISPRE
#define SJISPRE 1
#endif
#endif
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define TRUE 1
#define FALSE 0
#define NEW 1
#define OLD 2
#define ESCI 3
#define NEC 4
#define EUC 5
#define SJIS 6
#define EUCORSJIS 7
#define ASCII 8
#define NEWJISSTR "JIS7"
#define OLDJISSTR "jis"
#define EUCSTR "eucJP"
#define SJISSTR "SJIS"
#define ESC 27
#define SS2 142
static void
debug (const char *format,...)
{
#ifdef DEBUG
va_list args;
va_start (args, format);
fprintf (stdout, "%s: ", LIBNAME);
vfprintf (stdout, format, args);
fprintf (stdout, "\n");
va_end (args);
#endif
}
static void
error (const char *format,...)
{
va_list args;
char *tmp;
TSRMLS_FETCH();
va_start(args, format);
vspprintf(&tmp, 0, format, args);
va_end(args);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", LIBNAME, tmp);
efree(tmp);
}
/* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
static int
DetectKanjiCode (unsigned char *str)
{
static int whatcode = ASCII;
int oldcode = ASCII;
int c, i;
char *lang = NULL;
c = '\1';
i = 0;
if (whatcode != EUCORSJIS && whatcode != ASCII)
{
oldcode = whatcode;
whatcode = ASCII;
}
while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
{
if ((c = str[i++]) != '\0')
{
if (c == ESC)
{
c = str[i++];
if (c == '$')
{
c = str[i++];
if (c == 'B')
whatcode = NEW;
else if (c == '@')
whatcode = OLD;
}
else if (c == '(')
{
c = str[i++];
if (c == 'I')
whatcode = ESCI;
}
else if (c == 'K')
whatcode = NEC;
}
else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c == SS2)
{
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
whatcode = SJIS;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
}
else if (c >= 161 && c <= 223)
{
c = str[i++];
if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 223)
whatcode = EUCORSJIS;
else if (c >= 224 && c <= 239)
{
whatcode = EUCORSJIS;
while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
{
if (c >= 129)
{
if (c <= 141 || (c >= 143 && c <= 159))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
}
c = str[i++];
}
}
else if (c <= 159)
whatcode = SJIS;
}
else if (c >= 240 && c <= 254)
whatcode = EUC;
else if (c >= 224 && c <= 239)
{
c = str[i++];
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
whatcode = SJIS;
else if (c >= 253 && c <= 254)
whatcode = EUC;
else if (c >= 161 && c <= 252)
whatcode = EUCORSJIS;
}
}
}
#ifdef DEBUG
if (whatcode == ASCII)
debug ("Kanji code not included.");
else if (whatcode == EUCORSJIS)
debug ("Kanji code not detected.");
else
debug ("Kanji code detected at %d byte.", i);
#endif
if (whatcode == EUCORSJIS && oldcode != ASCII)
whatcode = oldcode;
if (whatcode == EUCORSJIS)
{
if (getenv ("LC_ALL"))
lang = getenv ("LC_ALL");
else if (getenv ("LC_CTYPE"))
lang = getenv ("LC_CTYPE");
else if (getenv ("LANG"))
lang = getenv ("LANG");
if (lang)
{
if (strcmp (lang, "ja_JP.SJIS") == 0 ||
#ifdef hpux
strcmp (lang, "japanese") == 0 ||
#endif
strcmp (lang, "ja_JP.mscode") == 0 ||
strcmp (lang, "ja_JP.PCK") == 0)
whatcode = SJIS;
else if (strncmp (lang, "ja", 2) == 0)
#ifdef SJISPRE
whatcode = SJIS;
#else
whatcode = EUC;
#endif
}
}
if (whatcode == EUCORSJIS)
#ifdef SJISPRE
whatcode = SJIS;
#else
whatcode = EUC;
#endif
return whatcode;
}
/* SJIStoJIS() is sjis2jis() by Ken Lunde. */
static void
SJIStoJIS (int *p1, int *p2)
{
register unsigned char c1 = *p1;
register unsigned char c2 = *p2;
register int adjust = c2 < 159;
register int rowOffset = c1 < 160 ? 112 : 176;
register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
*p1 = ((c1 - rowOffset) << 1) - adjust;
*p2 -= cellOffset;
}
/* han2zen() was derived from han2zen() written by Ken Lunde. */
#define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
#define IS_HANDAKU(c) (c >= 202 && c <= 206)
static void
han2zen (int *p1, int *p2)
{
int c = *p1;
int daku = FALSE;
int handaku = FALSE;
int mtable[][2] =
{
{129, 66},
{129, 117},
{129, 118},
{129, 65},
{129, 69},
{131, 146},
{131, 64},
{131, 66},
{131, 68},
{131, 70},
{131, 72},
{131, 131},
{131, 133},
{131, 135},
{131, 98},
{129, 91},
{131, 65},
{131, 67},
{131, 69},
{131, 71},
{131, 73},
{131, 74},
{131, 76},
{131, 78},
{131, 80},
{131, 82},
{131, 84},
{131, 86},
{131, 88},
{131, 90},
{131, 92},
{131, 94},
{131, 96},
{131, 99},
{131, 101},
{131, 103},
{131, 105},
{131, 106},
{131, 107},
{131, 108},
{131, 109},
{131, 110},
{131, 113},
{131, 116},
{131, 119},
{131, 122},
{131, 125},
{131, 126},
{131, 128},
{131, 129},
{131, 130},
{131, 132},
{131, 134},
{131, 136},
{131, 137},
{131, 138},
{131, 139},
{131, 140},
{131, 141},
{131, 143},
{131, 147},
{129, 74},
{129, 75}
};
if (*p2 == 222 && IS_DAKU (*p1))
daku = TRUE; /* Daku-ten */
else if (*p2 == 223 && IS_HANDAKU (*p1))
handaku = TRUE; /* Han-daku-ten */
*p1 = mtable[c - 161][0];
*p2 = mtable[c - 161][1];
if (daku)
{
if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
(*p2)++;
else if (*p2 == 131 || *p2 == 69)
*p2 = 148;
}
else if (handaku && *p2 >= 110 && *p2 <= 122)
(*p2) += 2;
}
/* Recast strcpy to handle unsigned chars used below. */
#define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
static void
do_convert (unsigned char *to, unsigned char *from, const char *code)
{
#ifdef HAVE_ICONV
iconv_t cd;
size_t from_len, to_len;
if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
{
error ("iconv_open() error");
#ifdef HAVE_ERRNO_H
if (errno == EINVAL)
error ("invalid code specification: \"%s\" or \"%s\"",
EUCSTR, code);
#endif
strcpy ((char *) to, (const char *) from);
return;
}
from_len = strlen ((const char *) from) + 1;
to_len = BUFSIZ;
if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
{
#ifdef HAVE_ERRNO_H
if (errno == EINVAL)
error ("invalid end of input string");
else if (errno == EILSEQ)
error ("invalid code in input string");
else if (errno == E2BIG)
error ("output buffer overflow at do_convert()");
else
#endif
error ("something happen");
strcpy ((char *) to, (const char *) from);
return;
}
if (iconv_close (cd) != 0)
{
error ("iconv_close() error");
}
#else
int p1, p2, i, j;
int jisx0208 = FALSE;
int hankaku = FALSE;
j = 0;
if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
{
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
{
if (from[i] == ESC)
{
i++;
if (from[i] == '$')
{
jisx0208 = TRUE;
hankaku = FALSE;
i++;
}
else if (from[i] == '(')
{
jisx0208 = FALSE;
i++;
if (from[i] == 'I') /* Hankaku Kana */
hankaku = TRUE;
else
hankaku = FALSE;
}
}
else
{
if (jisx0208)
to[j++] = from[i] + 128;
else if (hankaku)
{
to[j++] = SS2;
to[j++] = from[i] + 128;
}
else
to[j++] = from[i];
}
}
}
else if (strcmp (code, SJISSTR) == 0)
{
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
{
p1 = from[i];
if (p1 < 127)
to[j++] = p1;
else if ((p1 >= 161) && (p1 <= 223))
{ /* Hankaku Kana */
to[j++] = SS2;
to[j++] = p1;
}
else
{
p2 = from[++i];
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
}
}
}
else
{
error ("invalid code specification: \"%s\"", code);
return;
}
if (j >= BUFSIZ)
{
error ("output buffer overflow at do_convert()");
ustrcpy (to, from);
}
else
to[j] = '\0';
#endif /* HAVE_ICONV */
}
static int
do_check_and_conv (unsigned char *to, unsigned char *from)
{
static unsigned char tmp[BUFSIZ];
int p1, p2, i, j;
int kanji = TRUE;
switch (DetectKanjiCode (from))
{
case NEW:
debug ("Kanji code is New JIS.");
do_convert (tmp, from, NEWJISSTR);
break;
case OLD:
debug ("Kanji code is Old JIS.");
do_convert (tmp, from, OLDJISSTR);
break;
case ESCI:
debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
do_convert (tmp, from, NEWJISSTR);
break;
case NEC:
debug ("Kanji code is NEC Kanji.");
error ("cannot convert NEC Kanji.");
ustrcpy (tmp, from);
kanji = FALSE;
break;
case EUC:
debug ("Kanji code is EUC.");
ustrcpy (tmp, from);
break;
case SJIS:
debug ("Kanji code is SJIS.");
do_convert (tmp, from, SJISSTR);
break;
case EUCORSJIS:
debug ("Kanji code is EUC or SJIS.");
ustrcpy (tmp, from);
kanji = FALSE;
break;
case ASCII:
debug ("This is ASCII string.");
ustrcpy (tmp, from);
kanji = FALSE;
break;
default:
debug ("This string includes unknown code.");
ustrcpy (tmp, from);
kanji = FALSE;
break;
}
/* Hankaku Kana ---> Zenkaku Kana */
if (kanji)
{
j = 0;
for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
{
if (tmp[i] == SS2)
{
p1 = tmp[++i];
if (tmp[i + 1] == SS2)
{
p2 = tmp[i + 2];
if (p2 == 222 || p2 == 223)
i += 2;
else
p2 = 0;
}
else
p2 = 0;
han2zen (&p1, &p2);
SJIStoJIS (&p1, &p2);
to[j++] = p1 + 128;
to[j++] = p2 + 128;
}
else
to[j++] = tmp[i];
}
if (j >= BUFSIZ)
{
error ("output buffer overflow at Hankaku --> Zenkaku");
ustrcpy (to, tmp);
}
else
to[j] = '\0';
}
else
ustrcpy (to, tmp);
return kanji;
}
int
any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
{
static unsigned char tmp_dest[BUFSIZ];
int ret;
if (strlen ((const char *) src) >= BUFSIZ)
{
error ("input string too large");
return -1;
}
if (dest_max > BUFSIZ)
{
error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
return -1;
}
ret = do_check_and_conv (tmp_dest, src);
if (strlen ((const char *) tmp_dest) >= dest_max)
{
error ("output buffer overflow");
ustrcpy (dest, src);
return -1;
}
ustrcpy (dest, tmp_dest);
return ret;
}
#if 0
unsigned int
strwidth (unsigned char *s)
{
unsigned char *t;
unsigned int i;
t = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (t, s, BUFSIZ);
i = strlen (t);
gdFree (t);
return i;
}
#ifdef DEBUG
int
main ()
{
unsigned char input[BUFSIZ];
unsigned char *output;
unsigned char *str;
int c, i = 0;
while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
input[i++] = c;
input[i] = '\0';
printf ("input : %d bytes\n", strlen ((const char *) input));
printf ("output: %d bytes\n", strwidth (input));
output = (unsigned char *) gdMalloc (BUFSIZ);
any2eucjp (output, input, BUFSIZ);
str = output;
while (*str != '\0')
putchar (*(str++));
putchar ('\n');
gdFree (output);
return 0;
}
#endif
#endif