--- NKF_python.c.org Tue Jan 18 10:12:15 2005 +++ NKF_python.c Mon Nov 27 10:50:45 2006 @@ -19,21 +19,31 @@ #undef getc #undef ungetc -#define getc(f) pynkf_getc(f) -#define ungetc(c,f) pynkf_ungetc(c,f) +#define getc(f) (*fp_pynkf_getc)(f) +#define ungetc(c,f) (*fp_pynkf_ungetc)(c,f) #undef putchar #undef TRUE #undef FALSE -#define putchar(c) pynkf_putchar(c) +#define putchar(c) (*fp_pynkf_putchar)(c) static char *PYNKF_ARGV[PYNKF_OPTSLEN/2 + 3]; -static long pynkf_ibufsize, pynkf_obufsize; -static unsigned char *pynkf_inbuf, *pynkf_outbuf; +static long pynkf_ibufsize, pynkf_ibufsize_prev, pynkf_obufsize; +static unsigned char *pynkf_inbuf, *pynkf_inbuf_prev, *pynkf_outbuf; static int pynkf_icount,pynkf_ocount; static unsigned char *pynkf_iptr, *pynkf_optr; static jmp_buf env; +static void (*fp_pynkf_putchar)(int c); +static int (*fp_pynkf_getc)(FILE *f); +static int (*fp_pynkf_ungetc)(int c, FILE *f); +PyObject* pynkf_istr; +PyObject* pynkf_ostr; +PyObject* pynkf_istr_str; +PyObject* pynkf_istr_str_prev; +static int chunksize; +static int error_f; + static int pynkf_getc(FILE *f) {unsigned char c; @@ -52,6 +62,63 @@ }else{ return EOF; } } +static int +pynkf_getc_str(FILE *f) +{ + unsigned char c; + PyObject* new_istr_str; + if (pynkf_iptr >= pynkf_inbuf && pynkf_iptr <= pynkf_inbuf + pynkf_ibufsize){ + if (pynkf_icount >= pynkf_ibufsize){ + pynkf_ibufsize_prev = pynkf_ibufsize; + if (pynkf_istr_str_prev){ + Py_DECREF(pynkf_istr_str_prev); + } + pynkf_istr_str_prev = pynkf_istr_str; + pynkf_istr_str = PyObject_CallMethod(pynkf_istr, "read", "i", chunksize); + if (pynkf_istr_str == NULL){ + longjmp(env, 1); + } + PyString_AsStringAndSize(pynkf_istr_str, &pynkf_inbuf, &pynkf_ibufsize); + if (pynkf_ibufsize == 0){ + Py_DECREF(pynkf_istr_str); + return EOF; + } + new_istr_str = PyString_FromStringAndSize(pynkf_inbuf, pynkf_ibufsize); + Py_DECREF(pynkf_istr_str); + pynkf_istr_str = new_istr_str; + PyString_AsStringAndSize(pynkf_istr_str, &pynkf_inbuf, &pynkf_ibufsize); + pynkf_iptr = pynkf_inbuf; + pynkf_icount = 0; + } + }else{ + if (pynkf_icount >= pynkf_ibufsize_prev){ + pynkf_iptr = pynkf_inbuf; + pynkf_icount = 0; + } + } + c = *pynkf_iptr++; + pynkf_icount++; + return (int)c; +} + +static int +pynkf_ungetc_str(int c, FILE *f) +{ + if (pynkf_icount--){ + *(--pynkf_iptr) = c; + return c; + } + if (pynkf_iptr >= pynkf_inbuf && pynkf_iptr <= pynkf_inbuf + pynkf_ibufsize){ + pynkf_iptr = pynkf_inbuf_prev + pynkf_ibufsize_prev; + pynkf_icount = pynkf_ibufsize_prev; + --pynkf_icount; + *(--pynkf_iptr) = c; + return c; + }else{ + return EOF; + } +} + static void pynkf_putchar(int c) { @@ -73,6 +140,42 @@ } } +static void +pynkf_putchar_str(int c) +{ + PyObject* arglist; + PyObject* result; + PyObject* out; + + if (pynkf_ocount--){ + *pynkf_optr++ = c; + }else{ + *pynkf_optr++ = c; + out = PyString_FromStringAndSize(NULL, pynkf_obufsize - pynkf_ocount); + if (out == NULL){ + longjmp(env, 1); + } + memcpy(PyString_AsString(out), pynkf_outbuf, pynkf_obufsize - pynkf_ocount); + arglist = Py_BuildValue("(O)", out); + result = PyObject_CallObject(pynkf_ostr, arglist); + Py_DECREF(arglist); + if (result == NULL){ + longjmp(env, 1); + } + Py_DECREF(result); + + pynkf_outbuf = (unsigned char *)PyMem_Malloc(pynkf_obufsize); + if (pynkf_outbuf == NULL){ longjmp(env, 1); } + pynkf_outbuf[0] = '\0'; + pynkf_ocount = pynkf_obufsize; + pynkf_optr = pynkf_outbuf; + /* + *pynkf_optr++ = c; + pynkf_ocount--; + */ + } +} + #define PERL_XS 1 #include "../utf8tbl.c" #include "../nkf.c" @@ -86,7 +189,7 @@ argc = 1; flg = 1; *argv++ = "nkf"; - while(*p){ + while(p && *p){ if (*p == ' '){ *p = '\0'; flg = 1; p++; continue; } if (*p != ' ' && flg){*argv++ = p; argc++; flg = 0; } p++; @@ -95,7 +198,7 @@ } static PyObject * -pynkf_convert(unsigned char* str, long strlen, char* opts, int optslen) +pynkf_convert(int to_guess, unsigned char* str, long strlen, char* opts, int optslen) { unsigned char *cp; int argc; @@ -123,6 +226,14 @@ argv = PYNKF_ARGV; pynkf_parseopts(opts, &argc, argv); + + if (to_guess == 0){ + fp_pynkf_putchar = pynkf_putchar; + }else{ + fp_pynkf_putchar = no_putc; + } + fp_pynkf_getc = pynkf_getc; + fp_pynkf_ungetc = pynkf_ungetc; if (setjmp(env) == 0){ @@ -137,6 +248,9 @@ if(x0201_f == WISH_TRUE) x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201); + if (to_guess == 1){ + guess_f = 1; + } kanji_convert(NULL); }else{ @@ -146,8 +260,133 @@ } *pynkf_optr = 0; - res = PyString_FromString(pynkf_outbuf); + if (to_guess == 0){ + res = PyString_FromString(pynkf_outbuf); + }else{ + char *codename = "BINARY"; + if (!is_inputcode_mixed) { + if (strcmp(input_codename, "") == 0) { + codename = "ASCII"; + } else { + codename = input_codename; + } + } + res = PyString_FromString(codename); + } + PyMem_Free(pynkf_outbuf); + return res; +} + +static PyObject * +pynkf_convert_stream(int to_guess, char* opts, int optslen) +{ + unsigned char *cp; + int argc; + char **argv; + register char **p; + PyObject* res; + PyObject* arglist; + PyObject* result; + PyObject* out; + + if (optslen > PYNKF_OPTSLEN) { + PyErr_SetString(PyExc_ValueError, "Too many options."); + return NULL; + } + pynkf_obufsize = (chunksize + 1) * 1.5 + 256; + pynkf_outbuf = (unsigned char *)PyMem_Malloc(pynkf_obufsize); + if (pynkf_outbuf == NULL){ + PyErr_NoMemory(); + return NULL; + } + argv = PYNKF_ARGV; + + pynkf_parseopts(opts, &argc, argv); + + if (to_guess == 0){ + fp_pynkf_putchar = pynkf_putchar_str; + }else{ + fp_pynkf_putchar = no_putc; + } + fp_pynkf_getc = pynkf_getc_str; + fp_pynkf_ungetc = pynkf_ungetc_str; + + pynkf_outbuf[0] = '\0'; + pynkf_ocount = pynkf_obufsize; + pynkf_optr = pynkf_outbuf; + pynkf_icount = 0; + pynkf_inbuf = 0; + pynkf_inbuf_prev = 0; + pynkf_ibufsize = 0; + pynkf_ibufsize_prev = 0; + pynkf_iptr = 0; + pynkf_istr_str = 0; + pynkf_istr_str_prev = 0; + error_f = 0; + + if (setjmp(env) == 0){ + + reinit(); + + p = argv; + for (argc--,p++; (argc > 0) && **p == '-'; argc--, p++) { + cp = *p; + options(cp); + } + + if(x0201_f == WISH_TRUE) + x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201); + + if (to_guess == 1){ + guess_f = 1; + } + kanji_convert(NULL); + /* testcode: noconvert + { + int cc; + while ((cc = getc(NULL)) != EOF){ + putchar(cc); + } + } + */ + + }else{ + PyMem_Free(pynkf_outbuf); + PyErr_NoMemory(); + return NULL; + } + + if (pynkf_obufsize - pynkf_ocount > 0){ + *pynkf_optr = 0; + out = PyString_FromStringAndSize(NULL, pynkf_obufsize - pynkf_ocount); + if (out == NULL){ + longjmp(env, 1); + } + memcpy(PyString_AsString(out), pynkf_outbuf, pynkf_obufsize - pynkf_ocount); + arglist = Py_BuildValue("(O)", out); + result = PyObject_CallObject(pynkf_ostr, arglist); + Py_DECREF(arglist); + if (result == NULL){ + longjmp(env, 1); + } + Py_DECREF(result); + } + PyMem_Free(pynkf_outbuf); + + if (to_guess == 0){ + res = PyString_FromString(""); + }else{ + char *codename = "BINARY"; + if (!is_inputcode_mixed) { + if (strcmp(input_codename, "") == 0) { + codename = "ASCII"; + } else { + codename = input_codename; + } + } + res = PyString_FromString(codename); + } return res; } @@ -165,7 +404,62 @@ if (!PyArg_ParseTuple(args, "s#s#", &opts, &optslen, &str, &strlen)) { return NULL; } - res = pynkf_convert(str, strlen, opts, optslen); + res = pynkf_convert(0, str, strlen, opts, optslen); + return res; +} + +#ifndef EXTERN_NKF +static +#endif +PyObject *pynkf_guess(PyObject *self, PyObject *args) +{ + unsigned char *str; + long strlen; + PyObject* res; + + if (!PyArg_ParseTuple(args, "s#", &str, &strlen)) { + return NULL; + } + res = pynkf_convert(1, str, strlen, 0, 0); + return res; +} + +#ifndef EXTERN_NKF +static +#endif +PyObject *pynkf_nkf_stream(PyObject *self, PyObject *args) +{ + char *opts; + int optslen; + PyObject* res; + + pynkf_istr = 0; + pynkf_ostr = 0; + if (!PyArg_ParseTuple(args, "s#OOi", &opts, &optslen, &pynkf_istr, &pynkf_ostr, &chunksize)) { + return NULL; + } + if (!PyCallable_Check(pynkf_ostr)) { + PyErr_SetString(PyExc_TypeError, "3rd parameter must be callable"); + return NULL; + } + res = pynkf_convert_stream(0, opts, optslen); + return res; +} + +#ifndef EXTERN_NKF +static +#endif +PyObject *pynkf_guess_stream(PyObject *self, PyObject *args) +{ + PyObject* res; + + pynkf_istr = 0; + pynkf_ostr = 0; + if (!PyArg_ParseTuple(args, "Oi", &pynkf_istr, &chunksize)) { + return NULL; + } + + res = pynkf_convert_stream(1, 0, 0); return res; } @@ -173,6 +467,9 @@ static PyMethodDef nkfmethods[] = { {"nkf", pynkf_nkf, METH_VARARGS}, + {"guess", pynkf_guess, METH_VARARGS}, + {"nkf_stream", pynkf_nkf_stream, METH_VARARGS}, + {"guess_stream", pynkf_guess_stream, METH_VARARGS}, {NULL, NULL} };