Ensure that the encoding and nencoding values are respected when the OCI

environment is created; ensure that these values are also used when LOBs are
read and written.
This commit is contained in:
Anthony Tuininga 2017-01-16 13:40:43 -07:00
parent 8fda704186
commit 2c57b455e6
3 changed files with 61 additions and 70 deletions

View File

@ -11,9 +11,11 @@ typedef struct {
OCIEnv *handle; OCIEnv *handle;
OCIError *errorHandle; OCIError *errorHandle;
int maxBytesPerCharacter; int maxBytesPerCharacter;
int fixedWidth; int nmaxBytesPerCharacter;
char *encoding; char *encoding;
char *nencoding; char *nencoding;
ub2 charsetId;
ub2 ncharsetId;
PyObject *cloneEnv; PyObject *cloneEnv;
udt_Buffer numberToStringFormatBuffer; udt_Buffer numberToStringFormatBuffer;
udt_Buffer numberFromStringFormatBuffer; udt_Buffer numberFromStringFormatBuffer;
@ -73,8 +75,8 @@ static udt_Environment *Environment_New(
if (!env) if (!env)
return NULL; return NULL;
env->handle = handle; env->handle = handle;
env->fixedWidth = 1;
env->maxBytesPerCharacter = 1; env->maxBytesPerCharacter = 1;
env->nmaxBytesPerCharacter = 4;
cxBuffer_Init(&env->numberToStringFormatBuffer); cxBuffer_Init(&env->numberToStringFormatBuffer);
cxBuffer_Init(&env->numberFromStringFormatBuffer); cxBuffer_Init(&env->numberFromStringFormatBuffer);
cxBuffer_Init(&env->nlsNumericCharactersBuffer); cxBuffer_Init(&env->nlsNumericCharactersBuffer);
@ -107,12 +109,19 @@ static int Environment_GetCharacterSetName(
udt_Environment *self, // environment object udt_Environment *self, // environment object
ub2 attribute, // attribute to fetch ub2 attribute, // attribute to fetch
const char *overrideValue, // override value, if specified const char *overrideValue, // override value, if specified
char **result) // place to store result char **result, // place to store result
ub2 *charsetId) // character set ID (OUT)
{ {
char charsetName[OCI_NLS_MAXBUFSZ], ianaCharsetName[OCI_NLS_MAXBUFSZ]; char charsetName[OCI_NLS_MAXBUFSZ], ianaCharsetName[OCI_NLS_MAXBUFSZ];
ub2 charsetId;
sword status; sword status;
// get character set id
status = OCIAttrGet(self->handle, OCI_HTYPE_ENV, charsetId, NULL,
attribute, self->errorHandle);
if (Environment_CheckForError(self, status,
"Environment_GetCharacterSetName(): get charset id") < 0)
return -1;
// if override value specified, use it // if override value specified, use it
if (overrideValue) { if (overrideValue) {
*result = PyMem_Malloc(strlen(overrideValue) + 1); *result = PyMem_Malloc(strlen(overrideValue) + 1);
@ -122,16 +131,9 @@ static int Environment_GetCharacterSetName(
return 0; return 0;
} }
// get character set id
status = OCIAttrGet(self->handle, OCI_HTYPE_ENV, &charsetId, NULL,
attribute, self->errorHandle);
if (Environment_CheckForError(self, status,
"Environment_GetCharacterSetName(): get charset id") < 0)
return -1;
// get character set name // get character set name
status = OCINlsCharSetIdToName(self->handle, (text*) charsetName, status = OCINlsCharSetIdToName(self->handle, (text*) charsetName,
OCI_NLS_MAXBUFSZ, charsetId); OCI_NLS_MAXBUFSZ, *charsetId);
if (Environment_CheckForError(self, status, if (Environment_CheckForError(self, status,
"Environment_GetCharacterSetName(): get Oracle charset name") < 0) "Environment_GetCharacterSetName(): get Oracle charset name") < 0)
return -1; return -1;
@ -292,23 +294,20 @@ static udt_Environment *Environment_NewFromScratch(
return NULL; return NULL;
} }
// acquire whether character set is fixed width
status = OCINlsNumericInfoGet(env->handle, env->errorHandle,
&env->fixedWidth, OCI_NLS_CHARSET_FIXEDWIDTH);
if (Environment_CheckForError(env, status,
"Environment_New(): determine if charset fixed width") < 0) {
Py_DECREF(env);
return NULL;
}
// determine encodings to use for Unicode values // determine encodings to use for Unicode values
if (Environment_GetCharacterSetName(env, OCI_ATTR_ENV_CHARSET_ID, if (Environment_GetCharacterSetName(env, OCI_ATTR_ENV_CHARSET_ID,
encoding, &env->encoding) < 0) encoding, &env->encoding, &env->charsetId) < 0)
return NULL; return NULL;
if (Environment_GetCharacterSetName(env, OCI_ATTR_ENV_NCHARSET_ID, if (Environment_GetCharacterSetName(env, OCI_ATTR_ENV_NCHARSET_ID,
nencoding, &env->nencoding) < 0) nencoding, &env->nencoding, &env->ncharsetId) < 0)
return NULL; return NULL;
// max bytes per character for NCHAR can be assigned only if it matches the
// character set used for CHAR data; OCI does not provide a way of
// determining it otherwise
if (env->ncharsetId == env->charsetId)
env->nmaxBytesPerCharacter = env->maxBytesPerCharacter;
// fill buffers for number formats // fill buffers for number formats
if (Environment_SetBuffer(&env->numberToStringFormatBuffer, "TM9", if (Environment_SetBuffer(&env->numberToStringFormatBuffer, "TM9",
env->encoding) < 0) env->encoding) < 0)
@ -339,11 +338,12 @@ static udt_Environment *Environment_Clone(
if (!env) if (!env)
return NULL; return NULL;
env->maxBytesPerCharacter = cloneEnv->maxBytesPerCharacter; env->maxBytesPerCharacter = cloneEnv->maxBytesPerCharacter;
env->fixedWidth = cloneEnv->fixedWidth;
Py_INCREF(cloneEnv); Py_INCREF(cloneEnv);
env->cloneEnv = (PyObject*) cloneEnv; env->cloneEnv = (PyObject*) cloneEnv;
env->encoding = cloneEnv->encoding; env->encoding = cloneEnv->encoding;
env->nencoding = cloneEnv->nencoding; env->nencoding = cloneEnv->nencoding;
env->charsetId = cloneEnv->charsetId;
env->ncharsetId = cloneEnv->ncharsetId;
cxBuffer_Copy(&env->numberToStringFormatBuffer, cxBuffer_Copy(&env->numberToStringFormatBuffer,
&cloneEnv->numberToStringFormatBuffer); &cloneEnv->numberToStringFormatBuffer);
cxBuffer_Copy(&env->numberFromStringFormatBuffer, cxBuffer_Copy(&env->numberFromStringFormatBuffer,

View File

@ -169,17 +169,13 @@ static int ExternalLobVar_InternalRead(
oraub8 *length, // length of data (IN/OUT) oraub8 *length, // length of data (IN/OUT)
oraub8 offset) // offset oraub8 offset) // offset
{ {
oraub8 lengthInBytes, lengthInChars; oraub8 lengthInBytes = 0, lengthInChars = 0;
ub2 charsetId; ub2 charsetId;
sword status; sword status;
if (var->lobVar->type == &vt_NCLOB || var->lobVar->type == &vt_CLOB) { if (var->lobVar->type == &vt_NCLOB || var->lobVar->type == &vt_CLOB)
lengthInBytes = 0;
lengthInChars = *length; lengthInChars = *length;
} else { else lengthInBytes = *length;
lengthInChars = 0;
lengthInBytes = *length;
}
if (var->lobVar->isFile) { if (var->lobVar->isFile) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
@ -193,9 +189,9 @@ static int ExternalLobVar_InternalRead(
} }
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
if (var->lobVar->type == &vt_NCLOB) charsetId = (var->lobVar->type->charsetForm == SQLCS_NCHAR) ?
charsetId = OCI_UTF16ID; var->lobVar->environment->ncharsetId :
else charsetId = 0; var->lobVar->environment->charsetId;
status = OCILobRead2(var->lobVar->connection->handle, status = OCILobRead2(var->lobVar->connection->handle,
var->lobVar->environment->errorHandle, var->lobVar->data[var->pos], var->lobVar->environment->errorHandle, var->lobVar->data[var->pos],
&lengthInBytes, &lengthInChars, offset, buffer, bufferSize, &lengthInBytes, &lengthInChars, offset, buffer, bufferSize,
@ -275,10 +271,11 @@ static PyObject *ExternalLobVar_Value(
else amount = 1; else amount = 1;
} }
length = amount; length = amount;
if (var->lobVar->type == &vt_CLOB) if (var->lobVar->type == &vt_CLOB)
bufferSize = amount * var->lobVar->environment->maxBytesPerCharacter; bufferSize = amount * var->lobVar->environment->maxBytesPerCharacter;
else if (var->lobVar->type == &vt_NCLOB) else if (var->lobVar->type == &vt_NCLOB)
bufferSize = amount * 2; bufferSize = amount * var->lobVar->environment->nmaxBytesPerCharacter;
else bufferSize = amount; else bufferSize = amount;
// create a string for retrieving the value // create a string for retrieving the value
@ -296,7 +293,8 @@ static PyObject *ExternalLobVar_Value(
result = cxString_FromEncodedString(buffer, length, result = cxString_FromEncodedString(buffer, length,
var->lobVar->environment->encoding); var->lobVar->environment->encoding);
} else if (var->lobVar->type == &vt_NCLOB) { } else if (var->lobVar->type == &vt_NCLOB) {
result = PyUnicode_DecodeUTF16(buffer, length, NULL, NULL); result = PyUnicode_Decode(buffer, length,
var->lobVar->environment->encoding, NULL);
} else { } else {
result = PyBytes_FromStringAndSize(buffer, length); result = PyBytes_FromStringAndSize(buffer, length);
} }
@ -413,7 +411,7 @@ static PyObject *ExternalLobVar_Str(
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// ExternalLobVar_Write() // ExternalLobVar_Write()
// Write a value to the LOB variable; return the number of bytes written. // Write a value to the LOB at the specified offset.
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
static PyObject *ExternalLobVar_Write( static PyObject *ExternalLobVar_Write(
udt_ExternalLobVar *var, // variable to perform write against udt_ExternalLobVar *var, // variable to perform write against
@ -421,8 +419,8 @@ static PyObject *ExternalLobVar_Write(
PyObject *keywordArgs) // keyword arguments PyObject *keywordArgs) // keyword arguments
{ {
static char *keywordList[] = { "data", "offset", NULL }; static char *keywordList[] = { "data", "offset", NULL };
oraub8 amount, offset;
PyObject *dataObj; PyObject *dataObj;
oraub8 offset;
// buffer is expected, offset is optional // buffer is expected, offset is optional
offset = 1; offset = 1;
@ -433,11 +431,10 @@ static PyObject *ExternalLobVar_Write(
// perform the write, if possible // perform the write, if possible
if (ExternalLobVar_Verify(var) < 0) if (ExternalLobVar_Verify(var) < 0)
return NULL; return NULL;
if (LobVar_Write(var->lobVar, var->pos, dataObj, offset, &amount) < 0) if (LobVar_Write(var->lobVar, var->pos, dataObj, offset) < 0)
return NULL; return NULL;
// return the result Py_RETURN_NONE;
return PyLong_FromUnsignedLong(amount);
} }

View File

@ -21,7 +21,7 @@ static int LobVar_PreFetch(udt_LobVar*);
static void LobVar_Finalize(udt_LobVar*); static void LobVar_Finalize(udt_LobVar*);
static PyObject *LobVar_GetValue(udt_LobVar*, unsigned); static PyObject *LobVar_GetValue(udt_LobVar*, unsigned);
static int LobVar_SetValue(udt_LobVar*, unsigned, PyObject*); static int LobVar_SetValue(udt_LobVar*, unsigned, PyObject*);
static int LobVar_Write(udt_LobVar*, unsigned, PyObject*, oraub8, oraub8*); static int LobVar_Write(udt_LobVar*, unsigned, PyObject*, oraub8);
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Python type declarations // Python type declarations
@ -318,49 +318,44 @@ static int LobVar_Write(
udt_LobVar *var, // variable to perform write against udt_LobVar *var, // variable to perform write against
unsigned position, // position to perform write against unsigned position, // position to perform write against
PyObject *dataObj, // data object to write into LOB PyObject *dataObj, // data object to write into LOB
oraub8 offset, // offset into variable oraub8 offset) // offset into variable
oraub8 *amount) // amount to write
{ {
oraub8 lengthInBytes, lengthInChars = 0;
const char *encoding;
udt_Buffer buffer; udt_Buffer buffer;
ub2 charsetId;
sword status; sword status;
// verify the data type // verify the data type
if (var->type == &vt_BFILE) { if (var->type == &vt_BFILE) {
PyErr_SetString(PyExc_TypeError, "BFILEs are read only"); PyErr_SetString(PyExc_TypeError, "BFILEs are read only");
return -1; return -1;
} else if (var->type == &vt_BLOB) {
if (cxBuffer_FromObject(&buffer, dataObj,
var->environment->encoding) < 0)
return -1;
*amount = buffer.size;
#if PY_MAJOR_VERSION < 3
} else if (var->type == &vt_NCLOB) {
if (cxBuffer_FromObject(&buffer, dataObj,
var->environment->nencoding) < 0)
return -1;
*amount = buffer.size;
#endif
} else {
if (cxBuffer_FromObject(&buffer, dataObj,
var->environment->encoding) < 0)
return -1;
if (var->environment->fixedWidth
&& var->environment->maxBytesPerCharacter > 1)
*amount = buffer.size / var->environment->maxBytesPerCharacter;
else *amount = buffer.size;
} }
// determine the buffer to write
if (var->type->charsetForm == SQLCS_NCHAR) {
charsetId = var->environment->ncharsetId;
encoding = var->environment->nencoding;
} else {
charsetId = var->environment->charsetId;
encoding = var->environment->encoding;
}
if (cxBuffer_FromObject(&buffer, dataObj, encoding) < 0)
return -1;
lengthInBytes = buffer.size;
// nothing to do if no data to write // nothing to do if no data to write
if (*amount == 0) { if (lengthInBytes == 0) {
cxBuffer_Clear(&buffer); cxBuffer_Clear(&buffer);
return 0; return 0;
} }
// write the data with the correct character set
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
status = OCILobWrite2(var->connection->handle, status = OCILobWrite2(var->connection->handle,
var->environment->errorHandle, var->data[position], amount, 0, var->environment->errorHandle, var->data[position], &lengthInBytes,
offset, (void*) buffer.ptr, buffer.size, OCI_ONE_PIECE, NULL, NULL, &lengthInChars, offset, (void*) buffer.ptr, buffer.size,
0, var->type->charsetForm); OCI_ONE_PIECE, NULL, NULL, charsetId, var->type->charsetForm);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
cxBuffer_Clear(&buffer); cxBuffer_Clear(&buffer);
if (Environment_CheckForError(var->environment, status, if (Environment_CheckForError(var->environment, status,
@ -393,7 +388,6 @@ static int LobVar_SetValue(
PyObject *value) // value to set PyObject *value) // value to set
{ {
boolean isTemporary; boolean isTemporary;
oraub8 amount;
sword status; sword status;
ub1 lobType; ub1 lobType;
@ -428,6 +422,6 @@ static int LobVar_SetValue(
return -1; return -1;
// set the current value // set the current value
return LobVar_Write(var, position, value, 1, &amount); return LobVar_Write(var, position, value, 1);
} }