Mailing List Archive

writelines() not thread-safe
Christian Tismer just did an exhaustive search for thread unsafe use
of Python operations, and found two weaknesses. One is
posix.listdir(), which I had already found; the other is
file.writelines(). Here's a program that demonstrates the bug;
basically, while writelines is walking down the list, another thread
could truncate the list, causing PyList_GetItem() to fail or a string
object to be deallocated while writelines is using it. On my SOlaris
7 system it typically crashes in the first or second iteration.

It's easy to fix: just don't use release the interpreter lock (get rid
of Py_BEGIN_ALLOW_THREADS c.s.). This would however prevent other
threads from doing any work while this thread may be blocked for I/O.

An alternative solution is to put Py_BEGIN_ALLOW_THREADS and
Py_END_ALLOW_THREADS just around the fwrite() call. This is safe, but
would require a lot of lock operations and would probably slow things
down too much.

Ideas?

--Guido van Rossum (home page: http://www.python.org/~guido/)
import os
import sys
import thread
import random
import time
import tempfile

def good_guy(fp, list):
t0 = time.time()
fp.seek(0)
fp.writelines(list)
t1 = time.time()
print fp.tell(), "bytes written"
return t1-t0

def bad_guy(dt, list):
time.sleep(random.random() * dt)
del list[:]

def main():
infn = "/usr/dict/words"
if sys.argv[1:]:
infn = sys.argv[1]
print "reading %s..." % infn
fp = open(infn)
list = fp.readlines()
fp.close()
print "read %d lines" % len(list)
tfn = tempfile.mktemp()
fp = None
try:
fp = open(tfn, "w")
print "calibrating..."
dt = 0.0
n = 3
for i in range(n):
dt = dt + good_guy(fp, list)
dt = dt / n # average time it took to write the list to disk
print "dt =", round(dt, 3)
i = 0
while 1:
i = i+1
print "test", i
copy = map(lambda x: x[1:], list)
thread.start_new_thread(bad_guy, (dt, copy))
good_guy(fp, copy)
finally:
if fp:
fp.close()
try:
os.unlink(tfn)
except os.error:
pass

main()
RE: writelines() not thread-safe [ In reply to ]
[Guido van Rossum]
> Christian Tismer just did an exhaustive search for thread unsafe use
> of Python operations, and found two weaknesses. One is
> posix.listdir(), which I had already found; the other is
> file.writelines(). Here's a program that demonstrates the bug;
> basically, while writelines is walking down the list, another thread
> could truncate the list, causing PyList_GetItem() to fail or a string
> object to be deallocated while writelines is using it. On my SOlaris
> 7 system it typically crashes in the first or second iteration.
>
> It's easy to fix: just don't use release the interpreter lock (get rid
> of Py_BEGIN_ALLOW_THREADS c.s.). This would however prevent other
> threads from doing any work while this thread may be blocked for I/O.
>
> An alternative solution is to put Py_BEGIN_ALLOW_THREADS and
> Py_END_ALLOW_THREADS just around the fwrite() call. This is safe, but
> would require a lot of lock operations and would probably slow things
> down too much.
>
> Ideas?

2.5:

1: Before releasing the lock, make a shallow copy of the list.

1.5: As in #1, but iteratively peeling off "the next N" values, for some N
balancing the number of lock operations against the memory burden (I don't
care about the speed of a shallow copy here ...).

2. Pull the same trick list.sort() uses: make the list object immutable for
the duration (I know you think that's a hack, and it is <wink>, but it costs
virtually nothing and would raise an approriate error when they attempted
the insane mutation).

I actually like #2 best now, but won't in the future, because
file_writelines() should really accept an argument of any sequence type.
This makes 1.5 a better long-term hack.

although-adding-1.5-to-1.6-is-confusing<wink>-ly y'rs - tim
Re: writelines() not thread-safe [ In reply to ]
OK, here's a patch for writelines() that supports arbitrary sequences
and fixes the lock problem using Tim's solution #1.5 (slicing 1000
items at a time). It contains a fast path for when the argument is a
list, using PyList_GetSlice; otherwise it uses PyObject_GetItem and a
fixed list.

Please have a good look at this; I've only tested it lightly.

--Guido van Rossum (home page: http://www.python.org/~guido/)

Index: fileobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.70
diff -c -r2.70 fileobject.c
*** fileobject.c 2000/02/29 13:59:28 2.70
--- fileobject.c 2000/03/10 14:55:47
***************
*** 884,923 ****
PyFileObject *f;
PyObject *args;
{
! int i, n;
if (f->f_fp == NULL)
return err_closed();
! if (args == NULL || !PyList_Check(args)) {
PyErr_SetString(PyExc_TypeError,
! "writelines() requires list of strings");
return NULL;
}
! n = PyList_Size(args);
! f->f_softspace = 0;
! Py_BEGIN_ALLOW_THREADS
! errno = 0;
! for (i = 0; i < n; i++) {
! PyObject *line = PyList_GetItem(args, i);
! int len;
! int nwritten;
! if (!PyString_Check(line)) {
! Py_BLOCK_THREADS
! PyErr_SetString(PyExc_TypeError,
! "writelines() requires list of strings");
return NULL;
}
! len = PyString_Size(line);
! nwritten = fwrite(PyString_AsString(line), 1, len, f->f_fp);
! if (nwritten != len) {
! Py_BLOCK_THREADS
! PyErr_SetFromErrno(PyExc_IOError);
! clearerr(f->f_fp);
! return NULL;
}
}
! Py_END_ALLOW_THREADS
Py_INCREF(Py_None);
! return Py_None;
}

static PyMethodDef file_methods[] = {
--- 884,975 ----
PyFileObject *f;
PyObject *args;
{
! #define CHUNKSIZE 1000
! PyObject *list, *line;
! PyObject *result;
! int i, j, index, len, nwritten, islist;
!
if (f->f_fp == NULL)
return err_closed();
! if (args == NULL || !PySequence_Check(args)) {
PyErr_SetString(PyExc_TypeError,
! "writelines() requires sequence of strings");
return NULL;
}
! islist = PyList_Check(args);
!
! /* Strategy: slurp CHUNKSIZE lines into a private list,
! checking that they are all strings, then write that list
! without holding the interpreter lock, then come back for more. */
! index = 0;
! if (islist)
! list = NULL;
! else {
! list = PyList_New(CHUNKSIZE);
! if (list == NULL)
return NULL;
+ }
+ result = NULL;
+
+ for (;;) {
+ if (islist) {
+ Py_XDECREF(list);
+ list = PyList_GetSlice(args, index, index+CHUNKSIZE);
+ if (list == NULL)
+ return NULL;
+ j = PyList_GET_SIZE(list);
}
! else {
! for (j = 0; j < CHUNKSIZE; j++) {
! line = PySequence_GetItem(args, index+j);
! if (line == NULL) {
! if (PyErr_ExceptionMatches(PyExc_IndexError)) {
! PyErr_Clear();
! break;
! }
! /* Some other error occurred.
! Note that we may lose some output. */
! goto error;
! }
! if (!PyString_Check(line)) {
! PyErr_SetString(PyExc_TypeError,
! "writelines() requires sequences of strings");
! goto error;
! }
! PyList_SetItem(list, j, line);
! }
! }
! if (j == 0)
! break;
!
! Py_BEGIN_ALLOW_THREADS
! f->f_softspace = 0;
! errno = 0;
! for (i = 0; i < j; i++) {
! line = PyList_GET_ITEM(list, i);
! len = PyString_GET_SIZE(line);
! nwritten = fwrite(PyString_AS_STRING(line),
! 1, len, f->f_fp);
! if (nwritten != len) {
! Py_BLOCK_THREADS
! PyErr_SetFromErrno(PyExc_IOError);
! clearerr(f->f_fp);
! Py_DECREF(list);
! return NULL;
! }
}
+ Py_END_ALLOW_THREADS
+
+ if (j < CHUNKSIZE)
+ break;
+ index += CHUNKSIZE;
}
!
Py_INCREF(Py_None);
! result = Py_None;
! error:
! Py_XDECREF(list);
! return result;
}

static PyMethodDef file_methods[] = {