[cairo] [PATCH 3/3] snapshot: fix the race condition. Protect the whole replaying function.

Uli Schlachter psychon at znc.in
Fri May 25 03:13:13 PDT 2012


Hi,

On 25.05.2012 11:13, Zhigang Gong wrote:
> Simply protect the whole replaying stage here, we may fine
> tune the protect region in the future. This commit also break
> the "self-copy" bug. Here self-copy means, replay a recording
> surface to a surface which is some of the snapshot's target
> surface. Then it may cause a "race" condition even with single
> thread.
> 
> As now the accessing from the snapshot side must hold a rdlock,
> then the surface's handling side is easier than before. We do
> not need the snapshot side to touch the surface's reference counter
> now, the wrlock at the COW function is enough to avoid race.
> 
> Slightly change the surface's pointer stealing. We set the _finishing
> before call the cairo_surface_flush, then latter the first snapshot
> will steal its pointer and then clear the bit, and the other snapshots
> will have to create new buffer.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
> ---
>  src/cairo-image-source.c            |   33 ++---------
>  src/cairo-image-surface.c           |    1 +
>  src/cairo-pdf-surface.c             |   13 +---
>  src/cairo-ps-surface.c              |   16 +----
>  src/cairo-recording-surface.c       |  112 +++++++++++++++++++++++++++++++++++
>  src/cairo-script-surface.c          |    5 +-
>  src/cairo-surface-private.h         |    1 -
>  src/cairo-surface-snapshot-inline.h |    5 +-
>  src/cairo-surface-snapshot.c        |   39 ++----------
>  src/cairo-surface.c                 |    7 +--
>  10 files changed, 135 insertions(+), 97 deletions(-)
> 
[...]
> diff --git a/src/cairo-image-surface.c b/src/cairo-image-surface.c
> index 98f70c5..4322f0a 100644
> --- a/src/cairo-image-surface.c
> +++ b/src/cairo-image-surface.c
> @@ -768,6 +768,7 @@ _cairo_image_surface_snapshot (void *abstract_surface)
>  	clone->color = image->color;
>  
>  	clone->owns_data = FALSE;
> +	image->base._finishing = FALSE;
>  	return &clone->base;
>      }

Whoa? Why? This smells quite fishy to me. Just changing a boolean won't stop
cairo_surface_finish() from doing its job, right?

>  
> diff --git a/src/cairo-recording-surface.c b/src/cairo-recording-surface.c
> index 02d8afd..85e65f7 100644
> --- a/src/cairo-recording-surface.c
> +++ b/src/cairo-recording-surface.c
> @@ -89,6 +89,7 @@
>  #include "cairo-recording-surface-inline.h"
>  #include "cairo-surface-wrapper-private.h"
>  #include "cairo-traps-private.h"
> +#include "cairo-surface-snapshot-inline.h"
>  
>  typedef enum {
>      CAIRO_RECORDING_REPLAY,
> @@ -1287,6 +1288,42 @@ _cairo_recording_surface_get_visible_commands (cairo_recording_surface_t *surfac
>      return num_visible;
>  }
>  
> +static cairo_surface_snapshot_t *
> +_cairo_pattern_get_snapshot (cairo_pattern_t *pattern)
> +{
> +    if (pattern->type == CAIRO_PATTERN_TYPE_SURFACE) {
> +        cairo_surface_pattern_t *surface_pattern =
> +            (cairo_surface_pattern_t *) pattern;
> +        cairo_surface_t *surface = surface_pattern->surface;
> +
> +        if (_cairo_surface_is_snapshot(surface))
> +	    return (cairo_surface_snapshot_t *) surface;
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * Begin to access the snapshot to replay on the target surface. To avoid race
> + * condition we need to get the rdlock. Thus other thread should not modify the
> + * source during we are accessing the original source.
> + *
> + * We need to check the target, if the target is source target of the snapshot, we
> + * need to break the self-copy here.
> + *
> + * */

Minor nitpicking: That "*/" looks misplaced.

> +static int
> +_cairo_snapshot_begin_access_target(cairo_surface_snapshot_t *snapshot,
> +				    cairo_surface_t *target)
> +{
> +    if (snapshot->target == target) {
> +	target->snapshot_detach(&snapshot->base);
> +	return 0;
> +    }
> +    CAIRO_RWLOCK_RDLOCK(snapshot->rwlock);
> +    return 1;
> +}

This function should return cairo_bool_t, not int.

>  static cairo_status_t
>  _cairo_recording_surface_replay_internal (cairo_recording_surface_t	*surface,
>  					  const cairo_rectangle_int_t *surface_extents,
> @@ -1355,21 +1397,42 @@ _cairo_recording_surface_replay_internal (cairo_recording_surface_t	*surface,
>  
>  	switch (command->header.type) {
>  	case CAIRO_COMMAND_PAINT:
> +	    snapshot = _cairo_pattern_get_snapshot(&command->paint.source.base);
> +	    if (snapshot)
> +		locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	    status = _cairo_surface_wrapper_paint (&wrapper,
>  						   command->header.op,
>  						   &command->paint.source.base,
>  						   command->header.clip);
> +	    if (locked)
> +	        CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	    break;
>  
>  	case CAIRO_COMMAND_MASK:
> +
> +            snapshot = _cairo_pattern_get_snapshot(&command->mask.source.base);
> +	    if (snapshot)
> +		locked = _cairo_snapshot_begin_access_target(snapshot, target);
> +
> +	    mask_snapshot = _cairo_pattern_get_snapshot(&command->mask.mask.base);
> +	    if (mask_snapshot)
> +		mask_locked = _cairo_snapshot_begin_access_target(mask_snapshot, target);

Couldn't this deadlock?

Thread A uses mask 1 and source 2, thread B uses mask 2 and source 1. If both
first lock their source, they'd deadlock on the mask.

>  	    status = _cairo_surface_wrapper_mask (&wrapper,
>  						  command->header.op,
>  						  &command->mask.source.base,
>  						  &command->mask.mask.base,
>  						  command->header.clip);
> +	    if (mask_locked)
> +	        CAIRO_RWLOCK_UNLOCK(mask_snapshot->rwlock);
> +	    if (locked)
> +	        CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	    break;
>  
>  	case CAIRO_COMMAND_STROKE:
> +	    snapshot = _cairo_pattern_get_snapshot(&command->stroke.source.base);
> +	    if (snapshot)
> +		locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	    status = _cairo_surface_wrapper_stroke (&wrapper,
>  						    command->header.op,
>  						    &command->stroke.source.base,
> @@ -1380,9 +1443,14 @@ _cairo_recording_surface_replay_internal (cairo_recording_surface_t	*surface,
>  						    command->stroke.tolerance,
>  						    command->stroke.antialias,
>  						    command->header.clip);
> +	    if (locked)
> +	        CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	    break;
>  
>  	case CAIRO_COMMAND_FILL:
> +	    snapshot = _cairo_pattern_get_snapshot(&command->fill.source.base);
> +	    if (snapshot)
> +		locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	    status = CAIRO_INT_STATUS_UNSUPPORTED;
>  	    if (_cairo_surface_wrapper_has_fill_stroke (&wrapper)) {
>  		cairo_command_t *stroke_command;
> @@ -1434,9 +1502,14 @@ _cairo_recording_surface_replay_internal (cairo_recording_surface_t	*surface,
>  						      command->fill.antialias,
>  						      command->header.clip);
>  	    }
> +	    if (locked)
> +	        CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	    break;
>  
>  	case CAIRO_COMMAND_SHOW_TEXT_GLYPHS:
> +	    snapshot = _cairo_pattern_get_snapshot(&command->show_text_glyphs.source.base);
> +	    if (snapshot)
> +		locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	    status = _cairo_surface_wrapper_show_text_glyphs (&wrapper,
>  							      command->header.op,
>  							      &command->show_text_glyphs.source.base,
> @@ -1446,6 +1519,8 @@ _cairo_recording_surface_replay_internal (cairo_recording_surface_t	*surface,
>  							      command->show_text_glyphs.cluster_flags,
>  							      command->show_text_glyphs.scaled_font,
>  							      command->header.clip);
> +	    if (locked)
> +	        CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	    break;

Does anyone have a good idea how the locking could be simplified here? That
looks quite complex.

> @@ -1480,6 +1555,8 @@ _cairo_recording_surface_replay_one (cairo_recording_surface_t	*surface,
>      cairo_surface_wrapper_t wrapper;
>      cairo_command_t **elements, *command;
>      cairo_int_status_t status;
> +    cairo_surface_snapshot_t *snapshot, *mask_snapshot;
> +    int locked = 0, mask_locked = 0;

This is used as a boolean, so cairo_bool_t here, too, please.
>  
>      if (unlikely (surface->base.status))
>  	return surface->base.status;
> @@ -1501,25 +1578,48 @@ _cairo_recording_surface_replay_one (cairo_recording_surface_t	*surface,
>      if (index > surface->commands.num_elements)
>  	return _cairo_error (CAIRO_STATUS_READ_ERROR);
>  
> +    if (target->type == CAIRO_SURFACE_TYPE_RECORDING)
> +	_cairo_recording_surface_break_self_copy_loop((cairo_recording_surface_t*)target);
> +
>      elements = _cairo_array_index (&surface->commands, 0);
>      command = elements[index];
>      switch (command->header.type) {
>      case CAIRO_COMMAND_PAINT:
> +	snapshot = _cairo_pattern_get_snapshot(&command->paint.source.base);
> +	if (snapshot)
> +	    locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	status = _cairo_surface_wrapper_paint (&wrapper,
>  					       command->header.op,
>  					       &command->paint.source.base,
>  					       command->header.clip);
> +	if (locked)
> +	    CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	break;
>  
>      case CAIRO_COMMAND_MASK:
> +	snapshot = _cairo_pattern_get_snapshot(&command->mask.source.base);
> +	if (snapshot)
> +	    locked = _cairo_snapshot_begin_access_target(snapshot, target);
> +
> +	mask_snapshot = _cairo_pattern_get_snapshot(&command->mask.mask.base);
> +	if (mask_snapshot)
> +	    mask_locked = _cairo_snapshot_begin_access_target(snapshot, target);
> +
>  	status = _cairo_surface_wrapper_mask (&wrapper,
>  					      command->header.op,
>  					      &command->mask.source.base,
>  					      &command->mask.mask.base,
>  					      command->header.clip);
> +	if (mask_locked)
> +	    CAIRO_RWLOCK_UNLOCK(mask_snapshot->rwlock);
> +	if (locked)
> +	    CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	break;
>  
>      case CAIRO_COMMAND_STROKE:
> +	snapshot = _cairo_pattern_get_snapshot(&command->stroke.source.base);
> +	if (snapshot)
> +	    locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	status = _cairo_surface_wrapper_stroke (&wrapper,
>  						command->header.op,
>  						&command->stroke.source.base,
> @@ -1530,9 +1630,14 @@ _cairo_recording_surface_replay_one (cairo_recording_surface_t	*surface,
>  						command->stroke.tolerance,
>  						command->stroke.antialias,
>  						command->header.clip);
> +	if (locked)
> +	    CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	break;
>  
>      case CAIRO_COMMAND_FILL:
> +	snapshot = _cairo_pattern_get_snapshot(&command->fill.source.base);
> +	if (snapshot)
> +	    locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	status = _cairo_surface_wrapper_fill (&wrapper,
>  					      command->header.op,
>  					      &command->fill.source.base,
> @@ -1541,9 +1646,14 @@ _cairo_recording_surface_replay_one (cairo_recording_surface_t	*surface,
>  					      command->fill.tolerance,
>  					      command->fill.antialias,
>  					      command->header.clip);
> +	if (locked)
> +	    CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	break;
>  
>      case CAIRO_COMMAND_SHOW_TEXT_GLYPHS:
> +	snapshot = _cairo_pattern_get_snapshot(&command->show_text_glyphs.source.base);
> +	if (snapshot)
> +	    locked = _cairo_snapshot_begin_access_target(snapshot, target);
>  	status = _cairo_surface_wrapper_show_text_glyphs (&wrapper,
>  							  command->header.op,
>  							  &command->show_text_glyphs.source.base,
> @@ -1553,6 +1663,8 @@ _cairo_recording_surface_replay_one (cairo_recording_surface_t	*surface,
>  							  command->show_text_glyphs.cluster_flags,
>  							  command->show_text_glyphs.scaled_font,
>  							  command->header.clip);
> +	if (locked)
> +	    CAIRO_RWLOCK_UNLOCK(snapshot->rwlock);
>  	break;
>  
>      default:

I'd like to simplify this, too, but I don't have any good idea on how. :-(
(On and the MASK version has the same possible-deadlock)

[...]
> diff --git a/src/cairo-surface-private.h b/src/cairo-surface-private.h
> index d23fc05..321719d 100644
> --- a/src/cairo-surface-private.h
> +++ b/src/cairo-surface-private.h
> @@ -64,7 +64,6 @@ struct _cairo_surface {
>      unsigned int unique_id;
>      unsigned int serial;
>      cairo_damage_t *damage;
> -
>      unsigned _finishing : 1;
>      unsigned finished : 1;
>      unsigned is_clear : 1;

That looks quite random. Why is it in this patch?

> diff --git a/src/cairo-surface-snapshot-inline.h b/src/cairo-surface-snapshot-inline.h
> index 48b24a4..2fe05ac 100644
> --- a/src/cairo-surface-snapshot-inline.h
> +++ b/src/cairo-surface-snapshot-inline.h
> @@ -48,12 +48,9 @@ _cairo_surface_snapshot_is_reused (cairo_surface_t *surface)
>  static inline cairo_surface_t *
>  _cairo_surface_snapshot_get_target (cairo_surface_t *surface)
>  {
> -    cairo_surface_snapshot_t *snapshot = (cairo_surface_snapshot_t *) surface;
>      cairo_surface_t *target;
>  
> -    CAIRO_RWLOCK_WRLOCK (snapshot->rwlock);
> -    target = _cairo_surface_reference (snapshot->target);
> -    CAIRO_RWLOCK_UNLOCK (snapshot->rwlock);
> +    target = ((cairo_surface_snapshot_t *) surface)->target;
>  
>      return target;
>  }
> diff --git a/src/cairo-surface-snapshot.c b/src/cairo-surface-snapshot.c
> index 30e58fd..268e3b3 100644
> --- a/src/cairo-surface-snapshot.c
> +++ b/src/cairo-surface-snapshot.c
> @@ -67,15 +67,10 @@ static cairo_status_t
>  _cairo_surface_snapshot_flush (void *abstract_surface)
>  {
>      cairo_surface_snapshot_t *surface = abstract_surface;
> -    cairo_surface_t *target;
> -    cairo_status_t status;
>  
> -    target = _cairo_surface_snapshot_get_target (&surface->base);
> -    cairo_surface_flush (target);
> -    status = target->status;
> -    cairo_surface_destroy (target);
> +    cairo_surface_flush (surface->target);
>  
> -    return status;
> +    return surface->target->status;
>  }

Before, this code made sure that it held a reference to the snapshot while
reading its status. Why? Could that cause problems here (=use after free)?

[...]
> diff --git a/src/cairo-surface.c b/src/cairo-surface.c
> index 35ac391..a172992 100644
> --- a/src/cairo-surface.c
> +++ b/src/cairo-surface.c
> @@ -858,11 +858,6 @@ cairo_surface_destroy (cairo_surface_t *surface)
>  
>      if (! surface->finished) {
>  	_cairo_surface_finish_snapshots (surface);
> -	/* We may have been referenced by a snapshot prior to have
> -	 * detaching it with the copy-on-write.
> -	 */
> -	if (CAIRO_REFERENCE_COUNT_GET_VALUE (&surface->ref_count))
> -	    return;

How about adding an assert?

assert (! CAIRO_REFERENCE_COUNT_HAS_REFERENCE (&surface->ref_count));

Perhaps this assertion should best be done right before the free() at the end of
this function instead of here...

> @@ -909,10 +904,10 @@ cairo_surface_get_reference_count (cairo_surface_t *surface)
>  static void
>  _cairo_surface_finish_snapshots (cairo_surface_t *surface)
>  {
> +    surface->_finishing = TRUE;
>      cairo_surface_flush (surface);
>  
>      /* update the snapshots *before* we declare the surface as finished */
> -    surface->_finishing = TRUE;

I'm curious for why this is necessary. Could you tell me? If you remember,
perhaps this deserves a little comment on which weird edge case makes this
necessary.

Cheers,
Uli
-- 
"Do you know that books smell like nutmeg or some spice from a foreign land?"
                                                  -- Faber in Fahrenheit 451


More information about the cairo mailing list