[gegl] operations: update ctx.h dep



commit d0cba135bc3ab0eefd488671ff7766cf0d855983
Author: Øyvind Kolås <pippin gimp org>
Date:   Wed Jan 19 03:25:35 2022 +0100

    operations: update ctx.h dep

 operations/common/ctx/ctx.h     | 40490 ++++++++++++++++++++------------------
 operations/common/vector-fill.c |     2 +-
 2 files changed, 21421 insertions(+), 19071 deletions(-)
---
diff --git a/operations/common/ctx/ctx.h b/operations/common/ctx/ctx.h
index 6a177fdb3..6298e56c9 100644
--- a/operations/common/ctx/ctx.h
+++ b/operations/common/ctx/ctx.h
@@ -1,4 +1,4 @@
-/* ctx git commit: 6bfbdd4 */
+/* ctx git commit: fb20c15a */
 /* 
  * ctx.h is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -24,7 +24,7 @@
  *
  * Ctx contains a minimal default fallback font with only ascii, so
  * you probably want to also include a font, and perhaps enable
- * the cairo or SDL2 optional renderers, a more complete example
+ * the cairo or SDL2 optional backends, a more complete example
  * could be:
  *
  * #include <cairo.h>
@@ -87,12 +87,19 @@ typedef struct _CtxGlyph       CtxGlyph;
 
 /**
  * ctx_new:
+ * width: with in device units
+ * height: height in device units
+ * backend: backend to use
+ *
+ *   valid values are:
+ *     NULL/"auto", "drawlist", "sdl", "term", "ctx" the strings are
+ *     the same as are valid for the CTX_BACKEND environment variable.
  *
  * Create a new drawing context, this context has no pixels but
  * accumulates commands and can be played back on other ctx
- * render contexts.
+ * render contexts, this is a ctx context using the drawlist backend.
  */
-Ctx *ctx_new (void);
+Ctx *ctx_new (int width, int height, const char *backend);
 
 /**
  * ctx_new_for_framebuffer:
@@ -105,20 +112,22 @@ Ctx *ctx_new_for_framebuffer (void *data,
                               int   height,
                               int   stride,
                               CtxPixelFormat pixel_format);
+
 /**
- * ctx_new_ui:
+ * ctx_new_drawlist:
  *
- * Create a new interactive ctx context, might depend on additional
- * integration.
+ * Create a new drawing context that can record drawing commands,
+ * this is also the basis for creating more complex contexts with
+ * the backend swapped out.
  */
-Ctx *ctx_new_ui (int width, int height);
+Ctx * ctx_new_drawlist (int width, int height);
 
 /**
  * ctx_new_for_drawlist:
  *
  * Create a new drawing context for a pre-existing drawlist.
  */
-Ctx *ctx_new_for_drawlist (void *data, size_t length);
+Ctx *ctx_new_for_drawlist (int width, int height, void *data, size_t length);
 
 
 /**
@@ -206,7 +215,6 @@ const char *ctx_get_font  (Ctx *ctx);
 float ctx_get_line_width  (Ctx *ctx);
 int   ctx_width           (Ctx *ctx);
 int   ctx_height          (Ctx *ctx);
-int   ctx_rev             (Ctx *ctx);
 float ctx_x               (Ctx *ctx);
 float ctx_y               (Ctx *ctx);
 void  ctx_current_point   (Ctx *ctx, float *x, float *y);
@@ -382,6 +390,14 @@ int ctx_utf8_strlen (const char *s);
 #endif
 #endif
 
+#ifndef CTX_TFT_ESPI
+#ifdef _TFT_eSPIH_
+#define CTX_TFT_ESPI 1
+#else
+#define CTX_TFT_ESPI 0
+#endif
+#endif
+
 #ifndef CTX_SDL
 #ifdef SDL_h_
 #define CTX_SDL 1
@@ -410,6 +426,32 @@ int ctx_utf8_strlen (const char *s);
 #define ctx_unlock_mutex(a)  
 #endif
 
+
+#if CTX_TFT_ESPI
+
+typedef enum CtxTftFlags {
+  CTX_TFT_DEFAULTS   = 0,
+  CTX_TFT_GRAY       = 1 << 0,
+  CTX_TFT_HASH_CACHE = 1 << 1,
+  CTX_TFT_332        = 1 << 2, // might do a 332 render
+                               // that is tear-free but slower
+                               // before queueing slotted redraws
+                               // of higher quality tiles
+                               // this is a pre-amble to eink modes
+                               //
+  CTX_TFT_CYCLE_BUF  = 1 << 4, // if set then we free buffers after each
+                               // use, higher risk of memory fragmentation
+                               // but making each frame blit a memory use peak
+
+  CTX_TFT_SHOW_FPS   = 1 << 5,
+  CTX_TFT_AUTO_332   = 1 << 6,
+} CtxTFtFlags;
+
+Ctx *ctx_new_tft (TFT_eSPI tft, int flags);
+
+#endif
+
+
 #if CTX_CAIRO
 #ifndef CAIRO_H
 typedef struct _cairo_t cairo_t;
@@ -643,7 +685,11 @@ float ctx_glyph_width   (Ctx *ctx, int unichar);
 
 int   ctx_load_font_ttf (const char *name, const void *ttf_contents, int length);
 
+#ifdef CTX_X86_64
+int ctx_x86_64_level (void);
+#endif
 
+void ctx_drawlist_clear (Ctx *ctx);
 
 enum _CtxModifierState
 {
@@ -668,24 +714,17 @@ typedef enum _CtxScrollDirection CtxScrollDirection;
 
 typedef struct _CtxEvent CtxEvent;
 
-void ctx_set_renderer (Ctx *ctx,
-                       void *renderer);
-void *ctx_get_renderer (Ctx *ctx);
-
-int ctx_renderer_is_sdl (Ctx *ctx);
-int ctx_renderer_is_fb (Ctx *ctx);
-int ctx_renderer_is_kms (Ctx *ctx);
-int ctx_renderer_is_tiled (Ctx *ctx);
-int ctx_renderer_is_ctx (Ctx *ctx);
-int ctx_renderer_is_term (Ctx *ctx);
+void ctx_set_backend (Ctx *ctx,
+                      void *backend);
+void *ctx_get_backend (Ctx *ctx);
 
 /* the following API is only available when CTX_EVENTS is defined to 1
  *
  * it provides the ability to register callbacks with the current path
  * that get delivered with transformed coordinates.
  */
-int ctx_is_dirty (Ctx *ctx);
-void ctx_set_dirty (Ctx *ctx, int dirty);
+int ctx_need_redraw (Ctx *ctx);
+void ctx_queue_draw (Ctx *ctx);
 float ctx_get_float (Ctx *ctx, uint32_t hash);
 void ctx_set_float (Ctx *ctx, uint32_t hash, float value);
 
@@ -749,6 +788,7 @@ enum _CtxEventType {
 typedef enum _CtxEventType CtxEventType;
 
 #define CTX_CLICK   CTX_PRESS   // SHOULD HAVE MORE LOGIC
+typedef struct _CtxClient CtxClient;
 
 struct _CtxEvent {
   CtxEventType  type;
@@ -783,7 +823,13 @@ struct _CtxEvent {
                          * MESSAGE events
                          *
                          * and the data for drop events are delivered
+                         *
+                         */
+                         /* XXX lifetime of this string should be longer
+                         * than the events, preferably interned. XXX
+                         * maybe add a flag for this?
                          */
+  int owns_string; /* if 1 call free.. */
   CtxScrollDirection scroll_direction;
 
 
@@ -829,8 +875,6 @@ int   ctx_add_idle           (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data
 
 void ctx_add_hit_region (Ctx *ctx, const char *id);
 
-void ctx_set_title (Ctx *ctx, const char *title);
-
 void ctx_listen_full (Ctx     *ctx,
                       float    x,
                       float    y,
@@ -861,9 +905,9 @@ void  ctx_listen_with_finalize (Ctx          *ctx,
 void ctx_init (int *argc, char ***argv); // is a no-op but could launch
                                          // terminal
 CtxEvent *ctx_get_event (Ctx *ctx);
-int       ctx_has_event (Ctx *ctx, int timeout);
 void      ctx_get_event_fds (Ctx *ctx, int *fd, int *count);
 
+
 int   ctx_pointer_is_down (Ctx *ctx, int no);
 float ctx_pointer_x (Ctx *ctx);
 float ctx_pointer_y (Ctx *ctx);
@@ -1595,19 +1639,31 @@ struct
   CtxEntry next_entry; // also pads size of CtxCommand slightly.
 };
 
-typedef struct _CtxImplementation CtxImplementation;
-struct _CtxImplementation
+typedef struct _CtxBackend CtxBackend;
+void ctx_windowtitle (Ctx *ctx, const char *text);
+struct _CtxBackend
 {
-  void (*process)        (void *renderer, CtxCommand *entry);
-  void (*reset)          (void *renderer);
-  void (*flush)          (void *renderer);
-  char *(*get_clipboard) (void *ctxctx);
-  void (*set_clipboard)  (void *ctxctx, const char *text);
-  void (*free)           (void *renderer);
+  Ctx                      *ctx;
+  void  (*process)         (Ctx *ctx, CtxCommand *entry);
+  void  (*reset)           (Ctx *ctx);
+  void  (*flush)           (Ctx *ctx);
+
+  void  (*set_windowtitle) (Ctx *ctx, const char *text);
+
+  char *(*get_event)       (Ctx *ctx, int timout_ms);
+
+  void                     (*consume_events) (Ctx *ctx);
+  void                     (*get_event_fds)  (Ctx *ctx, int *fd, int *count);
+  char *(*get_clipboard)   (Ctx *ctx);
+  void  (*set_clipboard)   (Ctx *ctx, const char *text);
+  void (*free)             (void *backend); /* the free pointers are abused as the differentiatior
+                                               between different backends   */
+  void                     *user_data; // not used by ctx core
 };
 
 CtxCommand *ctx_iterator_next (CtxIterator *iterator);
 
+void ctx_handle_events (Ctx *ctx);
 #define ctx_arg_string()  ((char*)&entry[2].data.u8[0])
 
 
@@ -1833,9 +1889,13 @@ long vt_rev               (VT *vt);
 int  vt_has_blink         (VT *vt);
 int ctx_vt_had_alt_screen (VT *vt);
 
-int  ctx_clients_need_redraw   (Ctx *ctx);
-void ctx_clients_handle_events (Ctx *ctx);
+int ctx_clients_handle_events (Ctx *ctx);
+
+typedef struct _CtxList CtxList;
+CtxList *ctx_clients (Ctx *ctx);
 
+void ctx_set_fullscreen (Ctx *ctx, int val);
+int ctx_get_fullscreen (Ctx *ctx);
 
 typedef struct _CtxBuffer CtxBuffer;
 CtxBuffer *ctx_buffer_new_for_data (void *data, int width, int height,
@@ -1844,11 +1904,36 @@ CtxBuffer *ctx_buffer_new_for_data (void *data, int width, int height,
                                     void (*freefunc) (void *pixels, void *user_data),
                                     void *user_data);
 
-
-
-
-
-
+typedef enum CtxBackendType {
+  CTX_BACKEND_NONE,
+  CTX_BACKEND_CTX,
+  CTX_BACKEND_RASTERIZER,
+  CTX_BACKEND_HASHER,
+  CTX_BACKEND_HEADLESS,
+  CTX_BACKEND_TERM,
+  CTX_BACKEND_FB,
+  CTX_BACKEND_KMS,
+  CTX_BACKEND_TERMIMG,
+  CTX_BACKEND_CAIRO,
+  CTX_BACKEND_SDL,
+  CTX_BACKEND_DRAWLIST,
+} CtxBackendType;
+
+CtxBackendType ctx_backend_type (Ctx *ctx);
+
+static inline int ctx_backend_is_tiled (Ctx *ctx)
+{
+  switch (ctx_backend_type (ctx))
+  {
+    case CTX_BACKEND_FB:
+    case CTX_BACKEND_SDL:
+    case CTX_BACKEND_KMS:
+    case CTX_BACKEND_HEADLESS:
+      return 1;
+    default:
+      return 0;
+  }
+}
 
 #endif
 
@@ -3965,7 +4050,6 @@ static inline void *ctx_calloc (size_t size, size_t count)
   return ret;
 }
 
-typedef struct _CtxList CtxList;
 struct _CtxList {
   void *data;
   CtxList *next;
@@ -4352,17 +4436,17 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 /* size (in pixels, w*h) that we cache rasterization for
  */
 #ifndef CTX_SHAPE_CACHE_DIM
-#define CTX_SHAPE_CACHE_DIM      (16*8)
+#define CTX_SHAPE_CACHE_DIM      (64*64)
 #endif
 
 #ifndef CTX_SHAPE_CACHE_MAX_DIM
-#define CTX_SHAPE_CACHE_MAX_DIM  20
+#define CTX_SHAPE_CACHE_MAX_DIM  256
 #endif
 
 /* maximum number of entries in shape cache
  */
 #ifndef CTX_SHAPE_CACHE_ENTRIES
-#define CTX_SHAPE_CACHE_ENTRIES  160
+#define CTX_SHAPE_CACHE_ENTRIES  1024
 #endif
 
 
@@ -4381,6 +4465,13 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 #endif
 
+
+#ifndef CTX_FAST_FILL_RECT
+#define CTX_FAST_FILL_RECT 1    /*  matters most for tiny rectangles where it shaves overhead, for larger 
rectangles
+                                    a ~15-20% performance win can be seen. */
+#endif
+
+
 #ifndef CTX_COMPOSITING_GROUPS
 #define CTX_COMPOSITING_GROUPS   1
 #endif
@@ -4447,8 +4538,8 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #define CTX_XML             1
 #endif
 
-#ifndef CTX_VT
-#define CTX_VT              0
+#ifndef CTX_CLIENTS
+#define CTX_CLIENTS              0
 #endif
 
 /* when ctx_math is defined, which it is by default, we use ctx' own
@@ -4514,7 +4605,7 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 
 /*  only source-over clear and copy will work, the API still
- *  through - but the renderer is limited, for use to measure
+ *  through - but the backend is limited, for use to measure
  *  size and possibly in severely constrained ROMs.
  */
 #ifndef CTX_BLENDING_AND_COMPOSITING
@@ -4535,6 +4626,10 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #define CTX_INLINED_NORMAL      1
 #endif
 
+#ifndef CTX_U8_TO_FLOAT_LUT
+#define CTX_U8_TO_FLOAT_LUT  0
+#endif
+
 #ifndef CTX_INLINED_GRADIENTS
 #define CTX_INLINED_GRADIENTS   1
 #endif
@@ -4567,6 +4662,7 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #define CTX_EVENTS              1
 #endif
 
+
 #ifndef CTX_LIMIT_FORMATS
 #define CTX_LIMIT_FORMATS       0
 #endif
@@ -4658,7 +4754,7 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 
 #ifndef CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
-#define CTX_ALWAYS_USE_NEAREST_FOR_SCALE1 1
+#define CTX_ALWAYS_USE_NEAREST_FOR_SCALE1 0
 #endif
 
 /* force add format if we have shape cache */
@@ -4792,6 +4888,10 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #define CTX_HASH_COLS            8
 #endif
 
+#ifndef CTX_INLINE_FILL_RULE
+#define CTX_INLINE_FILL_RULE 1
+#endif
+
 #ifndef CTX_MAX_THREADS
 #define CTX_MAX_THREADS          8 // runtime is max of cores/2 and this
 #endif
@@ -4812,9 +4912,13 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 
 
+#ifndef CTX_SYNC_FRAMES
+#define CTX_SYNC_FRAMES  1
+#endif
+
 #ifdef CTX_RASTERIZER
 #if CTX_RASTERIZER==0
-#if CTX_SDL || CTX_FB
+#if CTX_SDL || CTX_FB || CTX_HEADLESS
 #undef CTX_RASTERIZER
 #define CTX_RASTERIZER 1
 #endif
@@ -4824,16 +4928,20 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 #endif
 
-#if CTX_RASTERIZER
-#ifndef CTX_COMPOSITE
-#define CTX_COMPOSITE 1
+#if CTX_SDL || CTX_FB || CTX_HEADLESS
+#if CTX_EVENTS
+#undef CTX_EVENTS
 #endif
-#else
-#ifndef CTX_COMPOSITE
-#define CTX_COMPOSITE 0
+#define CTX_EVENTS 1
+#endif
+
+#if CTX_EVENTS
+#ifndef CTX_HEADLESS
+#define CTX_HEADLESS 1
 #endif
 #endif
 
+
 #ifndef CTX_GRADIENT_CACHE_ELEMENTS
 #define CTX_GRADIENT_CACHE_ELEMENTS 256
 #endif
@@ -4861,13 +4969,18 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #endif
 
 #ifndef CTX_TILED
-#if CTX_SDL || CTX_FB || CTX_KMS
+#if CTX_SDL || CTX_FB || CTX_KMS || CTX_HEADLESS
 #define CTX_TILED 1
 #else
 #define CTX_TILED 0
 #endif
+#if !CTX_RASTERIZER
+#undef CTX_RASTERIZER
+#define CTX_RASTERIZER 1
+#endif
 #endif
 
+
 #ifndef CTX_THREADS
 #if CTX_TILED
 #define CTX_THREADS 1
@@ -4891,7 +5004,34 @@ static inline CtxList *ctx_list_find_custom (CtxList *list,
 #define thrd_t pthread_t
 #endif
 
+#ifndef CTX_SIMD_SUFFIX
+#define CTX_SIMD_SUFFIX(symbol) symbol##_generic
+#define CTX_SIMD_BUILD 0
+#else
+
+
+#define CTX_SIMD_BUILD 1
+#ifdef CTX_COMPOSITE
+#undef CTX_COMPOSITE
+#define CTX_COMPOSITE 1
+#endif
+
+#endif
+
+
+#if CTX_RASTERIZER
+#ifndef CTX_COMPOSITE
+#define CTX_COMPOSITE 1
+#endif
+#else
+#ifndef CTX_COMPOSITE
+#define CTX_COMPOSITE 0
+#endif
+#endif
 
+#ifndef CTX_COMPOSITE
+#define CTX_COMPOSITE 0
+#endif
  /* Copyright (C) 2020 Øyvind Kolås <pippin gimp org>
  */
 
@@ -4964,13 +5104,10 @@ ctx_invsqrtf (float x)
     float f;
     uint32_t i;
   } u = { x };
-  float xhalf = 0.5f * x;
-  int i=u.i;
-  i = 0x5f3759df - (i >> 1);
-  x = u.f;
-  x *= (1.5f - xhalf * x * x);
-  x *= (1.5f - xhalf * x * x); //repeating Newton-Raphson step for higher precision
-  return x;
+  u.i = 0x5f3759df - (u.i >> 1);
+  u.f *= (1.5f - 0.5f * x * u.f * u.f);
+  u.f *= (1.5f - 0.5f * x * u.f * u.f); //repeating Newton-Raphson step for higher precision
+  return u.f;
 }
 
 static CTX_INLINE float
@@ -4981,13 +5118,8 @@ ctx_invsqrtf_fast (float x)
     float f;
     uint32_t i;
   } u = { x };
-
-//float xhalf = 0.5f * x;
-  int i=u.i;
-  i = 0x5f3759df - (i >> 1);
-  x = u.f;
-//x *= (1.5f - xhalf * x * x);
-  return x;
+  u.i = 0x5f3759df - (u.i >> 1);
+  return u.f;
 }
 
 CTX_INLINE static float ctx_sqrtf (float a)
@@ -5231,11 +5363,12 @@ void ctx_exit (Ctx *ctx);
 void ctx_list_backends(void);
 int ctx_pixel_format_ebpp (CtxPixelFormat format);
 
+
 #endif
 #ifndef __CTX_CONSTANTS
 #define __CTX_CONSTANTS
 
-#define TOKENHASH(a)    ((uint64_t)a)
+#define TOKENHASH(a)    ((uint32_t)a)
 
 #define CTX_strokeSource TOKENHASH(3387288669)
 #define CTX_add_stop TOKENHASH(3572486242)
@@ -5481,6 +5614,7 @@ int ctx_pixel_format_ebpp (CtxPixelFormat format);
 #define CTX_aqua TOKENHASH(109634)
 #define CTX_transparent TOKENHASH(3143361910)
 #define CTX_currentColor TOKENHASH(2944012414)
+#define CTX_title TOKENHASH(11313768)
 
 #endif
 
@@ -5648,14 +5782,14 @@ struct _CtxColor
 #endif
 
 #if CTX_ENABLE_CM
+  float   red;
+  float   green;
+  float   blue;
 #if CTX_BABL
   const Babl *space; // gets copied from state when color is declared
 #else
   void   *space; // gets copied from state when color is declared, 
 #endif
-  float   red;
-  float   green;
-  float   blue;
 #endif
 };
 
@@ -5663,8 +5797,8 @@ typedef struct _CtxGradientStop CtxGradientStop;
 
 struct _CtxGradientStop
 {
-  float   pos;
   CtxColor color;
+  float   pos;
 };
 
 
@@ -5681,16 +5815,15 @@ typedef enum _CtxSourceType CtxSourceType;
 
 typedef struct _CtxPixelFormatInfo CtxPixelFormatInfo;
 
-
 struct _CtxBuffer
 {
   void               *data;
   int                 width;
   int                 height;
   int                 stride;
-  char               *eid;        // might be NULL, when not - should be unique for pixel contents
   int                 frame;      // last frame used in, everything > 3 can be removed,
                                   // as clients wont rely on it.
+  char               *eid;        // might be NULL, when not - should be unique for pixel contents
   CtxPixelFormatInfo *format;
   void (*free_func) (void *pixels, void *user_data);
   void               *user_data;
@@ -5709,6 +5842,7 @@ struct _CtxBuffer
 #endif
 };
 
+
 //void _ctx_user_to_device          (CtxState *state, float *x, float *y);
 //void _ctx_user_to_device_distance (CtxState *state, float *x, float *y);
 
@@ -5724,6 +5858,7 @@ struct _CtxSource
   int type;
   CtxMatrix  set_transform;
   CtxMatrix  transform;
+  int pad; // to align next properly
   union
   {
     CtxColor color;
@@ -5768,7 +5903,6 @@ struct _CtxGState
   CtxSource     source_stroke;
   CtxSource     source_fill;
   float         global_alpha_f;
-  uint8_t       global_alpha_u8;
 
   float         line_width;
   float         line_dash_offset;
@@ -5779,12 +5913,23 @@ struct _CtxGState
   float         shadow_offset_x;
   float         shadow_offset_y;
 #endif
-  int           clipped:1;
+  unsigned int        clipped:1;
+  CtxColorModel    color_model:8;
+  /* bitfield-pack small state-parts */
+  CtxLineCap          line_cap:2;
+  CtxLineJoin        line_join:2;
+  CtxFillRule        fill_rule:1;
+  unsigned int image_smoothing:1;
+  unsigned int            font:6;
+  unsigned int            bold:1;
+  unsigned int          italic:1;
 
+  uint8_t       global_alpha_u8;
   int16_t       clip_min_x;
   int16_t       clip_min_y;
   int16_t       clip_max_x;
   int16_t       clip_max_y;
+  int           n_dashes;
 
 #if CTX_ENABLE_CM
 #if CTX_BABL
@@ -5811,17 +5956,7 @@ struct _CtxGState
   CtxBlend                  blend_mode; // non-vectorization
 
   float dashes[CTX_PARSER_MAX_ARGS];
-  int n_dashes;
 
-  CtxColorModel    color_model;
-  /* bitfield-pack small state-parts */
-  CtxLineCap          line_cap:2;
-  CtxLineJoin        line_join:2;
-  CtxFillRule        fill_rule:1;
-  unsigned int image_smoothing:1;
-  unsigned int            font:6;
-  unsigned int            bold:1;
-  unsigned int          italic:1;
 };
 
 typedef enum
@@ -5843,8 +5978,8 @@ typedef enum
 struct _CtxDrawlist
 {
   CtxEntry *entries;
-  int       count;
-  int       size;
+  unsigned int count;
+  int size;
   uint32_t  flags;
   int       bitpack_pos;  // stream is bitpacked up to this offset
 };
@@ -5868,13 +6003,21 @@ struct _CtxState
 {
   int           has_moved:1;
   int           has_clipped:1;
+  int16_t       gstate_no;
+  int8_t        source; // used for the single-shifting to stroking
+                // 0  = fill
+                // 1  = start_stroke
+                // 2  = in_stroke
+                //
+                //   if we're at in_stroke at start of a source definition
+                //   we do filling
+
   float         x;
   float         y;
   int           min_x;
   int           min_y;
   int           max_x;
   int           max_y;
-  int16_t       gstate_no;
   CtxGState     gstate;
   CtxGState     gstate_stack[CTX_MAX_STATES];//at end, so can be made dynamic
 #if CTX_GRADIENTS
@@ -5888,13 +6031,6 @@ struct _CtxState
 #endif
   CtxKeyDbEntry keydb[CTX_MAX_KEYDB];
   char          stringpool[CTX_STRINGPOOL_SIZE];
-  int8_t        source; // used for the single-shifting to stroking
-                // 0  = fill
-                // 1  = start_stroke
-                // 2  = in_stroke
-                //
-                //   if we're at in_stroke at start of a source definition
-                //   we do filling
 };
 
 
@@ -6029,21 +6165,24 @@ struct _CtxEvents
   CtxList         *idles;
   CtxList         *idles_to_remove;
   CtxList         *idles_to_add;
-  int              in_idle_dispatch;
   CtxList         *events; // for ctx_get_event
-  int              ctx_get_event_enabled;
-  int              idle_id;
   CtxBinding       bindings[CTX_MAX_KEYBINDINGS]; /*< better as list, uses no mem if unused */
   int              n_bindings;
-  int              width;
-  int              height;
+  int              in_idle_dispatch;
+  int              ctx_get_event_enabled;
+  int              idle_id;
   CtxList         *items;
   CtxItem         *last_item;
   CtxModifierState modifier_state;
+  double           tap_hysteresis;
+#if CTX_CLIENTS
+  CtxList         *clients;
+  CtxClient *active;
+  CtxClient *active_tab;
+#endif
   int              tap_delay_min;
   int              tap_delay_max;
   int              tap_delay_hold;
-  double           tap_hysteresis;
 };
 
 
@@ -6059,33 +6198,37 @@ typedef struct _CtxEidInfo
 
 struct _Ctx
 {
-  CtxImplementation *renderer;
-  CtxDrawlist        drawlist;
-  int                transformation;
-  CtxBuffer          texture[CTX_MAX_TEXTURES];
-  Ctx               *texture_cache;
-  CtxList           *eid_db;
-  int                rev;
-  void              *backend;
-  CtxState           state;        /**/
-  int                frame; /* used for texture lifetime */
+  CtxBackend       *backend;
+  CtxDrawlist       drawlist;
+  int               transformation;
+  CtxBuffer         texture[CTX_MAX_TEXTURES];
+  Ctx              *texture_cache;
+  CtxList          *eid_db;
+  CtxState          state;        /**/
+  int               frame; /* used for texture lifetime */
+  int               width;
+  int               height;
 #if CTX_EVENTS 
-  CtxCursor          cursor;
-  int                quit;
-  int                dirty;
-  CtxEvents          events;
-  int                mouse_fd;
-  int                mouse_x;
-  int                mouse_y;
+  CtxCursor         cursor;
+  int               quit;
+  int               dirty;
+  CtxEvents         events;
+  int               mouse_fd;
+  int               mouse_x;
+  int               mouse_y;
 #endif
 #if CTX_CURRENT_PATH
-  CtxDrawlist    current_path; // possibly transformed coordinates !
-  CtxIterator        current_path_iterator;
+  CtxDrawlist       current_path; // possibly transformed coordinates !
+  CtxIterator       current_path_iterator;
 #endif
 };
 
+static inline void
+ctx_process (Ctx *ctx, CtxEntry *entry)
+{
+  ctx->backend->process (ctx, (CtxCommand *) entry);
+}
 
-static void ctx_process (Ctx *ctx, CtxEntry *entry);
 CtxBuffer *ctx_buffer_new (int width, int height,
                            CtxPixelFormat pixel_format);
 void ctx_buffer_free (CtxBuffer *buffer);
@@ -6107,8 +6250,8 @@ struct _CtxInternalFsEntry
 
 struct _CtxPixelFormatInfo
 {
-  CtxPixelFormat pixel_format;
-  uint8_t        components:4; /* number of components */
+  CtxPixelFormat pixel_format:8;
+  uint8_t        components; /* number of components */
   uint8_t        bpp; /* bits  per pixel - for doing offset computations
                          along with rowstride found elsewhere, if 0 it indicates
                          1/8  */
@@ -6117,14 +6260,14 @@ struct _CtxPixelFormatInfo
                          ebpp of the working space applied */
   uint8_t        dither_red_blue;
   uint8_t        dither_green;
-  CtxPixelFormat composite_format;
+  CtxPixelFormat composite_format:8;
 
   void         (*to_comp) (CtxRasterizer *r,
                            int x, const void * __restrict__ src, uint8_t * __restrict__ comp, int count);
   void         (*from_comp) (CtxRasterizer *r,
                              int x, const uint8_t * __restrict__ comp, void *__restrict__ dst, int count);
   void         (*apply_coverage) (CtxRasterizer *r, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, 
int x, uint8_t *coverage,
-                          int count);
+                          unsigned int count);
   void         (*setup) (CtxRasterizer *r);
 };
 
@@ -6139,8 +6282,47 @@ ctx_interpret_pos_bare (CtxState *state, CtxEntry *entry, void *data);
 static inline void
 ctx_drawlist_deinit (CtxDrawlist *drawlist);
 
-CtxPixelFormatInfo *
-ctx_pixel_format_info (CtxPixelFormat format);
+//extern CtxPixelFormatInfo *(*ctx_pixel_format_info) (CtxPixelFormat format);
+CtxPixelFormatInfo *ctx_pixel_format_info (CtxPixelFormat format);
+
+
+
+extern void (*ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
+                           float          x0,
+                           float          y0,
+                           float          x1,
+                           float          y1,
+                           float          line_width);
+
+extern void (*ctx_composite_setup) (CtxRasterizer *rasterizer);
+
+
+struct _CtxShapeEntry
+{
+  uint32_t hash;
+  uint16_t width;
+  uint16_t height;
+  int      last_frame; // xxx
+  uint32_t uses;  // instrumented for longer keep-alive
+  uint8_t  data[];
+};
+
+typedef struct _CtxShapeEntry CtxShapeEntry;
+
+extern void (*ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule
+#if CTX_SHAPE_CACHE
+                ,CtxShapeEntry *shape
+#endif
+                );
+
+
+
+extern void (*ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
+                           float        x0,
+                           float        y0,
+                           float        x1,
+                           float        y1,
+                           uint8_t      cov);
 
 
 int ctx_utf8_len (const unsigned char first_byte);
@@ -6160,17 +6342,6 @@ typedef void (*CtxFragment) (CtxRasterizer *rasterizer, float x, float y, void *
 
 #define CTX_MAX_GAUSSIAN_KERNEL_DIM    512
 
-struct _CtxShapeEntry
-{
-  uint32_t hash;
-  uint16_t width;
-  uint16_t height;
-  int      last_frame; // xxx
-  uint32_t uses;  // instrumented for longer keep-alive
-  uint8_t  data[];
-};
-
-typedef struct _CtxShapeEntry CtxShapeEntry;
 
 
 struct _CtxShapeCache
@@ -6183,15 +6354,28 @@ typedef struct _CtxShapeCache CtxShapeCache;
 
 typedef enum {
    CTX_COV_PATH_FALLBACK =0,
-   CTX_COV_PATH_OVER,
-   CTX_COV_PATH_COPY,
-   CTX_COV_PATH_COPY_FRAGMENT,
-   CTX_COV_PATH_OVER_FRAGMENT
+   CTX_COV_PATH_RGBA8_OVER,
+   CTX_COV_PATH_RGBA8_COPY,
+   CTX_COV_PATH_RGBA8_COPY_FRAGMENT,
+   CTX_COV_PATH_RGBA8_OVER_FRAGMENT,
+   CTX_COV_PATH_GRAYA8_COPY,
+   CTX_COV_PATH_GRAY1_COPY,
+
+
+   CTX_COV_PATH_RGB565_COPY,
+   CTX_COV_PATH_RGB332_COPY,
+   CTX_COV_PATH_GRAY8_COPY,
+   CTX_COV_PATH_RGBAF_COPY,
+   CTX_COV_PATH_RGB8_COPY,
+   CTX_COV_PATH_CMYK8_COPY,
+   CTX_COV_PATH_CMYKA8_COPY
+
+
 } CtxCovPath;
 
 struct _CtxRasterizer
 {
-  CtxImplementation vfuncs;
+  CtxBackend backend;
   /* these should be initialized and used as the bounds for rendering into the
      buffer as well XXX: not yet in use, and when in use will only be
      correct for axis aligned clips - proper rasterization of a clipping path
@@ -6199,78 +6383,81 @@ struct _CtxRasterizer
    */
 
 
-#define CTX_COMPOSITE_ARGUMENTS CtxRasterizer *rasterizer, uint8_t * __restrict__ dst, uint8_t * 
__restrict__ src, int x0, uint8_t * __restrict__ coverage, int count
+#define CTX_COMPOSITE_ARGUMENTS CtxRasterizer *rasterizer, uint8_t * __restrict__ dst, uint8_t * 
__restrict__ src, int x0, uint8_t * __restrict__ coverage, unsigned int count
   void (*comp_op)(CTX_COMPOSITE_ARGUMENTS);
   CtxFragment fragment;
-  Ctx       *ctx;
+  //Ctx       *ctx;
   CtxState  *state;
   void      *buf;
   int fast_aa;
   CtxCovPath  comp;
-  float      x;  // < redundant? use state instead?
-  float      y;
+  void       (*apply_coverage) (CtxRasterizer *r, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, 
int x, uint8_t *coverage, unsigned int count);
 
   unsigned int aa;          // level of vertical aa
-  int prev_active_edges;
-  int active_edges;
-  int pending_edges;
-  int ending_edges;
-  int edge_pos;         // where we're at in iterating all edges
+  int        uses_transforms;
+  unsigned int prev_active_edges;
+  unsigned int active_edges;
+  unsigned int pending_edges;
+  unsigned int ending_edges;
+  unsigned int edge_pos;         // where we're at in iterating all edges
   unsigned int needs_aa3; // count of how many edges implies antialiasing
   unsigned int needs_aa5; // count of how many edges implies antialiasing
   unsigned int needs_aa15; // count of how many edges implies antialiasing
-  int        horizontal_edges;
+  unsigned int horizontal_edges;
 
-  int scanline;
+  int        scanline;
   int        scan_min;
   int        scan_max;
   int        col_min;
   int        col_max;
 
-
   int        inner_x;
   int        inner_y;
 
+  float      x;
+  float      y;
+
   float      first_x;
   float      first_y;
-  int        uses_transforms;
-  int        has_shape:2;
-  int        has_prev:2;
-  int        preserve:1;
 
-  int16_t    blit_x;
-  int16_t    blit_y;
-  int16_t    blit_width;
-  int16_t    blit_height;
-  int16_t    blit_stride;
+  uint16_t    blit_x;
+  uint16_t    blit_y;
+  uint16_t    blit_width;
+  uint16_t    blit_height;
+  uint16_t    blit_stride;
 
+  unsigned int  clip_rectangle:1;
+  unsigned int  has_shape:2;
+  int  has_prev:2;
+  unsigned int  preserve:1;
+#if CTX_ENABLE_SHADOW_BLUR
+  unsigned int  in_shadow:1;
+#endif
+  unsigned int  in_text:1;
+  unsigned int  swap_red_green:1;
+
+#if CTX_BRAILLE_TEXT
+  unsigned int  term_glyphs:1; // store appropriate glyphs for redisplay
+#endif
+  int        shadow_x;
+#if CTX_BRAILLE_TEXT
+  CtxList   *glyphs;
+#endif
   CtxPixelFormatInfo *format;
   Ctx       *texture_source; /* normally same as ctx */
+  int        shadow_y;
+
+  uint8_t    color[4*5];   // in compositing format
+  uint16_t   color_native;  //
+  uint16_t   color_nativeB[5];
 
   int edges[CTX_MAX_EDGES]; // integer position in edge array
   CtxDrawlist edge_list;
 
-
-
-
-
-#if CTX_ENABLE_SHADOW_BLUR
-  int in_shadow;
-#endif
-  int in_text;
-  int shadow_x;
-  int shadow_y;
-
-  int swap_red_green;
-  uint8_t             color[4*5];
-
-  int clip_rectangle;
-
 #if CTX_ENABLE_CLIP
   CtxBuffer *clip_buffer;
 #endif
 
-
 #if CTX_COMPOSITING_GROUPS
   void      *saved_buf; // when group redirected
   CtxBuffer *group[CTX_GROUP_MAX];
@@ -6279,12 +6466,11 @@ struct _CtxRasterizer
   float      kernel[CTX_MAX_GAUSSIAN_KERNEL_DIM];
 #endif
 
+
+
 #if CTX_SHAPE_CACHE
-  CtxShapeCache shape_cache;
-#endif
-#if CTX_BRAILLE_TEXT
-  int        term_glyphs:1; // store appropriate glyphs for redisplay
-  CtxList   *glyphs;
+  CtxShapeCache shape_cache; /* needs to be at end of struct, it
+                                is excluded from clearing */
 #endif
 };
 
@@ -6301,33 +6487,15 @@ struct _CtxHasher
   int           cols;
   int           rows;
   uint8_t      *hashes;
-  CtxSHA1       sha1_fill; 
-  CtxSHA1       sha1_stroke;
+  CtxSHA1       sha1_fill[CTX_MAX_STATES]; 
+  CtxSHA1       sha1_stroke[CTX_MAX_STATES];
+  int           source_level;
 };
 
 #if CTX_RASTERIZER
 void ctx_rasterizer_deinit (CtxRasterizer *rasterizer);
 #endif
 
-#if CTX_EVENTS
-extern int ctx_native_events;
-
-#if CTX_SDL
-extern int ctx_sdl_events;
-int ctx_sdl_consume_events (Ctx *ctx);
-#endif
-
-#if CTX_FB
-extern int ctx_fb_events;
-int ctx_fb_consume_events (Ctx *ctx);
-#endif
-
-
-int ctx_nct_consume_events (Ctx *ctx);
-int ctx_ctx_consume_events (Ctx *ctx);
-
-#endif
-
 enum {
   NC_MOUSE_NONE  = 0,
   NC_MOUSE_PRESS = 1,  /* "mouse-pressed", "mouse-released" */
@@ -6343,20 +6511,10 @@ int ctx_terminal_cols   (void);
 int ctx_terminal_rows   (void);
 extern int ctx_frame_ack;
 
-int ctx_nct_consume_events (Ctx *ctx);
-
 typedef struct _CtxCtx CtxCtx;
 struct _CtxCtx
 {
-   void (*render) (void *ctxctx, CtxCommand *command);
-   void (*reset)  (void *ctxvtx);
-   void (*flush)  (void *ctxctx);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard) (void *ctxctx, const char *text);
-   void (*free)   (void *ctxctx);
-   Ctx *ctx;
-   int  width;
-   int  height;
+   CtxBackend backend;
    int  cols;
    int  rows;
    int  was_down;
@@ -6370,30 +6528,31 @@ ctx_set (Ctx *ctx, uint32_t key_hash, const char *string, int len);
 const char *
 ctx_get (Ctx *ctx, const char *key);
 
-int ctx_renderer_is_term (Ctx *ctx);
 Ctx *ctx_new_ctx (int width, int height);
 Ctx *ctx_new_fb (int width, int height);
+Ctx *ctx_new_headless (int width, int height);
 Ctx *ctx_new_kms (int width, int height);
 Ctx *ctx_new_sdl (int width, int height);
 Ctx *ctx_new_term (int width, int height);
 Ctx *ctx_new_termimg (int width, int height);
 
 int ctx_resolve_font (const char *name);
+
+#if CTX_U8_TO_FLOAT_LUT
 extern float ctx_u8_float[256];
 #define ctx_u8_to_float(val_u8) ctx_u8_float[((uint8_t)(val_u8))]
-//#define ctx_u8_to_float(val_u8) (val_u8/255.0f)
-//
-//
-
+#else
+#define ctx_u8_to_float(val_u8) (val_u8/255.0f)
+#endif
 
-static uint8_t ctx_float_to_u8 (float val_f)
+static inline uint8_t ctx_float_to_u8 (float val_f)
 {
+#if 1 
+  union { float f; uint32_t i; } u;
+  u.f = 32768.0f + val_f * (255.0f / 256.0f);
+  return (uint8_t)u.i;
+#else
   return val_f < 0.0f ? 0 : val_f > 1.0f ? 0xff : 0xff * val_f +  0.5f;
-#if 0
-  int val_i = val_f * 255.999f;
-  if (val_i < 0) { return 0; }
-  else if (val_i > 255) { return 255; }
-  return val_i;
 #endif
 }
 
@@ -6528,7 +6687,6 @@ ctx_set_cmyk_space (Ctx *ctx, int device_space);
 CtxRasterizer *
 ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, 
int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias);
 
-
 CTX_INLINE static uint8_t ctx_lerp_u8 (uint8_t v0, uint8_t v1, uint8_t dx)
 {
 #if 0
@@ -6600,8 +6758,6 @@ CTX_INLINE static uint32_t ctx_lerp_RGBA8_2 (const uint32_t v0, uint32_t si_ga,
      (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));
 }
 
-
-
 CTX_INLINE static float
 ctx_lerpf (float v0, float v1, float dx)
 {
@@ -6683,13 +6839,8 @@ struct _EvSource
 
 struct _CtxTiled
 {
-   void (*render)    (void *term, CtxCommand *command);
-   void (*reset)     (void *term);
-   void (*flush)     (void *term);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard) (void *ctxctx, const char *text);
-   void (*free)      (void *term);
-   Ctx          *ctx;
+   CtxBackend backend;
+   void (*show_frame) (void *backend, int block);
    int           width;
    int           height;
    int           cols;
@@ -6729,12 +6880,74 @@ struct _CtxTiled
 #endif
 };
 
-static void
+static inline Ctx *ctx_backend_get_ctx (void *backend)
+{
+  CtxBackend *r = (CtxBackend*)backend;
+  if (r) return r->ctx;
+  return NULL;
+}
+
+void
 _ctx_texture_prepare_color_management (CtxRasterizer *rasterizer,
                                       CtxBuffer     *buffer);
 
 int ctx_is_set (Ctx *ctx, uint32_t hash);
 
+static Ctx *_ctx_new_drawlist (int width, int height);
+
+/**
+ * ctx_new_ui:
+ *
+ * Create a new interactive ctx context, might depend on additional
+ * integration.
+ *
+ * The values for backend are as for the environment variable,
+ * NULL for auto.
+ */
+static Ctx *ctx_new_ui (int width, int height, const char *backend);
+
+static inline void
+_ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
+{
+  float x_in = *x;
+  float y_in = *y;
+  *x = ( (x_in * m->m[0][0]) + (y_in * m->m[1][0]) + m->m[2][0]);
+  *y = ( (y_in * m->m[1][1]) + (x_in * m->m[0][1]) + m->m[2][1]);
+}
+
+static inline void
+_ctx_matrix_multiply (CtxMatrix       *result,
+                      const CtxMatrix *t,
+                      const CtxMatrix *s)
+{
+  CtxMatrix r;
+  r.m[0][0] = t->m[0][0] * s->m[0][0] + t->m[0][1] * s->m[1][0];
+  r.m[0][1] = t->m[0][0] * s->m[0][1] + t->m[0][1] * s->m[1][1];
+  r.m[1][0] = t->m[1][0] * s->m[0][0] + t->m[1][1] * s->m[1][0];
+  r.m[1][1] = t->m[1][0] * s->m[0][1] + t->m[1][1] * s->m[1][1];
+  r.m[2][0] = t->m[2][0] * s->m[0][0] + t->m[2][1] * s->m[1][0] + s->m[2][0];
+  r.m[2][1] = t->m[2][0] * s->m[0][1] + t->m[2][1] * s->m[1][1] + s->m[2][1];
+  *result = r;
+}
+
+static inline void
+_ctx_matrix_identity (CtxMatrix *matrix)
+{
+  matrix->m[0][0] = 1.0f;
+  matrix->m[0][1] = 0.0f;
+  matrix->m[1][0] = 0.0f;
+  matrix->m[1][1] = 1.0f;
+  matrix->m[2][0] = 0.0f;
+  matrix->m[2][1] = 0.0f;
+}
+
+
+static int ctx_float_to_string_index (float val);
+
+static void ctx_state_set_blob (CtxState *state, uint32_t key, uint8_t *data, int len);
+
+int ctx_matrix_no_skew_or_rotate (CtxMatrix *matrix);
+
 #endif
 
 
@@ -6743,14460 +6956,15606 @@ CtxColor   *ctx_color_new      (void);
 int         ctx_get_int        (Ctx *ctx, uint32_t hash);
 int         ctx_get_is_set     (Ctx *ctx, uint32_t hash);
 Ctx        *ctx_new_for_buffer (CtxBuffer *buffer);
+#ifndef CTX_AUDIO_H
+#define CTX_AUDIO_H
 
-#if CTX_IMPLEMENTATION
+#if !__COSMOPOLITAN__
+#include <stdint.h>
+#endif
 
-#define SHA1_IMPLEMENTATION
-/* LibTomCrypt, modular cryptographic library -- Tom St Denis
- *
- * LibTomCrypt is a library that provides various cryptographic
- * algorithms in a highly modular and flexible manner.
- *
- * The library is free for all purposes without any express
- * guarantee it works.
- *
- * Tom St Denis, tomstdenis gmail com, http://libtom.org
- *
- * The plain ANSIC sha1 functionality has been extracted from libtomcrypt,
- * and is included directly in the sources. /Øyvind K. - since libtomcrypt
- * is public domain the adaptations done here to make the sha1 self contained
- * also is public domain.
+/* This enum should be kept in sync with the corresponding mmm enum.
  */
-#ifndef __SHA1_H
-#define __SHA1_H
-#if !__COSMOPOLITAN__
-#include <inttypes.h>
+typedef enum {
+  CTX_f32,
+  CTX_f32S,
+  CTX_s16,
+  CTX_s16S
+} CtxPCM;
+
+void   ctx_pcm_set_format        (Ctx *ctx, CtxPCM format);
+CtxPCM ctx_pcm_get_format        (Ctx *ctx);
+int    ctx_pcm_get_sample_rate   (Ctx *ctx);
+void   ctx_pcm_set_sample_rate   (Ctx *ctx, int sample_rate);
+int    ctx_pcm_get_frame_chunk   (Ctx *ctx);
+int    ctx_pcm_get_queued        (Ctx *ctx);
+float  ctx_pcm_get_queued_length (Ctx *ctx);
+int    ctx_pcm_queue             (Ctx *ctx, const int8_t *data, int frames);
+
 #endif
+#ifndef __CTX_CLIENTS_H
+#define __CTX_CLIENTS_H
+
+typedef enum CtxClientFlags {
+  ITK_CLIENT_UI_RESIZABLE = 1<<0,
+  ITK_CLIENT_CAN_LAUNCH   = 1<<1,
+  ITK_CLIENT_MAXIMIZED    = 1<<2,
+  ITK_CLIENT_ICONIFIED    = 1<<3,
+  ITK_CLIENT_SHADED       = 1<<4,
+  ITK_CLIENT_TITLEBAR     = 1<<5,
+  ITK_CLIENT_LAYER2       = 1<<6,  // used for having a second set
+                                   // to draw - useful for splitting
+                                   // scrolled and HUD items
+                                   // with HUD being LAYER2
+                                  
+  ITK_CLIENT_KEEP_ALIVE   = 1<<7,  // do not automatically
+  ITK_CLIENT_FINISHED     = 1<<8,  // do not automatically
+                                   // remove after process quits
+  ITK_CLIENT_PRELOAD      = 1<<9
+} CtxClientFlags;
 
+typedef void (*CtxClientFinalize)(CtxClient *client, void *user_data);
 
-int ctx_sha1_init(CtxSHA1 * sha1);
-CtxSHA1 *ctx_sha1_new (void)
-{
-  CtxSHA1 *state = (CtxSHA1*)calloc (sizeof (CtxSHA1), 1);
-  ctx_sha1_init (state);
-  return state;
-}
-void ctx_sha1_free (CtxSHA1 *sha1)
-{
-  free (sha1);
-}
+struct _CtxClient {
+  VT    *vt;        // or NULL when thread
+
+  long       rev;
+
+  CtxList *events;  // we could use this queue also for vt
 
+  Ctx     *ctx;
+  char    *title;
+  int      x;
+  int      y;
+  int      width;
+  int      height;
+  float    opacity;
+  CtxClientFlags flags;
 #if 0
-          CtxSHA1 sha1;
-          ctx_sha1_init (&sha1);
-          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+  int      shaded;
+  int      iconified;
+  int      maximized;
+  int      resizable;
+#endif
+  int      unmaximized_x;
+  int      unmaximized_y;
+  int      unmaximized_width;
+  int      unmaximized_height;
+  int      do_quit;
+  long     drawn_rev;
+  int      id;
+  int      internal; // render a settings window rather than a vt
+
+#if CTX_THREADS
+  thrd_t tid;     // and only split code path in processing?
+                    // -- why?
 #endif
+  void (*start_routine)(Ctx *ctx, void *user_data);
+  void    *user_data;
+  CtxClientFinalize finalize;
+  Ctx     *sub_ctx;
+  CtxList *ctx_events;
 
-#ifdef SHA1_FF0
-#undef SHA1_FF0
+
+  /* we want to keep variation at the end */
+#if CTX_THREADS
+  mtx_t    mtx;
 #endif
-#ifdef SHA1_FF1
-#undef SHA1_FF1
+#if VT_RECORD
+  Ctx     *recording;
 #endif
+};
 
-#ifdef SHA1_IMPLEMENTATION
-#if !__COSMOPOLITAN__
-#include <stdlib.h>
-#include <string.h>
-#endif
+int   ctx_client_resize        (Ctx *ctx, int id, int width, int height);
+void  ctx_client_set_font_size (Ctx *ctx, int id, float font_size);
+float ctx_client_get_font_size (Ctx *ctx, int id);
+void  ctx_client_maximize      (Ctx *ctx, int id);
 
-#define STORE64H(x,                                                             y)                           
                                          \
-   { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned                char)(((x)>>48)&255);     \
-     (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned                char)(((x)>>32)&255);     \
-     (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned                char)(((x)>>16)&255);     \
-     (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 
-#define STORE32H(x,                                                             y)                           
                                          \
-     { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned              char)(((x)>>16)&255);   \
-       (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned               char)((x)&255); }
+CtxClient *vt_get_client (VT *vt);
+CtxClient *ctx_client_new (Ctx *ctx,
+                           const char *commandline,
+                           int x, int y, int width, int height,
+                           float font_size,
+                           CtxClientFlags flags,
+                           void *user_data,
+                           CtxClientFinalize client_finalize);
 
-#define LOAD32H(x, y)                            \
-     { x = ((unsigned long)((y)[0] & 255)<<24) | \
-           ((unsigned long)((y)[1] & 255)<<16) | \
-           ((unsigned long)((y)[2] & 255)<<8)  | \
-           ((unsigned long)((y)[3] & 255)); }
+CtxClient *ctx_client_new_argv (Ctx *ctx, char **argv, int x, int y, int width, int height, float font_size, 
CtxClientFlags flags, void *user_data,
+                CtxClientFinalize client_finalize);
+int ctx_clients_need_redraw (Ctx *ctx);
 
-/* rotates the hard way */
-#define ROL(x, y)  ((((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned 
long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
-#define ROLc(x, y) ROL(x,y)
+CtxClient *ctx_client_new_thread (Ctx *ctx, void (*start_routine)(Ctx *ctx, void *user_data),
+                                  int x, int y, int width, int height, float font_size, CtxClientFlags 
flags, void *user_data, CtxClientFinalize finalize);
 
-#define CRYPT_OK     0
-#define CRYPT_ERROR  1
-#define CRYPT_NOP    2
+extern float ctx_shape_cache_rate;
+extern int _ctx_max_threads;
 
-#ifndef MAX
-   #define MAX(x, y) ( ((x)>(y))?(x):(y) )
-#endif
-#ifndef MIN
-   #define MIN(x, y) ( ((x)<(y))?(x):(y) )
-#endif
+CtxEvent *ctx_event_copy (CtxEvent *event);
 
-/* a simple macro for making hash "process" functions */
-#define HASH_PROCESS(func_name, compress_name, state_var, block_size)               \
-int func_name (CtxSHA1 *sha1, const unsigned char *in, unsigned long inlen)      \
-{                                                                                   \
-    unsigned long n;                                                                \
-    int           err;                                                              \
-    assert (sha1 != NULL);                                                          \
-    assert (in != NULL);                                                            \
-    if (sha1->curlen > sizeof(sha1->buf)) {                                         \
-       return -1;                                                                   \
-    }                                                                               \
-    while (inlen > 0) {                                                             \
-        if (sha1->curlen == 0 && inlen >= block_size) {                             \
-           if ((err = compress_name (sha1, (unsigned char *)in)) != CRYPT_OK) {     \
-              return err;                                                           \
-           }                                                                        \
-           sha1->length += block_size * 8;                                          \
-           in             += block_size;                                            \
-           inlen          -= block_size;                                            \
-        } else {                                                                    \
-           n = MIN(inlen, (block_size - sha1->curlen));                             \
-           memcpy(sha1->buf + sha1->curlen, in, (size_t)n);                         \
-           sha1->curlen += n;                                                       \
-           in             += n;                                                     \
-           inlen          -= n;                                                     \
-           if (sha1->curlen == block_size) {                                        \
-              if ((err = compress_name (sha1, sha1->buf)) != CRYPT_OK) {            \
-                 return err;                                                        \
-              }                                                                     \
-              sha1->length += 8*block_size;                                         \
-              sha1->curlen = 0;                                                     \
-           }                                                                        \
-       }                                                                            \
-    }                                                                               \
-    return CRYPT_OK;                                                                \
-}
+void  ctx_client_move         (Ctx *ctx, int id, int x, int y);
+void  ctx_client_shade_toggle (Ctx *ctx, int id);
+float ctx_client_min_y_pos    (Ctx *ctx);
+float ctx_client_max_y_pos    (Ctx *ctx);
+void ctx_client_paste (Ctx *ctx, int id, const char *str);
+char  *ctx_client_get_selection        (Ctx *ctx, int id);
 
-/**********************/
+void  ctx_client_rev_inc      (CtxClient *client);
+long  ctx_client_rev          (CtxClient *client);
 
-#define F0(x,y,z)  (z ^ (x & (y ^ z)))
-#define F1(x,y,z)  (x ^ y ^ z)
-#define F2(x,y,z)  ((x & y) | (z & (x | y)))
-#define F3(x,y,z)  (x ^ y ^ z)
+int   ctx_clients_active      (Ctx *ctx);
 
-static int  ctx_sha1_compress(CtxSHA1 *sha1, unsigned char *buf)
-{
-    uint32_t a,b,c,d,e,W[80],i;
+CtxClient *ctx_client_by_id (Ctx *ctx, int id);
 
-    /* copy the state into 512-bits into W[0..15] */
-    for (i = 0; i < 16; i++) {
-        LOAD32H(W[i], buf + (4*i));
-    }
+int ctx_clients_draw (Ctx *ctx, int layer2);
 
-    /* copy state */
-    a = sha1->state[0];
-    b = sha1->state[1];
-    c = sha1->state[2];
-    d = sha1->state[3];
-    e = sha1->state[4];
+void ctx_client_feed_keystring (CtxClient *client, CtxEvent *event, const char *str);
+// need not be public?
+void ctx_client_register_events (CtxClient *client, Ctx *ctx, double x0, double y0);
 
-    /* expand it */
-    for (i = 16; i < 80; i++) {
-        W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); 
-    }
+void ctx_client_remove (Ctx *ctx, CtxClient *client);
 
-    /* compress */
-    /* round one */
-    #define SHA1_FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
-    #define SHA1_FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
-    #define SHA1_FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
-    #define SHA1_FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
- 
-    for (i = 0; i < 20; ) {
-       SHA1_FF0(a,b,c,d,e,i++);
-       SHA1_FF0(e,a,b,c,d,i++);
-       SHA1_FF0(d,e,a,b,c,i++);
-       SHA1_FF0(c,d,e,a,b,i++);
-       SHA1_FF0(b,c,d,e,a,i++);
-    }
+int  ctx_client_height           (Ctx *ctx, int id);
+int  ctx_client_x                (Ctx *ctx, int id);
+int  ctx_client_y                (Ctx *ctx, int id);
+void ctx_client_raise_top        (Ctx *ctx, int id);
+void ctx_client_lower_bottom     (Ctx *ctx, int id);
+void ctx_client_iconify          (Ctx *ctx, int id);
+int  ctx_client_is_iconified     (Ctx *ctx, int id);
+void ctx_client_uniconify        (Ctx *ctx, int id);
+void ctx_client_maximize         (Ctx *ctx, int id);
+int  ctx_client_is_maximized     (Ctx *ctx, int id);
+void ctx_client_unmaximize       (Ctx *ctx, int id);
+void ctx_client_maximized_toggle (Ctx *ctx, int id);
+void ctx_client_shade            (Ctx *ctx, int id);
+int  ctx_client_is_shaded        (Ctx *ctx, int id);
+void ctx_client_unshade          (Ctx *ctx, int id);
+void ctx_client_toggle_maximized (Ctx *ctx, int id);
+void ctx_client_shade_toggle     (Ctx *ctx, int id);
+void ctx_client_move             (Ctx *ctx, int id, int x, int y);
+int  ctx_client_resize           (Ctx *ctx, int id, int width, int height);
+void ctx_client_set_opacity      (Ctx *ctx, int id, float opacity);
+float ctx_client_get_opacity     (Ctx *ctx, int id);
+void ctx_client_set_title        (Ctx *ctx, int id, const char *title);
+const char *ctx_client_get_title (Ctx *ctx, int id);
+
+
+#endif
+
+#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD
+#ifndef CTX_DRAWLIST_H
+#define CTX_DRAWLIST_H
 
-    /* round two */
-    for (; i < 40; )  { 
-       SHA1_FF1(a,b,c,d,e,i++);
-       SHA1_FF1(e,a,b,c,d,i++);
-       SHA1_FF1(d,e,a,b,c,i++);
-       SHA1_FF1(c,d,e,a,b,i++);
-       SHA1_FF1(b,c,d,e,a,i++);
-    }
+static int
+ctx_conts_for_entry (CtxEntry *entry);
+static void
+ctx_iterator_init (CtxIterator      *iterator,
+                   CtxDrawlist  *drawlist,
+                   int               start_pos,
+                   int               flags);
 
-    /* round three */
-    for (; i < 60; )  { 
-       SHA1_FF2(a,b,c,d,e,i++);
-       SHA1_FF2(e,a,b,c,d,i++);
-       SHA1_FF2(d,e,a,b,c,i++);
-       SHA1_FF2(c,d,e,a,b,i++);
-       SHA1_FF2(b,c,d,e,a,i++);
-    }
+int ctx_iterator_pos (CtxIterator *iterator);
 
-    /* round four */
-    for (; i < 80; )  { 
-       SHA1_FF3(a,b,c,d,e,i++);
-       SHA1_FF3(e,a,b,c,d,i++);
-       SHA1_FF3(d,e,a,b,c,i++);
-       SHA1_FF3(c,d,e,a,b,i++);
-       SHA1_FF3(b,c,d,e,a,i++);
-    }
+static void
+ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size);
+static int
+ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry);
+static int ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry);
+int
+ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry);
+int
+ctx_add_data (Ctx *ctx, void *data, int length);
 
-    #undef SHA1_FF0
-    #undef SHA1_FF1
-    #undef SHA1_FF2
-    #undef SHA1_FF3
+int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2]);
+int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length);
 
-    /* store */
-    sha1->state[0] = sha1->state[0] + a;
-    sha1->state[1] = sha1->state[1] + b;
-    sha1->state[2] = sha1->state[2] + c;
-    sha1->state[3] = sha1->state[3] + d;
-    sha1->state[4] = sha1->state[4] + e;
+static CtxEntry
+ctx_void (CtxCode code);
+static inline CtxEntry
+ctx_f (CtxCode code, float x, float y);
+static CtxEntry
+ctx_u32 (CtxCode code, uint32_t x, uint32_t y);
+#if 0
+static CtxEntry
+ctx_s32 (CtxCode code, int32_t x, int32_t y);
+#endif
 
-    return CRYPT_OK;
-}
+static inline CtxEntry
+ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1);
+static CtxEntry
+ctx_u8 (CtxCode code,
+        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+        uint8_t e, uint8_t f, uint8_t g, uint8_t h);
 
-/**
-   Initialize the hash state
-   @param md   The hash state you wish to initialize
-   @return CRYPT_OK if successful
-*/
-int ctx_sha1_init(CtxSHA1 * sha1)
-{
-   assert(sha1 != NULL);
-   sha1->state[0] = 0x67452301UL;
-   sha1->state[1] = 0xefcdab89UL;
-   sha1->state[2] = 0x98badcfeUL;
-   sha1->state[3] = 0x10325476UL;
-   sha1->state[4] = 0xc3d2e1f0UL;
-   sha1->curlen = 0;
-   sha1->length = 0;
-   return CRYPT_OK;
-}
+#define CTX_PROCESS_VOID(cmd) do {\
+  CtxEntry commands[4] = {{cmd}};\
+  ctx_process (ctx, &commands[0]);}while(0) \
 
-/**
-   Process a block of memory though the hash
-   @param md     The hash state
-   @param in     The data to hash
-   @param inlen  The length of the data (octets)
-   @return CRYPT_OK if successful
-*/
-HASH_PROCESS(ctx_sha1_process, ctx_sha1_compress, sha1, 64)
+#define CTX_PROCESS_F(cmd,x,y) do {\
+  CtxEntry commands[4] = {ctx_f(cmd,x,y),};\
+  ctx_process (ctx, &commands[0]);}while(0) \
 
-/**
-   Terminate the hash to get the digest
-   @param md  The hash state
-   @param out [out] The destination of the hash (20 bytes)
-   @return CRYPT_OK if successful
-*/
-int ctx_sha1_done(CtxSHA1 * sha1, unsigned char *out)
-{
-    int i;
+#define CTX_PROCESS_F1(cmd,x) do {\
+  CtxEntry commands[4] = {ctx_f(cmd,x,0),};\
+  ctx_process (ctx, &commands[0]);}while(0) \
 
-    assert(sha1 != NULL);
-    assert(out != NULL);
+#define CTX_PROCESS_U32(cmd, x, y) do {\
+  CtxEntry commands[4] = {ctx_u32(cmd, x, y)};\
+  ctx_process (ctx, &commands[0]);}while(0)
 
-    if (sha1->curlen >= sizeof(sha1->buf)) {
-       return -1;
-    }
+#define CTX_PROCESS_U8(cmd, x) do {\
+  CtxEntry commands[4] = {ctx_u8(cmd, x,0,0,0,0,0,0,0)};\
+  ctx_process (ctx, &commands[0]);}while(0)
 
-    /* increase the length of the message */
-    sha1->length += sha1->curlen * 8;
 
-    /* append the '1' bit */
-    sha1->buf[sha1->curlen++] = (unsigned char)0x80;
+#if CTX_BITPACK_PACKER
+static unsigned int
+ctx_last_history (CtxDrawlist *drawlist);
+#endif
 
-    /* if the length is currently above 56 bytes we append zeros
-     * then compress.  Then we can fall back to padding zeros and length
-     * encoding like normal.
-     */
-    if (sha1->curlen > 56) {
-        while (sha1->curlen < 64) {
-            sha1->buf[sha1->curlen++] = (unsigned char)0;
-        }
-        ctx_sha1_compress(sha1, sha1->buf);
-        sha1->curlen = 0;
-    }
+#if CTX_BITPACK_PACKER
+static void
+ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos);
 
-    /* pad upto 56 bytes of zeroes */
-    while (sha1->curlen < 56) {
-        sha1->buf[sha1->curlen++] = (unsigned char)0;
-    }
+static void
+ctx_drawlist_bitpack (CtxDrawlist *drawlist, unsigned int start_pos);
+#endif
 
-    /* store length */
-    STORE64H(sha1->length, sha1->buf+56);
-    ctx_sha1_compress(sha1, sha1->buf);
+static void
+ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1);
+static void
+ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1);
+static void
+ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int 
len);
 
-    /* copy output */
-    for (i = 0; i < 5; i++) {
-        STORE32H(sha1->state[i], out+(4*i));
-    }
-    return CRYPT_OK;
+#pragma pack(push,1)
+typedef struct 
+CtxSegment {
+  uint16_t code;
+  union {
+   int16_t s16[4];
+   uint32_t u32[2];
+  } data;
+  int32_t val;
+  int32_t delta;
+} CtxSegment;
+#pragma pack(pop)
+
+
+
+static inline CtxSegment
+ctx_segment_s16 (CtxCode code, int x0, int y0, int x1, int y1)
+{
+  CtxSegment command;
+  command.code = code;
+  command.data.s16[0] = x0;
+  command.data.s16[1] = y0;
+  command.data.s16[2] = x1;
+  command.data.s16[3] = y1;
+  return command;
 }
-#endif
 
-#endif
-#endif
-#ifndef CTX_AUDIO_H
-#define CTX_AUDIO_H
+static inline void
+ctx_edgelist_resize (CtxDrawlist *drawlist, int desired_size)
+{
+#if CTX_DRAWLIST_STATIC
+    {
+      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
+      drawlist->entries = (CtxEntry*)&sbuf[0];
+      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
+    }
+#else
+  int new_size = desired_size;
+  int min_size = CTX_MIN_JOURNAL_SIZE;
+  int max_size = CTX_MAX_JOURNAL_SIZE;
+    {
+      min_size = CTX_MIN_EDGE_LIST_SIZE;
+      max_size = CTX_MAX_EDGE_LIST_SIZE;
+    }
 
-#if !__COSMOPOLITAN__
-#include <stdint.h>
+  if (CTX_UNLIKELY(drawlist->size == max_size))
+    { return; }
+  new_size = ctx_maxi (new_size, min_size);
+  //if (new_size < drawlist->count)
+  //  { new_size = drawlist->count + 4; }
+  new_size = ctx_mini (new_size, max_size);
+  if (new_size != drawlist->size)
+    {
+      int item_size = item_size = sizeof (CtxSegment);
+      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, 
drawlist->size);
+  if (drawlist->entries)
+    {
+      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
+      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
+      memcpy (ne, drawlist->entries, drawlist->size * item_size );
+      free (drawlist->entries);
+      drawlist->entries = ne;
+      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
+    }
+  else
+    {
+      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
+      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
+    }
+  drawlist->size = new_size;
+    }
+  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
 #endif
+}
 
-/* This enum should be kept in sync with the corresponding mmm enum.
- */
-typedef enum {
-  CTX_f32,
-  CTX_f32S,
-  CTX_s16,
-  CTX_s16S
-} CtxPCM;
 
-void   ctx_pcm_set_format        (Ctx *ctx, CtxPCM format);
-CtxPCM ctx_pcm_get_format        (Ctx *ctx);
-int    ctx_pcm_get_sample_rate   (Ctx *ctx);
-void   ctx_pcm_set_sample_rate   (Ctx *ctx, int sample_rate);
-int    ctx_pcm_get_frame_chunk   (Ctx *ctx);
-int    ctx_pcm_get_queued        (Ctx *ctx);
-float  ctx_pcm_get_queued_length (Ctx *ctx);
-int    ctx_pcm_queue             (Ctx *ctx, const int8_t *data, int frames);
+static inline int
+ctx_edgelist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
+{
+  int ret = drawlist->count;
+
+  if (CTX_UNLIKELY(ret >= CTX_MAX_EDGE_LIST_SIZE- 20))
+    {
+      return 0;
+    }
+  if (CTX_UNLIKELY(ret + 2 >= drawlist->size))
+    {
+      int new_ = ctx_maxi (drawlist->size * 2, ret + 1024);
+      new_ = ctx_mini (CTX_MAX_EDGE_LIST_SIZE, new_);
+      ctx_edgelist_resize (drawlist, new_);
+    }
+
+  ((CtxSegment*)(drawlist->entries))[ret] = *(CtxSegment*)entry;
+  drawlist->count++;
+  return ret;
+}
+
 
 #endif
 
-#if CTX_IMPLEMENTATION
-#if CTX_AUDIO
 
-//#include <string.h>
-//#include "ctx-internal.h"
-//#include "mmm.h"
+#if CTX_COMPOSITE
 
-#if !__COSMOPOLITAN__
+#define CTX_FULL_AA 15
+#define CTX_REFERENCE 0
 
-#include <pthread.h>
-#if CTX_ALSA_AUDIO
-#include <alsa/asoundlib.h>
-#endif
 
+#define CTX_RGBA8_R_SHIFT  0
+#define CTX_RGBA8_G_SHIFT  8
+#define CTX_RGBA8_B_SHIFT  16
+#define CTX_RGBA8_A_SHIFT  24
+
+#define CTX_RGBA8_R_MASK   (0xff << CTX_RGBA8_R_SHIFT)
+#define CTX_RGBA8_G_MASK   (0xff << CTX_RGBA8_G_SHIFT)
+#define CTX_RGBA8_B_MASK   (0xff << CTX_RGBA8_B_SHIFT)
+#define CTX_RGBA8_A_MASK   (0xff << CTX_RGBA8_A_SHIFT)
 
+#define CTX_RGBA8_RB_MASK  (CTX_RGBA8_R_MASK | CTX_RGBA8_B_MASK)
+#define CTX_RGBA8_GA_MASK  (CTX_RGBA8_G_MASK | CTX_RGBA8_A_MASK)
 
-//#include <alloca.h>
 
+CTX_INLINE static void
+ctx_RGBA8_associate_alpha (uint8_t *u8)
+{
+#if 1
+  uint32_t val = *((uint32_t*)(u8));
+  uint32_t a = u8[3];
+  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
+  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
+  *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
+#else
+  uint32_t a = u8[3];
+  u8[0] = (u8[0] * a + 255) >> 8;
+  u8[1] = (u8[1] * a + 255) >> 8;
+  u8[2] = (u8[2] * a + 255) >> 8;
 #endif
+}
 
-#define DESIRED_PERIOD_SIZE 1000
+CTX_INLINE static void
+ctx_RGBA8_associate_alpha_probably_opaque (uint8_t *u8)
+{
+  uint32_t a = u8[3];//val>>24;//u8[3];
+  if (CTX_UNLIKELY(a!=255))
+  {
+    u8[0] = (u8[0] * a + 255) >> 8;
+    u8[1] = (u8[1] * a + 255) >> 8;
+    u8[2] = (u8[2] * a + 255) >> 8;
+  }
+}
 
-int ctx_pcm_bytes_per_frame (CtxPCM format)
+CTX_INLINE static uint32_t ctx_bi_RGBA8 (uint32_t isrc00, uint32_t isrc01, uint32_t isrc10, uint32_t isrc11, 
uint8_t dx, uint8_t dy)
 {
-  switch (format)
+#if 0
+#if 0
+  uint8_t ret[4];
+  uint8_t *src00 = (uint8_t*)&isrc00;
+  uint8_t *src10 = (uint8_t*)&isrc10;
+  uint8_t *src01 = (uint8_t*)&isrc01;
+  uint8_t *src11 = (uint8_t*)&isrc11;
+  for (int c = 0; c < 4; c++)
   {
-    case CTX_f32:  return 4;
-    case CTX_f32S: return 8;
-    case CTX_s16:  return 2;
-    case CTX_s16S: return 4;
-    default: return 1;
+    ret[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
+                         ctx_lerp_u8 (src10[c], src11[c], dx), dy);
   }
+  return  ((uint32_t*)&ret[0])[0];
+#else
+  return ctx_lerp_RGBA8 (ctx_lerp_RGBA8 (isrc00, isrc01, dx),
+                         ctx_lerp_RGBA8 (isrc10, isrc11, dx), dy);
+#endif
+#else
+  uint32_t s0_ga, s0_rb, s1_ga, s1_rb;
+  ctx_lerp_RGBA8_split (isrc00, isrc01, dx, &s0_ga, &s0_rb);
+  ctx_lerp_RGBA8_split (isrc10, isrc11, dx, &s1_ga, &s1_rb);
+  return ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, dy);
+#endif
 }
 
-static float    ctx_host_freq     = 48000;
-static CtxPCM   ctx_host_format   = CTX_s16S;
-static float    client_freq   = 48000;
-static CtxPCM   ctx_client_format = CTX_s16S;
-static int      ctx_pcm_queued    = 0;
-static int      ctx_pcm_cur_left  = 0;
-static CtxList *ctx_pcm_list;                 /* data is a blob a 32bit uint first, followed by pcm-data */
+#if CTX_GRADIENTS
+#if CTX_GRADIENT_CACHE
+static uint8_t ctx_gradient_cache_u8[CTX_GRADIENT_CACHE_ELEMENTS][4];
+extern int ctx_gradient_cache_valid;
+static int ctx_gradient_cache_elements = CTX_GRADIENT_CACHE_ELEMENTS;
+
+inline static int ctx_grad_index (float v)
+{
+  int ret = v * (ctx_gradient_cache_elements - 1) + 0.5f;
+  ret = ctx_maxi (0, ret);
+  ret = ctx_mini (ctx_gradient_cache_elements-1, ret);
+  return ret;
+}
 
+inline static int ctx_grad_index_i (int v)
+{
+  v = v >> 8;
+  return ctx_maxi (0, ctx_mini (ctx_gradient_cache_elements-1, v));
+}
 
-//static long int ctx_pcm_queued_ticks = 0;  /*  the number of ticks into the future
-  //                                      *  we've queued audio for
-                                       
+//static void
+//ctx_gradient_cache_reset (void)
+//{
+//  ctx_gradient_cache_valid = 0;
+//}
+#endif
 
 
-int
-ctx_pcm_channels (CtxPCM format)
+CTX_INLINE static void
+_ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
 {
-  switch (format)
+  float v = x;
+  CtxGradient *g = &rasterizer->state->gradient;
+  if (v < 0) { v = 0; }
+  if (v > 1) { v = 1; }
+
+  if (g->n_stops == 0)
+    {
+      rgba[0] = rgba[1] = rgba[2] = v * 255;
+      rgba[3] = 255;
+      return;
+    }
+  CtxGradientStop *stop      = NULL;
+  CtxGradientStop *next_stop = &g->stops[0];
+  CtxColor *color;
+  for (int s = 0; s < g->n_stops; s++)
+    {
+      stop      = &g->stops[s];
+      next_stop = &g->stops[s+1];
+      if (s + 1 >= g->n_stops) { next_stop = NULL; }
+      if (v >= stop->pos && next_stop && v < next_stop->pos)
+        { break; }
+      stop = NULL;
+      next_stop = NULL;
+    }
+  if (stop == NULL && next_stop)
+    {
+      color = & (next_stop->color);
+    }
+  else if (stop && next_stop == NULL)
+    {
+      color = & (stop->color);
+    }
+  else if (stop && next_stop)
+    {
+      uint8_t stop_rgba[4];
+      uint8_t next_rgba[4];
+      ctx_color_get_rgba8 (rasterizer->state, & (stop->color), stop_rgba);
+      ctx_color_get_rgba8 (rasterizer->state, & (next_stop->color), next_rgba);
+      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
+#if 1
+      ((uint32_t*)rgba)[0] = ctx_lerp_RGBA8 (((uint32_t*)stop_rgba)[0],
+                                             ((uint32_t*)next_rgba)[0], dx);
+#else
+      for (int c = 0; c < 4; c++)
+        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
+#endif
+      ctx_RGBA8_associate_alpha (rgba);
+      return;
+    }
+  else
+    {
+      color = & (g->stops[g->n_stops-1].color);
+    }
+  ctx_color_get_rgba8 (rasterizer->state, color, rgba);
+  if (rasterizer->swap_red_green)
   {
-    case CTX_s16:
-    case CTX_f32:
-      return 1;
-    case CTX_s16S:
-    case CTX_f32S:
-      return 2;
+    uint8_t tmp = rgba[0];
+    rgba[0] = rgba[2];
+    rgba[2] = tmp;
   }
-  return 0;
+  ctx_RGBA8_associate_alpha (rgba);
 }
 
-/* todo: only start audio thread on first write - enabling dynamic choice
- * of sample-rate? or is it better to keep to opening 48000 as a standard
- * and do better internal resampling for others?
- */
+#if CTX_GRADIENT_CACHE
+static void
+ctx_gradient_cache_prime (CtxRasterizer *rasterizer);
+#endif
 
-#if CTX_ALSA_AUDIO
-static snd_pcm_t *alsa_open (char *dev, int rate, int channels)
+CTX_INLINE static void
+ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
 {
-   snd_pcm_hw_params_t *hwp;
-   snd_pcm_sw_params_t *swp;
-   snd_pcm_t *h;
-   int r;
-   int dir;
-   snd_pcm_uframes_t period_size_min;
-   snd_pcm_uframes_t period_size_max;
-   snd_pcm_uframes_t period_size;
-   snd_pcm_uframes_t buffer_size;
+#if CTX_GRADIENT_CACHE
+  *((uint32_t*)rgba) = *((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index(x)][0]));
+#else
+ _ctx_fragment_gradient_1d_RGBA8 (rasterizer, x, y, rgba);
+#endif
+}
+#endif
 
-   if ((r = snd_pcm_open(&h, dev, SND_PCM_STREAM_PLAYBACK, 0) < 0))
-           return NULL;
+CTX_INLINE static void
+ctx_u8_associate_alpha (int components, uint8_t *u8)
+{
+  for (int c = 0; c < components-1; c++)
+    u8[c] = (u8[c] * u8[components-1] + 255)>>8;
+}
 
-   hwp = alloca(snd_pcm_hw_params_sizeof());
-   memset(hwp, 0, snd_pcm_hw_params_sizeof());
-   snd_pcm_hw_params_any(h, hwp);
+#if CTX_GRADIENTS
+#if CTX_GRADIENT_CACHE
+static void
+ctx_gradient_cache_prime (CtxRasterizer *rasterizer)
+{
+  // XXX : todo  make the number of element dynamic depending on length of gradient
+  // in device coordinates.
 
-   snd_pcm_hw_params_set_access(h, hwp, SND_PCM_ACCESS_RW_INTERLEAVED);
-   snd_pcm_hw_params_set_format(h, hwp, SND_PCM_FORMAT_S16_LE);
-   snd_pcm_hw_params_set_rate(h, hwp, rate, 0);
-   snd_pcm_hw_params_set_channels(h, hwp, channels);
-   dir = 0;
-   snd_pcm_hw_params_get_period_size_min(hwp, &period_size_min, &dir);
-   dir = 0;
-   snd_pcm_hw_params_get_period_size_max(hwp, &period_size_max, &dir);
+  if (ctx_gradient_cache_valid)
+    return;
+  
 
-   period_size = DESIRED_PERIOD_SIZE;
-
-   dir = 0;
-   r = snd_pcm_hw_params_set_period_size_near(h, hwp, &period_size, &dir);
-   r = snd_pcm_hw_params_get_period_size(hwp, &period_size, &dir);
-   buffer_size = period_size * 4;
-   r = snd_pcm_hw_params_set_buffer_size_near(h, hwp, &buffer_size);
-   r = snd_pcm_hw_params(h, hwp);
-   swp = alloca(snd_pcm_sw_params_sizeof());
-   memset(hwp, 0, snd_pcm_sw_params_sizeof());
-   snd_pcm_sw_params_current(h, swp);
-   r = snd_pcm_sw_params_set_avail_min(h, swp, period_size);
-   snd_pcm_sw_params_set_start_threshold(h, swp, 0);
-   r = snd_pcm_sw_params(h, swp);
-   r = snd_pcm_prepare(h);
+  {
+    CtxSource *source = &rasterizer->state->gstate.source_fill;
+    float length = 100;
+    if (source->type == CTX_SOURCE_LINEAR_GRADIENT)
+    {
+       length = source->linear_gradient.length;
+    }
+    else
+    if (source->type == CTX_SOURCE_RADIAL_GRADIENT)
+    {
+       length = ctx_maxf (source->radial_gradient.r1, source->radial_gradient.r0);
+    }
+  //  length = CTX_GRADIENT_CACHE_ELEMENTS;
+  {
+     float u = length; float v = length;
+     const CtxMatrix *m = &rasterizer->state->gstate.transform;
+     //CtxMatrix *transform = &source->transform;
+     //
+     //  combine with above source transform?
+     _ctx_matrix_apply_transform (m, &u, &v);
+     length = ctx_maxf (u, v);
+  }
+  
+    ctx_gradient_cache_elements = ctx_mini (length, CTX_GRADIENT_CACHE_ELEMENTS);
+  }
 
-   return h;
+  for (int u = 0; u < ctx_gradient_cache_elements; u++)
+  {
+    float v = u / (ctx_gradient_cache_elements - 1.0f);
+    _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0f, &ctx_gradient_cache_u8[u][0]);
+    //*((uint32_t*)(&ctx_gradient_cache_u8_a[u][0]))= *((uint32_t*)(&ctx_gradient_cache_u8[u][0]));
+    //memcpy(&ctx_gradient_cache_u8_a[u][0], &ctx_gradient_cache_u8[u][0], 4);
+    //ctx_RGBA8_associate_alpha (&ctx_gradient_cache_u8_a[u][0]);
+  }
+  ctx_gradient_cache_valid = 1;
 }
+#endif
 
-static  snd_pcm_t *h = NULL;
-static void *ctx_alsa_audio_start(Ctx *ctx)
+CTX_INLINE static void
+ctx_fragment_gradient_1d_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
 {
-//  Lyd *lyd = aux;
-  int c;
-
-  /* The audio handler is implemented as a mixer that adds data on top
-   * of 0s, XXX: it should be ensured that minimal work is there is
-   * no data available.
-   */
-  for (;;)
-  {
-    int client_channels = ctx_pcm_channels (ctx_client_format);
-    int is_float = 0;
-    int16_t data[81920*8]={0,};
-
-    if (ctx_client_format == CTX_f32 ||
-        ctx_client_format == CTX_f32S)
-      is_float = 1;
-
-    c = snd_pcm_wait(h, 1000);
+  float v = x;
+  CtxGradient *g = &rasterizer->state->gradient;
+  if (v < 0) { v = 0; }
+  if (v > 1) { v = 1; }
+  if (g->n_stops == 0)
+    {
+      rgba[0] = rgba[1] = rgba[2] = v * 255;
+      rgba[1] = 255;
+      return;
+    }
+  CtxGradientStop *stop      = NULL;
+  CtxGradientStop *next_stop = &g->stops[0];
+  CtxColor *color;
+  for (int s = 0; s < g->n_stops; s++)
+    {
+      stop      = &g->stops[s];
+      next_stop = &g->stops[s+1];
+      if (s + 1 >= g->n_stops) { next_stop = NULL; }
+      if (v >= stop->pos && next_stop && v < next_stop->pos)
+        { break; }
+      stop = NULL;
+      next_stop = NULL;
+    }
+  if (stop == NULL && next_stop)
+    {
+      color = & (next_stop->color);
+    }
+  else if (stop && next_stop == NULL)
+    {
+      color = & (stop->color);
+    }
+  else if (stop && next_stop)
+    {
+      uint8_t stop_rgba[4];
+      uint8_t next_rgba[4];
+      ctx_color_get_graya_u8 (rasterizer->state, & (stop->color), stop_rgba);
+      ctx_color_get_graya_u8 (rasterizer->state, & (next_stop->color), next_rgba);
+      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
+      for (int c = 0; c < 2; c++)
+        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
+      return;
+    }
+  else
+    {
+      color = & (g->stops[g->n_stops-1].color);
+    }
+  ctx_color_get_graya_u8 (rasterizer->state, color, rgba);
+}
 
-    if (c >= 0)
-       c = snd_pcm_avail_update(h);
+CTX_INLINE static void
+ctx_fragment_gradient_1d_RGBAF (CtxRasterizer *rasterizer, float v, float y, float *rgba)
+{
+  CtxGradient *g = &rasterizer->state->gradient;
+  if (v < 0) { v = 0; }
+  if (v > 1) { v = 1; }
+  if (g->n_stops == 0)
+    {
+      rgba[0] = rgba[1] = rgba[2] = v;
+      rgba[3] = 1.0;
+      return;
+    }
+  CtxGradientStop *stop      = NULL;
+  CtxGradientStop *next_stop = &g->stops[0];
+  CtxColor *color;
+  for (int s = 0; s < g->n_stops; s++)
+    {
+      stop      = &g->stops[s];
+      next_stop = &g->stops[s+1];
+      if (s + 1 >= g->n_stops) { next_stop = NULL; }
+      if (v >= stop->pos && next_stop && v < next_stop->pos)
+        { break; }
+      stop = NULL;
+      next_stop = NULL;
+    }
+  if (stop == NULL && next_stop)
+    {
+      color = & (next_stop->color);
+    }
+  else if (stop && next_stop == NULL)
+    {
+      color = & (stop->color);
+    }
+  else if (stop && next_stop)
+    {
+      float stop_rgba[4];
+      float next_rgba[4];
+      ctx_color_get_rgba (rasterizer->state, & (stop->color), stop_rgba);
+      ctx_color_get_rgba (rasterizer->state, & (next_stop->color), next_rgba);
+      int dx = (v - stop->pos) / (next_stop->pos - stop->pos);
+      for (int c = 0; c < 4; c++)
+        { rgba[c] = ctx_lerpf (stop_rgba[c], next_rgba[c], dx); }
+      return;
+    }
+  else
+    {
+      color = & (g->stops[g->n_stops-1].color);
+    }
+  ctx_color_get_rgba (rasterizer->state, color, rgba);
+}
+#endif
 
-    if (c > 1000) c = 1000; // should use max mmm buffer sizes
+static void
+ctx_fragment_image_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
+{
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
 
-    if (c == -EPIPE)
-      snd_pcm_prepare(h);
+  for (int i = 0; i < count; i ++)
+  {
 
-    if (c > 0)
+  int u = x;
+  int v = y;
+  int width = buffer->width;
+  int height = buffer->height;
+  if ( u < 0 || v < 0 ||
+       u >= width ||
+       v >= height)
     {
-      int i;
-      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
+      *((uint32_t*)(rgba)) = 0;
+    }
+  else
+    {
+      int bpp = buffer->format->bpp/8;
+      if (rasterizer->state->gstate.image_smoothing)
       {
-        if (ctx_pcm_cur_left)  //  XXX  this line can be removed
-        {
-          uint32_t *packet_sizep = (ctx_pcm_list->data);
-          uint32_t packet_size = *packet_sizep;
-          uint16_t left = 0, right = 0;
-
-          if (is_float)
-          {
-            float *packet = (ctx_pcm_list->data);
-            packet += 4;
-            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
-            left = right = packet[0] * (1<<15);
-            if (client_channels > 1)
-              right = packet[0] * (1<<15);
-          }
-          else // s16
-          {
-            uint16_t *packet = (ctx_pcm_list->data);
-            packet += 8;
-            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
-
-            left = right = packet[0];
-            if (client_channels > 1)
-              right = packet[1];
-          }
-          data[i * 2 + 0] = left;
-          data[i * 2 + 1] = right;
+      uint8_t *src00 = (uint8_t *) buffer->data;
+      src00 += v * buffer->stride + u * bpp;
+      uint8_t *src01 = src00;
+      if ( u + 1 < width)
+      {
+        src01 = src00 + bpp;
+      }
+      uint8_t *src11 = src01;
+      uint8_t *src10 = src00;
+      if ( v + 1 < height)
+      {
+        src10 = src00 + buffer->stride;
+        src11 = src01 + buffer->stride;
+      }
+      float dx = (x-(int)(x)) * 255.9;
+      float dy = (y-(int)(y)) * 255.9;
 
-          ctx_pcm_cur_left--;
-          ctx_pcm_queued --;
-          if (ctx_pcm_cur_left == 0)
-          {
-            void *old = ctx_pcm_list->data;
-            ctx_list_remove (&ctx_pcm_list, ctx_pcm_list->data);
-            free (old);
-            ctx_pcm_cur_left = 0;
-            if (ctx_pcm_list)
-            {
-              uint32_t *packet_sizep = (ctx_pcm_list->data);
-              uint32_t packet_size = *packet_sizep;
-              ctx_pcm_cur_left = packet_size;
-            }
-          }
+      switch (bpp)
+      {
+      case 1:
+        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
+                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
+        rgba[3] = global_alpha_u8;
+        break;
+      case 2:
+        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
+                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
+        rgba[3] = ctx_lerp_u8 (ctx_lerp_u8 (src00[1], src01[1], dx),
+                               ctx_lerp_u8 (src10[1], src11[1], dx), dy);
+        rgba[3] = (rgba[3] * global_alpha_u8) / 255;
+        break;
+      case 3:
+      for (int c = 0; c < bpp; c++)
+        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
+                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
+                
+        }
+        rgba[3]=global_alpha_u8;
+        break;
+      break;
+      case 4:
+      for (int c = 0; c < bpp; c++)
+        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
+                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
+                
         }
+        rgba[3] = (rgba[3] * global_alpha_u8) / 255;
       }
 
-    c = snd_pcm_writei(h, data, c);
-    if (c < 0)
-      c = snd_pcm_recover (h, c, 0);
-     }else{
-      if (getenv("LYD_FATAL_UNDERRUNS"))
+      }
+      else
+      {
+      uint8_t *src = (uint8_t *) buffer->data;
+      src += v * buffer->stride + u * bpp;
+      switch (bpp)
         {
-          printf ("dying XXxx need to add API for this debug\n");
-          //printf ("%i", lyd->active);
-          exit(0);
+          case 1:
+            for (int c = 0; c < 3; c++)
+              { rgba[c] = src[0]; }
+            rgba[3] = global_alpha_u8;
+            break;
+          case 2:
+            for (int c = 0; c < 3; c++)
+              { rgba[c] = src[0]; }
+            rgba[3] = src[1];
+            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
+            break;
+          case 3:
+            for (int c = 0; c < 3; c++)
+              { rgba[c] = src[c]; }
+            rgba[3] = global_alpha_u8;
+            break;
+          case 4:
+            for (int c = 0; c < 4; c++)
+              { rgba[c] = src[c]; }
+            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
+            break;
         }
-      fprintf (stderr, "ctx alsa underun\n");
-      //exit(0);
+
+      }
+      if (rasterizer->swap_red_green)
+      {
+        uint8_t tmp = rgba[0];
+        rgba[0] = rgba[2];
+        rgba[2] = tmp;
+      }
     }
+    ctx_RGBA8_associate_alpha_probably_opaque (rgba); // XXX: really?
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
 }
-#endif
 
-static char MuLawCompressTable[256] =
+#if CTX_DITHER
+static inline int ctx_dither_mask_a (int x, int y, int c, int divisor)
 {
-   0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
-   4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
-};
+  /* https://pippin.gimp.org/a_dither/ */
+  return ( ( ( ( (x + c * 67) + y * 236) * 119) & 255 )-127) / divisor;
+}
 
-static unsigned char LinearToMuLawSample(int16_t sample)
+inline static void
+ctx_dither_rgba_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
 {
-  const int cBias = 0x84;
-  const int cClip = 32635;
-  int sign = (sample >> 8) & 0x80;
-
-  if (sign)
-    sample = (int16_t)-sample;
-
-  if (sample > cClip)
-    sample = cClip;
-
-  sample = (int16_t)(sample + cBias);
-
-  int exponent = (int)MuLawCompressTable[(sample>>7) & 0xFF];
-  int mantissa = (sample >> (exponent+3)) & 0x0F;
-
-  int compressedByte = ~ (sign | (exponent << 4) | mantissa);
-
-  return (unsigned char)compressedByte;
+  if (dither_red_blue == 0)
+    { return; }
+  for (int c = 0; c < 3; c ++)
+    {
+      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, c==1?dither_green:dither_red_blue);
+      rgba[c] = CTX_CLAMP (val, 0, 255);
+    }
 }
 
-void ctx_ctx_pcm (Ctx *ctx)
+inline static void
+ctx_dither_graya_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
 {
-    int client_channels = ctx_pcm_channels (ctx_client_format);
-    int is_float = 0;
-    uint8_t data[81920*8]={0,};
-    int c;
-
-    if (ctx_client_format == CTX_f32 ||
-        ctx_client_format == CTX_f32S)
-      is_float = 1;
-
-    c = 2000;
-
-    if (c > 0)
+  if (dither_red_blue == 0)
+    { return; }
+  for (int c = 0; c < 1; c ++)
     {
-      int i;
-      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
-      {
-        if (ctx_pcm_cur_left)  //  XXX  this line can be removed
-        {
-          uint32_t *packet_sizep = (ctx_pcm_list->data);
-          uint32_t packet_size = *packet_sizep;
-          int left = 0, right = 0;
-
-          if (is_float)
-          {
-            float *packet = (ctx_pcm_list->data);
-            packet += 4;
-            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
-            left = right = packet[0] * (1<<15);
-            if (client_channels > 1)
-              right = packet[1] * (1<<15);
-          }
-          else // s16
-          {
-            uint16_t *packet = (ctx_pcm_list->data);
-            packet += 8;
-            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
-
-            left = right = packet[0];
-            if (client_channels > 1)
-              right = packet[1];
-          }
-          data[i] = LinearToMuLawSample((left+right)/2);
-
-          ctx_pcm_cur_left--;
-          ctx_pcm_queued --;
-          if (ctx_pcm_cur_left == 0)
-          {
-            void *old = ctx_pcm_list->data;
-            ctx_list_remove (&ctx_pcm_list, ctx_pcm_list->data);
-            free (old);
-            ctx_pcm_cur_left = 0;
-            if (ctx_pcm_list)
-            {
-              uint32_t *packet_sizep = (ctx_pcm_list->data);
-              uint32_t packet_size = *packet_sizep;
-              ctx_pcm_cur_left = packet_size;
-            }
-          }
-        }
-      }
-
-    char encoded[81920*8]="";
-
-    int encoded_len = ctx_a85enc (data, encoded, i);
-    fprintf (stdout, "\033_Af=%i;", i);
-    fwrite (encoded, 1, encoded_len, stdout);
-    fwrite ("\e\\", 1, 2, stdout);
-    fflush (stdout);
+      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, dither_red_blue);
+      rgba[c] = CTX_CLAMP (val, 0, 255);
     }
 }
+#endif
 
-int ctx_pcm_init (Ctx *ctx)
-{
 #if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
-  {
-    return 0;
-  }
-  else
+CTX_INLINE static void
+ctx_RGBA8_deassociate_alpha (const uint8_t *in, uint8_t *out)
+{
+    uint32_t val = *((uint32_t*)(in));
+    int a = val >> CTX_RGBA8_A_SHIFT;
+    if (a)
+    {
+    if (a ==255)
+    {
+      *((uint32_t*)(out)) = val;
+    } else
+    {
+      uint32_t g = (((val & CTX_RGBA8_G_MASK) * 255 / a) >> 8) & CTX_RGBA8_G_MASK;
+      uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * 255 / a) >> 8) & CTX_RGBA8_RB_MASK;
+      *((uint32_t*)(out)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
+    }
+    }
+    else
+    {
+      *((uint32_t*)(out)) = 0;
+    }
+}
 #endif
-  if (ctx_renderer_is_ctx (ctx))
+
+CTX_INLINE static void
+ctx_u8_deassociate_alpha (int components, const uint8_t *in, uint8_t *out)
+{
+  if (in[components-1])
   {
-     ctx_host_freq = 8000;
-     ctx_host_format = CTX_s16;
-#if 0
-     pthread_t tid;
-     pthread_create(&tid, NULL, (void*)ctx_audio_start, ctx);
-#endif
+    if (in[components-1] != 255)
+    for (int c = 0; c < components-1; c++)
+      out[c] = (in[c] * 255) / in[components-1];
+    else
+    for (int c = 0; c < components-1; c++)
+      out[c] = in[c];
+    out[components-1] = in[components-1];
   }
   else
   {
-#if CTX_ALSA_AUDIO
-     pthread_t tid;
-     h = alsa_open("default", ctx_host_freq, ctx_pcm_channels (ctx_host_format));
-  if (!h) {
-    fprintf(stderr, "ctx unable to open ALSA device (%d channels, %f Hz), dying\n",
-            ctx_pcm_channels (ctx_host_format), ctx_host_freq);
-    return -1;
-  }
-  pthread_create(&tid, NULL, (void*)ctx_alsa_audio_start, ctx);
-#endif
+  for (int c = 0; c < components; c++)
+    out[c] = 0;
   }
-  return 0;
 }
 
-int ctx_pcm_queue (Ctx *ctx, const int8_t *data, int frames)
+CTX_INLINE static void
+ctx_float_associate_alpha (int components, float *rgba)
 {
-  static int inited = 0;
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
-  {
-    return mmm_pcm_queue (ctx->backend_data, data, frames);
-  }
-  else
-#endif
-  {
-    if (!inited)
-    {
-      ctx_pcm_init (ctx);
-      inited = 1;
-    }
-    float factor = client_freq * 1.0 / ctx_host_freq;
-    int   scaled_frames = frames / factor;
-    int   bpf = ctx_pcm_bytes_per_frame (ctx_client_format);
-
-    uint8_t *packet = malloc (scaled_frames * ctx_pcm_bytes_per_frame (ctx_client_format) + 16);
-    *((uint32_t *)packet) = scaled_frames;
+  float alpha = rgba[components-1];
+  for (int c = 0; c < components-1; c++)
+    rgba[c] *= alpha;
+}
 
-    if (factor > 0.999 && factor < 1.0001)
-    {
-       memcpy (packet + 16, data, frames * bpf);
-    }
-    else
-    {
-      /* a crude nearest / sample-and hold resampler */
-      int i;
-      for (i = 0; i < scaled_frames; i++)
-      {
-        int source_frame = i * factor;
-        memcpy (packet + 16 + bpf * i, data + source_frame * bpf, bpf);
-      }
-    }
-    if (ctx_pcm_list == NULL)     // otherwise it is another frame at front
-      ctx_pcm_cur_left = scaled_frames;  // and current cur_left is valid
+CTX_INLINE static void
+ctx_float_deassociate_alpha (int components, float *rgba, float *dst)
+{
+  float ralpha = rgba[components-1];
+  if (ralpha != 0.0) ralpha = 1.0/ralpha;
 
-    ctx_list_append (&ctx_pcm_list, packet);
-    ctx_pcm_queued += scaled_frames;
+  for (int c = 0; c < components-1; c++)
+    dst[c] = (rgba[c] * ralpha);
+  dst[components-1] = rgba[components-1];
+}
 
-    return frames;
-  }
-  return 0;
+CTX_INLINE static void
+ctx_RGBAF_associate_alpha (float *rgba)
+{
+  ctx_float_associate_alpha (4, rgba);
 }
 
-static int ctx_pcm_get_queued_frames (Ctx *ctx)
+CTX_INLINE static void
+ctx_RGBAF_deassociate_alpha (float *rgba, float *dst)
 {
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
-  {
-    return mmm_pcm_get_queued_frames (ctx->backend_data);
-  }
-#endif
-  return ctx_pcm_queued;
+  ctx_float_deassociate_alpha (4, rgba, dst);
 }
 
-int ctx_pcm_get_queued (Ctx *ctx)
+
+static inline void ctx_swap_red_green_u8 (void *data)
 {
-  return ctx_pcm_get_queued_frames (ctx);
+  uint8_t *rgba = (uint8_t*)data;
+  uint8_t tmp = rgba[0];
+  rgba[0] = rgba[2];
+  rgba[2] = tmp;
 }
 
-float ctx_pcm_get_queued_length (Ctx *ctx)
+static void
+ctx_fragment_swap_red_green_u8 (void *out, int count)
 {
-  return 1.0 * ctx_pcm_get_queued_frames (ctx) / ctx_host_freq;
+  uint8_t *rgba = (uint8_t*)out;
+  for (int x = 0; x < count; x++)
+  {
+    ctx_swap_red_green_u8 (rgba);
+    rgba += 4;
+  }
 }
 
-int ctx_pcm_get_frame_chunk (Ctx *ctx)
+/**** rgb8 ***/
+
+static void
+ctx_fragment_image_rgb8_RGBA8_box (CtxRasterizer *rasterizer,
+                                   float x,
+                                   float y,
+                                   void *out, int count, float dx, float dy)
 {
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+  int width = buffer->width;
+  int height = buffer->height;
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
+  int dim = (1.0 / factor) / 3;
+
+  int i = 0;
+
+  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
   {
-    return mmm_pcm_get_frame_chunk (ctx->backend_data);
+    *((uint32_t*)(rgba))=0;
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
-#endif
-  if (ctx_renderer_is_ctx (ctx))
+
+  for (; i < count && !(
+       x - dim < 0 || y - dim < 0 ||
+       x + dim >= width ||
+       y + dim >= height); i++)
   {
-    // 300 stuttering
-    // 350 nothing
-    // 380 slight buzz
-    // 390  buzzing
-    // 400 ok - but sometimes falling out
-    // 410 buzzing
-    // 420 ok - but odd latency
-    // 450 buzzing
 
-    if (ctx_pcm_get_queued_frames (ctx) > 400)
-      return 0;
-    else
-      return 400 - ctx_pcm_get_queued_frames (ctx);
+  int u = x;
+  int v = y;
+    {
+      int bpp = 3;
+      rgba[3]=global_alpha_u8; // gets lost
+          uint64_t sum[4]={0,0,0,0};
+          int count = 0;
+
+          {
+            for (int ov = - dim; ov <= dim; ov++)
+            {
+              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
+              for (int ou = - dim; ou <= dim; ou++)
+              {
+                for (int c = 0; c < bpp; c++)
+                  sum[c] += src[c];
+                count ++;
+                src += bpp;
+              }
+
+            }
+          }
 
+          int recip = 65536/count;
+          for (int c = 0; c < bpp; c++)
+            rgba[c] = sum[c] * recip >> 16;
+          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
+    }
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
 
-  if (ctx_pcm_get_queued_frames (ctx) > 1000)
-    return 0;
-  else
-    return 1000 - ctx_pcm_get_queued_frames (ctx);
+  for (; i < count; i++)
+  {
+    *((uint32_t*)(rgba))= 0;
+    rgba += 4;
+  }
 }
 
-void ctx_pcm_set_sample_rate (Ctx *ctx, int sample_rate)
+static void
+ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (CtxRasterizer *rasterizer,
+                                  float x,
+                                  float y,
+                                  void *out, int count, float dx, float dy)
 {
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
-  {
-    mmm_pcm_set_sample_rate (ctx->backend_data, sample_rate);
-  }
-  else
-#endif
-    client_freq = sample_rate;
+  ctx_fragment_image_rgb8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
 }
 
-void ctx_pcm_set_format (Ctx *ctx, CtxPCM format)
+static inline void
+ctx_RGBA8_apply_global_alpha_and_associate (CtxRasterizer *rasterizer,
+                                         uint8_t *buf, int count)
 {
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  uint8_t *rgba = (uint8_t *) buf;
+  if (global_alpha_u8 != 255)
   {
-    mmm_pcm_set_format (ctx->backend_data, format);
+    for (int i = 0; i < count; i++)
+    {
+      rgba[3] = (rgba[3] * global_alpha_u8) / 255;
+      ctx_RGBA8_associate_alpha (rgba);
+      rgba += 4;
+    }
   }
   else
-#endif
-    ctx_client_format = format;
-}
-
-CtxPCM ctx_pcm_get_format (Ctx *ctx)
-{
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
   {
-    return mmm_pcm_get_format (ctx->backend_data);
+    for (int i = 0; i < count; i++)
+    {
+      ctx_RGBA8_associate_alpha_probably_opaque (rgba);
+      rgba += 4;
+    }
   }
-#endif
-  return ctx_client_format;
 }
 
-int ctx_pcm_get_sample_rate (Ctx *ctx)
+#if CTX_FRAGMENT_SPECIALIZE
+static void
+ctx_fragment_image_rgb8_RGBA8_bi (CtxRasterizer *rasterizer,
+                                  float x,
+                                  float y,
+                                  void *out, int count, float dx, float dy)
 {
-#if 0
-  if (!strcmp (ctx->backend->name, "mmm") ||
-      !strcmp (ctx->backend->name, "mmm-client"))
+  uint8_t *rgba = (uint8_t *) out;
+
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+  int width = buffer->width;
+  int height = buffer->height;
+
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  for (int i = 0; i < count; i++)
   {
-    return mmm_pcm_get_sample_rate (ctx->backend_data);
+
+  int u = x;
+  int v = y;
+  if ( u < 0 || v < 0 ||
+       u >= width ||
+       v >= height)
+    {
+      *((uint32_t*)(rgba))= 0;
+    }
+  else
+    {
+      int bpp = 3;
+      uint8_t *src00 = (uint8_t *) buffer->data;
+      int stride = buffer->stride;
+      src00 += v * stride + u * bpp;
+      uint8_t *src01 = src00;
+      if ( u + 1 < width)
+      {
+        src01 = src00 + bpp;
+      }
+      uint8_t *src11 = src01;
+      uint8_t *src10 = src00;
+      if ( v + 1 < height)
+      {
+        src10 = src00 + stride;
+        src11 = src01 + stride;
+      }
+      float dx = (x-(int)(x)) * 255.9f;
+      float dy = (y-(int)(y)) * 255.9f;
+      for (int c = 0; c < bpp; c++)
+      {
+        rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
+                               ctx_lerp_u8 (src10[c], src11[c], dx), dy);
+      }
+      rgba[3] = global_alpha_u8;
+      ctx_RGBA8_associate_alpha_probably_opaque (rgba);
+    }
+    x += dx;
+    y += dy;
+    rgba += 4;
   }
-#endif
-  return client_freq;
 }
 
-#endif
- /* Copyright (C) 2020 Øyvind Kolås <pippin gimp org>
- */
-
-#if CTX_FORMATTER
-
-/* returns the maximum string length including terminating \0 */
-int ctx_a85enc_len (int input_length)
+static void
+ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (CtxRasterizer *rasterizer,
+                                  float x,
+                                  float y,
+                                  void *out, int count, float dx, float dy)
 {
-  return (input_length / 4 + 1) * 5;
+  ctx_fragment_image_rgb8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
 }
 
-int ctx_a85enc (const void *srcp, char *dst, int count)
+static CTX_INLINE void
+ctx_fragment_image_rgb8_RGBA8_nearest (CtxRasterizer *rasterizer,
+                                       float x,
+                                       float y,
+                                       void *out, int count, float dx, float dy)
 {
-  const uint8_t *src = (uint8_t*)srcp;
-  int out_len = 0;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer;
+  if (buffer->color_managed)
+   buffer = buffer->color_managed;
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  uint8_t *rgba = (uint8_t *) out;
+  uint8_t *src = (uint8_t *) buffer->data;
+  int bwidth = buffer->width;
+  int bheight = buffer->height;
+  int stride = buffer->stride;
 
-  int padding = 4-(count % 4);
-  if (padding == 4) padding = 0;
+  x += 0.5f;
+  y += 0.5f;
 
-  for (int i = 0; i < (count+3)/4; i ++)
+  if (CTX_UNLIKELY (dy == 0.0f && dx > 0.999f && dx < 1.001f))
   {
-    uint32_t input = 0;
-    for (int j = 0; j < 4; j++)
+    int v = y;
+    int u = x;
+  
+    if (v < buffer->height && v > 0)
     {
-      input = (input << 8);
-      if (i*4+j<=count)
-        input += src[i*4+j];
-    }
+      int o = v * stride + u * 3;
+      int i;
+      for (i = 0; i < count && u < bwidth && u <0; i++)
+      {
+        *((uint32_t*)(rgba))= 0;
+        rgba += 4;
+        o += 3;
+        u+=1;
+      }
 
-    int divisor = 85 * 85 * 85 * 85;
-#if 0
-    if (input == 0)
-    {
-        dst[out_len++] = 'z';
+      for (; i < count && u < bwidth; i++)
+      {
+        rgba[0] = src[o];
+        rgba[1] = src[o+1];
+        rgba[2] = src[o+2]; 
+        rgba[3]=global_alpha_u8;
+        rgba += 4;
+        o += 3;
+        u+=1;
+      }
+      for (; i < count; i++)
+      {
+        *((uint32_t*)(rgba))= 0;
+        rgba += 4;
+      }
     }
-    /* todo: encode 4 spaces as 'y' */
     else
-#endif
     {
-      for (int j = 0; j < 5; j++)
+      for (int i = 0; i < count; i++)
       {
-        dst[out_len++] = ((input / divisor) % 85) + '!';
-        divisor /= 85;
+        *((uint32_t*)(rgba))= 0;
+        rgba+=4;
       }
     }
   }
-  out_len -= padding;
-  dst[out_len]=0;
-  return out_len;
-}
-#endif
-
-#if CTX_PARSER
-
-int ctx_a85dec (const char *src, char *dst, int count)
-{
-  int out_len = 0;
-  uint32_t val = 0;
-  int k = 0;
-  int i = 0;
-  int p = 0;
-  for (i = 0; i < count; i ++)
+  else if (dy == 0.0f)
   {
-    p = src[i];
-    val *= 85;
-    if (CTX_UNLIKELY(p == '~'))
+    int u = x;
+    int v = y;
+    int i;
+    for (i = 0; i < count && (u >= bwidth || u < 0); i++)
     {
-      break;
+      u = x;
+      *((uint32_t*)(rgba))= 0;
+      x += dx;
+      rgba += 4;
     }
-#if 0
-    else if (p == 'z')
+    u = x;
+    int ro = v * stride;
+    if (v >= 0 && v < bheight)
     {
-      for (int j = 0; j < 4; j++)
-        dst[out_len++] = 0;
-      k = 0;
-    }
-    else if (p == 'y') /* lets support this extension */
+       int lcount = ctx_mini (count - i, bwidth - u);
+    for (; lcount--; i++)
     {
-      for (int j = 0; j < 4; j++)
-        dst[out_len++] = 32;
-      k = 0;
+      int o = ro + u * 3;
+      rgba[0] = src[o];
+      rgba[1] = src[o+1];
+      rgba[2] = src[o+2]; 
+      rgba[3] = global_alpha_u8;
+  
+      rgba += 4;
+      x += dx;
+      u = x;
     }
-#endif
-    else if (CTX_LIKELY(p >= '!' && p <= 'u'))
+    }
+    for (; i < count; i++)
     {
-      val += p-'!';
-      if (CTX_UNLIKELY (k % 5 == 4))
-      {
-         for (int j = 0; j < 4; j++)
-         {
-           dst[out_len++] = (val & (0xff << 24)) >> 24;
-           val <<= 8;
-         }
-         val = 0;
-      }
-      k++;
+      *((uint32_t*)(rgba))= 0;
+      rgba += 4;
     }
-    // we treat all other chars as whitespace
-  }
-  if (CTX_LIKELY (p != '~'))
-  { 
-    val *= 85;
   }
-  k = k % 5;
-  if (k)
+  else
   {
-    val += 84;
-    for (int j = k; j < 4; j++)
+    int u = x;
+    int v = y;
+    int i;
+    for (i = 0; i < count && (u >= bwidth || u < 0); i++)
     {
-      val *= 85;
-      val += 84;
+      u = x;
+      v = y;;
+      *((uint32_t*)(rgba))= 0;
+      x += dx;
+      y += dy;
+      rgba += 4;
     }
-
-    for (int j = 0; j < k-1; j++)
+    u = x;
+    v = y;
+    for (; i < count && u < bwidth; i++)
     {
-      dst[out_len++] = (val & (0xff << 24)) >> 24;
-      val <<= 8;
+    if (CTX_UNLIKELY(v < 0 || v >= bheight))
+      {
+        *((uint32_t*)(rgba))= 0;
+      }
+    else
+      {
+        int o = v * stride + u * 3;
+        rgba[0] = src[o];
+        rgba[1] = src[o+1];
+        rgba[2] = src[o+2]; 
+        rgba[3] = global_alpha_u8;
+      }
+  
+      rgba += 4;
+      x += dx;
+      y += dy;
+      u = x;
+      v = y;
+    }
+    for (; i < count; i++)
+    {
+      *((uint32_t*)(rgba))= 0;
+      rgba += 4;
     }
-    val = 0;
   }
-  dst[out_len] = 0;
-  return out_len;
+#if CTX_DITHER
+  //ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
+  //                    rasterizer->format->dither_green);
+#endif
 }
 
-#if 1
-int ctx_a85len (const char *src, int count)
+
+static CTX_INLINE void
+ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (CtxRasterizer *rasterizer,
+                                                      float x,
+                                                      float y,
+                                                      void *out, int count, float dx, float dy)
 {
-  int out_len = 0;
-  int k = 0;
-  for (int i = 0; i < count; i ++)
+  ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
+}
+
+static void
+ctx_fragment_image_rgb8_RGBA8 (CtxRasterizer *rasterizer,
+                               float x,
+                               float y,
+                               void *out, int count, float dx, float dy)
+{
+  if (rasterizer->state->gstate.image_smoothing)
   {
-    if (src[i] == '~')
-      break;
-    else if (src[i] == 'z')
+    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
+    if (factor <= 0.50f)
     {
-      for (int j = 0; j < 4; j++)
-        out_len++;
-      k = 0;
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgb8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
     }
-    else if (src[i] >= '!' && src[i] <= 'u')
+#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
+    else if (factor > 0.99f && factor < 1.01f)
     {
-      if (k % 5 == 4)
-        out_len += 4;
-      k++;
+      // XXX missing translate test
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
     }
-    // we treat all other chars as whitespace
-  }
-  k = k % 5;
-  if (k)
-    out_len += k-1;
-  return out_len;
-}
-#endif
-
 #endif
-#ifndef SQUOZE_H
-#define SQUOZE_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <assert.h>
-
-uint32_t squoze6 (const char *utf8);
-uint64_t squoze10 (const char *utf8);
-uint64_t squoze12 (const char *utf8);
-const char *squoze6_decode (uint32_t hash);
-const char *squoze10_decode (uint64_t hash);
-const char *squoze12_decode (uint64_t hash);
-
-//#define SQUOZE_NO_INTERNING  // this disables the interning - providing only a hash (and decode for 
non-overflowed hashes)
+    else
+    {
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgb8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
+    }
+  }
+  else
+  {
+    if (rasterizer->swap_red_green)
+      ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+    else
+      ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+  }
+#if 0
+#if CTX_DITHER
+  {
+  uint8_t *rgba = (uint8_t*)out;
+  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
+                      rasterizer->format->dither_green);
+  }
+#endif
+#endif
+}
 
-#define SQUOZE_ENTER_SQUEEZE    16
 
-#define SQUOZE_SPACE            0
-#define SQUOZE_DEC_OFFSET_A     27
-#define SQUOZE_INC_OFFSET_A     28
-#define SQUOZE_DEC_OFFSET_B     29
-#define SQUOZE_INC_OFFSET_B     30
-#define SQUOZE_ENTER_UTF5       31
+/************** rgba8 */
 
-#define SQUOZE_JUMP_STRIDE      26
-#define SQUOZE_JUMP_OFFSET      19
+static void
+ctx_fragment_image_rgba8_RGBA8_box (CtxRasterizer *rasterizer,
+                                    float x,
+                                    float y,
+                                    void *out, int count, float dx, float dy)
+{
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+  int width = buffer->width;
+  int height = buffer->height;
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
+  int dim = (1.0 / factor) / 3;
 
-static inline uint32_t squoze_utf8_to_unichar (const char *input);
-static inline int      squoze_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
-static inline int      squoze_utf8_len        (const unsigned char first_byte);
+  int i = 0;
 
+  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
+  {
+    *((uint32_t*)(rgba))=0;
+    rgba += 4;
+    x += dx;
+    y += dy;
+  }
 
-/* returns the base-offset of the segment this unichar belongs to,
- *
- * segments are 26 items long and are offset so that the 'a'-'z' is
- * one segment.
- */
-static inline int squoze_new_offset (uint32_t unichar)
-{
-  uint32_t ret = unichar - (unichar % SQUOZE_JUMP_STRIDE) + SQUOZE_JUMP_OFFSET;
-  if (ret > unichar) ret -= SQUOZE_JUMP_STRIDE;
-  return ret;
-}
+  for (; i < count && !(
+       x - dim < 0 || y - dim < 0 ||
+       x + dim >= width ||
+       y + dim >= height); i++)
+  {
 
-static int squoze_needed_jump (uint32_t off, uint32_t unicha)
-{
-  int count = 0;
-  int unichar = unicha;
-  int offset = off;
+  int u = x;
+  int v = y;
+    {
+      int bpp = 4;
+          uint64_t sum[4]={0,0,0,0};
+          int count = 0;
 
-  if (unichar == 32) // space is always in range
-    return 0;
+          {
+            for (int ov = - dim; ov <= dim; ov++)
+            {
+              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
+              for (int ou = - dim; ou <= dim; ou++)
+              {
+                for (int c = 0; c < bpp; c++)
+                  sum[c] += src[c];
+                count ++;
+                src += bpp;
+              }
 
-  /* TODO: replace this with direct computation of values instead of loops */
+            }
+          }
 
-  while (unichar < offset)
-  {
-    offset -= SQUOZE_JUMP_STRIDE;
-    count ++;
-  }
-  if (count)
-  {
-    return -count;
-  }
-  while (unichar - offset >= SQUOZE_JUMP_STRIDE)
-  {
-    offset += SQUOZE_JUMP_STRIDE;
-    count ++;
+          int recip = 65536/count;
+          for (int c = 0; c < bpp; c++)
+            rgba[c] = sum[c] * recip >> 16;
+          rgba[3]=rgba[3]*global_alpha_u8/255; // gets lost
+          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
+    }
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
-  return count;
-}
 
-static inline int
-squoze_utf5_length (uint32_t unichar)
-{
-  int octets = 0;
-  if (unichar == 0)
-    return 1;
-  while (unichar)
+
+  for (; i < count; i++)
   {
-    octets ++;
-    unichar /= 16;
+    *((uint32_t*)(rgba))= 0;
+    rgba += 4;
   }
-  return octets;
+#if CTX_DITHER
+//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
+//                    rasterizer->format->dither_green);
+#endif
 }
 
-typedef struct EncodeUtf5 {
-  int      is_utf5;
-  int      offset;
-  int      length;
-  void    *write_data;
-  uint32_t current;
-} EncodeUtf5;
-
-static inline uint64_t
-squoze_overflow_mask_for_dim (int squoze_dim)
+static void
+ctx_fragment_image_rgba8_RGBA8_nearest (CtxRasterizer *rasterizer,
+                                        float x,
+                                        float y,
+                                        void *out, int scount, float dx, float dy)
 {
-  return ((uint64_t)1<<(squoze_dim * 5 + 1));
-}
+  unsigned int count = scount;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer;
+  if (buffer->color_managed)
+    buffer = buffer->color_managed;
+  int ideltax = dx * 65536;
+  int ideltay = dy * 65536;
+  uint32_t *src = (uint32_t *) buffer->data;
+  uint32_t *dst = (uint32_t*)out;
+  int bwidth  = buffer->width;
+  int bheight = buffer->height;
+  int bbheight = bheight << 16;
+  int bbwidth  = bwidth << 16;
+  x += 0.5f;
+  y += 0.5f;
 
-static int squoze_compute_cost_utf5 (int offset, int val, int next_val)
-{
-  int cost = 0; 
-  cost += squoze_utf5_length (val);
-  if (next_val)
+#if 1
+  if (CTX_UNLIKELY(ideltay == 0 && ideltax == 65536))
   {
-    int no_change_cost = squoze_utf5_length (next_val);
-#if 0 // not hit in test-corpus, it is easier to specify and
-      // port the hash consistently without it
-    offset = squoze_new_offset (val);
-    int change_cost = 1;
-    int needed_jump = squoze_needed_jump (offset, next_val);
-
-    if (needed_jump == 0)
-    {
-      change_cost += 1;
-    } else if (needed_jump >= -2 && needed_jump <= 2)
-    {
-      change_cost += 2;
-    }
-    else if (needed_jump >= -10 && needed_jump <= -10)
+    unsigned int i = 0;
+    int u = x;
+    int v = y;
+    if (!(v >= 0 && v < bheight))
     {
-      change_cost += 3;
+      for (i = 0 ; i < count; i++)
+        *dst++ = 0;
+      return;
     }
-    else
+    src += bwidth * v + u;
+    while (count && !(u >= 0))
     {
-      change_cost += 100;
+      *dst++ = 0;
+      src ++;
+      u++;
+      count--;
     }
-
-    if (change_cost < no_change_cost)
+    int limit = ctx_mini (count, bwidth - u);
+    if (limit>0)
     {
-      cost += change_cost;
+      memcpy (dst, src, limit * 4);
+      dst += limit;
+      i = limit;
     }
-    else
+#if 1
+    memset (dst, 0, count - i);
+#else
+    for (;i < count; i++)
+      *dst++ = 0;
 #endif
-    {
-      cost += no_change_cost;
-    }
-
-  }
-
-
-
-  return cost;
-}
-
-static int squoze_compute_cost_squeezed (int offset, int val, int next_val)
-{
-  int needed_jump = squoze_needed_jump (offset, val);
-  int cost = 0;
-  if (needed_jump == 0)
-  {
-    cost += 1;
-  }
-  else if (needed_jump >= -2 && needed_jump <= 2)
-  {
-    cost += 2;
-    offset += SQUOZE_JUMP_STRIDE * needed_jump;
-  }
-  else if (needed_jump >= -10 && needed_jump <= 10)
-  {
-    cost += 3;
-    offset += SQUOZE_JUMP_STRIDE * needed_jump;
-  }
-  else
-  {
-    cost += 100; // very expensive, makes the other choice win
+    return;
   }
+#endif
 
-  if (next_val)
   {
-    int change_cost = 1 + squoze_utf5_length (next_val);
-    int no_change_cost = 0;
-    needed_jump = squoze_needed_jump (offset, next_val);
+    unsigned int i = 0;
+    int32_t ix = x * 65536;
+    int32_t iy = y * 65536;
 
-    if (needed_jump == 0)
+    int32_t u1 = ix + ideltax * (count-1);
+    int32_t v1 = iy + ideltay * (count-1);
+    uint32_t *edst = ((uint32_t*)out)+count - 1;
+    for (; i < count; )
     {
-      no_change_cost += 1;
+      if (u1 <0 || v1 < 0 || u1 >= bbwidth || v1 >= bbheight)
+      {
+        *edst-- = 0;
+        count --;
+        u1 -= ideltax;
+        v1 -= ideltay;
+      }
+      else break;
     }
-    else if (needed_jump >= -2 && needed_jump <= 2)
+
+    for (i = 0; i < count; i ++)
     {
-      no_change_cost += 2;
+      if (ix < 0 || iy < 0 || ix >= bbwidth  || iy >= bbheight)
+      {
+        *dst++ = 0;
+        x += dx;
+        y += dy;
+        ix += ideltax;
+        iy += ideltay;
+      }
+      else break;
     }
-    else if (needed_jump >= -10 && needed_jump <= 10)
+
+    if (ideltay == 0)
     {
-      no_change_cost += 3;
-      offset += SQUOZE_JUMP_STRIDE * needed_jump;
+      int o = (iy>>16)*bwidth;
+      for (; i < count; i ++)
+      {
+        *dst++ = src[o + (ix>>16)];
+        ix += ideltax;
+      }
     }
     else
     {
-      no_change_cost = change_cost;
+      for (; i < count; i ++)
+      {
+        *dst++ = src[(iy>>16) * bwidth + (ix>>16)];
+        ix += ideltax;
+        iy += ideltay;
+      }
     }
-    if (change_cost < no_change_cost)
-      cost += change_cost;
-    else
-      cost += no_change_cost;
   }
-
-  return cost;
+  ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
 }
 
-
-static void squoze5_encode (const char *input, int inlen,
-                            char *output, int *r_outlen,
-                            int permit_squeezed,
-                            int escape_endzero)
+static void
+ctx_fragment_image_rgba8_RGBA8_bi (CtxRasterizer *rasterizer,
+                                   float x,
+                                   float y,
+                                   void *out, int count, float dx, float dy)
 {
-  int offset  = squoze_new_offset('a');
-  int is_utf5 = 1;
-  int len     = 0;
-
-  for (int i = 0; i < inlen; i+= squoze_utf8_len (input[i]))
-  {
-    int val = squoze_utf8_to_unichar (&input[i]);
-    int next_val = 0;
-    int first_len = squoze_utf8_len (input[i]);
-    if (i + first_len < inlen)
-      next_val = squoze_utf8_to_unichar (&input[i+first_len]);
+  uint8_t *rgba = (uint8_t *) out;
+  float ox = (x-(int)(x));
+  float oy = (y-(int)(y));
 
-    if (is_utf5)
-    {
-      int change_cost    = squoze_compute_cost_squeezed (offset, val, next_val);
-      int no_change_cost = squoze_compute_cost_utf5 (offset, val, next_val);
-  
-      if (i != 0)          /* ignore cost of initial 'G' */
-        change_cost += 1;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+  const int bwidth = buffer->width;
+  const int bheight = buffer->height;
+  int i = 0;
 
-      if (permit_squeezed && change_cost <= no_change_cost)
-      {
-        output[len++] = SQUOZE_ENTER_SQUEEZE;
-        is_utf5 = 0;
-      }
-    }
-    else
+  if (dy == 0.0f && dx > 0.0f)
+  {
+    if (!(y >= 0 && y < bheight))
     {
-      int change_cost    = 1 + squoze_compute_cost_utf5 (offset, val, next_val);
-      int no_change_cost = squoze_compute_cost_squeezed (offset, val, next_val);
-
-      if (change_cost < no_change_cost)
-      {
-        output[len++] = SQUOZE_ENTER_UTF5;
-        is_utf5 = 1;
-      }
+      uint32_t *dst = (uint32_t*)rgba;
+      for (i = 0 ; i < count; i++)
+        *dst++ = 0;
+      return;
     }
 
-    if (!is_utf5)
+    if ((dx > 0.99f && dx < 1.01f && 
+         ox < 0.01 && oy < 0.01))
     {
-      int needed_jump = squoze_needed_jump (offset, val);
-      if (needed_jump)
-      {
-        if (needed_jump >= -2 && needed_jump <= 2)
-        {
-          switch (needed_jump)
-          {
-            case -1: output[len++] = SQUOZE_DEC_OFFSET_B; break;
-            case  1: output[len++] = SQUOZE_INC_OFFSET_B; break;
-            case -2: output[len++] = SQUOZE_DEC_OFFSET_A; break;
-            case  2: output[len++] = SQUOZE_INC_OFFSET_A; break;
-          }
-          offset += SQUOZE_JUMP_STRIDE * needed_jump;
-        }
-        else if (needed_jump >= -10 && needed_jump <= 10) {
-              int encoded_val;
-              if (needed_jump < -2)
-                encoded_val = 5 - needed_jump;
-              else
-                encoded_val = needed_jump - 3;
-
-              output[len++] = (encoded_val / 4) + SQUOZE_DEC_OFFSET_A;
-              output[len++] = (encoded_val % 4) + SQUOZE_DEC_OFFSET_A;
-
-              offset += SQUOZE_JUMP_STRIDE * needed_jump;
-        }
-        else
-        {
-          assert(0); // should not be reached
-          output[len++] = SQUOZE_ENTER_UTF5;
-          is_utf5 = 1;
-        }
-      }
+      /* TODO: this could have been rigged up in composite_setup */
+      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer,
+                                   x, y, out, count, dx, dy);
+      return;
     }
+    x+=1; // XXX off by one somewhere? ,, needed for alignment with nearest
 
-    if (is_utf5)
+    uint32_t *data = ((uint32_t*)buffer->data);
+    uint32_t yi = y * 65536;
+    uint32_t xi = x * 65536;
+    int xi_delta = dx * 65536;
+
+    for (i= 0; i < count; i ++)
     {
-      int octets = 0;
-      offset = squoze_new_offset (val);
-      while (val)
+      int u = xi >> 16;
+      if ( u  < 0 || u >= bwidth-1)
       {
-        int oval = val % 16;
-        int hi = 16;
-        if (val / 16) hi = 0;
-        output[len+ (octets++)] = oval + hi;
-        val /= 16;
-      }
-      for (int j = 0; j < octets/2; j++) // mirror in-place
-      {                                  // TODO refactor to be single pass
-        int tmp = output[len+j];
-        output[len+j] = output[len+octets-1-j];
-        output[len+octets-1-j] = tmp;
+        *((uint32_t*)(rgba))= 0;
+        xi += xi_delta;
+        rgba += 4;
       }
-      len += octets;
-    }
-    else 
-    {
-       if (val == ' ')
-       {
-         output[len++] = SQUOZE_SPACE;
-       }
-       else
-       {
-         output[len++] = val-offset+1;
-       }
+      else
+        break;
     }
-  }
 
-  if (escape_endzero && len && output[len-1]==0)
-  {
-    if (is_utf5)
-      output[len++] = 16;
-    else
-      output[len++] = SQUOZE_ENTER_UTF5;
-  }
-  output[len]=0;
-  if (r_outlen)
-    *r_outlen = len;
-}
+  int loaded = -4;
+  uint32_t s0_ga = 0, s0_rb = 0, s1_ga = 0, s1_rb = 0;
+ 
+  int v = yi >> 16;
+  data += bwidth * v;
+  int dv = (yi >> 8) & 0xff;
 
-static inline uint64_t _squoze (int squoze_dim, const char *utf8)
-{
-  char encoded[4096]="";
-  int  encoded_len=0;
-  squoze5_encode (utf8, strlen (utf8), encoded, &encoded_len, 1, 1);
-  uint64_t hash = 0;
-  int  utf5 = (encoded[0] != SQUOZE_ENTER_SQUEEZE);
-  uint64_t multiplier = ((squoze_dim == 6) ? 0x25bd1e975
-                                           : 0x98173415bd1e975);
+  int u = xi >> 16;
 
-  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
-  uint64_t all_bits        = overflowed_mask - 1;
+  uint32_t *ndata = data;
+  if (v < bheight-1) ndata += bwidth;
 
-  int rshift = (squoze_dim == 6) ? 8 : 16;
+  uint32_t *src0 = data, *src1 = ndata;
 
 
-  if (encoded_len - (!utf5) <= squoze_dim)
+  if (xi_delta == 65536 && u < bwidth -1)
   {
-    for (int i = !utf5; i < encoded_len; i++)
+    int du = (xi >> 8) & 0xff;
+
+    src0 = data + u;
+    src1 = ndata + u;
+    ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
+
+    int limit = bwidth-u;
+    limit = ctx_mini(count,limit);
+
+    for (; i < limit; i ++)
     {
-      uint64_t val = encoded[i];
-      hash = hash | (val << (5*(i-(!utf5))));
+      s0_ga = s1_ga;
+      s0_rb = s1_rb;
+      ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
+      ((uint32_t*)(&rgba[0]))[0] = 
+      ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du);
+      rgba += 4;
+      u++;
+      src0 ++;
+      src1 ++;
     }
-    hash <<= 1; // make room for the bit that encodes utf5 or squeeze
   }
   else
   {
-    for (int i = 0; i < encoded_len; i++)
+    for (; (i < count) && (u< bwidth); i ++)
     {
-      uint64_t val = encoded[i];
-      hash = hash ^ val;
-      hash = hash * multiplier;
-      hash = hash & all_bits;
-      hash = hash ^ ((hash >> rshift));
+      if (loaded + 1 == u)
+      {
+        s0_ga = s1_ga;
+        s0_rb = s1_rb;
+        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
+        src0 ++;
+        src1 ++;
+      }
+      else if (loaded != u)
+      {
+        src0 = data + u;
+        src1 = ndata + u;
+        ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s0_ga, &s0_rb);
+        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
+      }
+      loaded = u;
+      ((uint32_t*)(&rgba[0]))[0] = 
+        ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, ((xi>>8)&0xff));
+      xi += xi_delta;
+      rgba += 4;
+      u = xi >> 16;
     }
-    hash |= overflowed_mask;
   }
-  return hash | utf5;
-}
-
-typedef struct _CashInterned CashInterned;
-
-struct _CashInterned {
-    uint64_t   hash;
-    char      *string;
-};
-
-static CashInterned *interned = NULL;
-static int n_interned = 0;
-static int s_interned = 0;
 
-static int squoze_interned_find (uint64_t hash)
-{
-#if 1
-  int min = 0;
-  int max = n_interned - 1;
-  if (max <= 0)
-    return 0;
-  do
-  {
-     int pos = (min + max)/2;
-     if (interned[pos].hash == hash)
-       return pos;
-     else if (min == max - 1)
-       return max;
-     else if (interned[pos].hash < hash)
-       min = pos;
-     else
-       max = pos;
-  } while (min != max);
-  return max;
-#else
-  for (int i = 0; i < n_interned; i++)
-    if (interned[i].hash > hash)
-      return i;
-  return 0;
-#endif
-}
-
-static inline uint64_t squoze (int squoze_dim, const char *utf8)
-{
-  uint64_t hash = _squoze (squoze_dim, utf8);
-#ifdef SQUOZE_NO_INTERNING
-  return hash;
-#endif
-  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
-  if (hash & overflowed_mask)
+  }
+  else // //
   {
-    int pos = squoze_interned_find (hash);
-    if (interned && interned[pos].hash == hash)
-      return hash;
-
-    if (n_interned + 1 >= s_interned)
+    uint32_t *data = ((uint32_t*)buffer->data);
+    for (i= 0; i < count; i ++)
     {
-       s_interned = (s_interned + 128)*2;
-       interned = (CashInterned*)realloc (interned, s_interned * sizeof (CashInterned));
+      int u = x;
+      int v = y;
+      int ut = x + 1.5;
+      int vt = y + 1.5;
+      if ( ut  <= 0 || vt  <= 0 || u >= buffer->width || v >= buffer->height)
+      {
+        *((uint32_t*)(rgba))= 0;
+      }
+      else
+        break;
+      x += dx;
+      y += dy;
+      rgba += 4;
     }
 
-    n_interned++;
+  uint32_t yi = y * 65536;
+  uint32_t xi = x * 65536;
+
+  int yi_delta = dy * 65536;
+  int xi_delta = dx * 65536;
+
+  int loaded = -4;
+  uint32_t *src00=data;
+  uint32_t *src01=data;
+  uint32_t *src10=data;
+  uint32_t *src11=data;
+
+  int u = xi >> 16;
+  int v = yi >> 16;
+  int offset = bwidth * v + u;
+
+  while (i < count &&
+         !(u >= buffer->width ||
+          v  <= -65536 ||
+          u  <= -65536 ||
+          v >= buffer->height))
+  {
 #if 1
-    if (n_interned-pos)
-      memmove (&interned[pos+1], &interned[pos], (n_interned-pos) * sizeof (CashInterned));
-    // the memmove is the expensive part of testing for collisions
-    // insertions should be cheaper! at least looking up strings
-    // is cheap
-#else
-    pos = n_interned-1;
+  if (CTX_UNLIKELY(u < 0 || v < 0)) // default to next sample down and to right
+  {
+      int got_prev_pix = (u >= 0);
+      int got_prev_row = (v>=0);
+      src11 = data  + offset + bwidth + 1;
+      src10 = src11 - got_prev_pix;
+      src01 = src11 - bwidth * got_prev_row;
+      src00 = src10 - bwidth * got_prev_row;
+  }
 #endif
-    {
-      CashInterned *entry = &interned[pos];
-      entry->hash = hash;
-      entry->string = strdup (utf8);
-    }
-
+#if 1
+  else if (loaded + 1 == offset)
+  {
+      src00++;
+      src01++;
+      src10++;
+      src11++;
   }
-  return hash;
-}
-
-uint32_t squoze6 (const char *utf8)
-{
-  return squoze (6, utf8);
-}
+#endif
+  else if (loaded != offset)
+  {
+      int next_row = ( v + 1 < bheight) * bwidth;
+      int next_pix = (u + 1 < bwidth);
+      src00 = data  + offset;
+      src01 = src00 + next_pix;
+      src10 = src00 + next_row;
+      src11 = src01 + next_row;
+  }
+    loaded = offset;
+    ((uint32_t*)(&rgba[0]))[0] = ctx_bi_RGBA8 (*src00,*src01,*src10,*src11, (xi>>8),(yi>>8)); // the 
argument type does the & 0xff
+    xi += xi_delta;
+    yi += yi_delta;
+    rgba += 4;
 
-uint64_t squoze10 (const char *utf8)
-{
-  return squoze (10, utf8);
-}
+    u = xi >> 16;
+    v = yi >> 16;
+    offset = bwidth * v + u;
+    i++;
+  }
+  }
 
-uint64_t squoze12 (const char *utf8)
-{
-  return squoze (12, utf8);
-}
+  for (; i < count; i ++)
+  {
+    *((uint32_t*)(rgba))= 0;
+    rgba += 4;
+  }
 
-uint32_t ctx_strhash(const char *str) {
-  return squoze (6, str);
+  ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
 }
+#endif
 
-typedef struct CashUtf5Dec {
-  int       is_utf5;
-  int       offset;
-  void     *write_data;
-  uint32_t  current;
-  void    (*append_unichar) (uint32_t unichar, void *write_data);
-  int       jumped_amount;
-  int       jump_mode;
-} CashUtf5Dec;
-
-typedef struct CashUtf5DecDefaultData {
-  uint8_t *buf;
-  int      length;
-} CashUtf5DecDefaultData;
+#define ctx_clampi(val,min,max) \
+     ctx_mini (ctx_maxi ((val), (min)), (max))
 
-static void squoze_decode_utf5_append_unichar_as_utf8 (uint32_t unichar, void *write_data)
+static inline uint32_t ctx_yuv_to_rgba32 (uint8_t y, uint8_t u, uint8_t v)
 {
-  CashUtf5DecDefaultData *data = (CashUtf5DecDefaultData*)write_data;
-  int length = squoze_unichar_to_utf8 (unichar, &data->buf[data->length]);
-  data->buf[data->length += length] = 0;
+  int cy  = ((y - 16) * 76309) >> 16;
+  int cr  = (v - 128);
+  int cb  = (u - 128);
+  int red = cy + ((cr * 104597) >> 16);
+  int green = cy - ((cb * 25674 + cr * 53278) >> 16);
+  int blue = cy + ((cb * 132201) >> 16);
+  return  ctx_clampi (red, 0, 255) |
+          (ctx_clampi (green, 0, 255) << 8) |
+          (ctx_clampi (blue, 0, 255) << 16) |
+          (0xff << 24);
 }
 
-static void squoze_decode_jump (CashUtf5Dec *dec, uint8_t in)
+static void
+ctx_fragment_image_yuv420_RGBA8_nearest (CtxRasterizer *rasterizer,
+                                         float x,
+                                         float y,
+                                         void *out, int count, float dx, float dy)
 {
-  dec->offset -= SQUOZE_JUMP_STRIDE * dec->jumped_amount;
-  int jump_len = (dec->jump_mode - SQUOZE_DEC_OFFSET_A) * 4 +
-                 (in - SQUOZE_DEC_OFFSET_A);
-  if (jump_len > 7)
-    jump_len = 5 - jump_len;
-  else
-    jump_len += 3;
-  dec->offset += jump_len * SQUOZE_JUMP_STRIDE;
-  dec->jumped_amount = 0;
-}
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer;
+  if (buffer->color_managed)
+    buffer = buffer->color_managed;
+  uint8_t *src = (uint8_t *) buffer->data;
+  int bwidth  = buffer->width;
+  int bheight = buffer->height;
+  int bwidth_div_2  = bwidth/2;
+  int bheight_div_2  = bheight/2;
+  x += 0.5f;
+  y += 0.5f;
 
-static void squoze_decode_utf5 (CashUtf5Dec *dec, uint8_t in)
-{
-  if (dec->is_utf5)
   {
-    if (in >= 16)
+    int i = 0;
+
+    float  u1 = x + dx * (count-1);
+    float  v1 = y + dy * (count-1);
+    uint32_t *edst = ((uint32_t*)out)+count - 1;
+    for (; i < count; )
     {
-      if (dec->current)
+      if (u1 <0 || v1 < 0 || u1 >= bwidth || v1 >= bheight)
       {
-        dec->offset = squoze_new_offset (dec->current);
-        dec->append_unichar (dec->current, dec->write_data);
-        dec->current = 0;
+        *edst-- = 0;
+        count --;
+        u1 -= dx;
+        v1 -= dy;
       }
+      else break;
     }
-    if (in == SQUOZE_ENTER_SQUEEZE)
+
+    for (; i < count; i ++)
     {
-      if (dec->current)
+      int u = x;
+      int v = y;
+      if ((u < 0 || v < 0 || u >= bwidth || v >= bheight))
       {
-        dec->offset = squoze_new_offset (dec->current);
-        dec->append_unichar (dec->current, dec->write_data);
-        dec->current = 0;
+        *((uint32_t*)(rgba))= 0;
       }
-      dec->is_utf5 = 0;
+      else
+      {
+        break;
+      }
+      x += dx;
+      y += dy;
+      rgba += 4;
     }
-    else
+
+    uint32_t u_offset = bheight * bwidth;
+    uint32_t v_offset = u_offset + bheight_div_2 * bwidth_div_2;
+
+    if (rasterizer->swap_red_green)
     {
-      dec->current = dec->current * 16 + (in % 16);
+      v_offset = bheight * bwidth;
+      u_offset = v_offset + bheight_div_2 * bwidth_div_2;
     }
-  }
-  else
-  {
-    if (dec->jumped_amount)
+
+    // XXX this is incorrect- but fixes some bug!
+    int ix = 65536;//x * 65536;
+    int iy = y * 65536;
+
+    int ideltax = dx * 65536;
+    int ideltay = dy * 65536;
+
+    if (ideltay == 0)
     {
-      switch (in)
+      int u = ix >> 16;
+      int v = iy >> 16;
+
+      uint32_t y  = v * bwidth;
+      uint32_t uv = (v / 2) * bwidth_div_2;
+
+      if (v >= 0 && v < bheight)
+      while (i < count)// && u >= 0 && u+1 < bwidth)
       {
-        case SQUOZE_DEC_OFFSET_A:
-        case SQUOZE_DEC_OFFSET_B:
-        case SQUOZE_INC_OFFSET_A:
-        case SQUOZE_INC_OFFSET_B:
-          squoze_decode_jump (dec, in);
-          break;
-        default:
-          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
-          dec->jumped_amount = 0;
-          dec->jump_mode = 0;
-          break;
+        *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y+u],
+                        src[u_offset+uv+u/2], src[v_offset+uv+u/2]);
+#if 0
+#if CTX_DITHER
+       ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
+                           rasterizer->format->dither_green);
+#endif
+#endif
+
+        ix += ideltax;
+        rgba += 4;
+        u = ix >> 16;
+        i++;
       }
     }
     else
     {
-      switch (in)
+      int u = ix >> 16;
+      int v = iy >> 16;
+
+      while (i < count)// && u >= 0 && v >= 0 && u < bwidth && v < bheight)
       {
-        case SQUOZE_ENTER_UTF5:
-          dec->is_utf5 = 1;
-          dec->jumped_amount = 0;
-          dec->jump_mode = 0;
-          break;
-        case SQUOZE_SPACE: 
-          dec->append_unichar (' ', dec->write_data);
-          dec->jumped_amount = 0;
-          dec->jump_mode = 0;
-          break;
-        case SQUOZE_DEC_OFFSET_A:
-          dec->jumped_amount = -2;
-          dec->jump_mode = in;
-          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
-          break;
-        case SQUOZE_INC_OFFSET_A:
-          dec->jumped_amount = 2;
-          dec->jump_mode = in;
-          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
-          break;
-        case SQUOZE_DEC_OFFSET_B:
-          dec->jumped_amount = -1;
-          dec->jump_mode = in;
-          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
-          break;
-        case SQUOZE_INC_OFFSET_B:
-          dec->jumped_amount = 1;
-          dec->jump_mode = in;
-          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
-          break;
-        default:
-          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
-          dec->jumped_amount = 0;
-          dec->jump_mode = 0;
-      }
-    }
-  }
-}
+        uint32_t y  = v * bwidth + u;
+        uint32_t uv = (v / 2) * bwidth_div_2 + (u / 2);
 
-static void squoze_decode_utf5_bytes (int is_utf5, 
-                        const unsigned char *input, int inlen,
-                        char *output, int *r_outlen)
-{
-  CashUtf5DecDefaultData append_data = {(unsigned char*)output, };
-  CashUtf5Dec dec = {is_utf5,
-                     squoze_new_offset('a'),
-                     &append_data,
-                     0,
-                     squoze_decode_utf5_append_unichar_as_utf8,
-                     0
-                    };
-  for (int i = 0; i < inlen; i++)
-    squoze_decode_utf5 (&dec, input[i]);
-  if (dec.current)
-    dec.append_unichar (dec.current, dec.write_data);
-  if (r_outlen)
-    *r_outlen = append_data.length;
-}
+        *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y],
+                        src[u_offset+uv], src[v_offset+uv]);
+#if 0
+#if CTX_DITHER
+       ctx_dither_rgba_u8 (rgba, x+i, y, rasterizer->format->dither_red_blue,
+                           rasterizer->format->dither_green);
+#endif
+#endif
 
-static const char *squoze_decode_r (int squoze_dim, uint64_t hash, char *ret, int retlen)
-{
-  uint64_t overflowed_mask = ((uint64_t)1<<(squoze_dim * 5 + 1));
+        ix += ideltax;
+        iy += ideltay;
+        rgba += 4;
+        u = ix >> 16;
+        v = iy >> 16;
+        i++;
+      }
+    }
 
-  if (hash & overflowed_mask)
-  {
-#if 0
-    for (int i = 0; i < n_interned; i++)
+    for (; i < count; i++)
     {
-      CashInterned *entry = &interned[i];
-      if (entry->hash == hash)
-        return entry->string;
+      *((uint32_t*)(rgba))= 0;
+      rgba += 4;
     }
-#else
-    int pos = squoze_interned_find (hash);
-    if (!interned || (interned[pos].hash!=hash))
-      return NULL;
-    return interned[pos].string;
-#endif
-    return NULL;
   }
 
-  uint8_t utf5[140]=""; // we newer go really high since there isnt room
-                        // in the integers
-  uint64_t tmp = hash & (overflowed_mask-1);
-  int len = 0;
-  int is_utf5 = tmp & 1;
-  tmp /= 2;
-  int in_utf5 = is_utf5;
-  while (tmp > 0)
-  {
-    uint64_t remnant = tmp % 32;
-    uint64_t val = remnant;
-
-    if      ( in_utf5 && val == SQUOZE_ENTER_SQUEEZE) in_utf5 = 0;
-    else if (!in_utf5 && val == SQUOZE_ENTER_UTF5) in_utf5 = 1;
-
-    utf5[len++] = val;
-    tmp -= remnant;
-    tmp /= 32;
-  }
-  utf5[len]=0;
-  squoze_decode_utf5_bytes (is_utf5, utf5, len, ret, &retlen);
-  //ret[retlen]=0;
-  return ret;
+  if (rasterizer->state->gstate.global_alpha_u8 != 255)
+    ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
 }
 
-/* copy the value as soon as possible, some mitigation is in place
- * for more than one value in use and cross-thread interactions.
- */
-static const char *squoze_decode (int squoze_dim, uint64_t hash)
-{
-#define THREAD __thread  // use thread local storage
-  static THREAD int no = 0;
-  static THREAD char ret[8][256];
-  no ++;
-  if (no > 7) no = 0;
-  return squoze_decode_r (squoze_dim, hash, ret[no], 256);
-#undef THREAD
-}
+#if CTX_FRAGMENT_SPECIALIZE
 
-const char *squoze6_decode (uint32_t hash)
+static void
+ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (CtxRasterizer *rasterizer,
+                                    float x,
+                                    float y,
+                                    void *out, int count, float dx, float dy)
 {
-  return squoze_decode (6, hash);
+  ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
 }
 
-const char *squoze10_decode (uint64_t hash)
+static void
+ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (CtxRasterizer *rasterizer,
+                                    float x,
+                                    float y,
+                                    void *out, int count, float dx, float dy)
 {
-  return squoze_decode (10, hash);
+  ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
 }
 
-const char *squoze12_decode (uint64_t hash)
+static void
+ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (CtxRasterizer *rasterizer,
+                                    float x,
+                                    float y,
+                                    void *out, int count, float dx, float dy)
 {
-  return squoze_decode (12, hash);
+  ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+  ctx_fragment_swap_red_green_u8 (out, count);
 }
 
-static inline uint32_t
-squoze_utf8_to_unichar (const char *input)
-{
-  const uint8_t *utf8 = (const uint8_t *) input;
-  uint8_t c = utf8[0];
-  if ( (c & 0x80) == 0)
-    { return c; }
-  else if ( (c & 0xE0) == 0xC0)
-    return ( (utf8[0] & 0x1F) << 6) |
-           (utf8[1] & 0x3F);
-  else if ( (c & 0xF0) == 0xE0)
-    return ( (utf8[0] & 0xF)  << 12) |
-           ( (utf8[1] & 0x3F) << 6) |
-           (utf8[2] & 0x3F);
-  else if ( (c & 0xF8) == 0xF0)
-    return ( (utf8[0] & 0x7)  << 18) |
-           ( (utf8[1] & 0x3F) << 12) |
-           ( (utf8[2] & 0x3F) << 6) |
-           (utf8[3] & 0x3F);
-  else if ( (c & 0xFC) == 0xF8)
-    return ( (utf8[0] & 0x3)  << 24) |
-           ( (utf8[1] & 0x3F) << 18) |
-           ( (utf8[2] & 0x3F) << 12) |
-           ( (utf8[3] & 0x3F) << 6) |
-           (utf8[4] & 0x3F);
-  else if ( (c & 0xFE) == 0xFC)
-    return ( (utf8[0] & 0x1)  << 30) |
-           ( (utf8[1] & 0x3F) << 24) |
-           ( (utf8[2] & 0x3F) << 18) |
-           ( (utf8[3] & 0x3F) << 12) |
-           ( (utf8[4] & 0x3F) << 6) |
-           (utf8[5] & 0x3F);
-  return 0;
-}
-static inline int
-squoze_unichar_to_utf8 (uint32_t  ch,
-                      uint8_t  *dest)
+static void
+ctx_fragment_image_rgba8_RGBA8 (CtxRasterizer *rasterizer,
+                                float x,
+                                float y,
+                                void *out, int count, float dx, float dy)
 {
-  /* http://www.cprogramming.com/tutorial/utf8.c  */
-  /*  Basic UTF-8 manipulation routines
-    by Jeff Bezanson
-    placed in the public domain Fall 2005 ... */
-  if (ch < 0x80)
-    {
-      dest[0] = (char) ch;
-      return 1;
-    }
-  if (ch < 0x800)
+  if (rasterizer->state->gstate.image_smoothing)
+  {
+    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
+    if (factor <= 0.50f)
     {
-      dest[0] = (ch>>6) | 0xC0;
-      dest[1] = (ch & 0x3F) | 0x80;
-      return 2;
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
     }
-  if (ch < 0x10000)
+#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
+    else if (factor > 0.99f && factor < 1.01f)
     {
-      dest[0] = (ch>>12) | 0xE0;
-      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
-      dest[2] = (ch & 0x3F) | 0x80;
-      return 3;
+      // XXX: also verify translate == 0 for this fast path to be valid
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
     }
-  if (ch < 0x110000)
+#endif
+    else
     {
-      dest[0] = (ch>>18) | 0xF0;
-      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
-      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
-      dest[3] = (ch & 0x3F) | 0x80;
-      return 4;
+      if (rasterizer->swap_red_green)
+        ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      else
+        ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
     }
-  return 0;
-}
-
-static inline int
-squoze_utf8_len (const unsigned char first_byte)
-{
-  if      ( (first_byte & 0x80) == 0)
-    { return 1; } /* ASCII */
-  else if ( (first_byte & 0xE0) == 0xC0)
-    { return 2; }
-  else if ( (first_byte & 0xF0) == 0xE0)
-    { return 3; }
-  else if ( (first_byte & 0xF8) == 0xF0)
-    { return 4; }
-  return 1;
+  }
+  else
+  {
+    if (rasterizer->swap_red_green)
+      ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+    else
+      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+  }
+  //ctx_fragment_swap_red_green_u8 (out, count);
+#if 0
+#if CTX_DITHER
+  uint8_t *rgba = (uint8_t*)out;
+  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
+                      rasterizer->format->dither_green);
+#endif
+#endif
 }
-
 #endif
-/* atty - audio interface and driver for terminals
- * Copyright (C) 2020 Øyvind Kolås <pippin gimp org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>. 
- */
 
-static const char *base64_map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
-static void bin2base64_group (const unsigned char *in, int remaining, char *out)
+static void
+ctx_fragment_image_gray1_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
 {
-  unsigned char digit[4] = {0,0,64,64};
-  int i;
-  digit[0] = in[0] >> 2;
-  digit[1] = ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4);
-  if (remaining > 1)
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  CtxBuffer *buffer = g->texture.buffer;
+  for (int i = 0; i < count; i ++)
+  {
+  int u = x;
+  int v = y;
+  if ( u < 0 || v < 0 ||
+       u >= buffer->width ||
+       v >= buffer->height)
     {
-      digit[2] = ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6);
-      if (remaining > 2)
-        digit[3] = ((in[2] & 0x3f));
+      rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
     }
-  for (i = 0; i < 4; i++)
-    out[i] = base64_map[digit[i]];
-}
-
-void
-ctx_bin2base64 (const void *bin,
-                int         bin_length,
-                char       *ascii)
-{
-  /* this allocation is a hack to ensure we always produce the same result,
-   * regardless of padding data accidentally taken into account.
-   */
-  unsigned char *bin2 = (unsigned char*)calloc (bin_length + 4, 1);
-  unsigned const char *p = bin2;
-  int i;
-  memcpy (bin2, bin, bin_length);
-  for (i=0; i*3 < bin_length; i++)
-   {
-     int remaining = bin_length - i*3;
-     bin2base64_group (&p[i*3], remaining, &ascii[i*4]);
-   }
-  free (bin2);
-  ascii[i*4]=0;
-}
-
-static unsigned char base64_revmap[255];
-static void base64_revmap_init (void)
-{
-  static int done = 0;
-  if (done)
-    return;
-
-  for (int i = 0; i < 255; i ++)
-    base64_revmap[i]=255;
-  for (int i = 0; i < 64; i ++)
-    base64_revmap[((const unsigned char*)base64_map)[i]]=i;
-  /* include variants used in URI encodings for decoder,
-   * even if that is not how we encode
-  */
-  base64_revmap['-']=62;
-  base64_revmap['_']=63;
-  base64_revmap['+']=62;
-  base64_revmap['/']=63;
-
-  done = 1;
-}
-
-
-int
-ctx_base642bin (const char    *ascii,
-                int           *length,
-                unsigned char *bin)
-{
-  int i;
-  int charno = 0;
-  int outputno = 0;
-  int carry = 0;
-  base64_revmap_init ();
-  for (i = 0; ascii[i]; i++)
+  else
     {
-      int bits = base64_revmap[((const unsigned char*)ascii)[i]];
-      if (length && outputno > *length)
+      uint8_t *src = (uint8_t *) buffer->data;
+      src += v * buffer->stride + u / 8;
+      if (*src & (1<< (u & 7) ) )
         {
-          *length = -1;
-          return -1;
+          rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
         }
-      if (bits != 255)
+      else
         {
-          switch (charno % 4)
-            {
-              case 0:
-                carry = bits;
-                break;
-              case 1:
-                bin[outputno] = (carry << 2) | (bits >> 4);
-                outputno++;
-                carry = bits & 15;
-                break;
-              case 2:
-                bin[outputno] = (carry << 4) | (bits >> 2);
-                outputno++;
-                carry = bits & 3;
-                break;
-              case 3:
-                bin[outputno] = (carry << 6) | bits;
-                outputno++;
-                carry = 0;
-                break;
-            }
-          charno++;
+          for (int c = 0; c < 4; c++)
+            { rgba[c] = 255;
+            }//g->texture.rgba[c];
+            //}
         }
     }
-  bin[outputno]=0;
-  if (length)
-    *length= outputno;
-  return outputno;
-}
-#include <stdio.h>
-#include <string.h>
 
-#if CTX_FORMATTER
+    rgba += 4;
+    x += dx;
+    y += dy;
+  }
+}
 
-static int ctx_yenc (const char *src, char *dst, int count)
+#if CTX_GRADIENTS
+static void
+ctx_fragment_radial_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
 {
-  int out_len = 0;
-  for (int i = 0; i < count; i ++)
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+#if CTX_DITHER
+  int scan = rasterizer->scanline / CTX_FULL_AA;
+  int ox = x;
+#endif
+  for (int i = 0; i <  count; i ++)
   {
-    int o = (src[i] + 42) % 256;
-    switch (o)
-    {
-      case 0x00: //null
-      case 0x20: //space// but better safe
-      case 0x0A: //lf   // than sorry
-      case 0x0D: //cr
-      case 0x09: //tab  // not really needed
-      case 0x10: //datalink escape (used by ctx)
-      case 0x11: //xoff
-      case 0x13: //xon
-      case 0x1b: //
-      case 0xff: //
-      case 0x3D: //=
-        dst[out_len++] = '=';
-        o = (o + 64) % 256;
-        /* FALLTHROUGH */
-      default:
-        dst[out_len++] = o;
-        break;
-    }
+    float v = (ctx_hypotf_fast (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
+              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
+#if CTX_GRADIENT_CACHE
+    uint32_t *rgbap = (uint32_t*)&ctx_gradient_cache_u8[ctx_grad_index(v)][0];
+    *((uint32_t*)rgba) = *rgbap;
+#else
+    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
+#endif
+#
+#if CTX_DITHER
+    ctx_dither_rgba_u8 (rgba, ox+i, scan, rasterizer->format->dither_red_blue,
+                        rasterizer->format->dither_green);
+#endif
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
-  dst[out_len]=0;
-  return out_len;
 }
-#endif
 
-#if CTX_PARSER
-static int ctx_ydec (const char *tmp_src, char *dst, int count)
+static void
+ctx_fragment_linear_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
 {
-  const char *src = tmp_src;
 #if 0
-  if (tmp_src == dst)
+  uint8_t *rgba = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0; i <  count; i ++)
   {
-    src = malloc (count);
-    memcpy (src, tmp_src, count);
-  }
+  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
+                g->linear_gradient.length) -
+              g->linear_gradient.start) * (g->linear_gradient.rdelta);
+#if CTX_GRADIENT_CACHE
+  uint32_t*rgbap = ((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index(v)][0]));
+  *((uint32_t*)rgba) = *rgbap;
+#else
+  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
 #endif
-  int out_len = 0;
-  for (int i = 0; i < count; i ++)
-  {
-    int o = src[i];
-    switch (o)
-    {
-      case '=':
-        i++;
-        o = src[i];
-        if (o == 'y')
-        {
-          dst[out_len]=0;
-#if 0
-          if (tmp_src == dst) free (src);
+#if CTX_DITHER
+  ctx_dither_rgba_u8 (rgba, x+i, y, rasterizer->format->dither_red_blue,
+                      rasterizer->format->dither_green);
 #endif
-          return out_len;
-        }
-        o = (o-42-64) % 256;
-        dst[out_len++] = o;
-        break;
-      case '\n':
-      case '\e':
-      case '\r':
-      case '\0':
-        break;
-      default:
-        o = (o-42) % 256;
-        dst[out_len++] = o;
-        break;
-    }
+    rgba += 4;
+    x += dx;
+    y += dy;
   }
-  dst[out_len]=0;
-#if 0
-  if (tmp_src == dst) free (src);
-#endif
-  return out_len;
-}
-#endif
-
-#if 0
-int main (){
-  char *input="this is a testæøåÅØ'''\"!:_asdac\n\r";
-  char  encoded[256]="";
-  char  decoded[256]="";
-  int   in_len = strlen (input);
-  int   out_len;
-  int   dec_len;
-
-  printf ("input: %s\n", input);
-
-  out_len = ctx_yenc (input, encoded, in_len);
-  printf ("encoded: %s\n", encoded);
-
-  dec_len = ydec (encoded, encoded, out_len);
+#else
+  uint8_t *rgba = (uint8_t *) out;
 
-  printf ("decoded: %s\n", encoded);
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  float u0 = x; float v0 = y;
+  float ud = dx; float vd = dy;
+  float linear_gradient_rdelta = g->linear_gradient.rdelta;
+  float linear_gradient_length = g->linear_gradient.length;
+  float linear_gradient_length_recip = 1.0f/linear_gradient_length;
+  float linear_gradient_dx = g->linear_gradient.dx *linear_gradient_length_recip * linear_gradient_rdelta;
+  float linear_gradient_dy = g->linear_gradient.dy *linear_gradient_length_recip * linear_gradient_rdelta;
+  float linear_gradient_start = g->linear_gradient.start * linear_gradient_rdelta;
 
-  return 0;
-}
+#if CTX_DITHER
+  int dither_red_blue = rasterizer->format->dither_red_blue;
+  int dither_green = rasterizer->format->dither_green;
+  int scan = rasterizer->scanline / CTX_FULL_AA;
+  int ox = x;
 #endif
-#ifndef CTX_DRAWLIST_H
-#define CTX_DRAWLIST_H
-
-static int
-ctx_conts_for_entry (CtxEntry *entry);
-static void
-ctx_iterator_init (CtxIterator      *iterator,
-                   CtxDrawlist  *drawlist,
-                   int               start_pos,
-                   int               flags);
-
-int ctx_iterator_pos (CtxIterator *iterator);
-
-static void
-ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size);
-static int
-ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry);
-static int ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry);
-int
-ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry);
-int
-ctx_add_data (Ctx *ctx, void *data, int length);
 
-int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2]);
-int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length);
+  u0 *= linear_gradient_dx;
+  v0 *= linear_gradient_dy;
+  ud *= linear_gradient_dx;
+  vd *= linear_gradient_dy;
 
-static CtxEntry
-ctx_void (CtxCode code);
-static inline CtxEntry
-ctx_f (CtxCode code, float x, float y);
-static CtxEntry
-ctx_u32 (CtxCode code, uint32_t x, uint32_t y);
-#if 0
-static CtxEntry
-ctx_s32 (CtxCode code, int32_t x, int32_t y);
+#if CTX_GRADIENT_CACHE
+  int vv = ((u0 + v0) - linear_gradient_start) * (ctx_gradient_cache_elements-1) * 256;
+  int ud_plus_vd = (ud + vd) * (ctx_gradient_cache_elements-1) * 256;
+#else
+  float vv = ((u0 + v0) - linear_gradient_start);
+  float ud_plus_vd = (ud + vd);
 #endif
 
-static inline CtxEntry
-ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1);
-static CtxEntry
-ctx_u8 (CtxCode code,
-        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
-        uint8_t e, uint8_t f, uint8_t g, uint8_t h);
-
-#define CTX_PROCESS_VOID(cmd) do {\
-  CtxEntry commands[4] = {{cmd}};\
-  ctx_process (ctx, &commands[0]);}while(0) \
-
-#define CTX_PROCESS_F(cmd,x,y) do {\
-  CtxEntry commands[4] = {ctx_f(cmd,x,y),};\
-  ctx_process (ctx, &commands[0]);}while(0) \
-
-#define CTX_PROCESS_F1(cmd,x) do {\
-  CtxEntry commands[4] = {ctx_f(cmd,x,0),};\
-  ctx_process (ctx, &commands[0]);}while(0) \
-
-#define CTX_PROCESS_U32(cmd, x, y) do {\
-  CtxEntry commands[4] = {ctx_u32(cmd, x, y)};\
-  ctx_process (ctx, &commands[0]);}while(0)
-
-#define CTX_PROCESS_U8(cmd, x) do {\
-  CtxEntry commands[4] = {ctx_u8(cmd, x,0,0,0,0,0,0,0)};\
-  ctx_process (ctx, &commands[0]);}while(0)
-
-
-#if CTX_BITPACK_PACKER
-static int
-ctx_last_history (CtxDrawlist *drawlist);
+  for (int i = 0; i < count ; i++)
+  {
+#if CTX_GRADIENT_CACHE
+  uint32_t*rgbap = ((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index_i (vv)][0]));
+  *((uint32_t*)rgba) = *rgbap;
+#else
+  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
 #endif
-
-#if CTX_BITPACK_PACKER
-static void
-ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos);
-
-static void
-ctx_drawlist_bitpack (CtxDrawlist *drawlist, int start_pos);
+#if CTX_DITHER
+      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
 #endif
-
-static void
-ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1);
-static void
-ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1);
-static void
-ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int 
len);
-
-#pragma pack(push,1)
-typedef struct 
-CtxSegment {
-  uint16_t code;
-  union {
-   int16_t s16[4];
-   uint32_t u32[2];
-  } data;
-  int32_t val;
-  int32_t delta;
-} CtxSegment;
-#pragma pack(pop)
-
+    rgba+= 4;
+    vv += ud_plus_vd;
+  }
 #endif
+}
 
-#ifndef __CTX_UTIL_H
-#define __CTX_UTIL_H
+#endif
 
-inline static float ctx_fast_hypotf (float x, float y)
+static void
+ctx_fragment_color_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
 {
-  if (x < 0) { x = -x; }
-  if (y < 0) { y = -y; }
-  if (x < y)
-    { return 0.96f * y + 0.4f * x; }
-  else
-    { return 0.96f * x + 0.4f * y; }
+  uint8_t *rgba_out = (uint8_t *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  ctx_color_get_rgba8 (rasterizer->state, &g->color, rgba_out);
+  ctx_RGBA8_associate_alpha (rgba_out);
+  if (rasterizer->swap_red_green)
+  {
+    int tmp = rgba_out[0];
+    rgba_out[0] = rgba_out[2];
+    rgba_out[2] = tmp;
+  }
+  for (int i = 1; i < count; i++, rgba_out+=4)
+    memcpy (rgba_out + count * 4, rgba_out, 4);
 }
+#if CTX_ENABLE_FLOAT
 
-static int ctx_str_is_number (const char *str)
+#if CTX_GRADIENTS
+static void
+ctx_fragment_linear_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
 {
-  int got_digit = 0;
-  for (int i = 0; str[i]; i++)
+  float *rgba = (float *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0; i < count; i++)
   {
-    if (str[i] >= '0' && str[i] <= '9')
-    {
-       got_digit ++;
-    }
-    else if (str[i] == '.')
-    {
-    }
-    else
-      return 0;
+    float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
+                  g->linear_gradient.length) -
+                g->linear_gradient.start) * (g->linear_gradient.rdelta);
+    ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0f, rgba);
+    x += dx;
+    y += dy;
+    rgba += 4;
   }
-  if (got_digit)
-    return 1;
-  return 0;
 }
 
-#if CTX_GET_CONTENTS
-
-typedef struct CtxFileContent
+static void
+ctx_fragment_radial_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
 {
-  char *path;
-  unsigned char *contents;
-  long  length;
-  int   free_data;
-} CtxFileContent;
-
-CtxList *registered_contents = NULL;
-
-void
-ctx_register_contents (const char *path,
-                       const unsigned char *contents,
-                       long length,
-                       int  free_data)
-{
-  // if (path[0] != '/') && strchr(path, ':')) 
-  //   with this check regular use is faster, but we lose
-  //   generic filesystem overrides..
-  for (CtxList *l = registered_contents; l; l = l->next)
+  float *rgba = (float *) out;
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0; i < count; i++)
   {
-    CtxFileContent *c = (CtxFileContent*)l->data;
-    if (!strcmp (c->path, path))
-    {
-       if (c->free_data)
-       {
-         free (c->contents);
-       }
-       c->free_data = free_data;
-       c->contents = (unsigned char*)contents;
-       c->length = length;
-       return;
-    }
+  float v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
+        v = (v - g->radial_gradient.r0) * (g->radial_gradient.rdelta);
+  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0f, rgba);
+    x+=dx;
+    y+=dy;
+    rgba +=4;
   }
-  CtxFileContent *c = (CtxFileContent*)calloc (sizeof (CtxFileContent), 1);
-  c->free_data = free_data;
-  c->contents = (unsigned char*)contents;
-  c->length    = length;
-  ctx_list_append (&registered_contents, c);
 }
+#endif
 
-void
-_ctx_file_set_contents (const char     *path,
-                        const unsigned char  *contents,
-                        long            length)
-{
-  FILE *file;
-  file = fopen (path, "wb");
-  if (!file)
-    { return; }
-  if (length < 0) length = strlen ((const char*)contents);
-  fwrite (contents, 1, length, file);
-  fclose (file);
-}
 
-static int
-___ctx_file_get_contents (const char     *path,
-                          unsigned char **contents,
-                          long           *length,
-                          long            max_len)
+static void
+ctx_fragment_color_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
 {
-  FILE *file;
-  long  size;
-  long  remaining;
-  char *buffer;
-  file = fopen (path, "rb");
-  if (!file)
-    { return -1; }
-  fseek (file, 0, SEEK_END);
-  size = remaining = ftell (file);
-
-  if (size > max_len)
+  float *rgba = (float *) out;
+  float  in[4];
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  ctx_color_get_rgba (rasterizer->state, &g->color, in);
+  for (int c = 0; c < 3; c++)
+    in[c] *= in[3];
+  while (count--)
   {
-     size = remaining = max_len;
+    for (int c = 0; c < 4; c++)
+      rgba[c] = in[c];
+    rgba += 4;
   }
+}
 
-  if (length)
-    { *length =size; }
-  rewind (file);
-  buffer = (char*)malloc (size + 8);
-  if (!buffer)
-    {
-      fclose (file);
-      return -1;
-    }
-  remaining -= fread (buffer, 1, remaining, file);
-  if (remaining)
+
+static void ctx_fragment_image_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
+{
+  float *outf = (float *) out;
+  uint8_t rgba[4];
+  CtxGState *gstate = &rasterizer->state->gstate;
+  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
+  switch (buffer->format->bpp)
     {
-      fclose (file);
-      free (buffer);
-      return -1;
+#if CTX_FRAGMENT_SPECIALIZE
+      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
+      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+#endif
+      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
     }
-  fclose (file);
-  *contents = (unsigned char*) buffer;
-  buffer[size] = 0;
-  return 0;
+  for (int c = 0; c < 4 * count; c ++) { outf[c] = ctx_u8_to_float (rgba[c]); }
 }
 
-static int
-__ctx_file_get_contents (const char     *path,
-                        unsigned char **contents,
-                        long           *length)
+static CtxFragment ctx_rasterizer_get_fragment_RGBAF (CtxRasterizer *rasterizer)
 {
-  return ___ctx_file_get_contents (path, contents, length, 1024*1024*1024);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  switch (gstate->source_fill.type)
+    {
+      case CTX_SOURCE_TEXTURE:         return ctx_fragment_image_RGBAF;
+      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBAF;
+#if CTX_GRADIENTS
+      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBAF;
+      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBAF;
+#endif
+    }
+  return ctx_fragment_color_RGBAF;
 }
-
-#if !__COSMOPOLITAN__
-#include <limits.h>
 #endif
 
+static CtxFragment ctx_rasterizer_get_fragment_RGBA8 (CtxRasterizer *rasterizer)
+{
+  CtxGState *gstate = &rasterizer->state->gstate;
+  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
+  switch (gstate->source_fill.type)
+    {
+      case CTX_SOURCE_TEXTURE:
+        if (!buffer || !buffer->format)
+          return ctx_fragment_color_RGBA8;
 
-
-
+        if (buffer->format->pixel_format == CTX_FORMAT_YUV420)
+        {
+          return ctx_fragment_image_yuv420_RGBA8_nearest;
+        }
+        else
+#if CTX_FRAGMENT_SPECIALIZE
+        switch (buffer->format->bpp)
+          {
+            case 1: return ctx_fragment_image_gray1_RGBA8;
+#if 1
+            case 24: 
+              {
+                if (gstate->image_smoothing)
+                {
+                  float factor = ctx_matrix_get_scale (&gstate->transform);
+                          //fprintf (stderr, "{%.3f}", factor);
+                  if (factor < 0.5f)
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgb8_RGBA8_box_swap_red_green;
+                    return ctx_fragment_image_rgb8_RGBA8_box;
+                  }
+#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
+                  else if (factor > 0.99f && factor < 1.01f)
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
+                    return ctx_fragment_image_rgb8_RGBA8_nearest;
+                  }
 #endif
-
-
+                  else
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green;
+                    return ctx_fragment_image_rgb8_RGBA8_bi;
+                  }
+                }
+                else
+                {
+                  if (rasterizer->swap_red_green)
+                    return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
+                  return ctx_fragment_image_rgb8_RGBA8_nearest;
+                }
+              }
+              break;
+#endif
+            case 32:
+              {
+                if (gstate->image_smoothing)
+                {
+                  float factor = ctx_matrix_get_scale (&gstate->transform);
+                          //fprintf (stderr, "[%.3f]", factor);
+                  if (factor < 0.5f)
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgba8_RGBA8_box_swap_red_green;
+                    return ctx_fragment_image_rgba8_RGBA8_box;
+                  }
+#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
+                  else if (factor > 0.99f && factor < 1.01f)
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green;
+                    return ctx_fragment_image_rgba8_RGBA8_nearest;
+                  }
+#endif
+                  else
+                  {
+                    if (rasterizer->swap_red_green)
+                      return ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green;
+                    return ctx_fragment_image_rgba8_RGBA8_bi;
+                  }
+                }
+                else
+                {
+                  if (rasterizer->swap_red_green)
+                    return ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green;
+                  return ctx_fragment_image_rgba8_RGBA8_nearest;
+                }
+              }
+            default: return ctx_fragment_image_RGBA8;
+          }
+#else
+          return ctx_fragment_image_RGBA8;
 #endif
 
+      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBA8;
+#if CTX_GRADIENTS
+      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBA8;
+      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBA8;
+#endif
+    }
+  return ctx_fragment_color_RGBA8;
+}
 
+static inline void
+ctx_init_uv (CtxRasterizer *rasterizer,
+             int x0,
+             float *u0, float *v0, float *ud, float *vd)
+{
+  CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
+  float u1 = x0 + 1;
+  float v1 = rasterizer->scanline / CTX_FULL_AA;
+  *v0 = v1;
+  *u0 = x0;
+  _ctx_matrix_apply_transform (transform, u0, v0);
+  _ctx_matrix_apply_transform (transform, &u1, &v1);
+  *ud = u1 - *u0;
+  *vd = v1 - *v0;
+}
 
-static CTX_INLINE int
-ctx_conts_for_entry (CtxEntry *entry)
+static void
+ctx_u8_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-    switch (entry->code)
+  if (CTX_UNLIKELY(rasterizer->fragment))
     {
-      case CTX_DATA:
-        return entry->data.u32[1];
-      case CTX_LINEAR_GRADIENT:
-      //case CTX_DEFINE_TEXTURE:
-        return 1;
-      case CTX_RADIAL_GRADIENT:
-      case CTX_ARC:
-      case CTX_ARC_TO:
-      case CTX_REL_ARC_TO:
-      case CTX_CURVE_TO:
-      case CTX_REL_CURVE_TO:
-      case CTX_APPLY_TRANSFORM:
-      case CTX_SOURCE_TRANSFORM:
-      case CTX_COLOR:
-      case CTX_ROUND_RECTANGLE:
-      case CTX_SHADOW_COLOR:
-        return 2;
-      case CTX_FILL_RECT:
-      case CTX_STROKE_RECT:
-      case CTX_RECTANGLE:
-      case CTX_VIEW_BOX:
-      case CTX_REL_QUAD_TO:
-      case CTX_QUAD_TO:
-        return 1;
-
-      case CTX_TEXT:
-      case CTX_LINE_DASH:
-      case CTX_COLOR_SPACE:
-      case CTX_STROKE_TEXT:
-      case CTX_FONT:
-      case CTX_TEXTURE:
+      float u0 = 0; float v0 = 0;
+      float ud = 0; float vd = 0;
+      ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+      while (count--)
+      {
+        uint8_t cov = *coverage;
+        if (CTX_UNLIKELY(cov == 0))
         {
-          int eid_len = entry[1].data.u32[1];
-          return eid_len + 1;
+          u0+=ud;
+          v0+=vd;
         }
-      case CTX_DEFINE_TEXTURE:
+        else
         {
-          int eid_len = entry[2].data.u32[1];
-          int pix_len = entry[2 + eid_len + 1].data.u32[1];
-          return eid_len + pix_len + 2 + 1;
+          rasterizer->fragment (rasterizer, u0, v0, src, 1, ud, vd);
+          u0+=ud;
+          v0+=vd;
+          if (cov == 255)
+          {
+            for (int c = 0; c < components; c++)
+              dst[c] = src[c];
+          }
+          else
+          {
+            uint8_t rcov = 255 - cov;
+            for (int c = 0; c < components; c++)
+              { dst[c] = (src[c]*cov + dst[c]*rcov)/255; }
+          }
         }
-      default:
-        return 0;
+        dst += components;
+        coverage ++;
+      }
+      return;
     }
+
+  while (count--)
+  {
+    uint8_t cov = *coverage;
+    uint8_t rcov = 255-cov;
+    for (int c = 0; c < components; c++)
+      { dst[c] = (src[c]*cov+dst[c]*rcov)/255; }
+    dst += components;
+    coverage ++;
+  }
 }
 
-// expanding arc_to to arc can be the job
-// of a layer in front of renderer?
-//   doing:
-//     rectangle
-//     arc
-//     ... etc reduction to beziers
-//     or even do the reduction to
-//     polylines directly here...
-//     making the rasterizer able to
-//     only do poly-lines? will that be faster?
+static void
+ctx_u8_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
+{
+  while (count--)
+  {
+    uint8_t cov = *coverage;
+    for (int c = 0; c < components; c++)
+      { dst[c] = (dst[c] * (256-cov)) >> 8; }
+    coverage ++;
+    dst += components;
+  }
+}
 
-/* the iterator - should decode bitpacked data as well -
- * making the rasterizers simpler, possibly do unpacking
- * all the way to absolute coordinates.. unless mixed
- * relative/not are wanted.
- */
+typedef enum {
+  CTX_PORTER_DUFF_0,
+  CTX_PORTER_DUFF_1,
+  CTX_PORTER_DUFF_ALPHA,
+  CTX_PORTER_DUFF_1_MINUS_ALPHA,
+} CtxPorterDuffFactor;
 
+#define  \
+ctx_porter_duff_factors(mode, foo, bar)\
+{\
+  switch (mode)\
+  {\
+     case CTX_COMPOSITE_SOURCE_ATOP:\
+        f_s = CTX_PORTER_DUFF_ALPHA;\
+        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_ATOP:\
+        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+        f_d = CTX_PORTER_DUFF_ALPHA;\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_IN:\
+        f_s = CTX_PORTER_DUFF_0;\
+        f_d = CTX_PORTER_DUFF_ALPHA;\
+      break;\
+     case CTX_COMPOSITE_DESTINATION:\
+        f_s = CTX_PORTER_DUFF_0;\
+        f_d = CTX_PORTER_DUFF_1;\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OVER:\
+        f_s = CTX_PORTER_DUFF_1;\
+        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OVER:\
+        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+        f_d = CTX_PORTER_DUFF_1;\
+       break;\
+     case CTX_COMPOSITE_XOR:\
+        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OUT:\
+        f_s = CTX_PORTER_DUFF_0;\
+        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OUT:\
+        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+        f_d = CTX_PORTER_DUFF_0;\
+       break;\
+     case CTX_COMPOSITE_SOURCE_IN:\
+        f_s = CTX_PORTER_DUFF_ALPHA;\
+        f_d = CTX_PORTER_DUFF_0;\
+       break;\
+     case CTX_COMPOSITE_COPY:\
+        f_s = CTX_PORTER_DUFF_1;\
+        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
+       break;\
+     default:\
+     case CTX_COMPOSITE_CLEAR:\
+        f_s = CTX_PORTER_DUFF_0;\
+        f_d = CTX_PORTER_DUFF_0;\
+       break;\
+  }\
+}
 
 static void
-ctx_iterator_init (CtxIterator      *iterator,
-                   CtxDrawlist  *drawlist,
-                   int               start_pos,
-                   int               flags)
+ctx_u8_source_over_normal_color (int components,
+                                 CtxRasterizer         *rasterizer,
+                                 uint8_t * __restrict__ dst,
+                                 uint8_t * __restrict__ src,
+                                 int                    x0,
+                                 uint8_t * __restrict__ coverage,
+                                 int                    count)
 {
-  iterator->drawlist   = drawlist;
-  iterator->flags          = flags;
-  iterator->bitpack_pos    = 0;
-  iterator->bitpack_length = 0;
-  iterator->pos            = start_pos;
-  iterator->end_pos        = drawlist->count;
-  iterator->first_run      = 1; // -1 is a marker used for first run
-  ctx_memset (iterator->bitpack_command, 0, sizeof (iterator->bitpack_command) );
+  uint8_t tsrc[5];
+  *((uint32_t*)tsrc) = *((uint32_t*)src);
+
+  while (count--)
+  {
+    for (int c = 0; c < components; c++)
+      //dst[c] =  ((tsrc[c] * *coverage)>>8) + (dst[c] * (((65536)-(tsrc[components-1] * *coverage)))>>16);
+      dst[c] =  ((((tsrc[c] * *coverage)) + (dst[c] * (((255)-(((255+(tsrc[components-1] * 
*coverage))>>8))))))>>8);
+    coverage ++;
+    dst+=components;
+  }
 }
 
-int ctx_iterator_pos (CtxIterator *iterator)
+static void
+ctx_u8_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-  return iterator->pos;
+  while (count--)
+  {
+    for (int c = 0; c < components; c++)
+      dst[c] =  ctx_lerp_u8(dst[c],src[c],coverage[0]);
+    coverage ++;
+    dst+=components;
+  }
 }
 
-static CtxEntry *_ctx_iterator_next (CtxIterator *iterator)
+static inline void
+ctx_RGBA8_source_over_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
 {
-  int ret = iterator->pos;
-  CtxEntry *entry = &iterator->drawlist->entries[ret];
-  if (CTX_UNLIKELY(ret >= iterator->end_pos))
-    { return NULL; }
+  while (count--)
+  {
+     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
+     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
+//   uint32_t di_ga = ((*((uint32_t*)dst)) & 0xff00ff00) >> 8;
+//   uint32_t di_rb = (*((uint32_t*)dst)) & 0x00ff00ff;
+     uint32_t si_a  = si_ga >> 16;
+     uint32_t cov = *coverage;
+     uint32_t racov = (255-((255+si_a*cov)>>8));
+     *((uint32_t*)(dst)) =
 
-  if (CTX_UNLIKELY(iterator->first_run))
-      iterator->first_run = 0;
-  else
-     iterator->pos += (ctx_conts_for_entry (entry) + 1);
+     (((si_rb*cov+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
+     ((si_ga*cov+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);
 
-  if (CTX_UNLIKELY(iterator->pos >= iterator->end_pos))
-    { return NULL; }
-  return &iterator->drawlist->entries[iterator->pos];
+     coverage ++;
+     tsrc += 4;
+     dst  += 4;
+  }
 }
 
-// 6024x4008
-#if CTX_BITPACK
-static void
-ctx_iterator_expand_s8_args (CtxIterator *iterator, CtxEntry *entry)
+static inline void
+ctx_RGBA8_source_over_normal_full_cov_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
 {
-  int no = 0;
-  for (int cno = 0; cno < 4; cno++)
-    for (int d = 0; d < 2; d++, no++)
-      iterator->bitpack_command[cno].data.f[d] =
-        entry->data.s8[no] * 1.0f / CTX_SUBDIV;
-  iterator->bitpack_command[0].code =
-    iterator->bitpack_command[1].code =
-      iterator->bitpack_command[2].code =
-        iterator->bitpack_command[3].code = CTX_CONT;
-  iterator->bitpack_length = 4;
-  iterator->bitpack_pos = 0;
+  while (count--)
+  {
+     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
+     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
+     uint32_t si_a  = si_ga >> 16;
+     uint32_t racov = si_a^255;
+     *((uint32_t*)(dst)) =
+     (((si_rb*255+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
+     ((si_ga*255+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);
+     tsrc += 4;
+     dst  += 4;
+  }
 }
 
 static void
-ctx_iterator_expand_s16_args (CtxIterator *iterator, CtxEntry *entry)
+ctx_RGBA8_source_copy_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
 {
-  int no = 0;
-  for (int cno = 0; cno < 2; cno++)
-    for (int d = 0; d < 2; d++, no++)
-      iterator->bitpack_command[cno].data.f[d] = entry->data.s16[no] * 1.0f /
-          CTX_SUBDIV;
-  iterator->bitpack_command[0].code =
-    iterator->bitpack_command[1].code = CTX_CONT;
-  iterator->bitpack_length = 2;
-  iterator->bitpack_pos    = 0;
+  while (count--)
+  {
+    ((uint32_t*)dst)[0]=ctx_lerp_RGBA8 (((uint32_t*)dst)[0],
+                                        ((uint32_t*)tsrc)[0], coverage[0]);
+    coverage ++;
+    tsrc += 4;
+    dst  += 4;
+  }
 }
-#endif
 
-CtxCommand *
-ctx_iterator_next (CtxIterator *iterator)
+static void
+ctx_RGBA8_source_over_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxEntry *ret;
-#if CTX_BITPACK
-  int expand_bitpack = iterator->flags & CTX_ITERATOR_EXPAND_BITPACK;
-again:
-  if (CTX_UNLIKELY(iterator->bitpack_length))
-    {
-      ret = &iterator->bitpack_command[iterator->bitpack_pos];
-      iterator->bitpack_pos += (ctx_conts_for_entry (ret) + 1);
-      if (iterator->bitpack_pos >= iterator->bitpack_length)
-        {
-          iterator->bitpack_length = 0;
-        }
-      return (CtxCommand *) ret;
-    }
-#endif
-  ret = _ctx_iterator_next (iterator);
-#if CTX_BITPACK
-  if (CTX_UNLIKELY(ret && expand_bitpack))
-    switch ((CtxCode)(ret->code))
-      {
-        case CTX_REL_CURVE_TO_REL_LINE_TO:
-          ctx_iterator_expand_s8_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
-          iterator->bitpack_command[1].code =
-          iterator->bitpack_command[2].code = CTX_CONT;
-          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
-          // 0.0 here is a common optimization - so check for it
-          if (ret->data.s8[6]== 0 && ret->data.s8[7] == 0)
-            { iterator->bitpack_length = 3; }
-          else
-            iterator->bitpack_length          = 4;
-          goto again;
-        case CTX_REL_LINE_TO_REL_CURVE_TO:
-          ctx_iterator_expand_s8_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
-          iterator->bitpack_command[1].code = CTX_REL_CURVE_TO;
-          iterator->bitpack_length          = 2;
-          goto again;
-        case CTX_REL_CURVE_TO_REL_MOVE_TO:
-          ctx_iterator_expand_s8_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
-          iterator->bitpack_command[3].code = CTX_REL_MOVE_TO;
-          iterator->bitpack_length          = 4;
-          goto again;
-        case CTX_REL_LINE_TO_X4:
-          ctx_iterator_expand_s8_args (iterator, ret);
-          iterator->bitpack_command[0].code =
-          iterator->bitpack_command[1].code =
-          iterator->bitpack_command[2].code =
-          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
-          iterator->bitpack_length          = 4;
-          goto again;
-        case CTX_REL_QUAD_TO_S16:
-          ctx_iterator_expand_s16_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_REL_QUAD_TO;
-          iterator->bitpack_length          = 1;
-          goto again;
-        case CTX_REL_QUAD_TO_REL_QUAD_TO:
-          ctx_iterator_expand_s8_args (iterator, ret);
-          iterator->bitpack_command[0].code =
-          iterator->bitpack_command[2].code = CTX_REL_QUAD_TO;
-          iterator->bitpack_length          = 3;
-          goto again;
-        case CTX_REL_LINE_TO_X2:
-          ctx_iterator_expand_s16_args (iterator, ret);
-          iterator->bitpack_command[0].code =
-          iterator->bitpack_command[1].code = CTX_REL_LINE_TO;
-          iterator->bitpack_length          = 2;
-          goto again;
-        case CTX_REL_LINE_TO_REL_MOVE_TO:
-          ctx_iterator_expand_s16_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
-          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
-          iterator->bitpack_length          = 2;
-          goto again;
-        case CTX_MOVE_TO_REL_LINE_TO:
-          ctx_iterator_expand_s16_args (iterator, ret);
-          iterator->bitpack_command[0].code = CTX_MOVE_TO;
-          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
-          iterator->bitpack_length          = 2;
-          goto again;
-        case CTX_FILL_MOVE_TO:
-          iterator->bitpack_command[1]      = *ret;
-          iterator->bitpack_command[0].code = CTX_FILL;
-          iterator->bitpack_command[1].code = CTX_MOVE_TO;
-          iterator->bitpack_pos             = 0;
-          iterator->bitpack_length          = 2;
-          goto again;
-        case CTX_LINEAR_GRADIENT:
-        case CTX_QUAD_TO:
-        case CTX_REL_QUAD_TO:
-        case CTX_TEXTURE:
-        case CTX_RECTANGLE:
-        case CTX_VIEW_BOX:
-        case CTX_ARC:
-        case CTX_ARC_TO:
-        case CTX_REL_ARC_TO:
-        case CTX_COLOR:
-        case CTX_SHADOW_COLOR:
-        case CTX_RADIAL_GRADIENT:
-        case CTX_CURVE_TO:
-        case CTX_REL_CURVE_TO:
-        case CTX_APPLY_TRANSFORM:
-        case CTX_SOURCE_TRANSFORM:
-        case CTX_ROUND_RECTANGLE:
-        case CTX_TEXT:
-        case CTX_STROKE_TEXT:
-        case CTX_FONT:
-        case CTX_LINE_DASH:
-        case CTX_FILL:
-        case CTX_NOP:
-        case CTX_MOVE_TO:
-        case CTX_LINE_TO:
-        case CTX_REL_MOVE_TO:
-        case CTX_REL_LINE_TO:
-        case CTX_VER_LINE_TO:
-        case CTX_REL_VER_LINE_TO:
-        case CTX_HOR_LINE_TO:
-        case CTX_REL_HOR_LINE_TO:
-        case CTX_ROTATE:
-        case CTX_FLUSH:
-        case CTX_TEXT_ALIGN:
-        case CTX_TEXT_BASELINE:
-        case CTX_TEXT_DIRECTION:
-        case CTX_MITER_LIMIT:
-        case CTX_GLOBAL_ALPHA:
-        case CTX_COMPOSITING_MODE:
-        case CTX_BLEND_MODE:
-        case CTX_SHADOW_BLUR:
-        case CTX_SHADOW_OFFSET_X:
-        case CTX_SHADOW_OFFSET_Y:
-        case CTX_RESET:
-        case CTX_EXIT:
-        case CTX_BEGIN_PATH:
-        case CTX_CLOSE_PATH:
-        case CTX_SAVE:
-        case CTX_CLIP:
-        case CTX_PRESERVE:
-        case CTX_DEFINE_GLYPH:
-        case CTX_IDENTITY:
-        case CTX_FONT_SIZE:
-        case CTX_START_GROUP:
-        case CTX_END_GROUP:
-        case CTX_RESTORE:
-        case CTX_LINE_WIDTH:
-        case CTX_LINE_DASH_OFFSET:
-        case CTX_STROKE:
-        case CTX_KERNING_PAIR:
-        case CTX_SCALE:
-        case CTX_GLYPH:
-        case CTX_SET_PIXEL:
-        case CTX_FILL_RULE:
-        case CTX_LINE_CAP:
-        case CTX_LINE_JOIN:
-        case CTX_NEW_PAGE:
-        case CTX_SET_KEY:
-        case CTX_TRANSLATE:
-        case CTX_DEFINE_TEXTURE:
-        case CTX_GRADIENT_STOP:
-        case CTX_DATA: // XXX : would be better if we hide the DATAs
-        case CTX_CONT: // shouldnt happen
-        default:
-          iterator->bitpack_length = 0;
-          return (CtxCommand *) ret;
-#if 0
-        default: // XXX remove - and get better warnings
-          iterator->bitpack_command[0] = ret[0];
-          iterator->bitpack_command[1] = ret[1];
-          iterator->bitpack_command[2] = ret[2];
-          iterator->bitpack_command[3] = ret[3];
-          iterator->bitpack_command[4] = ret[4];
-          iterator->bitpack_pos = 0;
-          iterator->bitpack_length = 1;
-          goto again;
-#endif
-      }
-#endif
-  return (CtxCommand *) ret;
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+  uint8_t _tsrc[4 * (count)];
+  rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
+  ctx_RGBA8_source_over_normal_buf (rasterizer,
+                       dst, src, x0, coverage, count, &_tsrc[0]);
 }
 
-static void ctx_drawlist_compact (CtxDrawlist *drawlist);
-static void
-ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size)
+static inline void
+ctx_RGBA8_source_over_normal_full_cov_fragment (CTX_COMPOSITE_ARGUMENTS, int scanlines)
 {
-  int flags=drawlist->flags;
-#if CTX_DRAWLIST_STATIC
-  if (flags & CTX_DRAWLIST_EDGE_LIST)
-    {
-      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
-      drawlist->entries = (CtxEntry*)&sbuf[0];
-      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
-    {
-      static CtxEntry sbuf[CTX_MAX_EDGE_LIST_SIZE];
-      drawlist->entries = &sbuf[0];
-      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-  else
-    {
-      static CtxEntry sbuf[CTX_MAX_JOURNAL_SIZE];
-      drawlist->entries = &sbuf[0];
-      drawlist->size = CTX_MAX_JOURNAL_SIZE;
-      if(0)ctx_drawlist_compact (drawlist);
-    }
-#else
-  int new_size = desired_size;
-  int min_size = CTX_MIN_JOURNAL_SIZE;
-  int max_size = CTX_MAX_JOURNAL_SIZE;
-  if ((flags & CTX_DRAWLIST_EDGE_LIST))
-    {
-      min_size = CTX_MIN_EDGE_LIST_SIZE;
-      max_size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
-    {
-      min_size = CTX_MIN_EDGE_LIST_SIZE;
-      max_size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-  else
-    {
-#if 0
-      ctx_drawlist_compact (drawlist);
-#endif
-    }
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
 
-  if (CTX_UNLIKELY(new_size < drawlist->size))
-    { return; }
-  if (CTX_UNLIKELY(drawlist->size == max_size))
-    { return; }
-  new_size = ctx_maxi (new_size, min_size);
-  //if (new_size < drawlist->count)
-  //  { new_size = drawlist->count + 4; }
-  new_size = ctx_mini (new_size, max_size);
-  if (new_size != drawlist->size)
-    {
-      int item_size = sizeof (CtxEntry);
-      if (flags & CTX_DRAWLIST_EDGE_LIST) item_size = sizeof (CtxSegment);
-      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, 
drawlist->size);
-  if (drawlist->entries)
-    {
-      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
-      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
-      memcpy (ne, drawlist->entries, drawlist->size * item_size );
-      free (drawlist->entries);
-      drawlist->entries = ne;
-      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
-    }
-  else
-    {
-      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
-      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
-    }
-  drawlist->size = new_size;
-    }
-  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
-#endif
+  for (int y = 0; y < scanlines; y++)
+  {
+    uint8_t _tsrc[4 * (count)];
+    rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
+    ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
+                          dst, src, x0, coverage, count, &_tsrc[0]);
+    dst += rasterizer->blit_stride;
+    u0 -= vd;
+    v0 += ud;
+  }
 }
 
 static void
-ctx_edgelist_resize (CtxDrawlist *drawlist, int desired_size)
+ctx_RGBA8_source_copy_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
 {
-#if CTX_DRAWLIST_STATIC
-    {
-      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
-      drawlist->entries = (CtxEntry*)&sbuf[0];
-      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-#else
-  int new_size = desired_size;
-  int min_size = CTX_MIN_JOURNAL_SIZE;
-  int max_size = CTX_MAX_JOURNAL_SIZE;
-    {
-      min_size = CTX_MIN_EDGE_LIST_SIZE;
-      max_size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-
-  if (CTX_UNLIKELY(drawlist->size == max_size))
-    { return; }
-  new_size = ctx_maxi (new_size, min_size);
-  //if (new_size < drawlist->count)
-  //  { new_size = drawlist->count + 4; }
-  new_size = ctx_mini (new_size, max_size);
-  if (new_size != drawlist->size)
-    {
-      int item_size = item_size = sizeof (CtxSegment);
-      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, 
drawlist->size);
-  if (drawlist->entries)
-    {
-      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
-      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
-      memcpy (ne, drawlist->entries, drawlist->size * item_size );
-      free (drawlist->entries);
-      drawlist->entries = ne;
-      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
-    }
-  else
-    {
-      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
-      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
-    }
-  drawlist->size = new_size;
-    }
-  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
-#endif
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+  uint8_t _tsrc[4 * (count)];
+  rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
+  ctx_RGBA8_source_copy_normal_buf (rasterizer,
+                       dst, src, x0, coverage, count, &_tsrc[0]);
 }
 
 
-static inline int
-ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
+static void
+ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-  int max_size = CTX_MAX_JOURNAL_SIZE;
-  int ret = drawlist->count;
-  int flags = drawlist->flags;
-  if (CTX_LIKELY((flags & CTX_DRAWLIST_EDGE_LIST ||
-       flags & CTX_DRAWLIST_CURRENT_PATH)))
-    {
-      max_size = CTX_MAX_EDGE_LIST_SIZE;
-    }
-  if (CTX_UNLIKELY(flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES))
-    {
-      return ret;
-    }
-  if (CTX_UNLIKELY(ret + 64 >= drawlist->size - 40))
-    {
-      int new_ = CTX_MAX (drawlist->size * 2, ret + 1024);
-      ctx_drawlist_resize (drawlist, new_);
-    }
+#if CTX_REFERENCE
+  ctx_u8_source_over_normal_color (4, rasterizer, dst, src, x0, coverage, count);
+#else
+  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+  uint32_t si_a  = si_ga >> 16;
 
-  if (CTX_UNLIKELY(drawlist->count >= max_size - 20))
-    {
-      return 0;
-    }
-  if ((flags & CTX_DRAWLIST_EDGE_LIST))
-    ((CtxSegment*)(drawlist->entries))[drawlist->count] = *(CtxSegment*)entry;
-  else
-    drawlist->entries[drawlist->count] = *entry;
-  ret = drawlist->count;
-  drawlist->count++;
-  return ret;
+  while (count--)
+  {
+     uint32_t cov   = *coverage++;
+     uint32_t rcov  = (((255+si_a * cov)>>8))^255;
+     uint32_t di    = *((uint32_t*)dst);
+     uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+     uint32_t di_rb = (di & 0x00ff00ff);
+     *((uint32_t*)(dst)) =
+     (((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
+      ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00);
+     dst+=4;
+  }
+#endif
 }
 
-static inline int
-ctx_edgelist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
+static void
+ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-  int max_size = CTX_MAX_EDGE_LIST_SIZE;
-  int ret = drawlist->count;
-  if ((ret + 64 >= drawlist->size - 40))
-    {
-      int new_ = CTX_MAX (drawlist->size * 2, ret + 1024);
-      ctx_edgelist_resize (drawlist, new_);
-    }
+#if CTX_REFERENCE
+  ctx_u8_source_copy_normal_color (4, rasterizer, dst, src, x0, coverage, count);
+#else
+  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
 
-  if (CTX_UNLIKELY(drawlist->count >= max_size - 20))
-    {
-      return 0;
-    }
-  ((CtxSegment*)(drawlist->entries))[drawlist->count] = *(CtxSegment*)entry;
-  ret = drawlist->count;
-  drawlist->count++;
-  return ret;
-}
+  while (count--)
+  {
+     uint32_t cov   = *coverage++;
+     uint32_t di    = *((uint32_t*)dst);
+     uint32_t di_ga = (di & 0xff00ff00);
+     uint32_t di_rb = (di & 0x00ff00ff);
 
-int
-ctx_add_single (Ctx *ctx, void *entry)
-{
-  return ctx_drawlist_add_single (&ctx->drawlist, (CtxEntry *) entry);
+     uint32_t d_rb  = si_rb - di_rb;
+     uint32_t d_ga  = si_ga - (di_ga>>8);
+
+     *((uint32_t*)(dst)) =
+
+     (((di_rb + ((d_rb * cov)>>8)) & 0x00ff00ff))  |
+      ((di_ga + ((d_ga * cov)      & 0xff00ff00)));
+     dst +=4;
+  }
+#endif
 }
 
-static inline int
-ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry)
+static void
+ctx_RGBA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  int length = ctx_conts_for_entry (entry) + 1;
-  int ret = 0;
-  for (int i = 0; i < length; i ++)
-    {
-      ret = ctx_drawlist_add_single (drawlist, &entry[i]);
-    }
-  return ret;
+  ctx_u8_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
 }
 
-#if 0
-int
-ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
+static void
+ctx_u8_blend_normal (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
 {
-  int length = ctx_conts_for_entry (entry) + 1;
-  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
-  for (int i = 0; i < length; i++)
+  for (int j = 0; j < count; j++)
   {
-    for (int j = pos + i + 1; j < tmp_pos; j++)
-      drawlist->entries[j] = entry[j-1];
-    drawlist->entries[pos + i] = entry[i];
-  }
-  return pos;
-}
-#endif
-int
-ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
-{
-  int length = ctx_conts_for_entry (entry) + 1;
-  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
-#if 1
-  for (int i = 0; i < length; i++)
+  switch (components)
   {
-    for (int j = tmp_pos; j > pos + i; j--)
-      drawlist->entries[j] = drawlist->entries[j-1];
-    drawlist->entries[pos + i] = entry[i];
+     case 3:
+       ((uint8_t*)(blended))[2] = ((uint8_t*)(src))[2];
+       *((uint16_t*)(blended)) = *((uint16_t*)(src));
+       break;
+     case 2:
+       *((uint16_t*)(blended)) = *((uint16_t*)(src));
+       break;
+     case 5:
+       *((uint32_t*)(blended)) = *((uint32_t*)(src));
+       ((uint8_t*)(blended))[4] = ((uint8_t*)(src))[4];
+       break;
+     case 4:
+       *((uint32_t*)(blended)) = *((uint32_t*)(src));
+       break;
+     default:
+       {
+        for (int i = 0; i<components;i++)
+           blended[i] = src[i];
+       }
+       break;
+  }
+    blended+=components;
+    src+=components;
   }
-  return pos;
-#endif
-  return tmp_pos;
 }
 
-int ctx_append_drawlist (Ctx *ctx, void *data, int length)
+/* branchless 8bit add that maxes out at 255 */
+static inline uint8_t ctx_sadd8(uint8_t a, uint8_t b)
 {
-  CtxEntry *entries = (CtxEntry *) data;
-  if (length % sizeof (CtxEntry) )
-    {
-      ctx_log("drawlist not multiple of 9\n");
-      return -1;
-    }
-  for (unsigned int i = 0; i < length / sizeof (CtxEntry); i++)
-    {
-      ctx_drawlist_add_single (&ctx->drawlist, &entries[i]);
-    }
-  return 0;
+  uint16_t s = (uint16_t)a+b;
+  return -(s>>8) | (uint8_t)s;
 }
 
-int ctx_set_drawlist (Ctx *ctx, void *data, int length)
-{
-  CtxDrawlist *drawlist = &ctx->drawlist;
-  if (drawlist->flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES)
-    {
-      return -1;
-    }
-  ctx->drawlist.count = 0;
-  if (!data || length == 0)
-    return 0;
-  if (CTX_UNLIKELY(length % 9)) return -1;
-  ctx_drawlist_resize (drawlist, length/9);
-  memcpy (drawlist->entries, data, length);
-  drawlist->count = length / 9;
-  return length;
-}
+#if CTX_BLENDING_AND_COMPOSITING
 
-int ctx_get_drawlist_count (Ctx *ctx)
-{
-  return ctx->drawlist.count;
+#define ctx_u8_blend_define(name, CODE) \
+static inline void \
+ctx_u8_blend_##name (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)\
+{\
+  for (int j = 0; j < count; j++) { \
+  uint8_t *s=src; uint8_t b[components];\
+  ctx_u8_deassociate_alpha (components, dst, b);\
+    CODE;\
+  blended[components-1] = src[components-1];\
+  ctx_u8_associate_alpha (components, blended);\
+  src += components;\
+  dst += components;\
+  blended += components;\
+  }\
 }
 
-const CtxEntry *ctx_get_drawlist (Ctx *ctx)
-{
-  return ctx->drawlist.entries;
-}
+#define ctx_u8_blend_define_seperable(name, CODE) \
+        ctx_u8_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \
 
-int
-ctx_add_data (Ctx *ctx, void *data, int length)
-{
-  if (CTX_UNLIKELY(length % sizeof (CtxEntry) ))
-    {
-      //ctx_log("err\n");
-      return -1;
-    }
-  /* some more input verification might be in order.. like
-   * verify that it is well-formed up to length?
-   *
-   * also - it would be very useful to stop processing
-   * upon flush - and do drawlist resizing.
-   */
-  return ctx_drawlist_add_entry (&ctx->drawlist, (CtxEntry *) data);
-}
+ctx_u8_blend_define_seperable(multiply,     blended[c] = (b[c] * s[c])/255;)
+ctx_u8_blend_define_seperable(screen,       blended[c] = s[c] + b[c] - (s[c] * b[c])/255;)
+ctx_u8_blend_define_seperable(overlay,      blended[c] = b[c] < 127 ? (s[c] * b[c])/255 :
+                                                         s[c] + b[c] - (s[c] * b[c])/255;)
+ctx_u8_blend_define_seperable(darken,       blended[c] = ctx_mini (b[c], s[c]))
+ctx_u8_blend_define_seperable(lighten,      blended[c] = ctx_maxi (b[c], s[c]))
+ctx_u8_blend_define_seperable(color_dodge,  blended[c] = b[c] == 0 ? 0 :
+                                     s[c] == 255 ? 255 : ctx_mini(255, (255 * b[c]) / (255-s[c])))
+ctx_u8_blend_define_seperable(color_burn,   blended[c] = b[c] == 1 ? 1 :
+                                     s[c] == 0 ? 0 : 255 - ctx_mini(255, (255*(255 - b[c])) / s[c]))
+ctx_u8_blend_define_seperable(hard_light,   blended[c] = s[c] < 127 ? (b[c] * s[c])/255 :
+                                                          b[c] + s[c] - (b[c] * s[c])/255;)
+ctx_u8_blend_define_seperable(difference,   blended[c] = (b[c] - s[c]))
+ctx_u8_blend_define_seperable(divide,       blended[c] = s[c]?(255 * b[c]) / s[c]:0)
+ctx_u8_blend_define_seperable(addition,     blended[c] = ctx_sadd8 (s[c], b[c]))
+ctx_u8_blend_define_seperable(subtract,     blended[c] = ctx_maxi(0, s[c]-b[c]))
+ctx_u8_blend_define_seperable(exclusion,    blended[c] = b[c] + s[c] - 2 * (b[c] * s[c]/255))
+ctx_u8_blend_define_seperable(soft_light,
+  if (s[c] <= 255/2)
+  {
+    blended[c] = b[c] - (255 - 2 * s[c]) * b[c] * (255 - b[c]) / (255 * 255);
+  }
+  else
+  {
+    int d;
+    if (b[c] <= 255/4)
+      d = (((16 * b[c] - 12 * 255)/255 * b[c] + 4 * 255) * b[c])/255;
+    else
+      d = ctx_sqrtf(b[c]/255.0) * 255.4;
+    blended[c] = (b[c] + (2 * s[c] - 255) * (d - b[c]))/255;
+  }
+)
 
-int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2])
+static int ctx_int_get_max (int components, int *c)
 {
-  CtxEntry entry[3] = {{code, {{0},}},};
-  entry[0].data.u32[0] = u32[0];
-  entry[0].data.u32[1] = u32[1];
-  return ctx_drawlist_add_single (drawlist, &entry[0]);
+  int max = 0;
+  for (int i = 0; i < components - 1; i ++)
+  {
+    if (c[i] > max) max = c[i];
+  }
+  return max;
 }
 
-int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length)
+static int ctx_int_get_min (int components, int *c)
 {
-  CtxEntry entry[3] = {{CTX_DATA, {{0},}}};
-  entry[0].data.u32[0] = 0;
-  entry[0].data.u32[1] = 0;
-  int ret = ctx_drawlist_add_single (drawlist, &entry[0]);
-  if (CTX_UNLIKELY(!data)) { return -1; }
-  int length_in_blocks;
-  if (length <= 0) { length = strlen ( (char *) data) + 1; }
-  length_in_blocks = length / sizeof (CtxEntry);
-  length_in_blocks += (length % sizeof (CtxEntry) ) ?1:0;
-  if (drawlist->count + length_in_blocks + 4 > drawlist->size)
-    { ctx_drawlist_resize (drawlist, drawlist->count * 1.2 + length_in_blocks + 32); }
-  if (CTX_UNLIKELY(drawlist->count >= drawlist->size))
-    { return -1; }
-  drawlist->count += length_in_blocks;
-  drawlist->entries[ret].data.u32[0] = length;
-  drawlist->entries[ret].data.u32[1] = length_in_blocks;
-  memcpy (&drawlist->entries[ret+1], data, length);
+  int min = 400;
+  for (int i = 0; i < components - 1; i ++)
   {
-    //int reverse = ctx_drawlist_add (drawlist, CTX_DATA_REV);
-    CtxEntry entry[3] = {{CTX_DATA_REV, {{0},}}};
-    entry[0].data.u32[0] = length;
-    entry[0].data.u32[1] = length_in_blocks;
-    ctx_drawlist_add_single (drawlist, &entry[0]);
-
-    /* this reverse marker exist to enable more efficient
-       front to back traversal, can be ignored in other
-       direction, is this needed after string setters as well?
-     */
+    if (c[i] < min) min = c[i];
   }
-  return ret;
+  return min;
 }
 
-static inline CtxEntry
-ctx_void (CtxCode code)
+static int ctx_int_get_lum (int components, int *c)
 {
-  CtxEntry command;
-  command.code = code;
-  return command;
+  switch (components)
+  {
+    case 3:
+    case 4:
+            return CTX_CSS_RGB_TO_LUMINANCE(c);
+    case 1:
+    case 2:
+            return c[0];
+            break;
+    default:
+       {
+         int sum = 0;
+         for (int i = 0; i < components - 1; i ++)
+         {
+           sum += c[i];
+         }
+         return sum / (components - 1);
+       }
+            break;
+  }
 }
 
-static inline CtxEntry
-ctx_f (CtxCode code, float x, float y)
+static int ctx_u8_get_lum (int components, uint8_t *c)
 {
-  CtxEntry command;
-  command.code = code;
-  command.data.f[0] = x;
-  command.data.f[1] = y;
-  return command;
+  switch (components)
+  {
+    case 3:
+    case 4:
+            return CTX_CSS_RGB_TO_LUMINANCE(c);
+    case 1:
+    case 2:
+            return c[0];
+            break;
+    default:
+       {
+         int sum = 0;
+         for (int i = 0; i < components - 1; i ++)
+         {
+           sum += c[i];
+         }
+         return sum / (components - 1);
+       }
+            break;
+  }
 }
-
-static CtxEntry
-ctx_u32 (CtxCode code, uint32_t x, uint32_t y)
+static int ctx_u8_get_sat (int components, uint8_t *c)
 {
-  CtxEntry command = ctx_void (code);
-  command.data.u32[0] = x;
-  command.data.u32[1] = y;
-  return command;
+  switch (components)
+  {
+    case 3:
+    case 4:
+            { int r = c[0];
+              int g = c[1];
+              int b = c[2];
+              return ctx_maxi(r, ctx_maxi(g,b)) - ctx_mini(r,ctx_mini(g,b));
+            }
+            break;
+    case 1:
+    case 2:
+            return 0.0;
+            break;
+    default:
+       {
+         int min = 1000;
+         int max = -1000;
+         for (int i = 0; i < components - 1; i ++)
+         {
+           if (c[i] < min) min = c[i];
+           if (c[i] > max) max = c[i];
+         }
+         return max-min;
+       }
+       break;
+  }
 }
 
-#if 0
-static CtxEntry
-ctx_s32 (CtxCode code, int32_t x, int32_t y)
+static void ctx_u8_set_lum (int components, uint8_t *c, uint8_t lum)
 {
-  CtxEntry command = ctx_void (code);
-  command.data.s32[0] = x;
-  command.data.s32[1] = y;
-  return command;
-}
-#endif
+  int d = lum - ctx_u8_get_lum (components, c);
+  int tc[components];
+  for (int i = 0; i < components - 1; i++)
+  {
+    tc[i] = c[i] + d;
+  }
 
-static inline CtxEntry
-ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1)
-{
-  CtxEntry command;
-  command.code = code;
-  command.data.s16[0] = x0;
-  command.data.s16[1] = y0;
-  command.data.s16[2] = x1;
-  command.data.s16[3] = y1;
-  return command;
-}
+  int l = ctx_int_get_lum (components, tc);
+  int n = ctx_int_get_min (components, tc);
+  int x = ctx_int_get_max (components, tc);
 
-static inline CtxSegment
-ctx_segment_s16 (CtxCode code, int x0, int y0, int x1, int y1)
-{
-  CtxSegment command;
-  command.code = code;
-  command.data.s16[0] = x0;
-  command.data.s16[1] = y0;
-  command.data.s16[2] = x1;
-  command.data.s16[3] = y1;
-  return command;
-}
+  if (n < 0 && l!=n)
+  {
+    for (int i = 0; i < components - 1; i++)
+      tc[i] = l + (((tc[i] - l) * l) / (l-n));
+  }
 
-static CtxEntry
-ctx_u8 (CtxCode code,
-        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
-        uint8_t e, uint8_t f, uint8_t g, uint8_t h)
-{
-  CtxEntry command;
-  command.code = code;
-  command.data.u8[0] = a;
-  command.data.u8[1] = b;
-  command.data.u8[2] = c;
-  command.data.u8[3] = d;
-  command.data.u8[4] = e;
-  command.data.u8[5] = f;
-  command.data.u8[6] = g;
-  command.data.u8[7] = h;
-  return command;
+  if (x > 255 && x!=l)
+  {
+    for (int i = 0; i < components - 1; i++)
+      tc[i] = l + (((tc[i] - l) * (255 - l)) / (x-l));
+  }
+  for (int i = 0; i < components - 1; i++)
+    c[i] = tc[i];
 }
 
-#define CTX_PROCESS_VOID(cmd) do {\
-  CtxEntry commands[4] = {{cmd}};\
-  ctx_process (ctx, &commands[0]);}while(0) \
+static void ctx_u8_set_sat (int components, uint8_t *c, uint8_t sat)
+{
+  int max = 0, mid = 1, min = 2;
+  
+  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
+  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
+  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
 
-#define CTX_PROCESS_F(cmd,x,y) do {\
-  CtxEntry commands[4] = {ctx_f(cmd,x,y),};\
-  ctx_process (ctx, &commands[0]);}while(0) \
+  if (c[max] > c[min])
+  {
+    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
+    c[max] = sat;
+  }
+  else
+  {
+    c[mid] = c[max] = 0;
+  }
+  c[min] = 0;
+}
 
-#define CTX_PROCESS_F1(cmd,x) do {\
-  CtxEntry commands[4] = {ctx_f(cmd,x,0),};\
-  ctx_process (ctx, &commands[0]);}while(0) \
+ctx_u8_blend_define(color,
+  for (int i = 0; i < components; i++)
+    blended[i] = s[i];
+  ctx_u8_set_lum(components, blended, ctx_u8_get_lum (components, s));
+)
 
-#define CTX_PROCESS_U32(cmd, x, y) do {\
-  CtxEntry commands[4] = {ctx_u32(cmd, x, y)};\
-  ctx_process (ctx, &commands[0]);}while(0)
+ctx_u8_blend_define(hue,
+  int in_sat = ctx_u8_get_sat(components, b);
+  int in_lum = ctx_u8_get_lum(components, b);
+  for (int i = 0; i < components; i++)
+    blended[i] = s[i];
+  ctx_u8_set_sat(components, blended, in_sat);
+  ctx_u8_set_lum(components, blended, in_lum);
+)
 
-#define CTX_PROCESS_U8(cmd, x) do {\
-  CtxEntry commands[4] = {ctx_u8(cmd, x,0,0,0,0,0,0,0)};\
-  ctx_process (ctx, &commands[0]);}while(0)
+ctx_u8_blend_define(saturation,
+  int in_sat = ctx_u8_get_sat(components, s);
+  int in_lum = ctx_u8_get_lum(components, b);
+  for (int i = 0; i < components; i++)
+    blended[i] = b[i];
+  ctx_u8_set_sat(components, blended, in_sat);
+  ctx_u8_set_lum(components, blended, in_lum);
+)
 
+ctx_u8_blend_define(luminosity,
+  int in_lum = ctx_u8_get_lum(components, s);
+  for (int i = 0; i < components; i++)
+    blended[i] = b[i];
+  ctx_u8_set_lum(components, blended, in_lum);
+)
+#endif
 
-static void
-ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int 
len)
+CTX_INLINE static void
+ctx_u8_blend (int components, CtxBlend blend, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, 
int count)
 {
-  CtxEntry commands[1 + 2 + (len+1+1)/9];
-  ctx_memset (commands, 0, sizeof (commands) );
-  commands[0] = ctx_u32 (code, arg0, arg1);
-  commands[1].code = CTX_DATA;
-  commands[1].data.u32[0] = len;
-  commands[1].data.u32[1] = (len+1+1)/9 + 1;
-  memcpy( (char *) &commands[2].data.u8[0], string, len);
-  ( (char *) (&commands[2].data.u8[0]) ) [len]=0;
-  ctx_process (ctx, commands);
-}
+#if CTX_BLENDING_AND_COMPOSITING
+  switch (blend)
+  {
+    case CTX_BLEND_NORMAL:      ctx_u8_blend_normal      (components, dst, src, blended, count); break;
+    case CTX_BLEND_MULTIPLY:    ctx_u8_blend_multiply    (components, dst, src, blended, count); break;
+    case CTX_BLEND_SCREEN:      ctx_u8_blend_screen      (components, dst, src, blended, count); break;
+    case CTX_BLEND_OVERLAY:     ctx_u8_blend_overlay     (components, dst, src, blended, count); break;
+    case CTX_BLEND_DARKEN:      ctx_u8_blend_darken      (components, dst, src, blended, count); break;
+    case CTX_BLEND_LIGHTEN:     ctx_u8_blend_lighten     (components, dst, src, blended, count); break;
+    case CTX_BLEND_COLOR_DODGE: ctx_u8_blend_color_dodge (components, dst, src, blended, count); break;
+    case CTX_BLEND_COLOR_BURN:  ctx_u8_blend_color_burn  (components, dst, src, blended, count); break;
+    case CTX_BLEND_HARD_LIGHT:  ctx_u8_blend_hard_light  (components, dst, src, blended, count); break;
+    case CTX_BLEND_SOFT_LIGHT:  ctx_u8_blend_soft_light  (components, dst, src, blended, count); break;
+    case CTX_BLEND_DIFFERENCE:  ctx_u8_blend_difference  (components, dst, src, blended, count); break;
+    case CTX_BLEND_EXCLUSION:   ctx_u8_blend_exclusion   (components, dst, src, blended, count); break;
+    case CTX_BLEND_COLOR:       ctx_u8_blend_color       (components, dst, src, blended, count); break;
+    case CTX_BLEND_HUE:         ctx_u8_blend_hue         (components, dst, src, blended, count); break;
+    case CTX_BLEND_SATURATION:  ctx_u8_blend_saturation  (components, dst, src, blended, count); break;
+    case CTX_BLEND_LUMINOSITY:  ctx_u8_blend_luminosity  (components, dst, src, blended, count); break;
+    case CTX_BLEND_ADDITION:    ctx_u8_blend_addition    (components, dst, src, blended, count); break;
+    case CTX_BLEND_DIVIDE:      ctx_u8_blend_divide      (components, dst, src, blended, count); break;
+    case CTX_BLEND_SUBTRACT:    ctx_u8_blend_subtract    (components, dst, src, blended, count); break;
+  }
+#else
+  switch (blend)
+  {
+    default:                    ctx_u8_blend_normal      (components, dst, src, blended, count); break;
+  }
 
-static void
-ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1)
-{
-  ctx_process_cmd_str_with_len (ctx, code, string, arg0, arg1, strlen (string));
+#endif
 }
 
-static void
-ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1)
+CTX_INLINE static void
+__ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
+                     int                    components,
+                     uint8_t *              dst,
+                     uint8_t *              src,
+                     int                    x0,
+                     uint8_t * __restrict__ coverage,
+                     int                    count,
+                     CtxCompositingMode     compositing_mode,
+                     CtxFragment            fragment,
+                     CtxBlend               blend)
 {
-  uint32_t iarg0;
-  uint32_t iarg1;
-  memcpy (&iarg0, &arg0, sizeof (iarg0));
-  memcpy (&iarg1, &arg1, sizeof (iarg1));
-  ctx_process_cmd_str_with_len (ctx, code, string, iarg0, iarg1, strlen (string));
-}
+  CtxPorterDuffFactor f_s, f_d;
+  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  uint8_t global_alpha_u8 = gstate->global_alpha_u8;
+  uint8_t tsrc[components * count];
+  int src_step = 0;
 
-#if CTX_BITPACK_PACKER
-static int
-ctx_last_history (CtxDrawlist *drawlist)
-{
-  int last_history = 0;
-  int i = 0;
-  while (i < drawlist->count)
-    {
-      CtxEntry *entry = &drawlist->entries[i];
-      i += (ctx_conts_for_entry (entry) + 1);
-    }
-  return last_history;
-}
-#endif
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  {
+    src = &tsrc[0];
+    fragment (rasterizer, 0, 0, src, 1, 0, 0);
+    if (blend != CTX_BLEND_NORMAL)
+      ctx_u8_blend (components, blend, dst, src, src, 1);
+  }
+  else
+  {
+    float u0 = 0; float v0 = 0;
+    float ud = 0; float vd = 0;
+    src = &tsrc[0];
 
-#if CTX_BITPACK_PACKER
+    ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+    fragment (rasterizer, u0, v0, src, count, ud, vd);
+    if (blend != CTX_BLEND_NORMAL)
+      ctx_u8_blend (components, blend, dst, src, src, count);
+    src_step = components;
+  }
 
-static float
-find_max_dev (CtxEntry *entry, int nentrys)
-{
-  float max_dev = 0.0;
-  for (int c = 0; c < nentrys; c++)
-    {
-      for (int d = 0; d < 2; d++)
-        {
-          if (entry[c].data.f[d] > max_dev)
-            { max_dev = entry[c].data.f[d]; }
-          if (entry[c].data.f[d] < -max_dev)
-            { max_dev = -entry[c].data.f[d]; }
-        }
-    }
-  return max_dev;
-}
+  while (count--)
+  {
+    uint32_t cov = *coverage;
 
-static void
-pack_s8_args (CtxEntry *entry, int npairs)
-{
-  for (int c = 0; c < npairs; c++)
-    for (int d = 0; d < 2; d++)
-      { entry[0].data.s8[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
-}
+    if (CTX_UNLIKELY(global_alpha_u8 != 255))
+      cov = (cov * global_alpha_u8 + 255) >> 8;
 
-static void
-pack_s16_args (CtxEntry *entry, int npairs)
-{
-  for (int c = 0; c < npairs; c++)
-    for (int d = 0; d < 2; d++)
-      { entry[0].data.s16[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
-}
-#endif
+    uint8_t csrc[components];
+    for (int c = 0; c < components; c++)
+      csrc[c] = (src[c] * cov + 255) >> 8;
 
-#if CTX_BITPACK_PACKER
-static void
-ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos)
-{
-  CtxIterator iterator;
-  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
-    { return; }
-  ctx_iterator_init (&iterator, drawlist, start_pos, CTX_ITERATOR_FLAT);
-  iterator.end_pos = drawlist->count - 5;
-  CtxCommand *command = NULL;
-  while ( (command = ctx_iterator_next (&iterator) ) )
+    for (int c = 0; c < components; c++)
     {
-      CtxEntry *entry = &command->entry;
-      /* things smaller than this have probably been scaled down
-         beyond recognition, bailing for both better packing and less rasterization work
-       */
-      if (command[0].code == CTX_REL_CURVE_TO)
-        {
-          float max_dev = find_max_dev (entry, 3);
-          if (max_dev < 1.0)
-            {
-              entry[0].code = CTX_REL_LINE_TO;
-              entry[0].data.f[0] = entry[2].data.f[0];
-              entry[0].data.f[1] = entry[2].data.f[1];
-              entry[1].code = CTX_NOP;
-              entry[2].code = CTX_NOP;
-            }
-        }
-    }
-}
-#endif
-
-#if CTX_BITPACK_PACKER
-static void
-ctx_drawlist_bitpack (CtxDrawlist *drawlist, int start_pos)
-{
-#if CTX_BITPACK
-  int i = 0;
-  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
-    { return; }
-  ctx_drawlist_remove_tiny_curves (drawlist, drawlist->bitpack_pos);
-  i = drawlist->bitpack_pos;
-  if (start_pos > i)
-    { i = start_pos; }
-  while (i < drawlist->count - 4) /* the -4 is to avoid looking past
-                                    initialized data we're not ready
-                                    to bitpack yet*/
-    {
-      CtxEntry *entry = &drawlist->entries[i];
-      if (entry[0].code == CTX_SET_RGBA_U8 &&
-          entry[1].code == CTX_MOVE_TO &&
-          entry[2].code == CTX_REL_LINE_TO &&
-          entry[3].code == CTX_REL_LINE_TO &&
-          entry[4].code == CTX_REL_LINE_TO &&
-          entry[5].code == CTX_REL_LINE_TO &&
-          entry[6].code == CTX_FILL &&
-          ctx_fabsf (entry[2].data.f[0] - 1.0f) < 0.02f &&
-          ctx_fabsf (entry[3].data.f[1] - 1.0f) < 0.02f)
-        {
-          entry[0].code = CTX_SET_PIXEL;
-          entry[0].data.u16[2] = entry[1].data.f[0];
-          entry[0].data.u16[3] = entry[1].data.f[1];
-          entry[1].code = CTX_NOP;
-          entry[2].code = CTX_NOP;
-          entry[3].code = CTX_NOP;
-          entry[4].code = CTX_NOP;
-          entry[5].code = CTX_NOP;
-          entry[6].code = CTX_NOP;
-        }
-#if 1
-      else if (entry[0].code == CTX_REL_LINE_TO)
-        {
-          if (entry[1].code == CTX_REL_LINE_TO &&
-              entry[2].code == CTX_REL_LINE_TO &&
-              entry[3].code == CTX_REL_LINE_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_LINE_TO_X4;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else if (entry[1].code == CTX_REL_CURVE_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_LINE_TO_REL_CURVE_TO;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else if (entry[1].code == CTX_REL_LINE_TO &&
-                   entry[2].code == CTX_REL_LINE_TO &&
-                   entry[3].code == CTX_REL_LINE_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_LINE_TO_X4;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else if (entry[1].code == CTX_REL_MOVE_TO)
-            {
-              float max_dev = find_max_dev (entry, 2);
-              if (max_dev < 31000 / CTX_SUBDIV)
-                {
-                  pack_s16_args (entry, 2);
-                  entry[0].code = CTX_REL_LINE_TO_REL_MOVE_TO;
-                  entry[1].code = CTX_NOP;
-                }
-            }
-          else if (entry[1].code == CTX_REL_LINE_TO)
-            {
-              float max_dev = find_max_dev (entry, 2);
-              if (max_dev < 31000 / CTX_SUBDIV)
-                {
-                  pack_s16_args (entry, 2);
-                  entry[0].code = CTX_REL_LINE_TO_X2;
-                  entry[1].code = CTX_NOP;
-                }
-            }
-        }
-#endif
-#if 1
-      else if (entry[0].code == CTX_REL_CURVE_TO)
-        {
-          if (entry[3].code == CTX_REL_LINE_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else if (entry[3].code == CTX_REL_MOVE_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_CURVE_TO_REL_MOVE_TO;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else
-            {
-              float max_dev = find_max_dev (entry, 3);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 3);
-                  ctx_arg_s8 (6) =
-                    ctx_arg_s8 (7) = 0;
-                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                }
-            }
-        }
-#endif
-#if 1
-      else if (entry[0].code == CTX_REL_QUAD_TO)
-        {
-          if (entry[2].code == CTX_REL_QUAD_TO)
-            {
-              float max_dev = find_max_dev (entry, 4);
-              if (max_dev < 114 / CTX_SUBDIV)
-                {
-                  pack_s8_args (entry, 4);
-                  entry[0].code = CTX_REL_QUAD_TO_REL_QUAD_TO;
-                  entry[1].code = CTX_NOP;
-                  entry[2].code = CTX_NOP;
-                  entry[3].code = CTX_NOP;
-                }
-            }
-          else
-            {
-              float max_dev = find_max_dev (entry, 2);
-              if (max_dev < 3100 / CTX_SUBDIV)
-                {
-                  pack_s16_args (entry, 2);
-                  entry[0].code = CTX_REL_QUAD_TO_S16;
-                  entry[1].code = CTX_NOP;
-                }
-            }
-        }
-#endif
-#if 1
-      else if (entry[0].code == CTX_FILL &&
-               entry[1].code == CTX_MOVE_TO)
-        {
-          entry[0] = entry[1];
-          entry[0].code = CTX_FILL_MOVE_TO;
-          entry[1].code = CTX_NOP;
-        }
-#endif
-#if 1
-      else if (entry[0].code == CTX_MOVE_TO &&
-               entry[1].code == CTX_MOVE_TO &&
-               entry[2].code == CTX_MOVE_TO)
-        {
-          entry[0]      = entry[2];
-          entry[0].code = CTX_MOVE_TO;
-          entry[1].code = CTX_NOP;
-          entry[2].code = CTX_NOP;
-        }
-#endif
+      uint32_t res = 0;
 #if 1
-      else if ( (entry[0].code == CTX_MOVE_TO &&
-                 entry[1].code == CTX_MOVE_TO) ||
-                (entry[0].code == CTX_REL_MOVE_TO &&
-                 entry[1].code == CTX_MOVE_TO) )
-        {
-          entry[0]      = entry[1];
-          entry[0].code = CTX_MOVE_TO;
-          entry[1].code = CTX_NOP;
-        }
+      switch (f_s)
+      {
+        case CTX_PORTER_DUFF_0:             break;
+        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
+        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1] + 255) >> 8; break;
+        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (256-dst[components-1])) >> 8; break;
+      }
+      switch (f_d)
+      {
+        case CTX_PORTER_DUFF_0: break;
+        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
+        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1] + 255) >> 8; break;
+        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (256-csrc[components-1])) >> 8; break;
+      }
+#else
+      switch (f_s)
+      {
+        case CTX_PORTER_DUFF_0:             break;
+        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
+        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1])/255; break;
+        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (255-dst[components-1]))/255; break;
+      }
+      switch (f_d)
+      {
+        case CTX_PORTER_DUFF_0: break;
+        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
+        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1])/255; break;
+        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (255-csrc[components-1]))/255; break;
+      }
 #endif
-      i += (ctx_conts_for_entry (entry) + 1);
-    }
-  int source = drawlist->bitpack_pos;
-  int target = drawlist->bitpack_pos;
-  int removed = 0;
-  /* remove nops that have been inserted as part of shortenings
-   */
-  while (source < drawlist->count)
-    {
-      CtxEntry *sentry = &drawlist->entries[source];
-      CtxEntry *tentry = &drawlist->entries[target];
-      while (sentry->code == CTX_NOP && source < drawlist->count)
-        {
-          source++;
-          sentry = &drawlist->entries[source];
-          removed++;
-        }
-      if (sentry != tentry)
-        { *tentry = *sentry; }
-      source ++;
-      target ++;
+      dst[c] = res;
     }
-  drawlist->count -= removed;
-  drawlist->bitpack_pos = drawlist->count;
-#endif
+    coverage ++;
+    src+=src_step;
+    dst+=components;
+  }
 }
 
-#endif
-
-static inline void
-ctx_drawlist_compact (CtxDrawlist *drawlist)
+CTX_INLINE static void
+_ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
+                     int                    components,
+                     uint8_t *              dst,
+                     uint8_t * __restrict__ src,
+                     int                    x0,
+                     uint8_t *              coverage,
+                     int                    count,
+                     CtxCompositingMode     compositing_mode,
+                     CtxFragment            fragment,
+                     CtxBlend               blend)
 {
-#if CTX_BITPACK_PACKER
-  int last_history;
-  last_history = ctx_last_history (drawlist);
-#else
-  if (drawlist) {};
-#endif
-#if CTX_BITPACK_PACKER
-  ctx_drawlist_bitpack (drawlist, last_history);
-#endif
+  __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, compositing_mode, fragment, 
blend);
 }
 
-uint8_t *ctx_define_texture_pixel_data (CtxEntry *entry)
-{
-  return &entry[2 + 1 + 1 + ctx_conts_for_entry (&entry[2])].data.u8[0];
+#define _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend) \
+   switch (rasterizer->state->gstate.compositing_mode) \
+   { \
+     case CTX_COMPOSITE_SOURCE_ATOP: \
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
+        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_ATOP:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_IN:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OVER:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OVER:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_XOR:\
+      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_XOR, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OUT:\
+       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OUT:\
+       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_IN:\
+       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_COPY:\
+       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_COPY, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_CLEAR:\
+       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_CLEAR, fragment, blend);\
+       break;\
+   }
+
+/* generating one function per compositing_mode would be slightly more efficient,
+ * but on embedded targets leads to slightly more code bloat,
+ * here we trade off a slight amount of performance
+ */
+#define ctx_u8_porter_duff(comp_format, components, source, fragment, blend) \
+static void \
+ctx_##comp_format##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
+{ \
+  _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend);\
 }
 
-#ifndef __CTX_TRANSFORM
-#define __CTX_TRANSFORM
+ctx_u8_porter_duff(RGBA8, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+//ctx_u8_porter_duff(comp_name, components,color_##blend_name,  NULL, blend_mode)
 
-static inline void
-_ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
+static void
+ctx_RGBA8_nop (CTX_COMPOSITE_ARGUMENTS)
 {
-  float x_in = *x;
-  float y_in = *y;
-  *x = ( (x_in * m->m[0][0]) + (y_in * m->m[1][0]) + m->m[2][0]);
-  *y = ( (y_in * m->m[1][1]) + (x_in * m->m[0][1]) + m->m[2][1]);
 }
 
-void
-ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
-{
-  _ctx_matrix_apply_transform (m, x, y);
-}
 
 static inline void
-_ctx_user_to_device (CtxState *state, float *x, float *y)
+ctx_setup_native_color (CtxRasterizer *rasterizer)
 {
-  _ctx_matrix_apply_transform (&state->gstate.transform, x, y);
+  if (rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR)
+    rasterizer->format->from_comp (rasterizer, 0,
+      &rasterizer->color[0],
+      &rasterizer->color_native,
+      1);
 }
 
 static void
-_ctx_user_to_device_distance (CtxState *state, float *x, float *y)
+ctx_setup_apply_coverage (CtxRasterizer *rasterizer)
 {
-  const CtxMatrix *m = &state->gstate.transform;
-  _ctx_matrix_apply_transform (m, x, y);
-  *x -= m->m[2][0];
-  *y -= m->m[2][1];
+  rasterizer->apply_coverage = rasterizer->format->apply_coverage ?
+                               rasterizer->format->apply_coverage :
+                               rasterizer->comp_op;
 }
 
-void ctx_user_to_device          (Ctx *ctx, float *x, float *y)
+static void
+ctx_setup_RGBA8 (CtxRasterizer *rasterizer)
 {
-  _ctx_user_to_device (&ctx->state, x, y);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  int components       = 4;
+  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBA8 (rasterizer);
+  rasterizer->comp_op  = ctx_RGBA8_porter_duff_generic;
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+
+  int blend_mode       = gstate->blend_mode;
+  int compositing_mode = gstate->compositing_mode;
+
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+    {
+      ctx_fragment_color_RGBA8 (rasterizer, 0,0, rasterizer->color, 1, 0,0);
+      if (gstate->global_alpha_u8 != 255)
+      {
+        for (int c = 0; c < 4; c ++)
+          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8 + 255)>>8;
+      }
+      uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
+      uint32_t si_ga      = (src_pix & 0xff00ff00) >> 8;
+      uint32_t si_rb      = src_pix & 0x00ff00ff;
+      uint32_t si_ga_full = si_ga * 255;
+      uint32_t si_rb_full = si_rb * 255;
+//      uint32_t si_a       = si_ga >> 16;
+
+      ((uint32_t*)rasterizer->color)[1] = si_ga;
+      ((uint32_t*)rasterizer->color)[2] = si_rb;
+      ((uint32_t*)rasterizer->color)[3] = si_ga_full;
+      ((uint32_t*)rasterizer->color)[4] = si_rb_full;
+
+      if (blend_mode == CTX_BLEND_NORMAL)
+      {
+        if(compositing_mode == CTX_COMPOSITE_COPY)
+        {
+          rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
+          rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
+        }
+        else if (compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+        {
+          if (rasterizer->color[components-1] == 255)
+          {
+            rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
+            rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
+          }
+          else
+          {
+            rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
+            rasterizer->comp = CTX_COV_PATH_RGBA8_OVER;
+          }
+        }
+      }
+      else if (compositing_mode == CTX_COMPOSITE_CLEAR)
+      {
+        rasterizer->comp_op = ctx_RGBA8_clear_normal;
+      }
+  }
+  else if (blend_mode == CTX_BLEND_NORMAL)
+  {
+    if(compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+    {
+       rasterizer->comp_op = ctx_RGBA8_source_over_normal_fragment;
+       rasterizer->comp = CTX_COV_PATH_RGBA8_OVER_FRAGMENT;
+    }
+    else if (compositing_mode == CTX_COMPOSITE_COPY)
+    {
+       rasterizer->comp_op = ctx_RGBA8_source_copy_normal_fragment;
+       rasterizer->comp = CTX_COV_PATH_RGBA8_COPY_FRAGMENT;
+    }
+  }
+  ctx_setup_apply_coverage (rasterizer);
 }
-void ctx_user_to_device_distance (Ctx *ctx, float *x, float *y)
+
+
+static void
+ctx_setup_RGB (CtxRasterizer *rasterizer)
 {
-  _ctx_user_to_device_distance (&ctx->state, x, y);
+  ctx_setup_RGBA8 (rasterizer);
+  ctx_setup_native_color (rasterizer);
+
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
 static void
-ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f)
+ctx_setup_RGB332 (CtxRasterizer *rasterizer)
 {
-  matrix->m[0][0] = a;
-  matrix->m[0][1] = b;
-  matrix->m[1][0] = c;
-  matrix->m[1][1] = d;
-  matrix->m[2][0] = e;
-  matrix->m[2][1] = f;
+  ctx_setup_RGBA8 (rasterizer);
+  ctx_setup_native_color (rasterizer);
+
+  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
+    rasterizer->comp = CTX_COV_PATH_RGB332_COPY;
+  else
+    rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
-static inline void
-_ctx_matrix_identity (CtxMatrix *matrix)
+static void
+ctx_setup_RGB565 (CtxRasterizer *rasterizer)
 {
-  matrix->m[0][0] = 1.0f;
-  matrix->m[0][1] = 0.0f;
-  matrix->m[1][0] = 0.0f;
-  matrix->m[1][1] = 1.0f;
-  matrix->m[2][0] = 0.0f;
-  matrix->m[2][1] = 0.0f;
+  ctx_setup_RGBA8 (rasterizer);
+  ctx_setup_native_color (rasterizer);
+
+  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
+    rasterizer->comp = CTX_COV_PATH_RGB565_COPY;
+  else
+    rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
-void
-ctx_matrix_identity (CtxMatrix *matrix)
+static void
+ctx_setup_RGB8 (CtxRasterizer *rasterizer)
 {
-  _ctx_matrix_identity (matrix);
+  ctx_setup_RGBA8 (rasterizer);
+  ctx_setup_native_color (rasterizer);
+
+  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
+    rasterizer->comp = CTX_COV_PATH_RGB8_COPY;
+  else
+    rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
 static void
-_ctx_matrix_multiply (CtxMatrix       *result,
-                      const CtxMatrix *t,
-                      const CtxMatrix *s)
+ctx_composite_convert (CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxMatrix r;
-  r.m[0][0] = t->m[0][0] * s->m[0][0] + t->m[0][1] * s->m[1][0];
-  r.m[0][1] = t->m[0][0] * s->m[0][1] + t->m[0][1] * s->m[1][1];
-  r.m[1][0] = t->m[1][0] * s->m[0][0] + t->m[1][1] * s->m[1][0];
-  r.m[1][1] = t->m[1][0] * s->m[0][1] + t->m[1][1] * s->m[1][1];
-  r.m[2][0] = t->m[2][0] * s->m[0][0] + t->m[2][1] * s->m[1][0] + s->m[2][0];
-  r.m[2][1] = t->m[2][0] * s->m[0][1] + t->m[2][1] * s->m[1][1] + s->m[2][1];
-  *result = r;
+  uint8_t pixels[count * rasterizer->format->ebpp];
+  rasterizer->format->to_comp (rasterizer, x0, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
+  rasterizer->format->from_comp (rasterizer, x0, &pixels[0], dst, count);
 }
 
-void
-ctx_matrix_multiply (CtxMatrix       *result,
-                     const CtxMatrix *t,
-                     const CtxMatrix *s)
+#if CTX_ENABLE_FLOAT
+static void
+ctx_float_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-  _ctx_matrix_multiply (result, t, s);
+  float *dstf = (float*)dst;
+  float *srcf = (float*)src;
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+
+  while (count--)
+  {
+    uint8_t cov = *coverage;
+    float covf = ctx_u8_to_float (cov);
+    for (int c = 0; c < components; c++)
+      dstf[c] = dstf[c]*(1.0-covf) + srcf[c]*covf;
+    dstf += components;
+    coverage ++;
+  }
 }
 
-void
-ctx_matrix_translate (CtxMatrix *matrix, float x, float y)
+static void
+ctx_float_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxMatrix transform;
-  transform.m[0][0] = 1.0f;
-  transform.m[0][1] = 0.0f;
-  transform.m[1][0] = 0.0f;
-  transform.m[1][1] = 1.0f;
-  transform.m[2][0] = x;
-  transform.m[2][1] = y;
-  _ctx_matrix_multiply (matrix, &transform, matrix);
+  float *dstf = (float*)dst;
+  while (count--)
+  {
+#if 0
+    uint8_t cov = *coverage;
+    if (cov == 0)
+    {
+    }
+    else if (cov == 255)
+    {
+#endif
+      switch (components)
+      {
+        case 2:
+          ((uint64_t*)(dst))[0] = 0;
+          break;
+        case 4:
+          ((uint64_t*)(dst))[0] = 0;
+          ((uint64_t*)(dst))[1] = 0;
+          break;
+        default:
+          for (int c = 0; c < components; c++)
+            dstf[c] = 0.0f;
+      }
+#if 0
+    }
+    else
+    {
+      float ralpha = 1.0 - ctx_u8_to_float (cov);
+      for (int c = 0; c < components; c++)
+        { dstf[c] = (dstf[c] * ralpha); }
+    }
+    coverage ++;
+#endif
+    dstf += components;
+  }
 }
 
-void
-ctx_matrix_scale (CtxMatrix *matrix, float x, float y)
-{
-  CtxMatrix transform;
-  transform.m[0][0] = x;
-  transform.m[0][1] = 0.0f;
-  transform.m[1][0] = 0.0f;
-  transform.m[1][1] = y;
-  transform.m[2][0] = 0.0f;
-  transform.m[2][1] = 0.0f;
-  _ctx_matrix_multiply (matrix, &transform, matrix);
-}
 
-void
-ctx_matrix_rotate (CtxMatrix *matrix, float angle)
+static inline void
+ctx_float_source_over_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxMatrix transform;
-  float val_sin = ctx_sinf (angle);
-  float val_cos = ctx_cosf (angle);
-  transform.m[0][0] =  val_cos;
-  transform.m[0][1] = val_sin;
-  transform.m[1][0] = -val_sin;
-  transform.m[1][1] = val_cos;
-  transform.m[2][0] =     0.0f;
-  transform.m[2][1] = 0.0f;
-  _ctx_matrix_multiply (matrix, &transform, matrix);
+  float *dstf = (float*)dst;
+  float *srcf = (float*)src;
+  while (count--)
+  {
+    uint8_t cov = *coverage;
+    float fcov = ctx_u8_to_float (cov);
+    float ralpha = 1.0f - fcov * srcf[components-1];
+    for (int c = 0; c < components; c++)
+      dstf[c] = srcf[c]*fcov + dstf[c] * ralpha;
+    coverage ++;
+    dstf+= components;
+  }
 }
 
-#if 0
 static void
-ctx_matrix_skew_x (CtxMatrix *matrix, float angle)
+ctx_float_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxMatrix transform;
-  float val_tan = ctx_tanf (angle);
-  transform.m[0][0] =    1.0f;
-  transform.m[0][1] = 0.0f;
-  transform.m[1][0] = val_tan;
-  transform.m[1][1] = 1.0f;
-  transform.m[2][0] =    0.0f;
-  transform.m[2][1] = 0.0f;
-  _ctx_matrix_multiply (matrix, &transform, matrix);
-}
+  float *dstf = (float*)dst;
+  float *srcf = (float*)src;
 
-static void
-ctx_matrix_skew_y (CtxMatrix *matrix, float angle)
-{
-  CtxMatrix transform;
-  float val_tan = ctx_tanf (angle);
-  transform.m[0][0] =    1.0f;
-  transform.m[0][1] = val_tan;
-  transform.m[1][0] =    0.0f;
-  transform.m[1][1] = 1.0f;
-  transform.m[2][0] =    0.0f;
-  transform.m[2][1] = 0.0f;
-  _ctx_matrix_multiply (matrix, &transform, matrix);
+  while (count--)
+  {
+    uint8_t cov = *coverage;
+    float fcov = ctx_u8_to_float (cov);
+    float ralpha = 1.0f - fcov;
+    for (int c = 0; c < components; c++)
+      dstf[c] = (srcf[c]*fcov + dstf[c] * ralpha);
+    coverage ++;
+    dstf+= components;
+  }
 }
-#endif
-
 
-void
-ctx_identity (Ctx *ctx)
+inline static void
+ctx_float_blend_normal (int components, float *dst, float *src, float *blended)
 {
-  CTX_PROCESS_VOID (CTX_IDENTITY);
+  float a = src[components-1];
+  for (int c = 0; c <  components - 1; c++)
+    blended[c] = src[c] * a;
+  blended[components-1]=a;
 }
 
-
-
-void
-ctx_apply_transform (Ctx *ctx, float a, float b,  // hscale, hskew
-                     float c, float d,  // vskew,  vscale
-                     float e, float f)  // htran,  vtran
+static float ctx_float_get_max (int components, float *c)
 {
-  CtxEntry command[3]=
+  float max = -1000.0f;
+  for (int i = 0; i < components - 1; i ++)
   {
-    ctx_f (CTX_APPLY_TRANSFORM, a, b),
-    ctx_f (CTX_CONT,            c, d),
-    ctx_f (CTX_CONT,            e, f)
-  };
-  ctx_process (ctx, command);
+    if (c[i] > max) max = c[i];
+  }
+  return max;
 }
 
-void
-ctx_get_transform  (Ctx *ctx, float *a, float *b,
-                    float *c, float *d,
-                    float *e, float *f)
+static float ctx_float_get_min (int components, float *c)
 {
-  if (a) { *a = ctx->state.gstate.transform.m[0][0]; }
-  if (b) { *b = ctx->state.gstate.transform.m[0][1]; }
-  if (c) { *c = ctx->state.gstate.transform.m[1][0]; }
-  if (d) { *d = ctx->state.gstate.transform.m[1][1]; }
-  if (e) { *e = ctx->state.gstate.transform.m[2][0]; }
-  if (f) { *f = ctx->state.gstate.transform.m[2][1]; }
+  float min = 400.0;
+  for (int i = 0; i < components - 1; i ++)
+  {
+    if (c[i] < min) min = c[i];
+  }
+  return min;
 }
 
-void
-ctx_source_transform (Ctx *ctx, float a, float b,  // hscale, hskew
-                      float c, float d,  // vskew,  vscale
-                      float e, float f)  // htran,  vtran
+static float ctx_float_get_lum (int components, float *c)
 {
-  CtxEntry command[3]=
+  switch (components)
   {
-    ctx_f (CTX_SOURCE_TRANSFORM, a, b),
-    ctx_f (CTX_CONT,             c, d),
-    ctx_f (CTX_CONT,             e, f)
-  };
-  ctx_process (ctx, command);
+    case 3:
+    case 4:
+            return CTX_CSS_RGB_TO_LUMINANCE(c);
+    case 1:
+    case 2:
+            return c[0];
+            break;
+    default:
+       {
+         float sum = 0;
+         for (int i = 0; i < components - 1; i ++)
+         {
+           sum += c[i];
+         }
+         return sum / (components - 1);
+       }
+  }
 }
 
-void
-ctx_source_transform_matrix (Ctx *ctx, CtxMatrix *matrix)
+static float ctx_float_get_sat (int components, float *c)
 {
-  ctx_source_transform (ctx,
-    matrix->m[0][0], matrix->m[0][1],
-    matrix->m[1][0], matrix->m[1][1],
-    matrix->m[2][0], matrix->m[2][1]);
+  switch (components)
+  {
+    case 3:
+    case 4:
+            { float r = c[0];
+              float g = c[1];
+              float b = c[2];
+              return ctx_maxf(r, ctx_maxf(g,b)) - ctx_minf(r,ctx_minf(g,b));
+            }
+            break;
+    case 1:
+    case 2: return 0.0;
+            break;
+    default:
+       {
+         float min = 1000;
+         float max = -1000;
+         for (int i = 0; i < components - 1; i ++)
+         {
+           if (c[i] < min) min = c[i];
+           if (c[i] > max) max = c[i];
+         }
+         return max-min;
+       }
+  }
 }
 
-void ctx_apply_matrix (Ctx *ctx, CtxMatrix *matrix)
+static void ctx_float_set_lum (int components, float *c, float lum)
 {
-  ctx_apply_transform (ctx,
-                       matrix->m[0][0], matrix->m[0][1],
-                       matrix->m[1][0], matrix->m[1][1],
-                       matrix->m[2][0], matrix->m[2][1]);
-}
+  float d = lum - ctx_float_get_lum (components, c);
+  float tc[components];
+  for (int i = 0; i < components - 1; i++)
+  {
+    tc[i] = c[i] + d;
+  }
 
-void ctx_get_matrix (Ctx *ctx, CtxMatrix *matrix)
-{
-  *matrix = ctx->state.gstate.transform;
-}
+  float l = ctx_float_get_lum (components, tc);
+  float n = ctx_float_get_min (components, tc);
+  float x = ctx_float_get_max (components, tc);
 
-void ctx_set_matrix (Ctx *ctx, CtxMatrix *matrix)
-{
-  ctx_identity (ctx);
-  ctx_apply_matrix (ctx, matrix);
-}
+  if (n < 0.0f && l != n)
+  {
+    for (int i = 0; i < components - 1; i++)
+      tc[i] = l + (((tc[i] - l) * l) / (l-n));
+  }
 
-void ctx_rotate (Ctx *ctx, float x)
-{
-  if (x == 0.0f)
-    return;
-  CTX_PROCESS_F1 (CTX_ROTATE, x);
-  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
-    { ctx->drawlist.count--; }
+  if (x > 1.0f && x != l)
+  {
+    for (int i = 0; i < components - 1; i++)
+      tc[i] = l + (((tc[i] - l) * (1.0f - l)) / (x-l));
+  }
+  for (int i = 0; i < components - 1; i++)
+    c[i] = tc[i];
 }
 
-void ctx_scale (Ctx *ctx, float x, float y)
+static void ctx_float_set_sat (int components, float *c, float sat)
 {
-  if (x == 1.0f && y == 1.0f)
-    return;
-  CTX_PROCESS_F (CTX_SCALE, x, y);
-  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
-    { ctx->drawlist.count--; }
-}
+  int max = 0, mid = 1, min = 2;
+  
+  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
+  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
+  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
+
+  if (c[max] > c[min])
+  {
+    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
+    c[max] = sat;
+  }
+  else
+  {
+    c[mid] = c[max] = 0.0f;
+  }
+  c[min] = 0.0f;
 
-void ctx_translate (Ctx *ctx, float x, float y)
-{
-  if (x == 0.0f && y == 0.0f)
-    return;
-  CTX_PROCESS_F (CTX_TRANSLATE, x, y);
-  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
-    { ctx->drawlist.count--; }
 }
 
-void
-ctx_matrix_invert (CtxMatrix *m)
-{
-  CtxMatrix t = *m;
-  float invdet, det = m->m[0][0] * m->m[1][1] -
-                      m->m[1][0] * m->m[0][1];
-  if (det > -0.0000001f && det < 0.0000001f)
-    {
-      m->m[0][0] = m->m[0][1] =
-                     m->m[1][0] = m->m[1][1] =
-                                    m->m[2][0] = m->m[2][1] = 0.0;
-      return;
-    }
-  invdet = 1.0f / det;
-  m->m[0][0] = t.m[1][1] * invdet;
-  m->m[1][0] = -t.m[1][0] * invdet;
-  m->m[2][0] = (t.m[1][0] * t.m[2][1] - t.m[1][1] * t.m[2][0]) * invdet;
-  m->m[0][1] = -t.m[0][1] * invdet;
-  m->m[1][1] = t.m[0][0] * invdet;
-  m->m[2][1] = (t.m[0][1] * t.m[2][0] - t.m[0][0] * t.m[2][1]) * invdet ;
+#define ctx_float_blend_define(name, CODE) \
+static inline void \
+ctx_float_blend_##name (int components, float * __restrict__ dst, float *src, float *blended)\
+{\
+  float *s = src; float b[components];\
+  ctx_float_deassociate_alpha (components, dst, b);\
+    CODE;\
+  blended[components-1] = s[components-1];\
+  ctx_float_associate_alpha (components, blended);\
 }
 
+#define ctx_float_blend_define_seperable(name, CODE) \
+        ctx_float_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \
 
+ctx_float_blend_define_seperable(multiply,    blended[c] = (b[c] * s[c]);)
+ctx_float_blend_define_seperable(screen,      blended[c] = b[c] + s[c] - (b[c] * s[c]);)
+ctx_float_blend_define_seperable(overlay,     blended[c] = b[c] < 0.5f ? (s[c] * b[c]) :
+                                                          s[c] + b[c] - (s[c] * b[c]);)
+ctx_float_blend_define_seperable(darken,      blended[c] = ctx_minf (b[c], s[c]))
+ctx_float_blend_define_seperable(lighten,     blended[c] = ctx_maxf (b[c], s[c]))
+ctx_float_blend_define_seperable(color_dodge, blended[c] = (b[c] == 0.0f) ? 0.0f :
+                                     s[c] == 1.0f ? 1.0f : ctx_minf(1.0f, (b[c]) / (1.0f-s[c])))
+ctx_float_blend_define_seperable(color_burn,  blended[c] = (b[c] == 1.0f) ? 1.0f :
+                                     s[c] == 0.0f ? 0.0f : 1.0f - ctx_minf(1.0f, ((1.0f - b[c])) / s[c]))
+ctx_float_blend_define_seperable(hard_light,  blended[c] = s[c] < 0.f ? (b[c] * s[c]) :
+                                                          b[c] + s[c] - (b[c] * s[c]);)
+ctx_float_blend_define_seperable(difference,  blended[c] = (b[c] - s[c]))
 
-#endif
-#ifndef __CTX_COLOR
-#define __CTX_COLOR
+ctx_float_blend_define_seperable(divide,      blended[c] = s[c]?(b[c]) / s[c]:0.0f)
+ctx_float_blend_define_seperable(addition,    blended[c] = s[c]+b[c])
+ctx_float_blend_define_seperable(subtract,    blended[c] = s[c]-b[c])
 
-int ctx_color_model_get_components (CtxColorModel model)
-{
-  switch (model)
-    {
-      case CTX_GRAY:
-        return 1;
-      case CTX_GRAYA:
-      case CTX_GRAYA_A:
-        return 1;
-      case CTX_RGB:
-      case CTX_LAB:
-      case CTX_LCH:
-      case CTX_DRGB:
-        return 3;
-      case CTX_CMYK:
-      case CTX_DCMYK:
-      case CTX_LABA:
-      case CTX_LCHA:
-      case CTX_RGBA:
-      case CTX_DRGBA:
-      case CTX_RGBA_A:
-      case CTX_RGBA_A_DEVICE:
-        return 4;
-      case CTX_DCMYKA:
-      case CTX_CMYKA:
-      case CTX_CMYKA_A:
-      case CTX_DCMYKA_A:
-        return 5;
-    }
-  return 0;
-}
+ctx_float_blend_define_seperable(exclusion,   blended[c] = b[c] + s[c] - 2.0f * b[c] * s[c])
+ctx_float_blend_define_seperable(soft_light,
+  if (s[c] <= 0.5f)
+  {
+    blended[c] = b[c] - (1.0f - 2.0f * s[c]) * b[c] * (1.0f - b[c]);
+  }
+  else
+  {
+    int d;
+    if (b[c] <= 255/4)
+      d = (((16 * b[c] - 12.0f) * b[c] + 4.0f) * b[c]);
+    else
+      d = ctx_sqrtf(b[c]);
+    blended[c] = (b[c] + (2.0f * s[c] - 1.0f) * (d - b[c]));
+  }
+)
 
-#if 0
-inline static float ctx_u8_to_float (uint8_t val_u8)
-{
-  float val_f = val_u8 / 255.0;
-  return val_f;
-}
-#else
-float ctx_u8_float[256];
-#endif
 
-CtxColor *ctx_color_new (void)
-{
-  CtxColor *color = (CtxColor*)ctx_calloc (sizeof (CtxColor), 1);
-  return color;
-}
+ctx_float_blend_define(color,
+  for (int i = 0; i < components; i++)
+    blended[i] = s[i];
+  ctx_float_set_lum(components, blended, ctx_float_get_lum (components, s));
+)
 
-int ctx_color_is_transparent (CtxColor *color)
-{
-  return color->alpha <= 0.001f;
-}
+ctx_float_blend_define(hue,
+  float in_sat = ctx_float_get_sat(components, b);
+  float in_lum = ctx_float_get_lum(components, b);
+  for (int i = 0; i < components; i++)
+    blended[i] = s[i];
+  ctx_float_set_sat(components, blended, in_sat);
+  ctx_float_set_lum(components, blended, in_lum);
+)
 
+ctx_float_blend_define(saturation,
+  float in_sat = ctx_float_get_sat(components, s);
+  float in_lum = ctx_float_get_lum(components, b);
+  for (int i = 0; i < components; i++)
+    blended[i] = b[i];
+  ctx_float_set_sat(components, blended, in_sat);
+  ctx_float_set_lum(components, blended, in_lum);
+)
 
-void ctx_color_free (CtxColor *color)
-{
-  free (color);
-}
+ctx_float_blend_define(luminosity,
+  float in_lum = ctx_float_get_lum(components, s);
+  for (int i = 0; i < components; i++)
+    blended[i] = b[i];
+  ctx_float_set_lum(components, blended, in_lum);
+)
 
-static void ctx_color_set_RGBA8 (CtxState *state, CtxColor *color, uint8_t r, uint8_t g, uint8_t b, uint8_t 
a)
+inline static void
+ctx_float_blend (int components, CtxBlend blend, float * __restrict__ dst, float *src, float *blended)
 {
-  color->original = color->valid = CTX_VALID_RGBA_U8;
-  color->rgba[0] = r;
-  color->rgba[1] = g;
-  color->rgba[2] = b;
-  color->rgba[3] = a;
-#if CTX_ENABLE_CM
-  color->space = state->gstate.device_space;
-#endif
+  switch (blend)
+  {
+    case CTX_BLEND_NORMAL:      ctx_float_blend_normal      (components, dst, src, blended); break;
+    case CTX_BLEND_MULTIPLY:    ctx_float_blend_multiply    (components, dst, src, blended); break;
+    case CTX_BLEND_SCREEN:      ctx_float_blend_screen      (components, dst, src, blended); break;
+    case CTX_BLEND_OVERLAY:     ctx_float_blend_overlay     (components, dst, src, blended); break;
+    case CTX_BLEND_DARKEN:      ctx_float_blend_darken      (components, dst, src, blended); break;
+    case CTX_BLEND_LIGHTEN:     ctx_float_blend_lighten     (components, dst, src, blended); break;
+    case CTX_BLEND_COLOR_DODGE: ctx_float_blend_color_dodge (components, dst, src, blended); break;
+    case CTX_BLEND_COLOR_BURN:  ctx_float_blend_color_burn  (components, dst, src, blended); break;
+    case CTX_BLEND_HARD_LIGHT:  ctx_float_blend_hard_light  (components, dst, src, blended); break;
+    case CTX_BLEND_SOFT_LIGHT:  ctx_float_blend_soft_light  (components, dst, src, blended); break;
+    case CTX_BLEND_DIFFERENCE:  ctx_float_blend_difference  (components, dst, src, blended); break;
+    case CTX_BLEND_EXCLUSION:   ctx_float_blend_exclusion   (components, dst, src, blended); break;
+    case CTX_BLEND_COLOR:       ctx_float_blend_color       (components, dst, src, blended); break;
+    case CTX_BLEND_HUE:         ctx_float_blend_hue         (components, dst, src, blended); break;
+    case CTX_BLEND_SATURATION:  ctx_float_blend_saturation  (components, dst, src, blended); break;
+    case CTX_BLEND_LUMINOSITY:  ctx_float_blend_luminosity  (components, dst, src, blended); break;
+    case CTX_BLEND_ADDITION:    ctx_float_blend_addition    (components, dst, src, blended); break;
+    case CTX_BLEND_SUBTRACT:    ctx_float_blend_subtract    (components, dst, src, blended); break;
+    case CTX_BLEND_DIVIDE:      ctx_float_blend_divide      (components, dst, src, blended); break;
+  }
 }
 
-#if 0
-static void ctx_color_set_RGBA8_ (CtxColor *color, const uint8_t *in)
+/* this is the grunt working function, when inlined code-path elimination makes
+ * it produce efficient code.
+ */
+CTX_INLINE static void
+ctx_float_porter_duff (CtxRasterizer         *rasterizer,
+                       int                    components,
+                       uint8_t * __restrict__ dst,
+                       uint8_t * __restrict__ src,
+                       int                    x0,
+                       uint8_t * __restrict__ coverage,
+                       int                    count,
+                       CtxCompositingMode     compositing_mode,
+                       CtxFragment            fragment,
+                       CtxBlend               blend)
 {
-  ctx_color_set_RGBA8 (color, in[0], in[1], in[2], in[3]);
-}
-#endif
+  float *dstf = (float*)dst;
 
-static void ctx_color_set_graya (CtxState *state, CtxColor *color, float gray, float alpha)
-{
-  color->original = color->valid = CTX_VALID_GRAYA;
-  color->l = gray;
-  color->alpha = alpha;
-}
-#if 0
-static void ctx_color_set_graya_ (CtxColor *color, const float *in)
-{
-  return ctx_color_set_graya (color, in[0], in[1]);
-}
-#endif
+  CtxPorterDuffFactor f_s, f_d;
+  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
+  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  float   global_alpha_f = rasterizer->state->gstate.global_alpha_f;
+  
+  {
+    float tsrc[components];
+    float u0 = 0; float v0 = 0;
+    float ud = 0; float vd = 0;
+    for (int c = 0; c < components; c++) tsrc[c] = 0.0f;
 
-void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
-{
-#if CTX_ENABLE_CM
-  color->original = color->valid = CTX_VALID_RGBA;
-  color->red      = r;
-  color->green    = g;
-  color->blue     = b;
-  color->space    = state->gstate.rgb_space;
-#else
-  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
-  color->device_red   = r;
-  color->device_green = g;
-  color->device_blue  = b;
+    ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+
+    while (count--)
+    {
+      uint8_t cov = *coverage;
+#if 1
+      if (
+        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
+        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
+        compositing_mode == CTX_COMPOSITE_XOR               ||
+        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
+        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
+        ))))
+      {
+        u0 += ud;
+        v0 += vd;
+        coverage ++;
+        dstf+=components;
+        continue;
+      }
 #endif
-  color->alpha        = a;
+
+      fragment (rasterizer, u0, v0, tsrc, 1, ud, vd);
+      if (blend != CTX_BLEND_NORMAL)
+        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
+      u0 += ud;
+      v0 += vd;
+      float covf = ctx_u8_to_float (cov);
+
+      if (global_alpha_u8 != 255)
+        covf = covf * global_alpha_f;
+
+      if (covf != 1.0f)
+      {
+        for (int c = 0; c < components; c++)
+          tsrc[c] *= covf;
+      }
+
+      for (int c = 0; c < components; c++)
+      {
+        float res;
+        /* these switches and this whole function is written to be
+         * inlined when compiled when the enum values passed in are
+         * constants.
+         */
+        switch (f_s)
+        {
+          case CTX_PORTER_DUFF_0: res = 0.0f; break;
+          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
+          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
+          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
+        }
+        switch (f_d)
+        {
+          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
+          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
+          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
+          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
+        }
+      }
+      coverage ++;
+      dstf     +=components;
+    }
+  }
 }
 
-static void ctx_color_set_drgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
-{
-#if CTX_ENABLE_CM
-  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
-  color->device_red   = r;
-  color->device_green = g;
-  color->device_blue  = b;
-  color->alpha        = a;
-  color->space        = state->gstate.device_space;
+/* generating one function per compositing_mode would be slightly more efficient,
+ * but on embedded targets leads to slightly more code bloat,
+ * here we trade off a slight amount of performance
+ */
+#define ctx_float_porter_duff(compformat, components, source, fragment, blend) \
+static void \
+ctx_##compformat##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
+{ \
+   switch (rasterizer->state->gstate.compositing_mode) \
+   { \
+     case CTX_COMPOSITE_SOURCE_ATOP: \
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
+        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_ATOP:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION_IN:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
+      break;\
+     case CTX_COMPOSITE_DESTINATION:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OVER:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OVER:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_XOR:\
+      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_XOR, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_DESTINATION_OUT:\
+       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_OUT:\
+       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_SOURCE_IN:\
+       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_COPY:\
+       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_COPY, fragment, blend);\
+       break;\
+     case CTX_COMPOSITE_CLEAR:\
+       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
+        CTX_COMPOSITE_CLEAR, fragment, blend);\
+       break;\
+   }\
+}
+#endif
+
+#if CTX_ENABLE_RGBAF
+
+ctx_float_porter_duff(RGBAF, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+ctx_float_porter_duff(RGBAF, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+
+#if CTX_INLINED_NORMAL
+#if CTX_GRADIENTS
+ctx_float_porter_duff(RGBAF, 4,linear_gradient, ctx_fragment_linear_gradient_RGBAF, 
rasterizer->state->gstate.blend_mode)
+ctx_float_porter_duff(RGBAF, 4,radial_gradient, ctx_fragment_radial_gradient_RGBAF, 
rasterizer->state->gstate.blend_mode)
+#endif
+ctx_float_porter_duff(RGBAF, 4,image,           ctx_fragment_image_RGBAF,           
rasterizer->state->gstate.blend_mode)
+
+
+#if CTX_GRADIENTS
+#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
+ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,             
                  blend_mode)\
+ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,             
  blend_mode)\
+ctx_float_porter_duff(comp_name, components,linear_gradient_##blend_name,  
ctx_fragment_linear_gradient_RGBA8, blend_mode)\
+ctx_float_porter_duff(comp_name, components,radial_gradient_##blend_name,  
ctx_fragment_radial_gradient_RGBA8, blend_mode)\
+ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,         
  blend_mode)
 #else
-  ctx_color_set_rgba (state, color, r, g, b, a);
+#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
+ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,             
                  blend_mode)\
+ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,             
  blend_mode)\
+ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,         
  blend_mode)
 #endif
+
+ctx_float_porter_duff_blend(RGBAF, 4, CTX_BLEND_NORMAL, normal)
+
+
+static void
+ctx_RGBAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
+{
+  ctx_float_copy_normal (4, rasterizer, dst, src, x0, coverage, count);
 }
 
-#if 0
-static void ctx_color_set_rgba_ (CtxState *state, CtxColor *color, const float *in)
+static void
+ctx_RGBAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  ctx_color_set_rgba (color, in[0], in[1], in[2], in[3]);
+  ctx_float_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
 }
-#endif
 
-/* the baseline conversions we have whether CMYK support is enabled or not,
- * providing an effort at right rendering
- */
-static void ctx_cmyk_to_rgb (float c, float m, float y, float k, float *r, float *g, float *b)
+#if 1
+static void
+ctx_RGBAF_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-  *r = (1.0f-c) * (1.0f-k);
-  *g = (1.0f-m) * (1.0f-k);
-  *b = (1.0f-y) * (1.0f-k);
+  ctx_float_source_over_normal_color (4, rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
+#endif
+#endif
 
-void ctx_rgb_to_cmyk (float r, float g, float b,
-                      float *c_out, float *m_out, float *y_out, float *k_out)
+static void
+ctx_setup_RGBAF (CtxRasterizer *rasterizer)
 {
-  float c = 1.0f - r;
-  float m = 1.0f - g;
-  float y = 1.0f - b;
-  float k = ctx_minf (c, ctx_minf (y, m) );
-  if (k < 1.0f)
+  CtxGState *gstate = &rasterizer->state->gstate;
+  int components = 4;
+  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBAF (rasterizer);
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+#if 1
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
     {
-      c = (c - k) / (1.0f - k);
-      m = (m - k) / (1.0f - k);
-      y = (y - k) / (1.0f - k);
+      rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
+      ctx_fragment_color_RGBAF (rasterizer, 0,0, rasterizer->color, 1, 0,0);
+      if (gstate->global_alpha_u8 != 255)
+        for (int c = 0; c < components; c ++)
+          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;
+
+      if (rasterizer->format->from_comp)
+        rasterizer->format->from_comp (rasterizer, 0,
+          &rasterizer->color[0],
+          &rasterizer->color_native,
+          1);
     }
   else
+#endif
+  {
+    rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
+  }
+
+#if CTX_INLINED_NORMAL
+  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
+    rasterizer->comp_op = ctx_RGBAF_clear_normal;
+  else
+    switch (gstate->blend_mode)
     {
-      c = m = y = 0.0f;
-    }
-  *c_out = c;
-  *m_out = m;
-  *y_out = y;
-  *k_out = k;
-}
+      case CTX_BLEND_NORMAL:
+        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
+        {
+          rasterizer->comp_op = ctx_RGBAF_copy_normal;
+          if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+            rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;
 
-#if CTX_ENABLE_CMYK
-static void ctx_color_set_cmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float 
a)
-{
-  color->original = color->valid = CTX_VALID_CMYKA;
-  color->cyan     = c;
-  color->magenta  = m;
-  color->yellow   = y;
-  color->key      = k;
-  color->alpha    = a;
-#if CTX_ENABLE_CM
-  color->space    = state->gstate.cmyk_space;
+        }
+        else if (gstate->global_alpha_u8 == 0)
+        {
+          rasterizer->comp_op = ctx_RGBA8_nop;
+        }
+        else
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+            {
+              rasterizer->comp_op = ctx_RGBAF_source_over_normal_color;
+              if ( ((float*)rasterizer->color)[3] >= 0.999f)
+                rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;
+            }
+            else
+            {
+              rasterizer->comp_op = ctx_RGBAF_porter_duff_color_normal;
+            }
+            break;
+#if CTX_GRADIENTS
+          case CTX_SOURCE_LINEAR_GRADIENT:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient_normal;
+            break;
+          case CTX_SOURCE_RADIAL_GRADIENT:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient_normal;
+            break;
 #endif
-}
-
-static void ctx_color_set_dcmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, 
float a)
-{
-  color->original       = color->valid = CTX_VALID_DCMYKA;
-  color->device_cyan    = c;
-  color->device_magenta = m;
-  color->device_yellow  = y;
-  color->device_key     = k;
-  color->alpha          = a;
-#if CTX_ENABLE_CM
-  color->space = state->gstate.device_space;
+          case CTX_SOURCE_TEXTURE:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_image_normal;
+            break;
+          default:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic_normal;
+            break;
+        }
+        break;
+      default:
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
+            //rasterizer->fragment = NULL;
+            break;
+#if CTX_GRADIENTS
+          case CTX_SOURCE_LINEAR_GRADIENT:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient;
+            break;
+          case CTX_SOURCE_RADIAL_GRADIENT:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient;
+            break;
+#endif
+          case CTX_SOURCE_TEXTURE:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_image;
+            break;
+          default:
+            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
+            break;
+        }
+        break;
+    }
 #endif
+  ctx_setup_apply_coverage (rasterizer);
 }
 
 #endif
+#if CTX_ENABLE_GRAYAF
 
-#if CTX_ENABLE_CM
-
-static void ctx_rgb_user_to_device (CtxState *state, float rin, float gin, float bin,
-                                    float *rout, float *gout, float *bout)
+#if CTX_GRADIENTS
+static void
+ctx_fragment_linear_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
 {
-#if CTX_BABL
-#if 0
-  fprintf (stderr, "-[%p %p\n",
-    state->gstate.fish_rgbaf_user_to_device,
-    state->gstate.fish_rgbaf_device_to_user);
-#endif
-  if (state->gstate.fish_rgbaf_user_to_device)
+  float rgba[4];
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0 ; i < count; i++)
   {
-    float rgbaf[4]={rin,gin,bin,1.0};
-    float rgbafo[4];
-    babl_process (state->gstate.fish_rgbaf_user_to_device,
-                  rgbaf, rgbafo, 1);
-
-    *rout = rgbafo[0];
-    *gout = rgbafo[1];
-    *bout = rgbafo[2];
-    return;
+  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
+                g->linear_gradient.length) -
+              g->linear_gradient.start) * (g->linear_gradient.rdelta);
+  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0, rgba);
+  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
+  ((float*)out)[1] = rgba[3];
+     out = ((float*)(out)) + 2;
+     x += dx;
+     y += dy;
   }
-#endif
-  *rout = rin;
-  *gout = gin;
-  *bout = bin;
 }
 
-static void ctx_rgb_device_to_user (CtxState *state, float rin, float gin, float bin,
-                                    float *rout, float *gout, float *bout)
+static void
+ctx_fragment_radial_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
 {
-#if CTX_BABL
-#if 0
-  fprintf (stderr, "=[%p %p\n",
-    state->gstate.fish_rgbaf_user_to_device,
-    state->gstate.fish_rgbaf_device_to_user);
-#endif
-  if (state->gstate.fish_rgbaf_device_to_user)
+  float rgba[4];
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0; i < count; i ++)
   {
-    float rgbaf[4]={rin,gin,bin,1.0};
-    float rgbafo[4];
-    babl_process (state->gstate.fish_rgbaf_device_to_user,
-                  rgbaf, rgbafo, 1);
-
-    *rout = rgbafo[0];
-    *gout = rgbafo[1];
-    *bout = rgbafo[2];
-    return;
+  float v = 0.0f;
+  if ((g->radial_gradient.r1-g->radial_gradient.r0) > 0.0f)
+    {
+      v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
+      v = (v - g->radial_gradient.r0) / (g->radial_gradient.rdelta);
+    }
+  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0, rgba);
+  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
+  ((float*)out)[1] = rgba[3];
+     out = ((float*)(out)) + 2;
+     x += dx;
+     y += dy;
   }
-#endif
-  *rout = rin;
-  *gout = gin;
-  *bout = bin;
 }
 #endif
 
-static void ctx_color_get_drgba (CtxState *state, CtxColor *color, float *out)
+static void
+ctx_fragment_color_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
 {
-  if (! (color->valid & CTX_VALID_RGBA_DEVICE) )
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  for (int i = 0; i < count; i++)
+  {
+     ctx_color_get_graya (rasterizer->state, &g->color, (float*)out);
+     out = ((float*)(out)) + 2;
+     x += dx;
+     y += dy;
+  }
+}
+
+static void ctx_fragment_image_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
+{
+  uint8_t rgba[4];
+  float rgbaf[4];
+  CtxGState *gstate = &rasterizer->state->gstate;
+  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
+  switch (buffer->format->bpp)
     {
-#if CTX_ENABLE_CM
-      if (color->valid & CTX_VALID_RGBA)
-        {
-          ctx_rgb_user_to_device (state, color->red, color->green, color->blue,
-                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
-        }
-      else
-#endif
-        if (color->valid & CTX_VALID_RGBA_U8)
-          {
-            float red = ctx_u8_to_float (color->rgba[0]);
-            float green = ctx_u8_to_float (color->rgba[1]);
-            float blue = ctx_u8_to_float (color->rgba[2]);
-#if CTX_ENABLE_CM
-            ctx_rgb_user_to_device (state, red, green, blue,
-                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
-#else
-            color->device_red = red;
-            color->device_green = green;
-            color->device_blue = blue;
-#endif
-            color->alpha        = ctx_u8_to_float (color->rgba[3]);
-          }
-#if CTX_ENABLE_CMYK
-        else if (color->valid & CTX_VALID_CMYKA)
-          {
-            ctx_cmyk_to_rgb (color->cyan, color->magenta, color->yellow, color->key,
-                             &color->device_red,
-                             &color->device_green,
-                             &color->device_blue);
-          }
+#if CTX_FRAGMENT_SPECIALIZE
+      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
+      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
 #endif
-        else if (color->valid & CTX_VALID_GRAYA)
-          {
-            color->device_red   =
-              color->device_green =
-                color->device_blue  = color->l;
-          }
-      color->valid |= CTX_VALID_RGBA_DEVICE;
+      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
     }
-  out[0] = color->device_red;
-  out[1] = color->device_green;
-  out[2] = color->device_blue;
-  out[3] = color->alpha;
+  for (int c = 0; c < 2 * count; c ++) { 
+    rgbaf[c] = ctx_u8_to_float (rgba[c]);
+    ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgbaf);
+    ((float*)out)[1] = rgbaf[3];
+    out = ((float*)out) + 2;
+  }
 }
 
-
-static inline void
-_ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
+static CtxFragment ctx_rasterizer_get_fragment_GRAYAF (CtxRasterizer *rasterizer)
 {
-#if CTX_ENABLE_CM
-  if (! (color->valid & CTX_VALID_RGBA) )
+  CtxGState *gstate = &rasterizer->state->gstate;
+  switch (gstate->source_fill.type)
     {
-      ctx_color_get_drgba (state, color, out);
-      if (color->valid & CTX_VALID_RGBA_DEVICE)
-        {
-          ctx_rgb_device_to_user (state, color->device_red, color->device_green, color->device_blue,
-                                  & (color->red), & (color->green), & (color->blue) );
-        }
-      color->valid |= CTX_VALID_RGBA;
-    }
-  out[0] = color->red;
-  out[1] = color->green;
-  out[2] = color->blue;
-  out[3] = color->alpha;
-#else
-  ctx_color_get_drgba (state, color, out);
+      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYAF;
+      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYAF;
+#if CTX_GRADIENTS
+      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYAF;
+      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYAF;
 #endif
+    }
+  return ctx_fragment_color_GRAYAF;
 }
 
-void ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
-{
-  _ctx_color_get_rgba (state, color, out);
-}
+ctx_float_porter_duff(GRAYAF, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+ctx_float_porter_duff(GRAYAF, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
 
+#if CTX_INLINED_NORMAL
+ctx_float_porter_duff(GRAYAF, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
+ctx_float_porter_duff(GRAYAF, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)
 
+static void
+ctx_GRAYAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
+{
+  ctx_float_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
+}
 
-float ctx_float_color_rgb_to_gray (CtxState *state, const float *rgb)
+static void
+ctx_GRAYAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-        // XXX todo replace with correct according to primaries
-  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
+  ctx_float_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
 }
-uint8_t ctx_u8_color_rgb_to_gray (CtxState *state, const uint8_t *rgb)
+
+static void
+ctx_GRAYAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-        // XXX todo replace with correct according to primaries
-  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
+  ctx_float_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
+#endif
 
-void ctx_color_get_graya (CtxState *state, CtxColor *color, float *out)
+static void
+ctx_setup_GRAYAF (CtxRasterizer *rasterizer)
 {
-  if (! (color->valid & CTX_VALID_GRAYA) )
+  CtxGState *gstate = &rasterizer->state->gstate;
+  int components = 2;
+  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYAF (rasterizer);
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
     {
-      float rgba[4];
-      ctx_color_get_drgba (state, color, rgba);
-      color->l = ctx_float_color_rgb_to_gray (state, rgba);
-      color->valid |= CTX_VALID_GRAYA;
+      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
+  //  rasterizer->fragment = NULL;
+      ctx_color_get_rgba (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
+      if (gstate->global_alpha_u8 != 255)
+        for (int c = 0; c < components; c ++)
+          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;
+
+      if (rasterizer->format->from_comp)
+        rasterizer->format->from_comp (rasterizer, 0,
+          &rasterizer->color[0],
+          &rasterizer->color_native,
+          1);
     }
-  out[0] = color->l;
-  out[1] = color->alpha;
-}
+  else
+  {
+    rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
+  }
 
-#if CTX_ENABLE_CMYK
-void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out)
-{
-  if (! (color->valid & CTX_VALID_CMYKA) )
+#if CTX_INLINED_NORMAL
+  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
+    rasterizer->comp_op = ctx_GRAYAF_clear_normal;
+  else
+    switch (gstate->blend_mode)
     {
-      if (color->valid & CTX_VALID_GRAYA)
+      case CTX_BLEND_NORMAL:
+        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
         {
-          color->cyan = color->magenta = color->yellow = 0.0;
-          color->key = color->l;
+          rasterizer->comp_op = ctx_GRAYAF_copy_normal;
         }
-      else
+        else if (gstate->global_alpha_u8 == 0)
+          rasterizer->comp_op = ctx_RGBA8_nop;
+        else
+        switch (gstate->source_fill.type)
         {
-          float rgba[4];
-          ctx_color_get_rgba (state, color, rgba);
-          ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2],
-                           &color->cyan, &color->magenta, &color->yellow, &color->key);
-          color->alpha = rgba[3];
+          case CTX_SOURCE_COLOR:
+            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+            {
+              if (((float*)rasterizer->color)[components-1] == 0.0f)
+                rasterizer->comp_op = ctx_RGBA8_nop;
+#if 1
+              else //if (((float*)rasterizer->color)[components-1] == 0.0f)
+                rasterizer->comp_op = ctx_GRAYAF_source_copy_normal_color;
+#endif
+              //else
+          //      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
+//            rasterizer->fragment = NULL;
+            }
+            else
+            {
+              rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
+//            rasterizer->fragment = NULL;
+            }
+            break;
+          default:
+            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic_normal;
+            break;
         }
-      color->valid |= CTX_VALID_CMYKA;
+        break;
+      default:
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
+//          rasterizer->fragment = NULL;
+            break;
+          default:
+            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
+            break;
+        }
+        break;
     }
-  out[0] = color->cyan;
-  out[1] = color->magenta;
-  out[2] = color->yellow;
-  out[3] = color->key;
-  out[4] = color->alpha;
+#endif
+  ctx_setup_apply_coverage (rasterizer);
 }
 
-#if 0
-static void ctx_color_get_cmyka_u8 (CtxState *state, CtxColor *color, uint8_t *out)
-{
-  if (! (color->valid & CTX_VALID_CMYKA_U8) )
-    {
-      float cmyka[5];
-      ctx_color_get_cmyka (color, cmyka);
-      for (int i = 0; i < 5; i ++)
-        { color->cmyka[i] = ctx_float_to_u8 (cmyka[i]); }
-      color->valid |= CTX_VALID_CMYKA_U8;
-    }
-  out[0] = color->cmyka[0];
-  out[1] = color->cmyka[1];
-  out[2] = color->cmyka[2];
-  out[3] = color->cmyka[3];
-}
-#endif
 #endif
+#if CTX_ENABLE_GRAYF
 
-static inline void
-_ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
+static void
+ctx_composite_GRAYF (CTX_COMPOSITE_ARGUMENTS)
 {
-  if (! (color->valid & CTX_VALID_RGBA_U8) )
-    {
-      float rgba[4];
-      ctx_color_get_drgba (state, color, rgba);
-      for (int i = 0; i < 4; i ++)
-        { color->rgba[i] = ctx_float_to_u8 (rgba[i]); }
-      color->valid |= CTX_VALID_RGBA_U8;
-    }
-  out[0] = color->rgba[0];
-  out[1] = color->rgba[1];
-  out[2] = color->rgba[2];
-  out[3] = color->rgba[3];
+  float *dstf = (float*)dst;
+
+  float temp[count*2];
+  for (unsigned int i = 0; i < count; i++)
+  {
+    temp[i*2] = dstf[i];
+    temp[i*2+1] = 1.0f;
+  }
+  rasterizer->comp_op (rasterizer, (uint8_t*)temp, rasterizer->color, x0, coverage, count);
+  for (unsigned int i = 0; i < count; i++)
+  {
+    dstf[i] = temp[i*2];
+  }
 }
 
-void
-ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
+#endif
+#if CTX_ENABLE_BGRA8
+
+inline static void
+ctx_swap_red_green (uint8_t *rgba)
 {
-  _ctx_color_get_rgba8 (state, color, out);
+  uint32_t *buf  = (uint32_t *) rgba;
+  uint32_t  orig = *buf;
+  uint32_t  green_alpha = (orig & 0xff00ff00);
+  uint32_t  red_blue    = (orig & 0x00ff00ff);
+  uint32_t  red         = red_blue << 16;
+  uint32_t  blue        = red_blue >> 16;
+  *buf = green_alpha | red | blue;
 }
 
-void ctx_color_get_graya_u8 (CtxState *state, CtxColor *color, uint8_t *out)
+static void
+ctx_BGRA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  if (! (color->valid & CTX_VALID_GRAYA_U8) )
+  uint32_t *srci = (uint32_t *) buf;
+  uint32_t *dsti = (uint32_t *) rgba;
+  while (count--)
     {
-      float graya[2];
-      ctx_color_get_graya (state, color, graya);
-      color->l_u8 = ctx_float_to_u8 (graya[0]);
-      color->rgba[3] = ctx_float_to_u8 (graya[1]);
-      color->valid |= CTX_VALID_GRAYA_U8;
+      uint32_t val = *srci++;
+      ctx_swap_red_green ( (uint8_t *) &val);
+      *dsti++      = val;
     }
-  out[0] = color->l_u8;
-  out[1] = color->rgba[3];
-}
-
-#if 0
-void
-ctx_get_rgba (Ctx *ctx, float *rgba)
-{
-  ctx_color_get_rgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
 }
 
-void
-ctx_get_drgba (Ctx *ctx, float *rgba)
+static void
+ctx_RGBA8_to_BGRA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  ctx_color_get_drgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
+  ctx_BGRA8_to_RGBA8 (rasterizer, x, rgba, (uint8_t *) buf, count);
 }
-#endif
 
-int ctx_in_fill (Ctx *ctx, float x, float y)
+static void
+ctx_composite_BGRA8 (CTX_COMPOSITE_ARGUMENTS)
 {
-  float x1, y1, x2, y2;
-  ctx_path_extents (ctx, &x1, &y1, &x2, &y2);
-
-  if (x1 <= x && x <= x2 && // XXX - just bounding box for now
-      y1 <= y && y <= y2)   //
-    return 1;
-  return 0;
+  // for better performance, this could be done without a pre/post conversion,
+  // by swapping R and B of source instead... as long as it is a color instead
+  // of gradient or image
+  //
+  //
+  uint8_t pixels[count * 4];
+  ctx_BGRA8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
+  ctx_BGRA8_to_RGBA8  (rasterizer, x0, &pixels[0], dst, count);
 }
 
 
-#if CTX_ENABLE_CMYK
-#if 0
-void
-ctx_get_cmyka (Ctx *ctx, float *cmyka)
-{
-  ctx_color_get_cmyka (& (ctx->state), &ctx->state.gstate.source.color, cmyka);
-}
-#endif
-#endif
-#if 0
-void
-ctx_get_graya (Ctx *ctx, float *ya)
-{
-  ctx_color_get_graya (& (ctx->state), &ctx->state.gstate.source.color, ya);
-}
 #endif
-
-void ctx_stroke_source (Ctx *ctx)
+static void
+ctx_composite_direct (CTX_COMPOSITE_ARGUMENTS)
 {
-  CtxEntry set_stroke = ctx_void (CTX_STROKE_SOURCE);
-  ctx_process (ctx, &set_stroke);
+  // for better performance, this could be done without a pre/post conversion,
+  // by swapping R and B of source instead... as long as it is a color instead
+  // of gradient or image
+  //
+  //
+  rasterizer->comp_op (rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
 
+#if CTX_ENABLE_CMYKAF
 
-static void ctx_color_raw (Ctx *ctx, CtxColorModel model, float *components, int stroke)
+static void
+ctx_fragment_other_CMYKAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
 {
-#if 0
-  CtxSource *source = stroke?
-          &ctx->state.gstate.source_stroke:
-          &ctx->state.gstate.source_fill;
-
-  if (model == CTX_RGB || model == CTX_RGBA)
-  {
-    float rgba[4];
-  // XXX it should be possible to disable this, to get a more accurate record
-  // when it is intentional
-    float a = 1.0f;
-    if (model == CTX_RGBA) a = components[3];
-    ctx_color_get_rgba (&ctx->state, &source->color, rgba);
-    if (rgba[0] == components[0] && rgba[1] == components[1] && rgba[2] == components[2] && rgba[3] == a)
-     return;
-  }
+  float *cmyka = (float*)out;
+  float _rgba[4 * count];
+  float *rgba = &_rgba[0];
+  CtxGState *gstate = &rasterizer->state->gstate;
+  switch (gstate->source_fill.type)
+    {
+      case CTX_SOURCE_TEXTURE:
+        ctx_fragment_image_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
+        break;
+      case CTX_SOURCE_COLOR:
+        ctx_fragment_color_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
+        break;
+#if CTX_GRADIENTS
+      case CTX_SOURCE_LINEAR_GRADIENT:
+        ctx_fragment_linear_gradient_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
+        break;
+      case CTX_SOURCE_RADIAL_GRADIENT:
+        ctx_fragment_radial_gradient_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
+        break;
 #endif
-
-  if (stroke)
+      default:
+        rgba[0]=rgba[1]=rgba[2]=rgba[3]=0.0f;
+        break;
+    }
+  for (int i = 0; i < count; i++)
   {
-    ctx_stroke_source (ctx);
+    cmyka[4]=rgba[3];
+    ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2], &cmyka[0], &cmyka[1], &cmyka[2], &cmyka[3]);
+    cmyka += 5;
+    rgba += 4;
   }
+}
 
-  CtxEntry command[3]= {
-  ctx_f (CTX_COLOR, model, 0)
-  };
-  switch (model)
+static void
+ctx_fragment_color_CMYKAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
+{
+  CtxGState *gstate = &rasterizer->state->gstate;
+  float *cmyka = (float*)out;
+  float cmyka_in[5];
+  ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, cmyka_in);
+  for (int i = 0; i < count; i++)
   {
-    case CTX_RGBA:
-    case CTX_RGBA_A:
-    case CTX_RGBA_A_DEVICE:
-    case CTX_DRGBA:
-    case CTX_LABA:
-    case CTX_LCHA:
-      command[2].data.f[0]=components[3];
-      /*FALLTHROUGH*/
-    case CTX_RGB:
-    case CTX_LAB:
-    case CTX_LCH:
-    case CTX_DRGB:
-      command[0].data.f[1]=components[0];
-      command[1].data.f[0]=components[1];
-      command[1].data.f[1]=components[2];
-      break;
-    case CTX_DCMYKA:
-    case CTX_CMYKA:
-    case CTX_DCMYKA_A:
-    case CTX_CMYKA_A:
-      command[2].data.f[1]=components[4];
-      /*FALLTHROUGH*/
-    case CTX_CMYK:
-    case CTX_DCMYK:
-      command[0].data.f[1]=components[0];
-      command[1].data.f[0]=components[1];
-      command[1].data.f[1]=components[2];
-      command[2].data.f[0]=components[3];
-      break;
-    case CTX_GRAYA:
-    case CTX_GRAYA_A:
-      command[1].data.f[0]=components[1];
-      /*FALLTHROUGH*/
-    case CTX_GRAY:
-      command[0].data.f[1]=components[0];
-      break;
+    for (int c = 0; c < 4; c ++)
+    {
+      cmyka[c] = (1.0f - cmyka_in[c]);
+    }
+    cmyka[4] = cmyka_in[4];
+    cmyka += 5;
   }
-  ctx_process (ctx, command);
 }
 
-void ctx_rgba (Ctx *ctx, float r, float g, float b, float a)
+static CtxFragment ctx_rasterizer_get_fragment_CMYKAF (CtxRasterizer *rasterizer)
 {
-  float components[4]={r,g,b,a};
-  ctx_color_raw (ctx, CTX_RGBA, components, 0);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  switch (gstate->source_fill.type)
+    {
+      case CTX_SOURCE_COLOR:
+        return ctx_fragment_color_CMYKAF;
+    }
+  return ctx_fragment_other_CMYKAF;
 }
 
-void ctx_rgba_stroke (Ctx *ctx, float r, float g, float b, float a)
-{
-  float components[4]={r,g,b,a};
-  ctx_color_raw (ctx, CTX_RGBA, components, 1);
-}
+ctx_float_porter_duff (CMYKAF, 5,color,           rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+ctx_float_porter_duff (CMYKAF, 5,generic,         rasterizer->fragment, rasterizer->state->gstate.blend_mode)
 
-void ctx_rgb (Ctx *ctx, float   r, float   g, float   b)
-{
-  ctx_rgba (ctx, r, g, b, 1.0f);
-}
+#if CTX_INLINED_NORMAL
+ctx_float_porter_duff (CMYKAF, 5,color_normal,            rasterizer->fragment, CTX_BLEND_NORMAL)
+ctx_float_porter_duff (CMYKAF, 5,generic_normal,          rasterizer->fragment, CTX_BLEND_NORMAL)
 
-void ctx_rgb_stroke (Ctx *ctx, float   r, float   g, float   b)
+static void
+ctx_CMYKAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  ctx_rgba_stroke (ctx, r, g, b, 1.0f);
+  ctx_float_copy_normal (5, rasterizer, dst, src, x0, coverage, count);
 }
 
-void ctx_gray_stroke   (Ctx *ctx, float gray)
-{
-  ctx_color_raw (ctx, CTX_GRAY, &gray, 1);
-}
-void ctx_gray (Ctx *ctx, float gray)
+static void
+ctx_CMYKAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  ctx_color_raw (ctx, CTX_GRAY, &gray, 0);
+  ctx_float_clear_normal (5, rasterizer, dst, src, x0, coverage, count);
 }
 
-void ctx_drgba_stroke (Ctx *ctx, float r, float g, float b, float a)
+static void
+ctx_CMYKAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-  float components[4]={r,g,b,a};
-  ctx_color_raw (ctx, CTX_DRGBA, components, 1);
+  ctx_float_source_copy_normal_color (5, rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
-void ctx_drgba (Ctx *ctx, float r, float g, float b, float a)
+#endif
+
+static void
+ctx_setup_CMYKAF (CtxRasterizer *rasterizer)
 {
-  float components[4]={r,g,b,a};
-  ctx_color_raw (ctx, CTX_DRGBA, components, 0);
-}
+  CtxGState *gstate = &rasterizer->state->gstate;
+  int components = 5;
+  rasterizer->fragment = ctx_rasterizer_get_fragment_CMYKAF (rasterizer);
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+    {
+      rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
+      rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
+ //     rasterizer->fragment = NULL;
+      ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
+      if (gstate->global_alpha_u8 != 255)
+        ((float*)rasterizer->color)[components-1] *= gstate->global_alpha_f;
 
-#if CTX_ENABLE_CMYK
+      if (rasterizer->format->from_comp)
+        rasterizer->format->from_comp (rasterizer, 0,
+          &rasterizer->color[0],
+          &rasterizer->color_native,
+          1);
+    }
+  else
+  {
+    rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
+  }
 
-void ctx_cmyka_stroke (Ctx *ctx, float c, float m, float y, float k, float a)
-{
-  float components[5]={c,m,y,k,a};
-  ctx_color_raw (ctx, CTX_CMYKA, components, 1);
-}
-void ctx_cmyka (Ctx *ctx, float c, float m, float y, float k, float a)
-{
-  float components[5]={c,m,y,k,a};
-  ctx_color_raw (ctx, CTX_CMYKA, components, 0);
-}
-void ctx_cmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
-{
-  float components[4]={c,m,y,k};
-  ctx_color_raw (ctx, CTX_CMYK, components, 1);
+#if CTX_INLINED_NORMAL
+  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
+    rasterizer->comp_op = ctx_CMYKAF_clear_normal;
+#if 1
+  else
+    switch (gstate->blend_mode)
+    {
+      case CTX_BLEND_NORMAL:
+        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
+        {
+          rasterizer->comp_op = ctx_CMYKAF_copy_normal;
+        }
+        else if (gstate->global_alpha_u8 == 0)
+          rasterizer->comp_op = ctx_RGBA8_nop;
+        else
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+            {
+              if (((float*)rasterizer->color)[components-1] == 0.0f)
+                rasterizer->comp_op = ctx_RGBA8_nop;
+#if 1
+              else if (((float*)rasterizer->color)[components-1] == 1.0f)
+                rasterizer->comp_op = ctx_CMYKAF_source_copy_normal_color;
+              else
+                rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
+              //rasterizer->fragment = NULL;
+#endif
+            }
+            else
+            {
+              rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
+   //         rasterizer->fragment = NULL;
+            }
+            break;
+          default:
+            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic_normal;
+            break;
+        }
+        break;
+      default:
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
+    //      rasterizer->fragment = NULL;
+            break;
+          default:
+            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
+            break;
+        }
+        break;
+    }
+#endif
+#endif
+  ctx_setup_apply_coverage (rasterizer);
 }
-void ctx_cmyk (Ctx *ctx, float c, float m, float y, float k)
+
+static void
+ctx_setup_CMYKA8 (CtxRasterizer *rasterizer)
 {
-  float components[4]={c,m,y,k};
-  ctx_color_raw (ctx, CTX_CMYK, components, 0);
+  ctx_setup_CMYKAF (rasterizer);
+  if (rasterizer->comp_op == ctx_CMYKAF_source_copy_normal_color)
+    rasterizer->comp = CTX_COV_PATH_CMYKA8_COPY;
 }
 
-#if 0
-static void ctx_dcmyk_raw (Ctx *ctx, float c, float m, float y, float k, int stroke)
+static void
+ctx_setup_CMYK8 (CtxRasterizer *rasterizer)
 {
-  float components[5]={c,m,y,k,1.0f};
-  ctx_color_raw (ctx, CTX_DCMYKA, components, stroke);
+  ctx_setup_CMYKAF (rasterizer);
+  if (rasterizer->comp_op == ctx_CMYKAF_source_copy_normal_color)
+    rasterizer->comp = CTX_COV_PATH_CMYK8_COPY;
 }
 
-static void ctx_dcmyka_raw (Ctx *ctx, float c, float m, float y, float k, float a, int stroke)
+#endif
+#if CTX_ENABLE_CMYKA8
+
+static void
+ctx_CMYKA8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
 {
-  CtxEntry command[3]=
-  {
-    ctx_f (CTX_COLOR, CTX_DCMYKA + 512 * stroke, c),
-    ctx_f (CTX_CONT, m, y),
-    ctx_f (CTX_CONT, k, a)
-  };
-  ctx_process (ctx, command);
-}
-#endif
-
-void ctx_dcmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
-{
-  float components[5]={c,m,y,k,1.0f};
-  ctx_color_raw (ctx, CTX_DCMYK, components, 1);
+  for (int i = 0; i < count; i ++)
+    {
+      for (int c = 0; c < 4; c ++)
+        { dst[c] = ctx_u8_to_float ( (255-src[c]) ); }
+      dst[4] = ctx_u8_to_float (src[4]);
+      for (int c = 0; c < 4; c++)
+        { dst[c] *= dst[4]; }
+      src += 5;
+      dst += 5;
+    }
 }
-void ctx_dcmyk (Ctx *ctx, float c, float m, float y, float k)
+static void
+ctx_CMYKAF_to_CMYKA8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
 {
-  float components[5]={c,m,y,k,1.0f};
-  ctx_color_raw (ctx, CTX_DCMYK, components, 0);
-}
+  for (int i = 0; i < count; i ++)
+    {
+      int a = ctx_float_to_u8 (src[4]);
+      if (a != 0 && a != 255)
+      {
+        float recip = 1.0f/src[4];
+        for (int c = 0; c < 4; c++)
+        {
+          dst[c] = ctx_float_to_u8 (1.0f - src[c] * recip);
+        }
+      }
+      else
+      {
+        for (int c = 0; c < 4; c++)
+          dst[c] = 255 - ctx_float_to_u8 (src[c]);
+      }
+      dst[4]=a;
 
-void ctx_dcmyka_stroke   (Ctx *ctx, float c, float m, float y, float k, float a)
-{
-  float components[5]={c,m,y,k,a};
-  ctx_color_raw (ctx, CTX_DCMYKA, components, 1);
+      src += 5;
+      dst += 5;
+    }
 }
-void ctx_dcmyka (Ctx *ctx, float c, float m, float y, float k, float a)
+
+static void
+ctx_composite_CMYKA8 (CTX_COMPOSITE_ARGUMENTS)
 {
-  float components[5]={c,m,y,k,a};
-  ctx_color_raw (ctx, CTX_DCMYKA, components, 0);
+  float pixels[count * 5];
+  ctx_CMYKA8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], rasterizer->color, x0, coverage, count);
+  ctx_CMYKAF_to_CMYKA8 (rasterizer, &pixels[0], dst, count);
 }
 
 #endif
+#if CTX_ENABLE_CMYK8
 
-/* XXX: missing CSS1:
- *
- *   EM { color: rgb(110%, 0%, 0%) }  // clipped to 100% 
- *
- *
- *   :first-letter
- *   :first-list
- *   :link :visited :active
- *
- */
-
-typedef struct ColorDef {
-  uint64_t name;
-  float r;
-  float g;
-  float b;
-  float a;
-} ColorDef;
-
-#if 0
-#define CTX_silver     CTX_STRH('s','i','l','v','e','r',0,0,0,0,0,0,0,0)
-#define CTX_fuchsia    CTX_STRH('f','u','c','h','s','i','a',0,0,0,0,0,0,0)
-#define CTX_gray       CTX_STRH('g','r','a','y',0,0,0,0,0,0,0,0,0,0)
-#define CTX_yellow     CTX_STRH('y','e','l','l','o','w',0,0,0,0,0,0,0,0)
-#define CTX_white      CTX_STRH('w','h','i','t','e',0,0,0,0,0,0,0,0,0)
-#define CTX_maroon     CTX_STRH('m','a','r','o','o','n',0,0,0,0,0,0,0,0)
-#define CTX_magenta    CTX_STRH('m','a','g','e','n','t','a',0,0,0,0,0,0,0)
-#define CTX_blue       CTX_STRH('b','l','u','e',0,0,0,0,0,0,0,0,0,0)
-#define CTX_green      CTX_STRH('g','r','e','e','n',0,0,0,0,0,0,0,0,0)
-#define CTX_red        CTX_STRH('r','e','d',0,0,0,0,0,0,0,0,0,0,0)
-#define CTX_purple     CTX_STRH('p','u','r','p','l','e',0,0,0,0,0,0,0,0)
-#define CTX_olive      CTX_STRH('o','l','i','v','e',0,0,0,0,0,0,0,0,0)
-#define CTX_teal        CTX_STRH('t','e','a','l',0,0,0,0,0,0,0,0,0,0)
-#define CTX_black      CTX_STRH('b','l','a','c','k',0,0,0,0,0,0,0,0,0)
-#define CTX_cyan       CTX_STRH('c','y','a','n',0,0,0,0,0,0,0,0,0,0)
-#define CTX_navy       CTX_STRH('n','a','v','y',0,0,0,0,0,0,0,0,0,0)
-#define CTX_lime       CTX_STRH('l','i','m','e',0,0,0,0,0,0,0,0,0,0)
-#define CTX_aqua       CTX_STRH('a','q','u','a',0,0,0,0,0,0,0,0,0,0)
-#define CTX_transparent CTX_STRH('t','r','a','n','s','p','a','r','e','n','t',0,0,0)
-#endif
-
-static ColorDef _ctx_colors[]={
-  {CTX_black,    0, 0, 0, 1},
-  {CTX_red,      1, 0, 0, 1},
-  {CTX_green,    0, 1, 0, 1},
-  {CTX_yellow,   1, 1, 0, 1},
-  {CTX_blue,     0, 0, 1, 1},
-  {CTX_fuchsia,  1, 0, 1, 1},
-  {CTX_cyan,     0, 1, 1, 1},
-  {CTX_white,    1, 1, 1, 1},
-  {CTX_silver,   0.75294, 0.75294, 0.75294, 1},
-  {CTX_gray,     0.50196, 0.50196, 0.50196, 1},
-  {CTX_magenta,  0.50196, 0, 0.50196, 1},
-  {CTX_maroon,   0.50196, 0, 0, 1},
-  {CTX_purple,   0.50196, 0, 0.50196, 1},
-  {CTX_green,    0, 0.50196, 0, 1},
-  {CTX_lime,     0, 1, 0, 1},
-  {CTX_olive,    0.50196, 0.50196, 0, 1},
-  {CTX_navy,     0, 0,      0.50196, 1},
-  {CTX_teal,     0, 0.50196, 0.50196, 1},
-  {CTX_aqua,     0, 1, 1, 1},
-  {CTX_transparent, 0, 0, 0, 0},
-  {CTX_none,     0, 0, 0, 0},
-};
-
-static int xdigit_value(const char xdigit)
+static void
+ctx_CMYK8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
 {
-  if (xdigit >= '0' && xdigit <= '9')
-   return xdigit - '0';
-  switch (xdigit)
-  {
-    case 'A':case 'a': return 10;
-    case 'B':case 'b': return 11;
-    case 'C':case 'c': return 12;
-    case 'D':case 'd': return 13;
-    case 'E':case 'e': return 14;
-    case 'F':case 'f': return 15;
-  }
-  return 0;
+  for (int i = 0; i < count; i ++)
+    {
+      dst[0] = ctx_u8_to_float (255-src[0]);
+      dst[1] = ctx_u8_to_float (255-src[1]);
+      dst[2] = ctx_u8_to_float (255-src[2]);
+      dst[3] = ctx_u8_to_float (255-src[3]);
+      dst[4] = 1.0f;
+      src += 4;
+      dst += 5;
+    }
 }
-
-static int
-ctx_color_parse_rgb (CtxState *ctxstate, CtxColor *color, const char *color_string)
+static void
+ctx_CMYKAF_to_CMYK8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
 {
-  float dcolor[4] = {0,0,0,1};
-  while (*color_string && *color_string != '(')
-    color_string++;
-  if (*color_string) color_string++;
-
-  {
-    int n_floats = 0;
-    char *p =    (char*)color_string;
-    char *prev = (char*)NULL;
-    for (; p && n_floats < 4 && p != prev && *p; )
+  for (int i = 0; i < count; i ++)
     {
-      float val;
-      prev = p;
-      val = _ctx_parse_float (p, &p);
-      if (p != prev)
-      {
-        if (n_floats < 3)
-          dcolor[n_floats++] = val/255.0;
-        else
-          dcolor[n_floats++] = val;
-
-        while (*p == ' ' || *p == ',')
+      float c = src[0];
+      float m = src[1];
+      float y = src[2];
+      float k = src[3];
+      float a = src[4];
+      if (a != 0.0f && a != 1.0f)
         {
-          p++;
-          prev++;
+          float recip = 1.0f/a;
+          c *= recip;
+          m *= recip;
+          y *= recip;
+          k *= recip;
         }
-      }
+      c = 1.0 - c;
+      m = 1.0 - m;
+      y = 1.0 - y;
+      k = 1.0 - k;
+      dst[0] = ctx_float_to_u8 (c);
+      dst[1] = ctx_float_to_u8 (m);
+      dst[2] = ctx_float_to_u8 (y);
+      dst[3] = ctx_float_to_u8 (k);
+      src += 5;
+      dst += 4;
     }
-  }
-  ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
-  return 0;
 }
 
-static int ctx_isxdigit (uint8_t ch)
+static void
+ctx_composite_CMYK8 (CTX_COMPOSITE_ARGUMENTS)
 {
-  if (ch >= '0' && ch <= '9') return 1;
-  if (ch >= 'a' && ch <= 'f') return 1;
-  if (ch >= 'A' && ch <= 'F') return 1;
-  return 0;
+  float pixels[count * 5];
+  ctx_CMYK8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], src, x0, coverage, count);
+  ctx_CMYKAF_to_CMYK8 (rasterizer, &pixels[0], dst, count);
 }
+#endif
 
-static int
-mrg_color_parse_hex (CtxState *ctxstate, CtxColor *color, const char *color_string)
-{
-  float dcolor[4]={0,0,0,1};
-  int string_length = strlen (color_string);
-  int i;
-  dcolor[3] = 1.0;
+#if CTX_ENABLE_RGB8
 
-  if (string_length == 7 ||  /* #rrggbb   */
-      string_length == 9)    /* #rrggbbaa */
+inline static void
+ctx_RGB8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+{
+  const uint8_t *pixel = (const uint8_t *) buf;
+  while (count--)
     {
-      int num_iterations = (string_length - 1) / 2;
-  
-      for (i = 0; i < num_iterations; ++i)
-        {
-          if (ctx_isxdigit (color_string[2 * i + 1]) &&
-              ctx_isxdigit (color_string[2 * i + 2]))
-            {
-              dcolor[i] = (xdigit_value (color_string[2 * i + 1]) << 4 |
-                           xdigit_value (color_string[2 * i + 2])) / 255.f;
-            }
-          else
-            {
-              return 0;
-            }
-        }
-      /* Successful #rrggbb(aa) parsing! */
-      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
-      return 1;
+      rgba[0] = pixel[0];
+      rgba[1] = pixel[1];
+      rgba[2] = pixel[2];
+      rgba[3] = 255;
+      pixel+=3;
+      rgba +=4;
     }
-  else if (string_length == 4 ||  /* #rgb  */
-           string_length == 5)    /* #rgba */
+}
+
+inline static void
+ctx_RGBA8_to_RGB8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
+{
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      int num_iterations = string_length - 1;
-      for (i = 0; i < num_iterations; ++i)
-        {
-          if (ctx_isxdigit (color_string[i + 1]))
-            {
-              dcolor[i] = (xdigit_value (color_string[i + 1]) << 4 |
-                           xdigit_value (color_string[i + 1])) / 255.f;
-            }
-          else
-            {
-              return 0;
-            }
-        }
-      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
-      /* Successful #rgb(a) parsing! */
-      return 0;
+      pixel[0] = rgba[0];
+      pixel[1] = rgba[1];
+      pixel[2] = rgba[2];
+      pixel+=3;
+      rgba +=4;
     }
-  /* String was of unsupported length. */
-  return 1;
 }
 
-//#define CTX_currentColor     CTX_STRH('c','u','r','r','e','n','t','C','o','l','o','r',0,0)
+#endif
+#if CTX_ENABLE_GRAY1
 
-int ctx_color_set_from_string (Ctx *ctx, CtxColor *color, const char *string)
+#if CTX_NATIVE_GRAYA8
+inline static void
+ctx_GRAY1_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  int i;
-  uint32_t hash = ctx_strhash (string);
-//  ctx_color_set_rgba (&(ctx->state), color, 0.4,0.1,0.9,1.0);
-//  return 0;
-    //rgba[0], rgba[1], rgba[2], rgba[3]);
-
-  if (hash == CTX_currentColor)
-  {
-    float rgba[4];
-    CtxColor ccolor;
-    ctx_get_color (ctx, CTX_color, &ccolor);
-    ctx_color_get_rgba (&(ctx->state), &ccolor, rgba);
-    ctx_color_set_rgba (&(ctx->state), color, rgba[0], rgba[1], rgba[2], rgba[3]);
-    return 0;
-  }
-
-  for (i = (sizeof(_ctx_colors)/sizeof(_ctx_colors[0]))-1; i>=0; i--)
-  {
-    if (hash == _ctx_colors[i].name)
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      ctx_color_set_rgba (&(ctx->state), color,
-       _ctx_colors[i].r, _ctx_colors[i].g, _ctx_colors[i].b, _ctx_colors[i].a);
-      return 0;
+      int bitno = x&7;
+      rgba[0] = 255 * ((*pixel) & (1<<bitno));
+      rgba[1] = 255;
+      pixel+= (bitno ==7);
+      x++;
+      rgba +=2;
     }
-  }
-
-  if (string[0] == '#')
-    mrg_color_parse_hex (&(ctx->state), color, string);
-  else if (string[0] == 'r' &&
-      string[1] == 'g' &&
-      string[2] == 'b'
-      )
-    ctx_color_parse_rgb (&(ctx->state), color, string);
-
-  return 0;
 }
 
-int ctx_color (Ctx *ctx, const char *string)
+inline static void
+ctx_GRAYA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  CtxColor color = {0,};
-  ctx_color_set_from_string (ctx, &color, string);
-  float rgba[4];
-  ctx_color_get_rgba (&(ctx->state), &color, rgba);
-  ctx_color_raw (ctx, CTX_RGBA, rgba, 0);
-  return 0;
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int gray = rgba[0];
+      int bitno = x&7;
+      if (gray >= 128)
+        *pixel |= (1<<bitno);
+      else
+        *pixel &= (~ (1<<bitno));
+      pixel+= (bitno==7);
+      x++;
+      rgba +=2;
+    }
 }
 
-void
-ctx_rgba8 (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
-{
-#if 0
-  CtxEntry command = ctx_u8 (CTX_SET_RGBA_U8, r, g, b, a, 0, 0, 0, 0);
-
-  uint8_t rgba[4];
-  ctx_color_get_rgba8 (&ctx->state, &ctx->state.gstate.source.color, rgba);
-  if (rgba[0] == r && rgba[1] == g && rgba[2] == b && rgba[3] == a)
-     return;
-
-  ctx_process (ctx, &command);
 #else
-  ctx_rgba (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
-#endif
-}
 
-void ctx_rgba8_stroke (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
+inline static void
+ctx_GRAY1_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  ctx_rgba_stroke (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int bitno = x&7;
+      *((uint32_t*)(rgba))=0xff000000 + 0x00ffffff * ((*pixel & (1<< bitno ) )!=0);
+      pixel += (bitno ==7);
+      x++;
+      rgba +=4;
+    }
 }
 
-
-#endif 
-
-#if CTX_BABL
-void ctx_rasterizer_colorspace_babl (CtxState      *state,
-                                     CtxColorSpace  space_slot,
-                                     const Babl    *space)
+inline static void
+ctx_RGBA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  switch (space_slot)
-  {
-    case CTX_COLOR_SPACE_DEVICE_RGB:
-      state->gstate.device_space = space;
-      break;
-    case CTX_COLOR_SPACE_DEVICE_CMYK:
-      state->gstate.device_space = space;
-      break;
-    case CTX_COLOR_SPACE_USER_RGB:
-      state->gstate.rgb_space = space;
-      break;
-    case CTX_COLOR_SPACE_USER_CMYK:
-      state->gstate.cmyk_space = space;
-      break;
-    case CTX_COLOR_SPACE_TEXTURE:
-      state->gstate.texture_space = space;
-      break;
-  }
-
-  const Babl *srgb = babl_space ("sRGB");
-  if (!state->gstate.texture_space) 
-       state->gstate.texture_space = srgb;
-  if (!state->gstate.device_space) 
-       state->gstate.device_space = srgb;
-  if (!state->gstate.rgb_space) 
-       state->gstate.rgb_space = srgb;
-
-  //fprintf (stderr, "%s\n", babl_get_name (state->gstate.device_space));
-
-  state->gstate.fish_rgbaf_device_to_user = babl_fish (
-       babl_format_with_space ("R'G'B'A float", state->gstate.device_space),
-       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space));
-  state->gstate.fish_rgbaf_user_to_device = babl_fish (
-       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space),
-       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
-  state->gstate.fish_rgbaf_texture_to_device = babl_fish (
-       babl_format_with_space ("R'G'B'A float", state->gstate.texture_space),
-       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int gray = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
+      int bitno = x&7;
+      //gray += ctx_dither_mask_a (x, rasterizer->scanline/aa, 0, 127);
+      if (gray >= 128)
+        *pixel |= (1<< bitno);
+      else
+        *pixel &= (~ (1<< bitno));
+      pixel+= (bitno ==7);
+      x++;
+      rgba +=4;
+    }
 }
 #endif
 
-void ctx_rasterizer_colorspace_icc (CtxState      *state,
-                                    CtxColorSpace  space_slot,
-                                    char          *icc_data,
-                                    int            icc_length)
-{
-#if CTX_BABL
-   const char *error = NULL;
-   const Babl *space = NULL;
-
-   if (icc_data == NULL) space = babl_space ("sRGB");
-   else if (icc_length < 32)
-   {
-      if (icc_data[0] == '0' && icc_data[1] == 'x')
-        sscanf (icc_data, "%p", &space);
-      else
-      {
-        char tmp[24];
-        int i;
-        for (i = 0; i < icc_length; i++)
-          tmp[i]= (icc_data[i]>='A' && icc_data[i]<='Z')?icc_data[i]+('a'-'A'):icc_data[i];
-        tmp[icc_length]=0;
-        if (!strcmp (tmp, "srgb"))            space = babl_space ("sRGB");
-        else if (!strcmp (tmp, "scrgb"))      space = babl_space ("scRGB");
-        else if (!strcmp (tmp, "acescg"))     space = babl_space ("ACEScg");
-        else if (!strcmp (tmp, "adobe"))      space = babl_space ("Adobe");
-        else if (!strcmp (tmp, "apple"))      space = babl_space ("Apple");
-        else if (!strcmp (tmp, "rec2020"))    space = babl_space ("Rec2020");
-        else if (!strcmp (tmp, "aces2065-1")) space = babl_space ("ACES2065-1");
-      }
-   }
-
-   if (!space)
-   {
-     space = babl_space_from_icc (icc_data, icc_length, BABL_ICC_INTENT_RELATIVE_COLORIMETRIC, &error);
-   }
-   if (space)
-   {
-     ctx_rasterizer_colorspace_babl (state, space_slot, space);
-   }
 #endif
-}
+#if CTX_ENABLE_GRAY2
 
-void ctx_colorspace (Ctx           *ctx,
-                     CtxColorSpace  space_slot,
-                     unsigned char *data,
-                     int            data_length)
+#if CTX_NATIVE_GRAYA8
+inline static void
+ctx_GRAY2_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  if (data)
-  {
-    if (data_length <= 0) data_length = (int)strlen ((char*)data);
-    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, (char*)data, space_slot, 0, data_length);
-  }
-  else
-  {
-    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, "sRGB", space_slot, 0, 4);
-  }
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
+      val <<= 6;
+      rgba[0] = val;
+      rgba[1] = 255;
+      if ( (x&3) ==3)
+        { pixel+=1; }
+      x++;
+      rgba +=2;
+    }
 }
 
-void ctx_gradient_add_stop_u8
-(Ctx *ctx, float pos, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
+inline static void
+ctx_GRAYA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  CtxEntry entry = ctx_f (CTX_GRADIENT_STOP, pos, 0);
-  entry.data.u8[4+0] = r;
-  entry.data.u8[4+1] = g;
-  entry.data.u8[4+2] = b;
-  entry.data.u8[4+3] = a;
-  ctx_process (ctx, &entry);
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = rgba[0];
+      val >>= 6;
+      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
+      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
+      if ( (x&3) ==3)
+        { pixel+=1; }
+      x++;
+      rgba +=2;
+    }
 }
+#else
 
-void ctx_gradient_add_stop
-(Ctx *ctx, float pos, float r, float g, float b, float a)
+inline static void
+ctx_GRAY2_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  int ir = r * 255;
-  int ig = g * 255;
-  int ib = b * 255;
-  int ia = a * 255;
-  ir = CTX_CLAMP (ir, 0,255);
-  ig = CTX_CLAMP (ig, 0,255);
-  ib = CTX_CLAMP (ib, 0,255);
-  ia = CTX_CLAMP (ia, 0,255);
-  ctx_gradient_add_stop_u8 (ctx, pos, ir, ig, ib, ia);
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
+      val <<= 6;
+      rgba[0] = val;
+      rgba[1] = val;
+      rgba[2] = val;
+      rgba[3] = 255;
+      if ( (x&3) ==3)
+        { pixel+=1; }
+      x++;
+      rgba +=4;
+    }
 }
 
-void ctx_gradient_add_stop_string
-(Ctx *ctx, float pos, const char *string)
+inline static void
+ctx_RGBA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  CtxColor color = {0,};
-  ctx_color_set_from_string (ctx, &color, string);
-  float rgba[4];
-  ctx_color_get_rgba (&(ctx->state), &color, rgba);
-  ctx_gradient_add_stop (ctx, pos, rgba[0], rgba[1], rgba[2], rgba[3]);
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
+      val >>= 6;
+      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
+      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
+      if ( (x&3) ==3)
+        { pixel+=1; }
+      x++;
+      rgba +=4;
+    }
 }
+#endif
 
-//  deviceRGB .. settable when creating an RGB image surface..
-//               queryable when running in terminal - is it really needed?
-//               though it is settable ; and functional for changing this state at runtime..
-//
-//  userRGB - settable at any time, stored in save|restore 
-//  texture - set as the space of data on subsequent 
+#endif
+#if CTX_ENABLE_GRAY4
 
-static float ctx_state_get (CtxState *state, uint32_t hash)
+#if CTX_NATIVE_GRAYA8
+inline static void
+ctx_GRAY4_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  for (int i = state->gstate.keydb_pos-1; i>=0; i--)
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      if (state->keydb[i].key == hash)
-        { return state->keydb[i].value; }
+      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
+      val <<= 4;
+      rgba[0] = val;
+      rgba[1] = 255;
+      if ( (x&1) ==1)
+        { pixel+=1; }
+      x++;
+      rgba +=2;
     }
-  return -0.0;
 }
 
-static void ctx_state_set (CtxState *state, uint32_t key, float value)
+inline static void
+ctx_GRAYA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  if (key != CTX_new_state)
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      if (ctx_state_get (state, key) == value)
-        { return; }
-      for (int i = state->gstate.keydb_pos-1;
-           i >= 0 && state->keydb[i].key != CTX_new_state;
-           i--)
-        {
-          if (state->keydb[i].key == key)
-            {
-              state->keydb[i].value = value;
-              return;
-            }
-        }
+      int val = rgba[0];
+      val >>= 4;
+      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
+      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
+      if ( (x&1) ==1)
+        { pixel+=1; }
+      x++;
+      rgba +=2;
     }
-  if (state->gstate.keydb_pos >= CTX_MAX_KEYDB)
-    { return; }
-  state->keydb[state->gstate.keydb_pos].key = key;
-  state->keydb[state->gstate.keydb_pos].value = value;
-  state->gstate.keydb_pos++;
 }
-
-
-#define CTX_KEYDB_STRING_START (-90000.0)
-#define CTX_KEYDB_STRING_END   (CTX_KEYDB_STRING_START + CTX_STRINGPOOL_SIZE)
-
-static int ctx_float_is_string (float val)
+#else
+inline static void
+ctx_GRAY4_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  return val >= CTX_KEYDB_STRING_START && val <= CTX_KEYDB_STRING_END;
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
+      val <<= 4;
+      rgba[0] = val;
+      rgba[1] = val;
+      rgba[2] = val;
+      rgba[3] = 255;
+      if ( (x&1) ==1)
+        { pixel+=1; }
+      x++;
+      rgba +=4;
+    }
 }
 
-static int ctx_float_to_string_index (float val)
+inline static void
+ctx_RGBA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  int idx = -1;
-  if (ctx_float_is_string (val))
-  {
-    idx = val - CTX_KEYDB_STRING_START;
-  }
-  return idx;
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
+      val >>= 4;
+      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
+      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
+      if ( (x&1) ==1)
+        { pixel+=1; }
+      x++;
+      rgba +=4;
+    }
 }
+#endif
 
-static float ctx_string_index_to_float (int index)
+#endif
+#if CTX_ENABLE_GRAY8
+
+#if CTX_NATIVE_GRAYA8
+inline static void
+ctx_GRAY8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  return CTX_KEYDB_STRING_START + index;
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      rgba[0] = pixel[0];
+      rgba[1] = 255;
+      pixel+=1;
+      rgba +=2;
+    }
 }
 
-static void *ctx_state_get_blob (CtxState *state, uint32_t key)
+inline static void
+ctx_GRAYA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  float stored = ctx_state_get (state, key);
-  int idx = ctx_float_to_string_index (stored);
-  if (idx >= 0)
-  {
-     // can we know length?
-     return &state->stringpool[idx];
-  }
-
-  // format number as string?
-  return NULL;
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      pixel[0] = rgba[0];
+      pixel+=1;
+      rgba +=2;
+    }
+}
+#else
+inline static void
+ctx_GRAY8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+{
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      rgba[0] = pixel[0];
+      rgba[1] = pixel[0];
+      rgba[2] = pixel[0];
+      rgba[3] = 255;
+      pixel+=1;
+      rgba +=4;
+    }
 }
 
-static const char *ctx_state_get_string (CtxState *state, uint32_t key)
+inline static void
+ctx_RGBA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  const char *ret = (char*)ctx_state_get_blob (state, key);
-  if (ret && ret[0] == 127)
-    return NULL;
-  return ret;
+  uint8_t *pixel = (uint8_t *) buf;
+  for (int i = 0; i < count; i ++)
+    {
+      pixel[i] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba + i * 4);
+    }
 }
+#endif
 
+#endif
+#if CTX_ENABLE_GRAYA8
 
-static void ctx_state_set_blob (CtxState *state, uint32_t key, uint8_t *data, int len)
+inline static void
+ctx_GRAYA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  int idx = state->gstate.stringpool_pos;
-
-  if (idx + len > CTX_STRINGPOOL_SIZE)
-  {
-    ctx_log ("blowing varpool size [%c..]\n", data[0]);
-    //fprintf (stderr, "blowing varpool size [%c%c%c..]\n", data[0],data[1], data[1]?data[2]:0);
-#if 0
-    for (int i = 0; i< CTX_STRINGPOOL_SIZE; i++)
+  const uint8_t *pixel = (const uint8_t *) buf;
+  while (count--)
     {
-       if (i==0) fprintf (stderr, "\n%i ", i);
-       else      fprintf (stderr, "%c", state->stringpool[i]);
+      rgba[0] = pixel[0];
+      rgba[1] = pixel[0];
+      rgba[2] = pixel[0];
+      rgba[3] = pixel[1];
+      pixel+=2;
+      rgba +=4;
     }
-#endif
-    return;
-  }
+}
 
-  memcpy (&state->stringpool[idx], data, len);
-  state->gstate.stringpool_pos+=len;
-  state->stringpool[state->gstate.stringpool_pos++]=0;
-  ctx_state_set (state, key, ctx_string_index_to_float (idx));
+inline static void
+ctx_RGBA8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
+{
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
+    {
+      pixel[0] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
+      pixel[1] = rgba[3];
+      pixel+=2;
+      rgba +=4;
+    }
 }
 
-static void ctx_state_set_string (CtxState *state, uint32_t key, const char *string)
+#if CTX_NATIVE_GRAYA8
+CTX_INLINE static void ctx_rgba_to_graya_u8 (CtxState *state, uint8_t *in, uint8_t *out)
 {
-  float old_val = ctx_state_get (state, key);
-  int   old_idx = ctx_float_to_string_index (old_val);
+  out[0] = ctx_u8_color_rgb_to_gray (state, in);
+  out[1] = in[3];
+}
 
-  if (old_idx >= 0)
+#if CTX_GRADIENTS
+static void
+ctx_fragment_linear_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
+{
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+        uint8_t *dst = (uint8_t*)out;
+#if CTX_DITHER
+  int scan = rasterizer->scanline / CTX_FULL_AA;
+  int ox = x;
+#endif
+  for (int i = 0; i < count;i ++)
   {
-    const char *old_string = ctx_state_get_string (state, key);
-    if (old_string && !strcmp (old_string, string))
-      return;
+  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
+                g->linear_gradient.length) -
+              g->linear_gradient.start) * (g->linear_gradient.rdelta);
+  {
+    uint8_t rgba[4];
+    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
+    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
+   
   }
 
-  if (ctx_str_is_number (string))
-  {
-    ctx_state_set (state, key, strtod (string, NULL));
-    return;
+#if CTX_DITHER
+  ctx_dither_graya_u8 ((uint8_t*)dst, ox + i, scan, rasterizer->format->dither_red_blue,
+                      rasterizer->format->dither_green);
+#endif
+  dst += 2;
+  x += dx;
+  y += dy;
   }
-  // should do same with color
- 
-  // XXX should special case when the string modified is at the
-  //     end of the stringpool.
-  //
-  //     for clips the behavior is howevre ideal, since
-  //     we can have more than one clip per save/restore level
-  ctx_state_set_blob (state, key, (uint8_t*)string, strlen(string));
 }
 
-static int ctx_state_get_color (CtxState *state, uint32_t key, CtxColor *color)
+static void
+ctx_fragment_radial_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
 {
-  CtxColor *stored = (CtxColor*)ctx_state_get_blob (state, key);
-  if (stored)
+  uint8_t *dst = (uint8_t*)out;
+#if CTX_DITHER
+  int scan = rasterizer->scanline / CTX_FULL_AA;
+  int ox = x;
+#endif
+
+  for (int i = 0; i < count;i ++)
   {
-    if (stored->magic == 127)
-    {
-      *color = *stored;
-      return 0;
-    }
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  float v = (ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
+              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
+  {
+    uint8_t rgba[4];
+    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
+    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
+  }
+#if CTX_DITHER
+  ctx_dither_graya_u8 ((uint8_t*)dst, ox+i, scan, rasterizer->format->dither_red_blue,
+                      rasterizer->format->dither_green);
+#endif
+  dst += 2;
+  x += dx;
+  y += dy;
   }
-  return -1;
 }
+#endif
 
-static void ctx_state_set_color (CtxState *state, uint32_t key, CtxColor *color)
+static void
+ctx_fragment_color_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
 {
-  CtxColor mod_color;
-  CtxColor old_color;
-  mod_color = *color;
-  mod_color.magic = 127;
-  if (ctx_state_get_color (state, key, &old_color)==0)
+  CtxSource *g = &rasterizer->state->gstate.source_fill;
+  uint16_t *dst = (uint16_t*)out;
+  uint16_t pix;
+  ctx_color_get_graya_u8 (rasterizer->state, &g->color, (uint8_t*)&pix);
+  for (int i = 0; i <count; i++)
   {
-    if (!memcmp (&mod_color, &old_color, sizeof (mod_color)))
-      return;
+    dst[i]=pix;
   }
-  ctx_state_set_blob (state, key, (uint8_t*)&mod_color, sizeof (CtxColor));
 }
 
-const char *ctx_get_string (Ctx *ctx, uint32_t hash)
-{
-  return ctx_state_get_string (&ctx->state, hash);
-}
-float ctx_get_float (Ctx *ctx, uint32_t hash)
-{
-  return ctx_state_get (&ctx->state, hash);
-}
-int ctx_get_int (Ctx *ctx, uint32_t hash)
-{
-  return ctx_state_get (&ctx->state, hash);
-}
-void ctx_set_float (Ctx *ctx, uint32_t hash, float value)
-{
-  ctx_state_set (&ctx->state, hash, value);
-}
-void ctx_set_string (Ctx *ctx, uint32_t hash, const char *value)
+static void ctx_fragment_image_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
 {
-  ctx_state_set_string (&ctx->state, hash, value);
+  uint8_t rgba[4*count];
+  CtxGState *gstate = &rasterizer->state->gstate;
+  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
+  switch (buffer->format->bpp)
+    {
+#if CTX_FRAGMENT_SPECIALIZE
+      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
+      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+#endif
+      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
+    }
+  for (int i = 0; i < count; i++)
+    ctx_rgba_to_graya_u8 (rasterizer->state, &rgba[i*4], &((uint8_t*)out)[i*2]);
 }
-void ctx_set_color (Ctx *ctx, uint32_t hash, CtxColor *color)
+
+static CtxFragment ctx_rasterizer_get_fragment_GRAYA8 (CtxRasterizer *rasterizer)
 {
-  ctx_state_set_color (&ctx->state, hash, color);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  switch (gstate->source_fill.type)
+    {
+      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYA8;
+      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYA8;
+#if CTX_GRADIENTS
+      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYA8;
+      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYA8;
+#endif
+    }
+  return ctx_fragment_color_GRAYA8;
 }
-int  ctx_get_color (Ctx *ctx, uint32_t hash, CtxColor *color)
+
+//ctx_u8_porter_duff(GRAYA8, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+ctx_u8_porter_duff(GRAYA8, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+
+#if CTX_INLINED_NORMAL
+//ctx_u8_porter_duff(GRAYA8, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
+ctx_u8_porter_duff(GRAYA8, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)
+
+static void
+ctx_GRAYA8_copy_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  return ctx_state_get_color (&ctx->state, hash, color);
+  ctx_u8_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
 }
-int ctx_is_set (Ctx *ctx, uint32_t hash)
+
+static void
+ctx_GRAYA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
 {
-  return ctx_get_float (ctx, hash) != -0.0f;
+  ctx_u8_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
 }
-int ctx_is_set_now (Ctx *ctx, uint32_t hash)
+
+static void
+ctx_GRAYA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-  return ctx_is_set (ctx, hash);
+  ctx_u8_source_over_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
 
-#if CTX_COMPOSITE
-
-#define CTX_REFERENCE 0
-
-
-#define CTX_RGBA8_R_SHIFT  0
-#define CTX_RGBA8_G_SHIFT  8
-#define CTX_RGBA8_B_SHIFT  16
-#define CTX_RGBA8_A_SHIFT  24
-
-#define CTX_RGBA8_R_MASK   (0xff << CTX_RGBA8_R_SHIFT)
-#define CTX_RGBA8_G_MASK   (0xff << CTX_RGBA8_G_SHIFT)
-#define CTX_RGBA8_B_MASK   (0xff << CTX_RGBA8_B_SHIFT)
-#define CTX_RGBA8_A_MASK   (0xff << CTX_RGBA8_A_SHIFT)
-
-#define CTX_RGBA8_RB_MASK  (CTX_RGBA8_R_MASK | CTX_RGBA8_B_MASK)
-#define CTX_RGBA8_GA_MASK  (CTX_RGBA8_G_MASK | CTX_RGBA8_A_MASK)
-
-
-CTX_INLINE static void
-ctx_RGBA8_associate_alpha (uint8_t *u8)
+static void
+ctx_GRAYA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
 {
-#if 1
-  uint32_t val = *((uint32_t*)(u8));
-  uint32_t a = u8[3];
-  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
-  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
-  *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
-#else
-  uint32_t a = u8[3];
-  u8[0] = (u8[0] * a + 255) >> 8;
-  u8[1] = (u8[1] * a + 255) >> 8;
-  u8[2] = (u8[2] * a + 255) >> 8;
-#endif
+  ctx_u8_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
 }
+#endif
 
-CTX_INLINE static void
-ctx_RGBA8_associate_alpha_probably_opaque (uint8_t *u8)
+inline static int
+ctx_is_opaque_color (CtxRasterizer *rasterizer)
 {
-  uint32_t val = *((uint32_t*)(u8));
-  uint32_t a = val>>24;//u8[3];
-  //if (CTX_UNLIKELY(a==0))
-  //   *((uint32_t*)(u8)) = 0;
-  if (CTX_UNLIKELY(a!=255))
+  CtxGState *gstate = &rasterizer->state->gstate;
+  if (gstate->global_alpha_u8 != 255)
+    return 0;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
   {
-     uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
-     uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
-     *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
+    uint8_t ga[2];
+    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
+    return ga[1] == 255;
   }
+  return 0;
 }
 
-CTX_INLINE static uint32_t ctx_bi_RGBA8 (uint32_t isrc00, uint32_t isrc01, uint32_t isrc10, uint32_t isrc11, 
uint8_t dx, uint8_t dy)
+static void
+ctx_setup_GRAYA8 (CtxRasterizer *rasterizer)
 {
-#if 0
-#if 0
-  uint8_t ret[4];
-  uint8_t *src00 = (uint8_t*)&isrc00;
-  uint8_t *src10 = (uint8_t*)&isrc10;
-  uint8_t *src01 = (uint8_t*)&isrc01;
-  uint8_t *src11 = (uint8_t*)&isrc11;
-  for (int c = 0; c < 4; c++)
-  {
-    ret[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
-                         ctx_lerp_u8 (src10[c], src11[c], dx), dy);
-  }
-  return  ((uint32_t*)&ret[0])[0];
-#else
-  return ctx_lerp_RGBA8 (ctx_lerp_RGBA8 (isrc00, isrc01, dx),
-                         ctx_lerp_RGBA8 (isrc10, isrc11, dx), dy);
-#endif
-#else
-  uint32_t s0_ga, s0_rb, s1_ga, s1_rb;
-  ctx_lerp_RGBA8_split (isrc00, isrc01, dx, &s0_ga, &s0_rb);
-  ctx_lerp_RGBA8_split (isrc10, isrc11, dx, &s1_ga, &s1_rb);
-  return ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, dy);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  int components = 2;
+  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYA8 (rasterizer);
+  rasterizer->comp_op  = ctx_GRAYA8_porter_duff_generic;
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+    {
+      ctx_fragment_color_GRAYA8 (rasterizer, 0,0, rasterizer->color, 1, 0,0);
+      if (gstate->global_alpha_u8 != 255)
+        for (int c = 0; c < components; c ++)
+          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8)/255;
+
+      if (rasterizer->format->from_comp)
+        rasterizer->format->from_comp (rasterizer, 0,
+          &rasterizer->color[0],
+          &rasterizer->color_native,
+          1);
+    }
+
+#if CTX_INLINED_NORMAL
+  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
+    rasterizer->comp_op = ctx_GRAYA8_clear_normal;
+  else
+    switch (gstate->blend_mode)
+    {
+      case CTX_BLEND_NORMAL:
+        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
+        {
+          rasterizer->comp_op = ctx_GRAYA8_copy_normal;
+        }
+        else if (gstate->global_alpha_u8 == 0)
+          rasterizer->comp_op = ctx_RGBA8_nop;
+        else
+        switch (gstate->source_fill.type)
+        {
+          case CTX_SOURCE_COLOR:
+            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+            {
+              if (rasterizer->color[components-1] == 0)
+                rasterizer->comp_op = ctx_RGBA8_nop;
+              else if (rasterizer->color[components-1] == 255)
+              {
+                rasterizer->comp_op = ctx_GRAYA8_source_copy_normal_color;
+                rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
+              }
+              else
+                rasterizer->comp_op = ctx_GRAYA8_source_over_normal_color;
+            }
+            else
+            {
+              rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
+            }
+            break;
+          default:
+            rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
+            break;
+        }
+        break;
+      default:
+        rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic;
+        break;
+    }
 #endif
+  ctx_setup_apply_coverage (rasterizer);
 }
 
-#if CTX_GRADIENTS
-#if CTX_GRADIENT_CACHE
-static uint8_t ctx_gradient_cache_u8[CTX_GRADIENT_CACHE_ELEMENTS][4];
-extern int ctx_gradient_cache_valid;
-
+static void
+ctx_setup_GRAY4 (CtxRasterizer *rasterizer)
+{
+  ctx_setup_GRAYA8 (rasterizer);
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+}
 
-inline static int ctx_grad_index (float v)
+static void
+ctx_setup_GRAY2 (CtxRasterizer *rasterizer)
 {
-  int ret = v * (CTX_GRADIENT_CACHE_ELEMENTS - 1) + 0.5f;
-  ret = ctx_maxi (0, ret);
-  ret = ctx_mini (CTX_GRADIENT_CACHE_ELEMENTS-1, ret);
-  return ret;
+  ctx_setup_GRAYA8 (rasterizer);
+  rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
-inline static int ctx_grad_index_i (int v)
+static void
+ctx_setup_GRAY1 (CtxRasterizer *rasterizer)
 {
-  v = v >> 8;
-  return ctx_maxi (0, ctx_mini (CTX_GRADIENT_CACHE_ELEMENTS-1, v));
+  ctx_setup_GRAYA8 (rasterizer);
+  if (rasterizer->comp_op == ctx_GRAYA8_source_copy_normal_color)
+    rasterizer->comp = CTX_COV_PATH_GRAY1_COPY;
+  else
+    rasterizer->comp = CTX_COV_PATH_FALLBACK;
 }
 
+static void
+ctx_setup_GRAY8 (CtxRasterizer *rasterizer)
+{
+  ctx_setup_GRAYA8 (rasterizer);
+  if (rasterizer->comp_op == ctx_GRAYA8_source_copy_normal_color)
+    rasterizer->comp = CTX_COV_PATH_GRAY8_COPY;
+  else
+    rasterizer->comp = CTX_COV_PATH_FALLBACK;
+}
 
-//static void
-//ctx_gradient_cache_reset (void)
-//{
-//  ctx_gradient_cache_valid = 0;
-//}
 #endif
 
+#endif
+#if CTX_ENABLE_RGB332
 
-CTX_INLINE static void
-_ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
+inline static void
+ctx_332_unpack (uint8_t pixel,
+                uint8_t *red,
+                uint8_t *green,
+                uint8_t *blue)
 {
-  float v = x;
-  CtxGradient *g = &rasterizer->state->gradient;
-  if (v < 0) { v = 0; }
-  if (v > 1) { v = 1; }
+  uint32_t b = (pixel & 3) <<6;
+  uint32_t g = ( (pixel >> 2) & 7) <<5;
+  uint32_t r = ( (pixel >> 5) & 7) <<5;
 
-  if (g->n_stops == 0)
-    {
-      rgba[0] = rgba[1] = rgba[2] = v * 255;
-      rgba[3] = 255;
-      return;
-    }
-  CtxGradientStop *stop      = NULL;
-  CtxGradientStop *next_stop = &g->stops[0];
-  CtxColor *color;
-  for (int s = 0; s < g->n_stops; s++)
-    {
-      stop      = &g->stops[s];
-      next_stop = &g->stops[s+1];
-      if (s + 1 >= g->n_stops) { next_stop = NULL; }
-      if (v >= stop->pos && next_stop && v < next_stop->pos)
-        { break; }
-      stop = NULL;
-      next_stop = NULL;
-    }
-  if (stop == NULL && next_stop)
-    {
-      color = & (next_stop->color);
-    }
-  else if (stop && next_stop == NULL)
-    {
-      color = & (stop->color);
-    }
-  else if (stop && next_stop)
-    {
-      uint8_t stop_rgba[4];
-      uint8_t next_rgba[4];
-      ctx_color_get_rgba8 (rasterizer->state, & (stop->color), stop_rgba);
-      ctx_color_get_rgba8 (rasterizer->state, & (next_stop->color), next_rgba);
-      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
 #if 1
-      ((uint32_t*)rgba)[0] = ctx_lerp_RGBA8 (((uint32_t*)stop_rgba)[0],
-                                             ((uint32_t*)next_rgba)[0], dx);
+  *blue  = (b > 224) * 255 + (b <= 224) * b;
+  *green = (g > 224) * 255 + (g <= 224) * g;
+  *red   = (r > 224) * 255 + (r <= 224) * r;
 #else
-      for (int c = 0; c < 4; c++)
-        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
+  *blue  =  b;
+  *green =  g;
+  *red   =  r;
 #endif
-      ctx_RGBA8_associate_alpha (rgba);
-      return;
-    }
-  else
-    {
-      color = & (g->stops[g->n_stops-1].color);
-    }
-  ctx_color_get_rgba8 (rasterizer->state, color, rgba);
-  if (rasterizer->swap_red_green)
-  {
-    uint8_t tmp = rgba[0];
-    rgba[0] = rgba[2];
-    rgba[2] = tmp;
-  }
-  ctx_RGBA8_associate_alpha (rgba);
 }
 
-#if CTX_GRADIENT_CACHE
-static void
-ctx_gradient_cache_prime (CtxRasterizer *rasterizer);
-#endif
-
-CTX_INLINE static void
-ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
+static inline uint8_t
+ctx_332_pack (uint8_t red,
+              uint8_t green,
+              uint8_t blue)
 {
-#if CTX_GRADIENT_CACHE
-  *((uint32_t*)rgba) = *((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index(x)][0]));
-#else
- _ctx_fragment_gradient_1d_RGBA8 (rasterizer, x, y, rgba);
-#endif
+  uint8_t c  = (red >> 5) << 5;
+  c |= (green >> 5) << 2;
+  c |= (blue >> 6);
+  return c;
 }
-#endif
 
-CTX_INLINE static void
-ctx_u8_associate_alpha (int components, uint8_t *u8)
+static inline uint8_t
+ctx_888_to_332 (uint32_t in)
 {
-  for (int c = 0; c < components-1; c++)
-    u8[c] = (u8[c] * u8[components-1] + 255)>>8;
+  uint8_t *rgb=(uint8_t*)(&in);
+  return ctx_332_pack (rgb[0],rgb[1],rgb[2]);
 }
 
-#if CTX_GRADIENTS
-#if CTX_GRADIENT_CACHE
-static void
-ctx_gradient_cache_prime (CtxRasterizer *rasterizer)
+static inline uint32_t
+ctx_332_to_888 (uint8_t in)
 {
-  if (ctx_gradient_cache_valid)
-    return;
-  for (int u = 0; u < CTX_GRADIENT_CACHE_ELEMENTS; u++)
-  {
-    float v = u / (CTX_GRADIENT_CACHE_ELEMENTS - 1.0f);
-    _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0f, &ctx_gradient_cache_u8[u][0]);
-    //*((uint32_t*)(&ctx_gradient_cache_u8_a[u][0]))= *((uint32_t*)(&ctx_gradient_cache_u8[u][0]));
-    //memcpy(&ctx_gradient_cache_u8_a[u][0], &ctx_gradient_cache_u8[u][0], 4);
-    //ctx_RGBA8_associate_alpha (&ctx_gradient_cache_u8_a[u][0]);
-  }
-  ctx_gradient_cache_valid = 1;
+  uint32_t ret = 0;
+  uint8_t *rgba=(uint8_t*)&ret;
+  ctx_332_unpack (in,
+                  &rgba[0],
+                  &rgba[1],
+                  &rgba[2]);
+  //rgba[3] = 255;
+  return ret;
 }
-#endif
 
-CTX_INLINE static void
-ctx_fragment_gradient_1d_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
+static inline void
+ctx_RGB332_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  float v = x;
-  CtxGradient *g = &rasterizer->state->gradient;
-  if (v < 0) { v = 0; }
-  if (v > 1) { v = 1; }
-  if (g->n_stops == 0)
-    {
-      rgba[0] = rgba[1] = rgba[2] = v * 255;
-      rgba[1] = 255;
-      return;
-    }
-  CtxGradientStop *stop      = NULL;
-  CtxGradientStop *next_stop = &g->stops[0];
-  CtxColor *color;
-  for (int s = 0; s < g->n_stops; s++)
-    {
-      stop      = &g->stops[s];
-      next_stop = &g->stops[s+1];
-      if (s + 1 >= g->n_stops) { next_stop = NULL; }
-      if (v >= stop->pos && next_stop && v < next_stop->pos)
-        { break; }
-      stop = NULL;
-      next_stop = NULL;
-    }
-  if (stop == NULL && next_stop)
-    {
-      color = & (next_stop->color);
-    }
-  else if (stop && next_stop == NULL)
-    {
-      color = & (stop->color);
-    }
-  else if (stop && next_stop)
-    {
-      uint8_t stop_rgba[4];
-      uint8_t next_rgba[4];
-      ctx_color_get_graya_u8 (rasterizer->state, & (stop->color), stop_rgba);
-      ctx_color_get_graya_u8 (rasterizer->state, & (next_stop->color), next_rgba);
-      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
-      for (int c = 0; c < 2; c++)
-        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
-      return;
-    }
-  else
+  const uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      color = & (g->stops[g->n_stops-1].color);
+      ctx_332_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2]);
+#if CTX_RGB332_ALPHA
+      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
+        { rgba[3] = 0; }
+      else
+#endif
+        { rgba[3] = 255; }
+      pixel+=1;
+      rgba +=4;
     }
-  ctx_color_get_graya_u8 (rasterizer->state, color, rgba);
 }
 
-CTX_INLINE static void
-ctx_fragment_gradient_1d_RGBAF (CtxRasterizer *rasterizer, float v, float y, float *rgba)
+static inline void
+ctx_RGBA8_to_RGB332 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  CtxGradient *g = &rasterizer->state->gradient;
-  if (v < 0) { v = 0; }
-  if (v > 1) { v = 1; }
-  if (g->n_stops == 0)
-    {
-      rgba[0] = rgba[1] = rgba[2] = v;
-      rgba[3] = 1.0;
-      return;
-    }
-  CtxGradientStop *stop      = NULL;
-  CtxGradientStop *next_stop = &g->stops[0];
-  CtxColor *color;
-  for (int s = 0; s < g->n_stops; s++)
-    {
-      stop      = &g->stops[s];
-      next_stop = &g->stops[s+1];
-      if (s + 1 >= g->n_stops) { next_stop = NULL; }
-      if (v >= stop->pos && next_stop && v < next_stop->pos)
-        { break; }
-      stop = NULL;
-      next_stop = NULL;
-    }
-  if (stop == NULL && next_stop)
-    {
-      color = & (next_stop->color);
-    }
-  else if (stop && next_stop == NULL)
-    {
-      color = & (stop->color);
-    }
-  else if (stop && next_stop)
-    {
-      float stop_rgba[4];
-      float next_rgba[4];
-      ctx_color_get_rgba (rasterizer->state, & (stop->color), stop_rgba);
-      ctx_color_get_rgba (rasterizer->state, & (next_stop->color), next_rgba);
-      int dx = (v - stop->pos) / (next_stop->pos - stop->pos);
-      for (int c = 0; c < 4; c++)
-        { rgba[c] = ctx_lerpf (stop_rgba[c], next_rgba[c], dx); }
-      return;
-    }
-  else
+  uint8_t *pixel = (uint8_t *) buf;
+  while (count--)
     {
-      color = & (g->stops[g->n_stops-1].color);
+#if CTX_RGB332_ALPHA
+      if (rgba[3]==0)
+        { pixel[0] = ctx_332_pack (255, 0, 255); }
+      else
+#endif
+        { pixel[0] = ctx_332_pack (rgba[0], rgba[1], rgba[2]); }
+      pixel+=1;
+      rgba +=4;
     }
-  ctx_color_get_rgba (rasterizer->state, color, rgba);
 }
-#endif
 
 static void
-ctx_fragment_image_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
+ctx_composite_RGB332 (CTX_COMPOSITE_ARGUMENTS)
 {
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer->color_managed;
-  ctx_assert (rasterizer);
-  ctx_assert (g);
-  ctx_assert (buffer);
-
-  for (int i = 0; i < count; i ++)
+  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
   {
+    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+    uint32_t si_a  = si_ga >> 16;
 
-  int u = x;
-  int v = y;
-  int width = buffer->width;
-  int height = buffer->height;
-  if ( u < 0 || v < 0 ||
-       u >= width ||
-       v >= height)
-    {
-      *((uint32_t*)(rgba)) = 0;
-    }
-  else
-    {
-      int bpp = buffer->format->bpp/8;
-      if (rasterizer->state->gstate.image_smoothing)
-      {
-      uint8_t *src00 = (uint8_t *) buffer->data;
-      src00 += v * buffer->stride + u * bpp;
-      uint8_t *src01 = src00;
-      if ( u + 1 < width)
-      {
-        src01 = src00 + bpp;
-      }
-      uint8_t *src11 = src01;
-      uint8_t *src10 = src00;
-      if ( v + 1 < height)
-      {
-        src10 = src00 + buffer->stride;
-        src11 = src01 + buffer->stride;
-      }
-      float dx = (x-(int)(x)) * 255.9;
-      float dy = (y-(int)(y)) * 255.9;
+    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
+    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;
 
-      switch (bpp)
+    while (count--)
+    {
+      if (CTX_LIKELY(*coverage == 255))
       {
-      case 1:
-        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
-                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
-        rgba[3] = 255;
-        break;
-      case 2:
-        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
-                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
-        rgba[3] = ctx_lerp_u8 (ctx_lerp_u8 (src00[1], src01[1], dx),
-                               ctx_lerp_u8 (src10[1], src11[1], dx), dy);
-        break;
-      case 3:
-      for (int c = 0; c < bpp; c++)
-        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
-                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
-                
-        }
-        rgba[3]=255;
-        break;
-      break;
-      case 4:
-      for (int c = 0; c < bpp; c++)
-        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
-                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
-                
-        }
-      }
+        uint32_t rcov  = 255-*coverage++;
+        uint32_t di    = ctx_332_to_888 (*((uint8_t*)dst));
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_332((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         (((si_gaf) + di_ga * rcov) & 0xff00ff00));
+         dst+=1;
       }
       else
       {
-      uint8_t *src = (uint8_t *) buffer->data;
-      src += v * buffer->stride + u * bpp;
-      switch (bpp)
-        {
-          case 1:
-            for (int c = 0; c < 3; c++)
-              { rgba[c] = src[0]; }
-            rgba[3] = 255;
-            break;
-          case 2:
-            for (int c = 0; c < 3; c++)
-              { rgba[c] = src[0]; }
-            rgba[3] = src[1];
-            break;
-          case 3:
-            for (int c = 0; c < 3; c++)
-              { rgba[c] = src[c]; }
-            rgba[3] = 255;
-            break;
-          case 4:
-            for (int c = 0; c < 4; c++)
-              { rgba[c] = src[c]; }
-            break;
-        }
-      }
-      if (rasterizer->swap_red_green)
-      {
-        uint8_t tmp = rgba[0];
-        rgba[0] = rgba[2];
-        rgba[2] = tmp;
+        uint32_t cov   = *coverage++;
+        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
+        uint32_t di    = ctx_332_to_888 (*((uint8_t*)dst));
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_332((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00));
+         dst+=1;
       }
     }
-    ctx_RGBA8_associate_alpha_probably_opaque (rgba); // XXX: really?
-    rgba += 4;
-    x += dx;
-    y += dy;
+    return;
   }
+  uint8_t pixels[count * 4];
+  ctx_RGB332_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
+  ctx_RGBA8_to_RGB332 (rasterizer, x0, &pixels[0], dst, count);
 }
 
-#if CTX_DITHER
-static inline int ctx_dither_mask_a (int x, int y, int c, int divisor)
+#endif
+#if CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED
+
+static inline void
+ctx_565_unpack (const uint16_t pixel,
+                uint8_t *red,
+                uint8_t *green,
+                uint8_t *blue,
+                const int byteswap)
 {
-  /* https://pippin.gimp.org/a_dither/ */
-  return ( ( ( ( (x + c * 67) + y * 236) * 119) & 255 )-127) / divisor;
+  uint16_t byteswapped;
+  if (byteswap)
+    { byteswapped = (pixel>>8) | (pixel<<8); }
+  else
+    { byteswapped  = pixel; }
+  uint8_t b  =  (byteswapped & 31) <<3;
+  uint8_t g  = ( (byteswapped>>5) & 63) <<2;
+  uint8_t r  = ( (byteswapped>>11) & 31) <<3;
+
+#if 0
+  *blue  = (b > 248) * 255 + (b <= 248) * b;
+  *green = (g > 248) * 255 + (g <= 248) * g;
+  *red   = (r > 248) * 255 + (r <= 248) * r;
+#else
+  *blue = b;
+  *green = g;
+  *red = r;
+#endif
 }
 
-inline static void
-ctx_dither_rgba_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
+static inline uint32_t
+ctx_565_unpack_32 (const uint16_t pixel,
+                   const int byteswap)
 {
-  if (dither_red_blue == 0)
-    { return; }
-  for (int c = 0; c < 3; c ++)
-    {
-      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, c==1?dither_green:dither_red_blue);
-      rgba[c] = CTX_CLAMP (val, 0, 255);
-    }
+  uint16_t byteswapped;
+  if (byteswap)
+    { byteswapped = (pixel>>8) | (pixel<<8); }
+  else
+    { byteswapped  = pixel; }
+  uint32_t b   = (byteswapped & 31) <<3;
+  uint32_t g = ( (byteswapped>>5) & 63) <<2;
+  uint32_t r   = ( (byteswapped>>11) & 31) <<3;
+#if 0
+  b = (b > 248) * 255 + (b <= 248) * b;
+  g = (g > 248) * 255 + (g <= 248) * g;
+  r = (r > 248) * 255 + (r <= 248) * r;
+#endif
+
+  return r +  (g << 8) + (b << 16) + (0xff << 24);
 }
 
-inline static void
-ctx_dither_graya_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
+static inline uint16_t
+ctx_565_pack (const uint8_t  red,
+              const uint8_t  green,
+              const uint8_t  blue,
+              const int      byteswap)
 {
-  if (dither_red_blue == 0)
-    { return; }
-  for (int c = 0; c < 1; c ++)
-    {
-      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, dither_red_blue);
-      rgba[c] = CTX_CLAMP (val, 0, 255);
-    }
+  uint32_t c = (red >> 3) << 11;
+  c |= (green >> 2) << 5;
+  c |= blue >> 3;
+  if (byteswap)
+    { return (c>>8) | (c<<8); } /* swap bytes */
+  return c;
 }
-#endif
 
-CTX_INLINE static void
-ctx_RGBA8_deassociate_alpha (const uint8_t *in, uint8_t *out)
+static inline uint16_t
+ctx_888_to_565 (uint32_t in, int byteswap)
 {
-    uint32_t val = *((uint32_t*)(in));
-    int a = val >> CTX_RGBA8_A_SHIFT;
-    if (a)
-    {
-    if (a ==255)
-    {
-      *((uint32_t*)(out)) = val;
-    } else
-    {
-      uint32_t g = (((val & CTX_RGBA8_G_MASK) * 255 / a) >> 8) & CTX_RGBA8_G_MASK;
-      uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * 255 / a) >> 8) & CTX_RGBA8_RB_MASK;
-      *((uint32_t*)(out)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
-    }
-    }
-    else
-    {
-      *((uint32_t*)(out)) = 0;
-    }
+  uint8_t *rgb=(uint8_t*)(&in);
+  return ctx_565_pack (rgb[0],rgb[1],rgb[2], byteswap);
 }
 
-CTX_INLINE static void
-ctx_u8_deassociate_alpha (int components, const uint8_t *in, uint8_t *out)
+static inline uint32_t
+ctx_565_to_888 (uint16_t in, int byteswap)
 {
-  if (in[components-1])
-  {
-    if (in[components-1] != 255)
-    for (int c = 0; c < components-1; c++)
-      out[c] = (in[c] * 255) / in[components-1];
-    else
-    for (int c = 0; c < components-1; c++)
-      out[c] = in[c];
-    out[components-1] = in[components-1];
-  }
-  else
-  {
-  for (int c = 0; c < components; c++)
-    out[c] = 0;
-  }
+  uint32_t ret = 0;
+  uint8_t *rgba=(uint8_t*)&ret;
+  ctx_565_unpack (in,
+                  &rgba[0],
+                  &rgba[1],
+                  &rgba[2],
+                  byteswap);
+  //rgba[3]=255;
+  return ret;
 }
 
-CTX_INLINE static void
-ctx_float_associate_alpha (int components, float *rgba)
+#endif
+#if CTX_ENABLE_RGB565
+
+
+static inline void
+ctx_RGB565_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  float alpha = rgba[components-1];
-  for (int c = 0; c < components-1; c++)
-    rgba[c] *= alpha;
+  const uint16_t *pixel = (uint16_t *) buf;
+  while (count--)
+    {
+      // XXX : checking the raw value for alpha before unpack will be faster
+      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 0);
+#if CTX_RGB565_ALPHA
+      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
+        { rgba[3] = 0; }
+#endif
+      pixel+=1;
+      rgba +=4;
+    }
 }
 
-CTX_INLINE static void
-ctx_float_deassociate_alpha (int components, float *rgba, float *dst)
+static inline void
+ctx_RGBA8_to_RGB565 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  float ralpha = rgba[components-1];
-  if (ralpha != 0.0) ralpha = 1.0/ralpha;
-
-  for (int c = 0; c < components-1; c++)
-    dst[c] = (rgba[c] * ralpha);
-  dst[components-1] = rgba[components-1];
+  uint16_t *pixel = (uint16_t *) buf;
+  while (count--)
+    {
+#if CTX_RGB565_ALPHA
+      if (rgba[3]==0)
+        { pixel[0] = ctx_565_pack (255, 0, 255, 0); }
+      else
+#endif
+        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 0); }
+      pixel+=1;
+      rgba +=4;
+    }
 }
 
-CTX_INLINE static void
-ctx_RGBAF_associate_alpha (float *rgba)
+static void
+ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS);
+static void
+ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS);
+
+static void
+ctx_composite_RGB565 (CTX_COMPOSITE_ARGUMENTS)
 {
-  ctx_float_associate_alpha (4, rgba);
+#if 1
+  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
+  {
+    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+    uint32_t si_a  = si_ga >> 16;
+
+    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
+    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;
+
+    while (count--)
+    {
+      if (CTX_LIKELY(*coverage == 255)) // not vectorizable but we probably
+      {                                 // want to keep it like this
+        uint32_t rcov  = 255-*coverage++;
+        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 0);
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_565((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         (((si_gaf) + di_ga * rcov) & 0xff00ff00), 0);
+         dst+=2;
+      }
+      else
+      {
+        uint32_t cov   = *coverage++;
+        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
+        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 0);
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_565((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00), 0);
+         dst+=2;
+      }
+    }
+    return;
+  }
+#endif
+
+  uint8_t pixels[count * 4];
+  ctx_RGB565_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
+  ctx_RGBA8_to_RGB565 (rasterizer, x0, &pixels[0], dst, count);
 }
+#endif
+#if CTX_ENABLE_RGB565_BYTESWAPPED
 
-CTX_INLINE static void
-ctx_RGBAF_deassociate_alpha (float *rgba, float *dst)
+static inline void
+ctx_RGB565_BS_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
 {
-  ctx_float_deassociate_alpha (4, rgba, dst);
+  const uint16_t *pixel = (uint16_t *) buf;
+  while (count--)
+    {
+      //ctx_565_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2], 1);
+      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 1);
+#if CTX_RGB565_ALPHA
+      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
+        { rgba[3] = 0; }
+      else
+        { rgba[3] = 255; }
+#endif
+      pixel+=1;
+      rgba +=4;
+    }
 }
 
-
-static inline void ctx_swap_red_green_u8 (void *data)
+static inline void
+ctx_RGBA8_to_RGB565_BS (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
 {
-  uint8_t *rgba = (uint8_t*)data;
-  uint8_t tmp = rgba[0];
-  rgba[0] = rgba[2];
-  rgba[2] = tmp;
+  uint16_t *pixel = (uint16_t *) buf;
+  while (count--)
+    {
+#if CTX_RGB565_ALPHA
+      if (rgba[3]==0)
+        { pixel[0] = ctx_565_pack (255, 0, 255, 1); }
+      else
+#endif
+        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 1); }
+      pixel+=1;
+      rgba +=4;
+    }
 }
 
 static void
-ctx_fragment_swap_red_green_u8 (void *out, int count)
+ctx_composite_RGB565_BS (CTX_COMPOSITE_ARGUMENTS)
 {
-  uint8_t *rgba = (uint8_t*)out;
-  for (int x = 0; x < count; x++)
+#if 1
+  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
   {
-    ctx_swap_red_green_u8 (rgba);
-    rgba += 4;
+    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+    uint32_t si_a  = si_ga >> 16;
+
+    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
+    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;
+
+    while (count--)
+    {
+      if (CTX_LIKELY(*coverage == 255))
+      {
+        uint32_t rcov  = 255-*coverage++;
+        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 1);
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_565((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         (((si_gaf) + di_ga * rcov) & 0xff00ff00), 1);
+         dst+=2;
+      }
+      else
+      {
+        uint32_t cov   = *coverage++;
+        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
+        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 1);
+        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
+        uint32_t di_rb = (di & 0x00ff00ff);
+        *((uint16_t*)(dst)) =
+        ctx_888_to_565((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
+         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00), 1);
+         dst+=2;
+      }
+    }
+    return;
   }
+#endif
+
+  uint8_t pixels[count * 4];
+  ctx_RGB565_BS_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
+  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
+  ctx_RGBA8_to_RGB565_BS (rasterizer, x0, &pixels[0], dst, count);
 }
+#endif
 
-/**** rgb8 ***/
+static inline float ctx_fmod1f (float val)
+{
+  int vali = val;
+  return val - vali;
+}
 
-static void
-ctx_fragment_image_rgb8_RGBA8_box (CtxRasterizer *rasterizer,
-                                   float x,
-                                   float y,
-                                   void *out, int count, float dx, float dy)
+
+static inline uint32_t
+ctx_over_RGBA8 (uint32_t dst, uint32_t src, uint32_t cov)
 {
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer->color_managed;
-  int width = buffer->width;
-  int height = buffer->height;
+  uint32_t si_ga = (src & 0xff00ff00) >> 8;
+  uint32_t si_rb = src & 0x00ff00ff;
+  uint32_t si_a  = si_ga >> 16;
+  uint32_t rcov  = ((255+si_a * cov)>>8)^255;
+  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
+  uint32_t di_rb = dst & 0x00ff00ff;
+  return
+     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
+      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
+}
 
-  for (int i = 0; i < count; i++)
-  {
 
-  int u = x;
-  int v = y;
-  if ( u < 0 || v < 0 ||
-       u >= width ||
-       v >= height)
-    {
-      *((uint32_t*)(rgba))= 0;
-    }
-  else
-    {
-      int bpp = 3;
-      rgba[3]=255;
-      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
-          int dim = (1.0 / factor) / 2;
-          uint64_t sum[4]={0,0,0,0};
-          int count = 0;
-          for (int ou = - dim; ou < dim; ou++)
-          for (int ov = - dim; ov < dim; ov++)
-          {
-            uint8_t *src = (uint8_t *) buffer->data;
+static inline uint32_t
+ctx_over_RGBA8_full (uint32_t dst, uint32_t src)
+{
+  uint32_t si_ga = (src & 0xff00ff00) >> 8;
+  uint32_t si_rb = src & 0x00ff00ff;
+  uint32_t si_a  = si_ga >> 16;
+  uint32_t rcov  = si_a^255;
+  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
+  uint32_t di_rb = dst & 0x00ff00ff;
+  return
+     ((((si_rb * 255) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
+      (((si_ga * 255) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
+}
 
-            if (v+ov >= 0 && u+ou >=0 && u + ou < width && v + ov < height)
-            {
-              int o = (v+ov) * width + (u + ou);
-              src += o * bpp;
+static inline uint32_t
+ctx_over_RGBA8_2 (uint32_t dst, uint32_t si_ga, uint32_t si_rb, uint32_t si_a, uint32_t cov)
+{
+  uint32_t rcov  = ((si_a * cov)/255)^255;
+  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
+  uint32_t di_rb = dst & 0x00ff00ff;
+  return
+     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
+      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
+}
 
-              for (int c = 0; c < bpp; c++)
-                sum[c] += src[c];
-              count ++;
-            }
-          }
-          if (count)
-          {
-            int recip = 65536/count;
-            for (int c = 0; c < bpp; c++)
-              rgba[c] = sum[c] * recip >> 16;
-          }
-          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
-    }
-    rgba += 4;
-    x += dx;
-    y += dy;
-  }
-#if CTX_DITHER
-//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-//                    rasterizer->format->dither_green);
-#endif
+static inline uint32_t
+ctx_over_RGBA8_full_2 (uint32_t dst, uint32_t si_ga_full, uint32_t si_rb_full, uint32_t si_a)
+{
+  uint32_t rcov = si_a^255;
+  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
+  uint32_t di_rb = dst & 0x00ff00ff;
+  return
+     ((((si_rb_full) + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
+      (((si_ga_full) + (di_ga * rcov)) & 0xff00ff00);
 }
 
-static void
-ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (CtxRasterizer *rasterizer,
-                                  float x,
-                                  float y,
-                                  void *out, int count, float dx, float dy)
+static inline void ctx_span_set_color (uint32_t *dst_pix, uint32_t val, int count)
 {
-  ctx_fragment_image_rgb8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
+  if (count>0)
+  while(count--)
+    *dst_pix++=val;
 }
 
+static inline void ctx_span_set_colorb (uint32_t *dst_pix, uint32_t val, int count)
+{
+  while(count--)
+    *dst_pix++=val;
+}
 
-#if CTX_FRAGMENT_SPECIALIZE
-static void
-ctx_fragment_image_rgb8_RGBA8_bi (CtxRasterizer *rasterizer,
-                                  float x,
-                                  float y,
-                                  void *out, int count, float dx, float dy)
+static inline void ctx_span_set_color_x4 (uint32_t *dst_pix, uint32_t *val, int count)
 {
-  uint8_t *rgba = (uint8_t *) out;
+  if (count>0)
+  while(count--)
+  {
+    *dst_pix++=val[0];
+    *dst_pix++=val[1];
+    *dst_pix++=val[2];
+    *dst_pix++=val[3];
+  }
+}
+
+#if CTX_FAST_FILL_RECT
+
+static void ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect (CtxRasterizer *rasterizer, int x0, int y0, int 
x1, int y1, int copy)
+{
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+
+#if 0
+  rasterizer->scanline = y0 * CTX_FULL_AA;
+#endif
+  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
+  int blit_stride = rasterizer->blit_stride;
+
+#if 1
+  x0 = ctx_maxi (x0, rasterizer->blit_x);
+  y0 = ctx_maxi (y0, rasterizer->blit_y);
+  x1 = ctx_mini (x1, rasterizer->blit_x + rasterizer->blit_width - 1);
+  y1 = ctx_mini (y1, rasterizer->blit_y + rasterizer->blit_height - 1);
+#endif
+
+  dst += (y0 - rasterizer->blit_y) * blit_stride;
+  dst += (x0) * rasterizer->format->bpp/8;
+
+  int width = x1-x0+1;
+  int height = y1-y0+1;
 
   CtxSource *g = &rasterizer->state->gstate.source_fill;
   CtxBuffer *buffer = g->texture.buffer->color_managed;
-  int width = buffer->width;
-  int height = buffer->height;
-
-  for (int i = 0; i < count; i++)
+  int bwidth = buffer->width;
+  int bheight = buffer->height;
+  if (copy)
   {
+      if (vd == 0.0 && ud == 1.0f && u0 >= 0 && v0 >=0 && u0 + ud * (width - 1)< bwidth && v0 + (height - 1) 
< bheight)
+      {
+        uint32_t *data = ((uint32_t*)buffer->data);
+        data += ((int)(v0)) * bwidth + (int)u0; 
 
-  int u = x;
-  int v = y;
-  if ( u < 0 || v < 0 ||
-       u >= width ||
-       v >= height)
-    {
-      *((uint32_t*)(rgba))= 0;
-    }
-  else
-    {
-      int bpp = 3;
-      rgba[3]=255;
-      uint8_t *src00 = (uint8_t *) buffer->data;
-      int stride = buffer->stride;
-      src00 += v * stride + u * bpp;
-      uint8_t *src01 = src00;
-      if ( u + 1 < width)
+        for (int y = 0; y < height; y++)
+        {
+          memcpy (dst, data, width * 4);
+          dst += blit_stride;
+          data += bwidth;
+        }
+      }
+      else
+      for (int y = 0; y < height; y++)
       {
-        src01 = src00 + bpp;
+        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, u0, v0, &dst[0], width, ud, vd);
+        u0 -= vd;
+        v0 += ud;
+        dst += blit_stride;
       }
-      uint8_t *src11 = src01;
-      uint8_t *src10 = src00;
-      if ( v + 1 < height)
+  }
+  else
+  {
+      if (vd == 0.0 && ud == 1.0f && u0 >= 0 && v0 >=0 && u0 + ud * (width-1) < bwidth && v0 + height -1 < 
bheight)
       {
-        src10 = src00 + stride;
-        src11 = src01 + stride;
+        uint32_t *data = ((uint32_t*)buffer->data);
+        data += ((int)(v0)) * bwidth + (int)u0; 
+        for (int y = 0; y < height; y++)
+        {
+          ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
+           dst, NULL, x0, NULL, width, (uint8_t*)&data[0]);
+          u0 -= vd;
+          v0 += ud;
+          dst += blit_stride;
+          data += bwidth;
+        }
       }
-      float dx = (x-(int)(x)) * 255.9f;
-      float dy = (y-(int)(y)) * 255.9f;
-      for (int c = 0; c < bpp; c++)
+      else
+      for (int y = 0; y < height; y++)
       {
-        rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
-                               ctx_lerp_u8 (src10[c], src11[c], dx), dy);
+        uint8_t tsrc[width*4];
+        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, u0, v0, &tsrc[0], width, ud, vd);
+        ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
+           dst, NULL, x0, NULL, width, &tsrc[0]);
+        u0 -= vd;
+        v0 += ud;
+        dst += blit_stride;
       }
-      ctx_RGBA8_associate_alpha_probably_opaque (rgba);
-    }
-    x += dx;
-    y += dy;
-    rgba += 4;
   }
-#if CTX_DITHER
-//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-//                    rasterizer->format->dither_green);
-#endif
 }
 
-static void
-ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (CtxRasterizer *rasterizer,
-                                  float x,
-                                  float y,
-                                  void *out, int count, float dx, float dy)
+static inline void ctx_RGBA8_image_rgba8_RGBA8_bi_fill_rect (CtxRasterizer *rasterizer, int x0, int y0, int 
x1, int y1, int copy)
 {
-  ctx_fragment_image_rgb8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
-}
+  float u0 = 0; float v0 = 0;
+  float ud = 0; float vd = 0;
+  ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+
+  float ox = (u0-(int)(u0));
+  float oy = (v0-(int)(v0));
+
+  if ((ud > 0.99f && ud < 1.01f &&
+         ox < 0.01 && oy < 0.01))
+  {
+    ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect (rasterizer, x0, y0, x1, y1, copy);
+    return;
+  }
+  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
+  int blit_stride = rasterizer->blit_stride;
+  dst += (y0 - rasterizer->blit_y) * blit_stride;
+  dst += (x0) * rasterizer->format->bpp/8;
+
+  unsigned int width = x1-x0+1;
+  unsigned int height = y1-y0+1;
 
-static CTX_INLINE void
-ctx_fragment_image_rgb8_RGBA8_nearest (CtxRasterizer *rasterizer,
-                                       float x,
-                                       float y,
-                                       void *out, int count, float dx, float dy)
-{
   CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer;
-  if (buffer->color_managed)
-   buffer = buffer->color_managed;
-  uint8_t *rgba = (uint8_t *) out;
-  uint8_t *src = (uint8_t *) buffer->data;
+  CtxBuffer *buffer = g->texture.buffer->color_managed;
+
   int bwidth = buffer->width;
   int bheight = buffer->height;
-  int stride = buffer->stride;
+  uint8_t tsrc[copy?1:width*4]; /* unused when not copy */
 
-  x += 0.5f;
-  y += 0.5f;
+  //uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
+  uint32_t *data = ((uint32_t*)buffer->data);
+  uint32_t rb_row[2][width*2];
+  //uint32_t ga_row[2][width];
 
-  if (CTX_UNLIKELY (dy == 0.0f && dx > 0.999f && dx < 1.001f))
-  {
-    int v = y;
-    int u = x;
-  
-    if (v < buffer->height && v > 0)
-    {
-      int o = v * stride + u * 3;
-      int i;
-      for (i = 0; i < count && u < bwidth && u <0; i++)
-      {
-        *((uint32_t*)(rgba))= 0;
-        rgba += 4;
-        o += 3;
-        u+=1;
-      }
+  uint32_t row_u = u0 * 65536;
+  uint32_t row_v = v0 * 65536;
+  int   ui_delta = ud * 65536;
+  int   vi_delta = vd * 65536;
 
-      for (; i < count && u < bwidth; i++)
-      {
-        rgba[0] = src[o];
-        rgba[1] = src[o+1];
-        rgba[2] = src[o+2]; 
-        rgba[3]=255;
-        rgba += 4;
-        o += 3;
-        u+=1;
-      }
-      for (; i < count; i++)
-      {
-        *((uint32_t*)(rgba))= 0;
-        rgba += 4;
-      }
-    }
-    else
-    {
-      for (int i = 0; i < count; i++)
+  int iter = 0;
+
+  int loaded_v = -1;
+  int top      = iter % 2;
+
+
+  { // preload previous row for first row
+    uint32_t ui  = row_u;
+    uint32_t vi  = row_v;
+    unsigned int xa=0;
+      for (unsigned int x = 0; x < width; x++, xa+=2)
       {
-        *((uint32_t*)(rgba))= 0;
-        rgba+=4;
+        int u = ui >> 16;
+        int v = vi >> 16;
+        uint32_t  blank = 0;
+        uint32_t *src0 = &blank;
+        uint32_t *src1 = src0;
+    
+        if (CTX_LIKELY (v >= 0 && v < bheight))
+        {
+          if (CTX_LIKELY (u >= 0 && u + 1 < bwidth))
+          {
+            src0 = data + u + bwidth * (v);
+            src1 = src0 + 1;
+          }
+          else
+          {
+            if (u >= 0 && u < bwidth)
+              src0 = data + u + bwidth * (v);
+            if (u + 1>= 0 && u + 1 < bwidth)
+              src1 = data + (u+1) + bwidth * (v);
+          }
+        }
+    
+        ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[!top][xa], &rb_row[!top][xa+1]);
+        ui += ui_delta;
+        vi += vi_delta;
       }
     }
+
+  for (unsigned int y = 0; y < height; y++)
+  {
+     int top     = iter % 2;
+     uint32_t ui = row_u;
+     uint32_t vi = row_v;
+
+     int v =  (vi >> 16) + 1;
+     uint8_t dv = ((row_v)>> 8);
+
+     if (v != loaded_v)
+     {
+       loaded_v = v;
+       unsigned int xa=0;
+       for (unsigned int x = 0; x < width; x++, xa+=2)
+       {
+         int u = ui >> 16;
+         v =  (vi >> 16) + 1;
+         uint32_t  blank = 0;
+         uint32_t *src0 = &blank;
+         uint32_t *src1 = src0;
+         if (CTX_LIKELY (v >= 0 && v < bheight))
+         {
+           if (CTX_LIKELY(u >= 0 && u + 1 < bwidth))
+           {
+             src0 = data + u + bwidth * (v);
+             src1 = src0 + 1;
+           }
+           else
+           {
+             if (u >= 0 && u < bwidth)
+               src0 = data + u + bwidth * (v);
+             if (u + 1>= 0 && u + 1 < bwidth)
+               src1 = data + (u+1) + bwidth * (v);
+           }
+         }
+         ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[top][xa], &rb_row[top][xa+1]);
+         ui += ui_delta;
+         vi += vi_delta;
+       }
+       iter++;
+       top    = iter % 2;
+     }
+     
+     {
+       uint32_t*dst_i = copy?(uint32_t*)dst:(uint32_t*)tsrc;
+       int ntop = !top;
+       for (unsigned int xa = 0; xa < width * 2; xa+=2)
+       {
+          *dst_i ++ =
+          ctx_lerp_RGBA8_merge (rb_row[top][xa], rb_row[top][xa+1], 
+                                rb_row[ntop][xa], rb_row[ntop][xa+1],
+                                dv);
+       }
+       if (!copy)
+       ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
+          dst, NULL, x0, NULL, width, &tsrc[0]);
+     }
+     row_u -= vi_delta;
+     row_v += ui_delta;
+     dst += blit_stride;
   }
-  else
+}
+
+static void
+ctx_composite_fill_rect_aligned (CtxRasterizer *rasterizer,
+                                 int            x0,
+                                 int            y0,
+                                 int            x1,
+                                 int            y1,
+                                 uint8_t        cov)
+{
+  int blit_x = rasterizer->blit_x;
+  int blit_y = rasterizer->blit_y;
+  int blit_width = rasterizer->blit_width;
+  int blit_height = rasterizer->blit_height;
+  int blit_stride = rasterizer->blit_stride;
+
+  x0 = ctx_maxi (x0, blit_x);
+  x1 = ctx_mini (x1, blit_x + blit_width - 1);
+  y0 = ctx_maxi (y0, blit_y);
+  y1 = ctx_mini (y1, blit_y + blit_height - 1);
+
+  int width = x1 - x0 + 1;
+  int height= y1 - y0 + 1;
+
+  if (CTX_UNLIKELY(width <=0 || height <= 0))
+    return;
+
+  CtxCovPath comp = rasterizer->comp;
+
+  uint8_t *dst;
+
+  // this could be done here, but is not used
+  // by a couple of the cases
+#define INIT_ENV do {\
+  rasterizer->scanline = y0 * CTX_FULL_AA; \
+  dst = ( (uint8_t *) rasterizer->buf); \
+  dst += (y0 - blit_y) * blit_stride; \
+  dst += (x0 * rasterizer->format->bpp)/8;}while(0);
+
+  if (cov == 255)
   {
-    int u = x;
-    int v = y;
-    int i;
-    for (i = 0; i < count && u < bwidth && u <0; i++)
+    switch (comp)
     {
-      u = x;
-      v = y;;
-      *((uint32_t*)(rgba))= 0;
-      x += dx;
-      y += dy;
-      rgba += 4;
-    }
-    for (; i < count && u < bwidth; i++)
+    case CTX_COV_PATH_RGBA8_COPY:
     {
-      u = x;
-      v = y;
-    if (CTX_UNLIKELY(v < 0 || v >= bheight))
+      uint32_t color;
+      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
+      INIT_ENV;
+      if (width == 1)
       {
-        *((uint32_t*)(rgba))= 0;
+        for (int y = y0; y <= y1; y++)
+        {
+          uint32_t *dst_i = (uint32_t*)&dst[0];
+          *dst_i = color;
+          dst += blit_stride;
+        }
       }
-    else
+      else
       {
-        int o = v * stride + u * 3;
-        rgba[0] = src[o];
-        rgba[1] = src[o+1];
-        rgba[2] = src[o+2]; 
-        rgba[3]=255;
+        for (int y = y0; y <= y1; y++)
+        {
+#if 0
+          uint32_t *dst_pix = (uint32_t*)&dst[0];
+          int count = width;
+          while(count--)
+            *dst_pix++=color;
+#else
+          ctx_span_set_colorb ((uint32_t*)&dst[0], color, width);
+#endif
+          dst += blit_stride;
+        }
       }
-  
-      rgba += 4;
-      x += dx;
-      y += dy;
+      return;
     }
-      for (; i < count; i++)
+    case CTX_COV_PATH_RGBAF_COPY:
+    case CTX_COV_PATH_GRAY8_COPY:
+    case CTX_COV_PATH_GRAYA8_COPY:
+    case CTX_COV_PATH_RGB565_COPY:
+    case CTX_COV_PATH_RGB332_COPY:
+    case CTX_COV_PATH_RGB8_COPY:
+    case CTX_COV_PATH_CMYK8_COPY:
+    case CTX_COV_PATH_CMYKA8_COPY:
+    {
+      uint8_t *color = (uint8_t*)&rasterizer->color_native;
+      int bytes = rasterizer->format->bpp/8;
+      INIT_ENV;
+
+      switch (bytes)
       {
-        *((uint32_t*)(rgba))= 0;
-        rgba += 4;
-      }
-  }
-#if CTX_DITHER
-  //ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-  //                    rasterizer->format->dither_green);
+        case 1:
+          {
+          uint8_t col = *color;
+          if (width == 1)
+          for (int y = y0; y <= y1; y++)
+          {
+            *dst = col;
+            dst += blit_stride;
+          }
+          else
+          for (int y = y0; y <= y1; y++)
+          {
+#if 0
+            uint8_t *dst_i = (uint8_t*)&dst[0];
+            for (int x = 0; x < width; x++) *dst_i++ = col;
+#else
+            memset (dst, col, width);
 #endif
-}
-
-
-static CTX_INLINE void
-ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (CtxRasterizer *rasterizer,
-                                                      float x,
-                                                      float y,
-                                                      void *out, int count, float dx, float dy)
-{
-  ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
-}
-
-static void
-ctx_fragment_image_rgb8_RGBA8 (CtxRasterizer *rasterizer,
-                               float x,
-                               float y,
-                               void *out, int count, float dx, float dy)
-{
-  if (rasterizer->state->gstate.image_smoothing)
-  {
-    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
-    if (factor <= 0.50f)
+            dst += blit_stride;
+          }
+          }
+          break;
+        case 2:
+          {
+            uint16_t val = ((uint16_t*)color)[0];
+            for (int y = y0; y <= y1; y++)
+            {
+              uint16_t *dst_i = (uint16_t*)&dst[0];
+              for (int x = 0; x < width; x++) *dst_i++ = val;
+              dst += blit_stride;
+            }
+          }
+          break;
+        case 3:
+          for (int y = y0; y <= y1; y++)
+          {
+            uint8_t *dst_i = (uint8_t*)&dst[0];
+            for (int x = 0; x < width; x++)for (int b = 0; b < 3; b++) *dst_i++ = color[b];
+            dst += blit_stride;
+          }
+          break;
+        case 4:
+          {
+            uint32_t val = ((uint32_t*)color)[0];
+            if (width == 1)
+            for (int y = y0; y <= y1; y++)
+            {
+              uint32_t *dst_i = (uint32_t*)&dst[0];
+              *dst_i = val;
+              dst += blit_stride;
+            }
+            else
+            for (int y = y0; y <= y1; y++)
+            {
+              //uint32_t *dst_i = (uint32_t*)&dst[0];
+              ctx_span_set_colorb ((uint32_t*)&dst[0], val, width);
+              dst += blit_stride;
+            }
+          }
+          break;
+        case 5:
+          for (int y = y0; y <= y1; y++)
+          {
+            uint8_t *dst_i = (uint8_t*)&dst[0];
+            for (int x = 0; x < width; x++)for (int b = 0; b < 5; b++) *dst_i++ = color[b];
+            dst += blit_stride;
+          }
+          break;
+        case 16:
+          for (int y = y0; y <= y1; y++)
+          {
+            uint8_t *dst_i = (uint8_t*)&dst[0];
+            for (int x = 0; x < width; x++)for (int b = 0; b < 16; b++) *dst_i++ = color[b];
+            dst += blit_stride;
+          }
+          break;
+        default:
+          for (int y = y0; y <= y1; y++)
+          {
+            uint8_t *dst_i = (uint8_t*)&dst[0];
+            for (int x = 0; x < width; x++)
+              for (int b = 0; b < bytes; b++)
+                *dst_i++ = color[b];
+            dst += blit_stride;
+          }
+      }
+      return;
+    }
+    case CTX_COV_PATH_RGBA8_OVER:
     {
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
+      uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
+      uint32_t si_a  = rasterizer->color[3];
+      INIT_ENV;
+
+      if (width == 1)
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          ((uint32_t*)(dst))[0] = ctx_over_RGBA8_full_2 (
+             ((uint32_t*)(dst))[0], si_ga_full, si_rb_full, si_a);
+          dst += blit_stride;
+        }
+      }
       else
-        ctx_fragment_image_rgb8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          uint32_t *dst_i = (uint32_t*)&dst[0];
+          for (int i = 0; i < width; i++)
+          {
+            dst_i[i] = ctx_over_RGBA8_full_2 (dst_i[i], si_ga_full, si_rb_full, si_a);
+          }
+          dst += blit_stride;
+        }
+      }
+      return;
     }
-#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
-    else if (factor > 0.99f && factor < 1.01f)
+    case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
     {
-      // XXX missing translate test
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      CtxFragment fragment = rasterizer->fragment;
+      CtxGState *gstate = &rasterizer->state->gstate;
+      CtxMatrix *transform = &gstate->source_fill.transform;
+      int no_skew_or_rotate = ctx_matrix_no_skew_or_rotate (transform);
+
+      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi &&
+          no_skew_or_rotate)
+      {
+        ctx_RGBA8_image_rgba8_RGBA8_bi_fill_rect (rasterizer, x0, y0, x1, y1, 1);
+      }
+      else if (fragment == ctx_fragment_image_rgba8_RGBA8_nearest
+                     // && no_skew_or_rotate
+                      )
+      {
+        ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect (rasterizer, x0, y0, x1, y1, 1);
+      }
       else
-        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+      {
+        INIT_ENV;
+        float u0 = 0; float v0 = 0;
+        float ud = 0; float vd = 0;
+        ctx_init_uv (rasterizer, x0, &u0, &v0, &ud, &vd);
+        for (int y = y0; y <= y1; y++)
+        {
+          fragment (rasterizer, u0, v0, &dst[0], width, ud, vd);
+          u0 -= vd;
+          v0 += ud;
+          dst += blit_stride;
+        }
+      }
+      return;
     }
-#endif
-    else
+    case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
     {
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+      CtxFragment fragment = rasterizer->fragment;
+      CtxGState *gstate = &rasterizer->state->gstate;
+      CtxMatrix *transform = &gstate->source_fill.transform;
+      int no_skew_or_rotate = ctx_matrix_no_skew_or_rotate (transform);
+
+      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi && no_skew_or_rotate)
+        ctx_RGBA8_image_rgba8_RGBA8_bi_fill_rect (rasterizer, x0, y0, x1, y1, 0);
+      else if (fragment == ctx_fragment_image_rgba8_RGBA8_nearest && no_skew_or_rotate)
+        ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect (rasterizer, x0, y0, x1, y1, 0);
       else
-        ctx_fragment_image_rgb8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
+      {
+        INIT_ENV;
+        ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
+                                &dst[0], NULL, x0, NULL, width, y1-y0+1);
+      }
+      return;
+    }
+    break;
+    default:
+    break;
     }
   }
   else
   {
-    if (rasterizer->swap_red_green)
-      ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
-    else
-      ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
-  }
-#if CTX_DITHER
-  {
-  uint8_t *rgba = (uint8_t*)out;
-  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
-  }
-#endif
-}
-
-
-/************** rgba8 */
-
-static void
-ctx_fragment_image_rgba8_RGBA8_box (CtxRasterizer *rasterizer,
-                                    float x,
-                                    float y,
-                                    void *out, int count, float dx, float dy)
-{
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer->color_managed;
-
-  for (int i = 0; i < count; i ++)
-  {
-
-  int u = x;
-  int v = y;
-  if ( u < 0 || v < 0 ||
-       u >= buffer->width ||
-       v >= buffer->height)
+    switch (comp)
     {
-      *((uint32_t*)(rgba))= 0;
+    case CTX_COV_PATH_RGBA8_COPY:
+    {
+      uint32_t color;
+      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
+      INIT_ENV;
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          uint32_t *dst_i = (uint32_t*)&dst[0];
+          for (int i = 0; i < width; i++)
+          {
+            dst_i[i] = ctx_lerp_RGBA8 (dst_i[i], color, cov);
+          }
+          dst += blit_stride;
+        }
+        return;
+      }
     }
-  else
+    case CTX_COV_PATH_RGBAF_COPY:
     {
-      int bpp = 4;
-      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
-          int dim = (1.0 / factor) / 2;
-          uint64_t sum[4]={0,0,0,0};
-          int count = 0;
-          int width = buffer->width;
-          int height = buffer->height;
-          for (int ou = - dim; ou < dim; ou++)
-          for (int ov = - dim; ov < dim; ov++)
+      float *color = ((float*)rasterizer->color);
+      float covf = cov / 255.0f;
+      INIT_ENV;
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          float *dst_f = (float*)&dst[0];
+          for (int i = 0; i < width; i++)
           {
-
-            if (v+ov >= 0 && u+ou >=0 && u + ou < width && v + ov < height)
-            {
-              int o = (v+ov) * width + (u + ou);
-              uint8_t *src = (uint8_t *) buffer->data + o * bpp;
-
-              for (int c = 0; c < bpp; c++)
-                sum[c] += src[c];
-              count ++;
-            }
+            for (int c = 0; c < 4; c++)
+              dst_f[i*4+c] = ctx_lerpf (dst_f[i*4+c], color[c], covf);
           }
-          if (count)
+          dst += blit_stride;
+        }
+        return;
+      }
+    }
+    case CTX_COV_PATH_RGBA8_OVER:
+    {
+      uint32_t color;
+      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
+      INIT_ENV;
+      if (width == 1)
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          uint32_t *dst_i = (uint32_t*)&dst[0];
+          *dst_i = ctx_over_RGBA8 (*dst_i, color, cov);
+          dst += blit_stride;
+        }
+      }
+      else
+      {
+        for (int y = y0; y <= y1; y++)
+        {
+          uint32_t *dst_i = (uint32_t*)&dst[0];
+          for (int i = 0; i < width; i++)
           {
-            int recip = 65536/count;
-            for (int c = 0; c < bpp; c++)
-              rgba[c] = sum[c]*recip>>16;
+            dst_i[i] = ctx_over_RGBA8 (dst_i[i], color, cov);
           }
-          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
+          dst += blit_stride;
+        }
+      }
+      return;
+    }
+    break;
+    default:
+    break;
     }
-    rgba += 4;
-    x += dx;
-    y += dy;
   }
-#if CTX_DITHER
-//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-//                    rasterizer->format->dither_green);
-#endif
-}
 
+  INIT_ENV;
+#undef INIT_ENV
 
-static void
-ctx_fragment_image_rgba8_RGBA8_nearest (CtxRasterizer *rasterizer,
-                                        float x,
-                                        float y,
-                                        void *out, int count, float dx, float dy)
-{
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer;
-  if (buffer->color_managed)
-    buffer = buffer->color_managed;
-  int ideltax = dx * 65536;
-  int ideltay = dy * 65536;
-  uint32_t *src = (uint32_t *) buffer->data;
-  uint32_t *dst = (uint32_t*)out;
-  int bwidth  = buffer->width;
-  int bheight = buffer->height;
-  x += 0.5f;
-  y += 0.5f;
 
-#if 1
-  if (CTX_UNLIKELY(ideltay == 0 && ideltax == 65536))
+  /* fallback */
   {
-    int i = 0;
-    int u = x;
-    int v = y;
-    if (!(v >= 0 && v < bheight))
-    {
-      for (i = 0 ; i < count; i++)
-        *dst++ = 0;
-      return;
-    }
-    src += bwidth * v + u;
-    while (count && !(u >= 0))
-    {
-      *dst++ = 0;
-      src ++;
-      u++;
-      count--;
-    }
-    int limit = ctx_mini (count, bwidth - u);
-    if (limit>0)
+    uint8_t coverage[width];
+    memset (coverage, cov, sizeof (coverage) );
+    for (int y = y0; y <= y1; y++)
     {
-      memcpy (dst, src, limit * 4);
-      dst += limit;
-      i = limit;
+      rasterizer->apply_coverage (rasterizer, &dst[0], rasterizer->color, x0, coverage, width);
+      rasterizer->scanline += CTX_FULL_AA;
+      dst += blit_stride;
     }
-    for (;i < count; i++)
-      *dst++ = 0;
+  }
+}
+
+void
+CTX_SIMD_SUFFIX (ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
+                          float          x0,
+                          float          y0,
+                          float          x1,
+                          float          y1,
+                          uint8_t        cov)
+{
+  if((ctx_fmod1f (x0) < 0.01f || ctx_fmod1f(x0) > 0.99f) &&
+     (ctx_fmod1f (y0) < 0.01f || ctx_fmod1f(y0) > 0.99f) &&
+     (ctx_fmod1f (x1) < 0.01f || ctx_fmod1f(x1) > 0.99f) &&
+     (ctx_fmod1f (y1) < 0.01f || ctx_fmod1f(y1) > 0.99f))
+  {
+    /* best-case scenario axis aligned rectangle */
+    ctx_composite_fill_rect_aligned (rasterizer, x0, y0, x1-1, y1-1, 255);
     return;
   }
-#endif
 
-  {
-    int i = 0;
-    float u1 = x + dx * (count-1);
-    float v1 = y + dy * (count-1);
-    uint32_t *edst = ((uint32_t*)out)+count;
-    for (; i < count; )
-    {
-      if ((u1 < 0.0f || v1 < 0.0f || u1 >= bwidth || v1 >= bheight))
-      {
-        *edst-- = 0;
-        count --;
-        u1 -= dx;
-        v1 -= dy;
-      }
-      else break;
-    }
+  int blit_x = rasterizer->blit_x;
+  int blit_y = rasterizer->blit_y;
+  int blit_stride = rasterizer->blit_stride;
+  int blit_width = rasterizer->blit_width;
+  int blit_height = rasterizer->blit_height;
 
+  x0 = ctx_maxf (x0, blit_x);
+  y0 = ctx_maxf (y0, blit_y);
+  x1 = ctx_minf (x1, blit_x + blit_width );
+  y1 = ctx_minf (y1, blit_y + blit_height );
 
-    for (i = 0; i < count; i ++)
-    {
-      if ((x < 0.0f || y < 0.0f || x >= bwidth || y >= bheight))
-      {
-        *dst = 0;
-        dst++;
-        x += dx;
-        y += dy;
-      }
-      else break;
-    }
+  uint8_t left = 255-ctx_fmod1f (x0) * 255;
+  uint8_t top  = 255-ctx_fmod1f (y0) * 255;
+  uint8_t right  = ctx_fmod1f (x1) * 255;
+  uint8_t bottom = ctx_fmod1f (y1) * 255;
 
+  x0 = ctx_floorf (x0);
+  y0 = ctx_floorf (y0);
+  x1 = ctx_floorf (x1+7/8.0);
+  y1 = ctx_floorf (y1+15/15.0);
 
-    uint32_t ix = x * 65536;
-    uint32_t iy = y * 65536;
+  int has_top    = (top < 255);
+  int has_bottom = (bottom <255);
+  int has_right  = (right >0);
+  int has_left   = (left >0);
 
-    for (; i < count; i ++)
-    {
-      *dst = src[(iy>>16) * bwidth + (ix>>16)];
-      ix += ideltax;
-      iy += ideltay;
-      dst++;
-    }
-  }
-}
+  int width = x1 - x0;
 
-static void
-ctx_fragment_image_rgba8_RGBA8_bi (CtxRasterizer *rasterizer,
-                                   float x,
-                                   float y,
-                                   void *out, int count, float dx, float dy)
-{
-  uint8_t *rgba = (uint8_t *) out;
-  float ox = (x-(int)(x));
-  float oy = (y-(int)(y));
+  if ((width >0))
+  {
+     uint8_t *dst = ( (uint8_t *) rasterizer->buf);
+     uint8_t coverage[width+2];
+     dst += (((int)y0) - blit_y) * blit_stride;
+     dst += ((int)x0) * rasterizer->format->bpp/8;
 
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer->color_managed;
-  const int bwidth = buffer->width;
-  const int bheight = buffer->height;
-  int i = 0;
+     if (has_top)
+     {
+       int i = 0;
+       if (has_left)
+       {
+         coverage[i++] = (top * left + 255) >> 8;
+       }
+       for (int x = x0 + has_left; x < x1 - has_right; x++)
+         coverage[i++] = top;
+       if (has_right)
+         coverage[i++]= (top * right + 255) >> 8;
 
-  if (dy == 0.0f && dx > 0.0f)
+       rasterizer->apply_coverage (rasterizer, dst, rasterizer->color, x0, coverage, width);
+       dst += blit_stride;
+     }
+
+  if (y1-y0-has_top-has_bottom > 0)
   {
-    if (!(y >= 0 && y < bheight))
-    {
-      uint32_t *dst = (uint32_t*)rgba;
-      for (i = 0 ; i < count; i++)
-        *dst++ = 0;
-      return;
-    }
+    if (has_left)
+      ctx_composite_fill_rect_aligned (rasterizer, x0, y0 + has_top,
+                                                   x0, y1 - has_bottom-1, left);
+    if (has_right)
+      ctx_composite_fill_rect_aligned (rasterizer, x1-1, y0 + has_top,
+                                                   x1-1, y1 - has_bottom-1, right);
 
-    if ((dx > 0.99f && dx < 1.01f && 
-         ox < 0.01 && oy < 0.01))
+    if (width - has_left - has_right > 0)
+      ctx_composite_fill_rect_aligned (rasterizer, x0+has_left,y0+has_top,
+                                          x1-has_right-1,y1-has_bottom-1,255);
+
+    dst += blit_stride * ((((int)y1)-has_bottom) - (((int)y0)+has_top) );
+  }
+    if (has_bottom)
     {
-      /* TODO: this could have been rigged up in composite_setup */
-      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer,
-                                   x, y, out, count, dx, dy);
-      return;
+      int i = 0;
+      if (has_left)
+        coverage[i++] = (bottom * left + 255) >> 8;
+      for (int x = x0 + has_left; x < x1 - has_right; x++)
+        coverage[i++] = bottom;
+      coverage[i++]= (bottom * right + 255) >> 8;
+
+      rasterizer->apply_coverage (rasterizer,dst, rasterizer->color, x0, coverage, width);
     }
-    x+=1; // XXX off by one somewhere? ,, needed for alignment with nearest
+  }
+}
 
-    uint32_t *data = ((uint32_t*)buffer->data);
-    uint32_t yi = y * 65536;
-    uint32_t xi = x * 65536;
-    int xi_delta = dx * 65536;
+void
+CTX_SIMD_SUFFIX(ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
+                           float          x0,
+                           float          y0,
+                           float          x1,
+                           float          y1,
+                           float          line_width)
+{
+      float lwmod = ctx_fmod1f (line_width);
+      int lw = ctx_floorf (line_width + 0.5f);
+      int is_compat_even = (lw % 2 == 0) && (lwmod < 0.1); // only even linewidths implemented properly
+      int is_compat_odd = (lw % 2 == 1) && (lwmod < 0.1); // only even linewidths implemented properly
 
-    for (i= 0; i < count; i ++)
-    {
-      int u = xi >> 16;
-      if ( u  < 0 || u >= bwidth-1)
+      float off_x = 0;
+      float off_y = 0;
+
+      if (is_compat_odd)
       {
-        *((uint32_t*)(rgba))= 0;
-        xi += xi_delta;
-        rgba += 4;
+        off_x = 0.5f;
+        off_y = (CTX_FULL_AA/2)*1.0 / (CTX_FULL_AA);
       }
-      else
-        break;
-    }
-
-  int loaded = -4;
-  uint32_t s0_ga = 0, s0_rb = 0, s1_ga = 0, s1_rb = 0;
- 
-  int v = yi >> 16;
-  data += bwidth * v;
-  int dv = (yi >> 8) & 0xff;
 
-  int u = xi >> 16;
+      if((is_compat_odd || is_compat_even) &&
 
-  uint32_t *ndata = data;
-  if (v < bheight-1) ndata += bwidth;
+     ((ctx_fmod1f (x0-off_x) < 0.01f || ctx_fmod1f(x0-off_x) > 0.99f) &&
+     (ctx_fmod1f (y0-off_y) < 0.01f || ctx_fmod1f(y0-off_y) > 0.99f) &&
+     (ctx_fmod1f (x1-off_x) < 0.01f || ctx_fmod1f(x1-off_x) > 0.99f) &&
+     (ctx_fmod1f (y1-off_y) < 0.01f || ctx_fmod1f(y1-off_y) > 0.99f)))
 
-  uint32_t *src0 = data, *src1 = ndata;
 
+      {
+        int bw = lw/2+1;
+        int bwb = lw/2;
 
-  if (xi_delta == 65536 && u < bwidth -1)
-  {
-    int du = (xi >> 8) & 0xff;
+        if (is_compat_even)
+        {
+          bw = lw/2;
+        }
+        /* top */
+        ctx_composite_fill_rect_aligned (rasterizer,
+                                         x0-bwb, y0-bwb,
+                                         x1+bw-1, y0+bw-1, 255);
+        /* bottom */
+        ctx_composite_fill_rect_aligned (rasterizer,
+                                         x0-bwb, y1-bwb,
+                                         x1-bwb-1, y1+bw-1, 255);
+
+        /* left */
+        ctx_composite_fill_rect_aligned (rasterizer,
+                                         x0-bwb, y0+1,
+                                         x0+bw-1, y1-bwb, 255);
+        /* right */
+        ctx_composite_fill_rect_aligned (rasterizer,
+                                         x1-bwb, y0+1,
+                                         x1+bw-1, y1+bw-1, 255);
+      }
+      else
+      {
+        float hw = line_width/2;
+
+
+        /* top */
+        ctx_composite_fill_rect (rasterizer,
+                                 x0+hw, y0-hw,
+                                 x1-hw, y0+hw, 255);
+        /* bottom */
+        ctx_composite_fill_rect (rasterizer,
+                                 x0+hw, y1-hw,
+                                 x1-hw, y1+hw, 255);
+
+        /* left */
+        ctx_composite_fill_rect (rasterizer,
+                                 x0-hw, y0+hw,
+                                 x0+hw, y1-hw, 255);
+        /* right */
+
+        ctx_composite_fill_rect (rasterizer,
+                                 x1-hw, y0+hw,
+                                 x1+hw, y1-hw, 255);
+
+        /* corners */
+
+        ctx_composite_fill_rect (rasterizer,
+                                 x0-hw, y0-hw,
+                                 x0+hw, y0+hw, 255);
+        ctx_composite_fill_rect (rasterizer,
+                                 x1-hw, y1-hw,
+                                 x1+hw, y1+hw, 255);
+        ctx_composite_fill_rect (rasterizer,
+                                 x1-hw, y0-hw,
+                                 x1+hw, y0+hw, 255);
+        ctx_composite_fill_rect (rasterizer,
+                                 x0-hw, y1-hw,
+                                 x0+hw, y1+hw, 255);
+      }
+}
 
-    src0 = data + u;
-    src1 = ndata + u;
-    ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
 
-    int limit = bwidth-u;
-    limit = ctx_mini(count,limit);
+#endif
 
-    for (; i < limit; i ++)
-    {
-      s0_ga = s1_ga;
-      s0_rb = s1_rb;
-      ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
-      ((uint32_t*)(&rgba[0]))[0] = 
-      ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du);
-      rgba += 4;
-      u++;
-      src0 ++;
-      src1 ++;
-    }
-  }
-  else
+static void
+CTX_SIMD_SUFFIX (ctx_composite_setup) (CtxRasterizer *rasterizer)
+{
+  if (CTX_UNLIKELY (rasterizer->comp_op==NULL))
   {
-
-  for (; i < count; i ++)
+    rasterizer->format->setup (rasterizer);
+#if CTX_GRADIENTS
+#if CTX_GRADIENT_CACHE
+  switch (rasterizer->state->gstate.source_fill.type)
   {
-    if (CTX_UNLIKELY(u >= bwidth-1))
-    {
+    case CTX_SOURCE_LINEAR_GRADIENT:
+    case CTX_SOURCE_RADIAL_GRADIENT:
+      ctx_gradient_cache_prime (rasterizer);
       break;
-    }
-    else if (CTX_LIKELY(loaded + 1 == u))
-    {
-      s0_ga = s1_ga;
-      s0_rb = s1_rb;
-      ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
-      src0 ++;
-      src1 ++;
-    }
-    else if (loaded != u)
-    {
-      src0 = data + u;
-      src1 = ndata + u;
-      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s0_ga, &s0_rb);
-      ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
-    }
-    loaded = u;
-    ((uint32_t*)(&rgba[0]))[0] = 
-      ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, ((xi>>8)&0xff));
-    xi += xi_delta;
-    rgba += 4;
-    u = xi >> 16;
-  }
-  }
-
-  }
-  else // //
-  {
-    uint32_t *data = ((uint32_t*)buffer->data);
-    for (i= 0; i < count; i ++)
-    {
-      int u = x;
-      int v = y;
-      int ut = x + 1.5;
-      int vt = y + 1.5;
-      if ( ut  <= 0 || vt  <= 0 || u >= buffer->width || v >= buffer->height)
-      {
-        *((uint32_t*)(rgba))= 0;
-      }
-      else
-        break;
-      x += dx;
-      y += dy;
-      rgba += 4;
-    }
+    case CTX_SOURCE_TEXTURE:
 
-  uint32_t yi = y * 65536;
-  uint32_t xi = x * 65536;
+      _ctx_matrix_multiply (&rasterizer->state->gstate.source_fill.transform,
+                            &rasterizer->state->gstate.source_fill.set_transform,
+                            &rasterizer->state->gstate.transform);
 
-  int yi_delta = dy * 65536;
-  int xi_delta = dx * 65536;
+      ctx_matrix_invert (&rasterizer->state->gstate.source_fill.transform);
 
-  int loaded = -4;
-  uint32_t *src00=data;
-  uint32_t *src01=data;
-  uint32_t *src10=data;
-  uint32_t *src11=data;
+      if (!rasterizer->state->gstate.source_fill.texture.buffer->color_managed)
+        _ctx_texture_prepare_color_management (rasterizer,
+        rasterizer->state->gstate.source_fill.texture.buffer);
+      break;
+  }
+#endif
+#endif
+  }
+}
 
-  int u = xi >> 16;
-  int v = yi >> 16;
-  int offset = bwidth * v + u;
 
-  for (; i < count; i ++)
+CtxPixelFormatInfo CTX_SIMD_SUFFIX(ctx_pixel_formats)[]=
+{
+#if CTX_ENABLE_RGBA8
   {
-  if (CTX_UNLIKELY(
-       u >= buffer->width ||
-       v  <= -65536 ||
-       u  <= -65536 ||
-       v >= buffer->height))
-    {
-      break;
-    }
-#if 1
-  else if (CTX_UNLIKELY(u < 0 || v < 0)) // default to next sample down and to right
+    CTX_FORMAT_RGBA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
+    NULL, NULL, NULL, ctx_setup_RGBA8
+  },
+#endif
+#if CTX_ENABLE_BGRA8
   {
-      int got_prev_pix = (u >= 0);
-      int got_prev_row = (v>=0);
-      src11 = data  + offset + bwidth + 1;
-      src10 = src11 - got_prev_pix;
-      src01 = src11 - bwidth * got_prev_row;
-      src00 = src10 - bwidth * got_prev_row;
-  }
+    CTX_FORMAT_BGRA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
+    ctx_BGRA8_to_RGBA8, ctx_RGBA8_to_BGRA8, ctx_composite_BGRA8, ctx_setup_RGBA8,
+  },
 #endif
-#if 1
-  else if (loaded + 1 == offset)
+#if CTX_ENABLE_GRAYF
   {
-      src00++;
-      src01++;
-      src10++;
-      src11++;
-  }
+    CTX_FORMAT_GRAYF, 1, 32, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
+    NULL, NULL, ctx_composite_GRAYF, ctx_setup_GRAYAF,
+  },
 #endif
-  else if (loaded != offset)
+#if CTX_ENABLE_GRAYAF
   {
-      int next_row = ( v + 1 < bheight) * bwidth;
-      int next_pix = (u + 1 < bwidth);
-      src00 = data  + offset;
-      src01 = src00 + next_pix;
-      src10 = src00 + next_row;
-      src11 = src01 + next_row;
+    CTX_FORMAT_GRAYAF, 2, 64, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
+    NULL, NULL, NULL, ctx_setup_GRAYAF,
+  },
+#endif
+#if CTX_ENABLE_RGBAF
+  {
+    CTX_FORMAT_RGBAF, 4, 128, 4 * 4, 0, 0, CTX_FORMAT_RGBAF,
+    NULL, NULL, NULL, ctx_setup_RGBAF,
+  },
+#endif
+#if CTX_ENABLE_RGB8
+  {
+    CTX_FORMAT_RGB8, 3, 24, 4, 0, 0, CTX_FORMAT_RGBA8,
+    ctx_RGB8_to_RGBA8, ctx_RGBA8_to_RGB8, ctx_composite_convert, ctx_setup_RGB8,
+  },
+#endif
+#if CTX_ENABLE_GRAY1
+  {
+#if CTX_NATIVE_GRAYA8
+    CTX_FORMAT_GRAY1, 1, 1, 2, 1, 1, CTX_FORMAT_GRAYA8,
+    ctx_GRAY1_to_GRAYA8, ctx_GRAYA8_to_GRAY1, ctx_composite_convert, ctx_setup_GRAY1,
+#else
+    CTX_FORMAT_GRAY1, 1, 1, 4, 1, 1, CTX_FORMAT_RGBA8,
+    ctx_GRAY1_to_RGBA8, ctx_RGBA8_to_GRAY1, ctx_composite_convert, ctx_setup_RGB,
+#endif
+  },
+#endif
+#if CTX_ENABLE_GRAY2
+  {
+#if CTX_NATIVE_GRAYA8
+    CTX_FORMAT_GRAY2, 1, 2, 2, 4, 4, CTX_FORMAT_GRAYA8,
+    ctx_GRAY2_to_GRAYA8, ctx_GRAYA8_to_GRAY2, ctx_composite_convert, ctx_setup_GRAY2,
+#else
+    CTX_FORMAT_GRAY2, 1, 2, 4, 4, 4, CTX_FORMAT_RGBA8,
+    ctx_GRAY2_to_RGBA8, ctx_RGBA8_to_GRAY2, ctx_composite_convert, ctx_setup_RGB,
+#endif
+  },
+#endif
+#if CTX_ENABLE_GRAY4
+  {
+#if CTX_NATIVE_GRAYA8
+    CTX_FORMAT_GRAY4, 1, 4, 2, 16, 16, CTX_FORMAT_GRAYA8,
+    ctx_GRAY4_to_GRAYA8, ctx_GRAYA8_to_GRAY4, ctx_composite_convert, ctx_setup_GRAY4,
+#else
+    CTX_FORMAT_GRAY4, 1, 4, 4, 16, 16, CTX_FORMAT_GRAYA8,
+    ctx_GRAY4_to_RGBA8, ctx_RGBA8_to_GRAY4, ctx_composite_convert, ctx_setup_RGB,
+#endif
+  },
+#endif
+#if CTX_ENABLE_GRAY8
+  {
+#if CTX_NATIVE_GRAYA8
+    CTX_FORMAT_GRAY8, 1, 8, 2, 0, 0, CTX_FORMAT_GRAYA8,
+    ctx_GRAY8_to_GRAYA8, ctx_GRAYA8_to_GRAY8, ctx_composite_convert, ctx_setup_GRAY8,
+#else
+    CTX_FORMAT_GRAY8, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
+    ctx_GRAY8_to_RGBA8, ctx_RGBA8_to_GRAY8, ctx_composite_convert, ctx_setup_RGB,
+#endif
+  },
+#endif
+#if CTX_ENABLE_GRAYA8
+  {
+#if CTX_NATIVE_GRAYA8
+    CTX_FORMAT_GRAYA8, 2, 16, 2, 0, 0, CTX_FORMAT_GRAYA8,
+    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, NULL, ctx_setup_GRAYA8,
+#else
+    CTX_FORMAT_GRAYA8, 2, 16, 4, 0, 0, CTX_FORMAT_RGBA8,
+    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, ctx_composite_convert, ctx_setup_RGB,
+#endif
+  },
+#endif
+#if CTX_ENABLE_RGB332
+  {
+    CTX_FORMAT_RGB332, 3, 8, 4, 10, 12, CTX_FORMAT_RGBA8,
+    ctx_RGB332_to_RGBA8, ctx_RGBA8_to_RGB332,
+    ctx_composite_RGB332, ctx_setup_RGB332,
+  },
+#endif
+#if CTX_ENABLE_RGB565
+  {
+    CTX_FORMAT_RGB565, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
+    ctx_RGB565_to_RGBA8, ctx_RGBA8_to_RGB565,
+    ctx_composite_RGB565, ctx_setup_RGB565,
+  },
+#endif
+#if CTX_ENABLE_RGB565_BYTESWAPPED
+  {
+    CTX_FORMAT_RGB565_BYTESWAPPED, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
+    ctx_RGB565_BS_to_RGBA8,
+    ctx_RGBA8_to_RGB565_BS,
+    ctx_composite_RGB565_BS, ctx_setup_RGB565,
+  },
+#endif
+#if CTX_ENABLE_CMYKAF
+  {
+    CTX_FORMAT_CMYKAF, 5, 160, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
+    NULL, NULL, NULL, ctx_setup_CMYKAF,
+  },
+#endif
+#if CTX_ENABLE_CMYKA8
+  {
+    CTX_FORMAT_CMYKA8, 5, 40, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
+    NULL, NULL, ctx_composite_CMYKA8, ctx_setup_CMYKA8,
+  },
+#endif
+#if CTX_ENABLE_CMYK8
+  {
+    CTX_FORMAT_CMYK8, 5, 32, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
+    NULL, NULL, ctx_composite_CMYK8, ctx_setup_CMYK8,
+  },
+#endif
+#if CTX_ENABLE_YUV420
+  {
+    CTX_FORMAT_YUV420, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
+    NULL, NULL, ctx_composite_convert, ctx_setup_RGB,
+  },
+#endif
+  {
+    CTX_FORMAT_NONE
   }
-    loaded = offset;
-    ((uint32_t*)(&rgba[0]))[0] = ctx_bi_RGBA8 (*src00,*src01,*src10,*src11, (xi>>8),(yi>>8)); // the 
argument type does the & 0xff
-    xi += xi_delta;
-    yi += yi_delta;
-    rgba += 4;
+};
 
-    u = xi >> 16;
-    v = yi >> 16;
-    offset = bwidth * v + u;
-  }
-  }
 
-  for (; i < count; i ++)
-  {
-    *((uint32_t*)(rgba))= 0;
-    rgba += 4;
-  }
-}
-#endif
 
-#define ctx_clampi(val,min,max) \
-     ctx_mini (ctx_maxi ((val), (min)), (max))
 
-static inline uint32_t ctx_yuv_to_rgba32 (uint8_t y, uint8_t u, uint8_t v)
-{
-  int cy  = ((y - 16) * 76309) >> 16;
-  int cr  = (v - 128);
-  int cb  = (u - 128);
-  int red = cy + ((cr * 104597) >> 16);
-  int green = cy - ((cb * 25674 + cr * 53278) >> 16);
-  int blue = cy + ((cb * 132201) >> 16);
-  return  ctx_clampi (red, 0, 255) |
-          (ctx_clampi (green, 0, 255) << 8) |
-          (ctx_clampi (blue, 0, 255) << 16) |
-          (0xff << 24);
-}
+#endif // CTX_COMPOSITE
 
-static void
-ctx_fragment_image_yuv420_RGBA8_nearest (CtxRasterizer *rasterizer,
-                                         float x,
-                                         float y,
-                                         void *out, int count, float dx, float dy)
-{
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer;
-  if (buffer->color_managed)
-    buffer = buffer->color_managed;
-  uint8_t *src = (uint8_t *) buffer->data;
-  int bwidth  = buffer->width;
-  int bheight = buffer->height;
-  int bwidth_div_2  = bwidth/2;
-  int bheight_div_2  = bheight/2;
-  x += 0.5f;
-  y += 0.5f;
+#endif // CTX_IMPLEMENTATION
 
-  {
-    int i = 0;
+#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD
+#if CTX_COMPOSITE 
 
-    for (; i < count; i ++)
-    {
-      int u = x;
-      int v = y;
-      if ((u < 0 || v < 0 || u >= bwidth || v >= bheight))
-      {
-        *((uint32_t*)(rgba))= 0;
-      }
-      else
-      {
-        break;
-      }
-      x += dx;
-      y += dy;
-      rgba += 4;
-    }
+#define CTX_AA_HALFSTEP2   (CTX_FULL_AA/2)
+#define CTX_AA_HALFSTEP    ((CTX_FULL_AA/2)+1)
 
-    uint32_t u_offset = bheight * bwidth;
-    uint32_t v_offset = u_offset + bheight_div_2 * bwidth_div_2;
+CTX_INLINE static int ctx_compare_edges (const void *ap, const void *bp)
+{
+  const CtxSegment *a = (const CtxSegment *) ap;
+  const CtxSegment *b = (const CtxSegment *) bp;
+  return a->data.s16[1] - b->data.s16[1];
+}
 
-    if (rasterizer->swap_red_green)
+CTX_INLINE static int ctx_edge_qsort_partition (CtxSegment *A, int low, int high)
+{
+  CtxSegment pivot = A[ (high+low) /2];
+  int i = low;
+  int j = high;
+  while (i <= j)
     {
-      v_offset = bheight * bwidth;
-      u_offset = v_offset + bheight_div_2 * bwidth_div_2;
+      while (ctx_compare_edges (&A[i], &pivot) < 0) { i ++; }
+      while (ctx_compare_edges (&pivot, &A[j]) < 0) { j --; }
+      if (i <= j)
+        {
+          CtxSegment tmp = A[i];
+          A[i] = A[j];
+          A[j] = tmp;
+          i++;
+          j--;
+        }
     }
+  return i;
+}
 
-    // XXX this is incorrect- but fixes some bug!
-    int ix = 65536;//x * 65536;
-    int iy = y * 65536;
+static inline void ctx_edge_qsort (CtxSegment *entries, int low, int high)
+{
+  int p = ctx_edge_qsort_partition (entries, low, high);
+  if (low < p -1 )
+    { ctx_edge_qsort (entries, low, p - 1); }
+  if (low < high)
+    { ctx_edge_qsort (entries, p, high); }
+}
 
-    int ideltax = dx * 65536;
-    int ideltay = dy * 65536;
+static inline void ctx_rasterizer_sort_edges (CtxRasterizer *rasterizer)
+{
+  ctx_edge_qsort ((CtxSegment*)& (rasterizer->edge_list.entries[0]), 0, rasterizer->edge_list.count-1);
+}
 
-    for (; i < count; i ++)
+static inline void ctx_rasterizer_discard_edges (CtxRasterizer *rasterizer)
+{
+  int scanline = rasterizer->scanline;
+  int next_scanline = scanline + CTX_FULL_AA;
+  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
+  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
+  int *edges = rasterizer->edges;
+  for (unsigned int i = 0; i < rasterizer->active_edges; i++)
     {
-      int u = ix >> 16;
-      int v = iy >> 16;
-      if (u >= 0 && v >= 0 && u < bwidth && v < bheight)
-      {
-        uint32_t y  = v * bwidth + u;
-        uint32_t uv = (v / 2) * bwidth_div_2 + (u / 2);
+      CtxSegment *segment = segments + edges[i];
+      int edge_end = segment->data.s16[3]-1;
+      if (edge_end < scanline)
+        {
 
-        *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y],
-                        //127, 127);
-                        src[u_offset+uv], src[v_offset+uv]);
-        //ctx_RGBA8_associate_alpha_probably_opaque (rgba);
-#if CTX_DITHER
-       ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                           rasterizer->format->dither_green);
-#endif
-      }
-      else
-      {
-        break;
-      }
-      ix += ideltax;
-      iy += ideltay;
-      rgba += 4;
+          int dx_dy = abs(segment->delta);
+          rasterizer->needs_aa3  -= (dx_dy > limit3);
+          rasterizer->needs_aa5  -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
+          rasterizer->needs_aa15 -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
+          rasterizer->edges[i] = rasterizer->edges[rasterizer->active_edges-1];
+          rasterizer->active_edges--;
+          i--;
+        }
+      else if (edge_end < next_scanline)
+        rasterizer->ending_edges++;
     }
-
-    for (; i < count; i++)
+#if 0
+  // perhaps we should - but for 99% of the cases we do not need to, so we skip it
+  for (int i = 0; i < rasterizer->pending_edges; i++)
     {
-      *((uint32_t*)(rgba))= 0;
-      rgba += 4;
+      int edge_end = 
((CtxSegment*)(rasterizer->edge_list.entries))[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[3]-1;
+      if (edge_end < scanline + CTX_FULL_AA)
+        rasterizer->ending_edges++;
     }
-  }
+#endif
 }
 
-#if CTX_FRAGMENT_SPECIALIZE
+inline static void ctx_rasterizer_increment_edges (CtxRasterizer *rasterizer, int count)
+{
+  rasterizer->scanline += count;
+  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
+  for (unsigned int i = 0; i < rasterizer->active_edges; i++)
+    {
+      CtxSegment *segment = segments + rasterizer->edges[i];
+      segment->val += segment->delta * count;
+    }
+  for (unsigned int i = 0; i < rasterizer->pending_edges; i++)
+    {
+      CtxSegment *segment = segments + rasterizer->edges[CTX_MAX_EDGES-1-i];
+      segment->val += segment->delta * count;
+    }
+}
 
-static void
-ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (CtxRasterizer *rasterizer,
-                                    float x,
-                                    float y,
-                                    void *out, int count, float dx, float dy)
+/* feeds up to rasterizer->scanline,
+   keeps a pending buffer of edges - that encompass
+   the full incoming scanline,
+   feed until the start of the scanline and check for need for aa
+   in all of pending + active edges, then
+   again feed_edges until middle of scanline if doing non-AA
+   or directly render when doing AA
+*/
+inline static void ctx_edge2_insertion_sort (CtxSegment *segments, int *entries, unsigned int count)
 {
-  ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
+  for(unsigned int i=1; i<count; i++)
+   {
+     int temp = entries[i];
+     int j = i-1;
+     while (j >= 0 && segments[temp].val - segments[entries[j]].val < 0)
+     {
+       entries[j+1] = entries[j];
+       j--;
+     }
+     entries[j+1] = temp;
+   }
 }
 
-static void
-ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (CtxRasterizer *rasterizer,
-                                    float x,
-                                    float y,
-                                    void *out, int count, float dx, float dy)
+inline static int ctx_edge2_compare2 (CtxSegment *segments, int a, int b)
 {
-  ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
+  CtxSegment *seg_a = &segments[a];
+  CtxSegment *seg_b = &segments[b];
+  int minval_a = ctx_mini (seg_a->val - seg_a->delta * CTX_AA_HALFSTEP2, seg_a->val + seg_a->delta * 
CTX_AA_HALFSTEP);
+  int minval_b = ctx_mini (seg_b->val - seg_b->delta * CTX_AA_HALFSTEP2, seg_b->val + seg_b->delta * 
CTX_AA_HALFSTEP);
+  return minval_a - minval_b;
 }
 
-static void
-ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (CtxRasterizer *rasterizer,
-                                    float x,
-                                    float y,
-                                    void *out, int count, float dx, float dy)
+inline static void ctx_edge2_insertion_sort2 (CtxSegment *segments, int *entries, unsigned int count)
 {
-  ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
-  ctx_fragment_swap_red_green_u8 (out, count);
+  for(unsigned int i=1; i<count; i++)
+   {
+     int temp = entries[i];
+     int j = i-1;
+     while (j >= 0 && ctx_edge2_compare2 (segments, temp, entries[j]) < 0)
+     {
+       entries[j+1] = entries[j];
+       j--;
+     }
+     entries[j+1] = temp;
+   }
 }
 
-static void
-ctx_fragment_image_rgba8_RGBA8 (CtxRasterizer *rasterizer,
-                                float x,
-                                float y,
-                                void *out, int count, float dx, float dy)
+inline static void ctx_rasterizer_feed_edges (CtxRasterizer *rasterizer, int apply2_sort)
 {
-  if (rasterizer->state->gstate.image_smoothing)
-  {
-    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
-    if (factor <= 0.50f)
-    {
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (rasterizer, x, y, out, count, dx, dy);
-      else
-        ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, out, count, dx, dy);
-    }
-#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
-    else if (factor > 0.99f && factor < 1.01f)
+  int miny;
+  CtxSegment *entries = (CtxSegment*)&rasterizer->edge_list.entries[0];
+  rasterizer->horizontal_edges = 0;
+  rasterizer->ending_edges = 0;
+  for (unsigned int i = 0; i < rasterizer->pending_edges; i++)
     {
-      // XXX: also verify translate == 0 for this fast path to be valid
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
-      else
-        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
+      if (entries[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[1] - 1 <= rasterizer->scanline &&
+          rasterizer->active_edges < CTX_MAX_EDGES-2)
+        {
+          unsigned int no = rasterizer->active_edges;
+          rasterizer->active_edges++;
+          rasterizer->edges[no] = rasterizer->edges[CTX_MAX_EDGES-1-i];
+          rasterizer->edges[CTX_MAX_EDGES-1-i] =
+            rasterizer->edges[CTX_MAX_EDGES-1-rasterizer->pending_edges + 1];
+          rasterizer->pending_edges--;
+          i--;
+        }
     }
-#endif
-    else
+  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
+  int scanline = rasterizer->scanline;
+  int next_scanline = scanline + CTX_FULL_AA;
+  unsigned int edge_pos = rasterizer->edge_pos;
+  unsigned int edge_count = rasterizer->edge_list.count;
+  int *edges = rasterizer->edges;
+  while ((edge_pos < edge_count &&
+         (miny=entries[edge_pos].data.s16[1]-1)  <= next_scanline))
     {
-      if (rasterizer->swap_red_green)
-        ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (rasterizer, x, y, out, count, dx, dy);
-      else
-        ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, out, count, dx, dy);
+      if (rasterizer->active_edges < CTX_MAX_EDGES-2 &&
+      entries[edge_pos].data.s16[3]-1 /* (maxy) */  >= scanline)
+        {
+          int dy = (entries[edge_pos].data.s16[3] - 1 - miny);
+          if (dy)
+            {
+              int yd = scanline - miny;
+              unsigned int no = rasterizer->active_edges;
+              rasterizer->active_edges++;
+              unsigned int index = edges[no] = edge_pos;
+              int x0 = entries[index].data.s16[0];
+              int x1 = entries[index].data.s16[2];
+              int dx_dy = CTX_RASTERIZER_EDGE_MULTIPLIER * (x1 - x0) / dy;
+              entries[index].delta = dx_dy;
+              entries[index].val = x0 * CTX_RASTERIZER_EDGE_MULTIPLIER +
+                                         (yd * dx_dy);
+
+              {
+                int abs_dx_dy = abs(dx_dy);
+                rasterizer->needs_aa3  += (abs_dx_dy > limit3);
+                rasterizer->needs_aa5  += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
+                rasterizer->needs_aa15 += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
+              }
+
+              if (miny > scanline &&
+                  rasterizer->pending_edges < CTX_MAX_PENDING-1)
+              {
+                  /* it is a pending edge - we add it to the end of the array
+                     and keep a different count for items stored here, like
+                     a heap and stack growing against each other
+                  */
+                    edges[CTX_MAX_EDGES-1-rasterizer->pending_edges] =
+                    rasterizer->edges[no];
+                    rasterizer->pending_edges++;
+                    rasterizer->active_edges--;
+              }
+            }
+          else
+            rasterizer->horizontal_edges ++;
+        }
+      edge_pos++;
     }
-  }
-  else
-  {
-    if (rasterizer->swap_red_green)
-      ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, out, count, dx, dy);
+    rasterizer->edge_pos = edge_pos;
+    ctx_rasterizer_discard_edges (rasterizer);
+    if (apply2_sort)
+      ctx_edge2_insertion_sort2 ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, 
rasterizer->active_edges);
     else
-      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, out, count, dx, dy);
-  }
-  //ctx_fragment_swap_red_green_u8 (out, count);
-#if CTX_DITHER
-  uint8_t *rgba = (uint8_t*)out;
-  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
-#endif
+      ctx_edge2_insertion_sort ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, 
rasterizer->active_edges);
 }
-#endif
+#undef CTX_CMPSWP
 
-static void
-ctx_fragment_image_gray1_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
+static inline void ctx_coverage_post_process (CtxRasterizer *rasterizer, int minx, int maxx, uint8_t 
*coverage, int *first_col, int *last_col)
 {
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  CtxBuffer *buffer = g->texture.buffer;
-  ctx_assert (rasterizer);
-  ctx_assert (g);
-  ctx_assert (buffer);
-  for (int i = 0; i < count; i ++)
+#if CTX_ENABLE_SHADOW_BLUR
+  if (CTX_UNLIKELY(rasterizer->in_shadow))
   {
-  int u = x;
-  int v = y;
-  if ( u < 0 || v < 0 ||
-       u >= buffer->width ||
-       v >= buffer->height)
-    {
-      rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
-    }
-  else
+    float radius = rasterizer->state->gstate.shadow_blur;
+    int dim = 2 * radius + 1;
+    if (CTX_UNLIKELY (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM))
+      dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
     {
-      uint8_t *src = (uint8_t *) buffer->data;
-      src += v * buffer->stride + u / 8;
-      if (*src & (1<< (u & 7) ) )
-        {
-          rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
-        }
-      else
+      uint16_t temp[maxx-minx+1];
+      memset (temp, 0, sizeof (temp));
+      for (int x = dim/2; x < maxx-minx + 1 - dim/2; x ++)
+        for (int u = 0; u < dim; u ++)
         {
-          for (int c = 0; c < 4; c++)
-            { rgba[c] = 255;
-            }//g->texture.rgba[c];
-            //}
+          temp[x] += coverage[minx+x+u-dim/2] * rasterizer->kernel[u] * 256;
         }
+      for (int x = 0; x < maxx-minx + 1; x ++)
+        coverage[minx+x] = temp[x] >> 8;
     }
-
-    rgba += 4;
-    x += dx;
-    y += dy;
   }
-}
+#endif
 
-#if CTX_GRADIENTS
-static void
-ctx_fragment_radial_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
-{
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i <  count; i ++)
+#if CTX_ENABLE_CLIP
+  if (CTX_UNLIKELY(rasterizer->clip_buffer &&  !rasterizer->clip_rectangle))
   {
-    float v = (ctx_hypotf_fast (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
-              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
-#if CTX_GRADIENT_CACHE
-    uint32_t *rgbap = (uint32_t*)&ctx_gradient_cache_u8[ctx_grad_index(v)][0];
-    *((uint32_t*)rgba) = *rgbap;
+  int scanline     = rasterizer->scanline - CTX_FULL_AA; // we do the
+                                                 // post process after
+                                                 // coverage generation icnrement
+    /* perhaps not working right for clear? */
+    int y = scanline / CTX_FULL_AA;//rasterizer->aa;
+    uint8_t *clip_line = &((uint8_t*)(rasterizer->clip_buffer->data))[rasterizer->blit_width*y];
+    // XXX SIMD candidate
+    for (int x = minx; x <= maxx; x ++)
+    {
+#if CTX_1BIT_CLIP
+       coverage[x] = (coverage[x] * ((clip_line[x/8]&(1<<(x&8)))?255:0))/255;
 #else
-    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
-#endif
-#if CTX_DITHER
-    ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                        rasterizer->format->dither_green);
+       coverage[x] = (255 + coverage[x] * clip_line[x-rasterizer->blit_x])>>8;
 #endif
-    rgba += 4;
-    x += dx;
-    y += dy;
+    }
   }
+#endif
 }
 
-static void
-ctx_fragment_linear_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
-{
-#if 0
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i <  count; i ++)
-  {
-  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
-                g->linear_gradient.length) -
-              g->linear_gradient.start) * (g->linear_gradient.rdelta);
-#if CTX_GRADIENT_CACHE
-  uint32_t*rgbap = ((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index(v)][0]));
-  *((uint32_t*)rgba) = *rgbap;
-#else
-  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
-#endif
-#if CTX_DITHER
-  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
-#endif
-    rgba += 4;
-    x += dx;
-    y += dy;
-  }
-#else
-  uint8_t *rgba = (uint8_t *) out;
+#define CTX_EDGE(no)      entries[edges[no]]
+#define CTX_EDGE_YMIN     (segment->data.s16[1]-1)
 
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  float u0 = x; float v0 = y;
-  float ud = dx; float vd = dy;
-  float linear_gradient_rdelta = g->linear_gradient.rdelta;
-  float linear_gradient_length = g->linear_gradient.length;
-  float linear_gradient_length_recip = 1.0f/linear_gradient_length;
-  float linear_gradient_dx = g->linear_gradient.dx *linear_gradient_length_recip * linear_gradient_rdelta;
-  float linear_gradient_dy = g->linear_gradient.dy *linear_gradient_length_recip * linear_gradient_rdelta;
-  float linear_gradient_start = g->linear_gradient.start * linear_gradient_rdelta;
+#define UPDATE_PARITY \
+        if (CTX_LIKELY(scanline!=CTX_EDGE_YMIN))\
+        { \
+          if (is_winding)\
+             parity = parity + -1+2*(segment->code == CTX_EDGE_FLIPPED);\
+          else\
+             parity = 1-parity; \
+        }
 
-#if CTX_DITHER
-  int dither_red_blue = rasterizer->format->dither_red_blue;
-  int dither_green = rasterizer->format->dither_green;
-#endif
 
-  u0 *= linear_gradient_dx;
-  v0 *= linear_gradient_dy;
-  ud *= linear_gradient_dx;
-  vd *= linear_gradient_dy;
+inline static void
+ctx_rasterizer_generate_coverage (CtxRasterizer *rasterizer,
+                                  int            minx,
+                                  int            maxx,
+                                  uint8_t       *coverage,
+                                  int            is_winding,
+                                  const uint8_t  aa_factor,
+                                  const uint8_t  fraction)
+{
+  CtxSegment *entries      = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
+  int        *edges        = rasterizer->edges;
+  int         scanline     = rasterizer->scanline;
+  int         active_edges = rasterizer->active_edges;
+  int         parity       = 0;
+  coverage -= minx;
+  for (int t = 0; t < active_edges -1;t++)
+    {
+      CtxSegment *segment = &entries[edges[t]];
+      UPDATE_PARITY;
 
-#if CTX_GRADIENT_CACHE
-  int vv = ((u0 + v0) - linear_gradient_start) * (CTX_GRADIENT_CACHE_ELEMENTS-1) * 256;
-  int ud_plus_vd = (ud + vd) * (CTX_GRADIENT_CACHE_ELEMENTS-1) * 256;
-#else
-  float vv = ((u0 + v0) - linear_gradient_start);
-  float ud_plus_vd = (ud + vd);
-#endif
+      if (parity)
+        {
+          CtxSegment *next_segment = &entries[edges[t+1]];
+          const int x0 = segment->val;
+          const int x1 = next_segment->val;
+          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int first     = graystart >> 8;
+          int last      = grayend   >> 8;
 
-  for (int x = 0; x < count ; x++)
-  {
-#if CTX_GRADIENT_CACHE
-  uint32_t*rgbap = ((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index_i (vv)][0]));
-  *((uint32_t*)rgba) = *rgbap;
-#else
-  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
-#endif
-#if CTX_DITHER
-      ctx_dither_rgba_u8 (rgba, u0, v0, dither_red_blue, dither_green);
-#endif
-    rgba+= 4;
-    vv += ud_plus_vd;
-  }
-#endif
-}
+          if (CTX_UNLIKELY (first < minx))
+          { 
+            first = minx;
+            graystart=0;
+          }
+          if (CTX_UNLIKELY (last > maxx))
+          {
+            last = maxx;
+            grayend=255;
+          }
 
-#endif
+          graystart = fraction- (graystart&0xff)/aa_factor;
+          grayend   = (grayend & 0xff) / aa_factor;
 
-static void
-ctx_fragment_color_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
-{
-  uint8_t *rgba_out = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  _ctx_color_get_rgba8 (rasterizer->state, &g->color, rgba_out);
-  ctx_RGBA8_associate_alpha (rgba_out);
-  if (rasterizer->swap_red_green)
-  {
-    int tmp = rgba_out[0];
-    rgba_out[0] = rgba_out[2];
-    rgba_out[2] = tmp;
-  }
-  for (int i = 1; i < count; i++, rgba_out+=4)
-    memcpy (rgba_out + count * 4, rgba_out, 4);
+          if (first < last)
+          {
+              coverage[first] += graystart;
+              for (int x = first + 1; x < last; x++)
+                coverage[x]  += fraction;
+              coverage[last] += grayend;
+          }
+          else if (first == last)
+            coverage[first] += (graystart-(fraction-grayend));
+        }
+   }
 }
-#if CTX_ENABLE_FLOAT
 
-#if CTX_GRADIENTS
-static void
-ctx_fragment_linear_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
+inline static void
+ctx_rasterizer_generate_coverage_set (CtxRasterizer *rasterizer,
+                                      int            minx,
+                                      int            maxx,
+                                      uint8_t       *coverage,
+                                      int            is_winding)
 {
-  float *rgba = (float *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i < count; i++)
-  {
-    float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
-                  g->linear_gradient.length) -
-                g->linear_gradient.start) * (g->linear_gradient.rdelta);
-    ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0f, rgba);
-    x += dx;
-    y += dy;
-    rgba += 4;
-  }
-}
+  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
+  int      *edges = rasterizer->edges;
+  int scanline     = rasterizer->scanline;
+  int active_edges = rasterizer->active_edges;
+  int parity = 0;
+  coverage -= minx;
+  for (int t = 0; t < active_edges -1;t++)
+    {
+      CtxSegment *segment = &entries[edges[t]];
+      UPDATE_PARITY;
 
-static void
-ctx_fragment_radial_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float 
dx, float dy)
-{
-  float *rgba = (float *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i < count; i++)
-  {
-  float v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
-        v = (v - g->radial_gradient.r0) * (g->radial_gradient.rdelta);
-  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0f, rgba);
-    x+=dx;
-    y+=dy;
-    rgba +=4;
-  }
-}
-#endif
+      if (parity)
+        {
+          CtxSegment *next_segment = &entries[edges[t+1]];
+          const int x0        = segment->val;
+          const int x1        = next_segment->val;
+          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int first     = graystart >> 8;
+          int last      = grayend   >> 8;
 
+          if (CTX_UNLIKELY (first < minx))
+          { 
+            first = minx;
+            graystart=0;
+          }
+          if (CTX_UNLIKELY (last > maxx))
+          {
+            last = maxx;
+            grayend=255;
+          }
 
-static void
-ctx_fragment_color_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, float 
dy)
-{
-  float *rgba = (float *) out;
-  for (int i = 0; i < count; i++)
-  {
-    CtxSource *g = &rasterizer->state->gstate.source_fill;
-    ctx_color_get_rgba (rasterizer->state, &g->color, rgba);
-    for (int c = 0; c < 3; c++)
-      rgba[c] *= rgba[3];
-    rgba += 4;
-  }
-}
+          graystart = (graystart&0xff) ^ 255;
+          grayend   = (grayend & 0xff);
 
-static void ctx_fragment_image_RGBAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
-{
-  float *outf = (float *) out;
-  uint8_t rgba[4];
-  CtxGState *gstate = &rasterizer->state->gstate;
-  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
-  switch (buffer->format->bpp)
-    {
-#if CTX_FRAGMENT_SPECIALIZE
-      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
-      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
-      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
+          if (first < last)
+          {
+              coverage[first] += graystart;
+#if 0
+              for (int x = first + 1; x < last; x++)
+                coverage[x] = 255;
+#else
+              memset(&coverage[first+1], 255, last-(first+1));
 #endif
-      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
-    }
-  for (int c = 0; c < 4 * count; c ++) { outf[c] = ctx_u8_to_float (rgba[c]); }
+              coverage[last]  += grayend;
+          }
+          else if (first == last)
+            coverage[first] += (graystart-(grayend^255));
+        }
+   }
 }
 
-static CtxFragment ctx_rasterizer_get_fragment_RGBAF (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  switch (gstate->source_fill.type)
-    {
-      case CTX_SOURCE_TEXTURE:         return ctx_fragment_image_RGBAF;
-      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBAF;
-#if CTX_GRADIENTS
-      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBAF;
-      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBAF;
-#endif
-    }
-  return ctx_fragment_color_RGBAF;
-}
-#endif
 
-static CtxFragment ctx_rasterizer_get_fragment_RGBA8 (CtxRasterizer *rasterizer)
+inline static void
+ctx_rasterizer_generate_coverage_apply (CtxRasterizer *rasterizer,
+                                        int            minx,
+                                        int            maxx,
+                                        uint8_t       *coverage,
+                                        int            is_winding,
+                                        CtxCovPath     comp)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
-  switch (gstate->source_fill.type)
+  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
+  int *edges          = rasterizer->edges;
+  int scanline        = rasterizer->scanline;
+  const int bpp       = rasterizer->format->bpp;
+  int active_edges    = rasterizer->active_edges;
+  int parity          = 0;
+  uint32_t *src_pixp   = ((uint32_t*)rasterizer->color);
+  const uint32_t src_pix    = src_pixp[0];
+  const uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
+  const uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+  const uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
+  const uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
+  const uint32_t si_a  = si_ga >> 16;
+
+  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
+         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));
+  int accumulator_x=0;
+  uint8_t accumulated = 0;
+  for (int t = 0; t < active_edges -1;t++)
     {
-      case CTX_SOURCE_TEXTURE:
-        if (!buffer || !buffer->format)
-          return ctx_fragment_color_RGBA8;
+      CtxSegment *segment = &entries[edges[t]];
+      UPDATE_PARITY;
 
-        if (buffer->format->pixel_format == CTX_FORMAT_YUV420)
+       if (parity)
         {
-          return ctx_fragment_image_yuv420_RGBA8_nearest;
-        }
-        else
-#if CTX_FRAGMENT_SPECIALIZE
-        switch (buffer->format->bpp)
+          CtxSegment   *next_segment = &entries[edges[t+1]];
+          const int x0        = segment->val;
+          const int x1        = next_segment->val;
+
+          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int first = graystart >> 8;
+          int grayend = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int last = grayend >> 8;
+
+          if (CTX_UNLIKELY(first < minx))
+          { 
+            graystart = 0;
+            first = minx;
+          }
+          graystart = (graystart&0xff) ^ 255;
+          if (CTX_UNLIKELY(last > maxx))
           {
-            case 1:  return ctx_fragment_image_gray1_RGBA8;
-            case 24: 
+             last = maxx;
+             grayend=255;
+          }
+          grayend = (grayend & 0xff);
+
+
+          if (accumulated)
+          {
+            if (accumulator_x == first)
+            {
+              graystart += accumulated;
+            }
+            else
+            {
+              uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)/8]);
+              switch (comp)
               {
-                if (gstate->image_smoothing)
-                {
-                  float factor = ctx_matrix_get_scale (&gstate->transform);
-                          //fprintf (stderr, "{%.3f}", factor);
-                  if (factor < 0.5f)
+                case CTX_COV_PATH_RGBA8_COPY:
+                  *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
+                  break;
+                case CTX_COV_PATH_RGBA8_OVER:
+                  *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
+                  break;
+                default:
+                  rasterizer->apply_coverage (rasterizer, (uint8_t*)dst_pix, rasterizer->color, 
accumulator_x, &accumulated, 1);
+              }
+            }
+            accumulated = 0;
+          }
+
+          if (first < last)
+          {
+            switch (comp)
+            {
+              case CTX_COV_PATH_RGBA8_COPY:
+              {
+                uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
+                *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, graystart);
+
+                dst_pix++;
+                ctx_span_set_colorb (dst_pix, src_pix, last - first - 1);
+              }
+              break;
+            case CTX_COV_PATH_RGB8_COPY:
+            case CTX_COV_PATH_RGBAF_COPY:
+            case CTX_COV_PATH_RGB565_COPY:
+            case CTX_COV_PATH_RGB332_COPY:
+            case CTX_COV_PATH_GRAYA8_COPY:
+            case CTX_COV_PATH_GRAY8_COPY:
+            case CTX_COV_PATH_CMYKA8_COPY:
+            case CTX_COV_PATH_CMYK8_COPY:
+            {
+              uint8_t* dsts = (uint8_t*)(&dst[(first *bpp)/8]);
+              uint8_t  startcov = graystart;
+              rasterizer->apply_coverage (rasterizer, (uint8_t*)dsts, rasterizer->color, first, &startcov, 
1);
+              uint8_t* dst_i = (uint8_t*)dsts;
+              uint8_t *color = ((uint8_t*)&rasterizer->color_native);
+              unsigned int bytes = rasterizer->format->bpp/8;
+              dst_i+=bytes;
+
+              unsigned int count = last-(first+1);//  (last - post) - (first+pre) + 1;
+
+              //for (int i = first + pre; i <= last - post; i++)
+              if (CTX_LIKELY(count>0))
+              switch (bytes)
+              {
+                case 1:
+#if 1
+                  memset (dst_i, color[0], count);
+#else
+                  while (count--)
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgb8_RGBA8_box_swap_red_green;
-                    return ctx_fragment_image_rgb8_RGBA8_box;
+                    dst_i[0] = color[0];
+                    dst_i++;
                   }
-#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
-                  else if (factor > 0.99f && factor < 1.01f)
+#endif
+                  break;
+                case 2:
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
-                    return ctx_fragment_image_rgb8_RGBA8_nearest;
+                    uint16_t val = ((uint16_t*)color)[0];
+                    while (count--)
+                    {
+                      ((uint16_t*)dst_i)[0] = val;
+                      dst_i+=2;
+                    }
                   }
-#endif
-                  else
+                  break;
+                case 4:
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green;
-                    return ctx_fragment_image_rgb8_RGBA8_bi;
+                    uint32_t val = ((uint32_t*)color)[0];
+                    ctx_span_set_colorb ((uint32_t*)dst, val, count);
                   }
-                }
-                else
-                {
-                  if (rasterizer->swap_red_green)
-                    return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
-                  return ctx_fragment_image_rgb8_RGBA8_nearest;
-                }
-              }
+                  break;
+                case 16:
+                  ctx_span_set_color_x4 ((uint32_t*)dst, (uint32_t*)color, count);
+                  break;
+                case 3:
+                 while (count--)
+                 {
+                   *dst_i ++ = color[0];
+                   *dst_i ++ = color[1];
+                   *dst_i ++ = color[2];
+                 }
+                 break;
+                case 5:
+                 while (count--)
+                 {
+                   *dst_i ++ = color[0];
+                   *dst_i ++ = color[1];
+                   *dst_i ++ = color[2];
+                   *dst_i ++ = color[3];
+                   *dst_i ++ = color[4];
+                 }
+                 break;
+                default:
+                 while (count--)
+                 {
+                   for (unsigned int b = 0; b < bytes; b++)
+                     *dst_i++ = color[b];
+                 }
+                  break;
+               }
+            }
               break;
-            case 32:
+
+              case CTX_COV_PATH_GRAY1_COPY:
               {
-                if (gstate->image_smoothing)
+                uint8_t* dstp = (uint8_t*)(&dst[(first *bpp)/8]);
+                uint8_t *srcp = (uint8_t*)src_pixp;
+                uint8_t  startcov = graystart;
+                rasterizer->apply_coverage (rasterizer, (uint8_t*)dstp, rasterizer->color, first, &startcov, 
1);
+                dstp = (uint8_t*)(&dst[((first+1)*bpp)/8]);
+                unsigned int count = last - first - 1;
+                if (srcp[0]>=127)
                 {
-                  float factor = ctx_matrix_get_scale (&gstate->transform);
-                          //fprintf (stderr, "[%.3f]", factor);
-                  if (factor < 0.5f)
+                  int x = first + 1;
+                  for (unsigned int i = 0; i < count && x & 7; count--)
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgba8_RGBA8_box_swap_red_green;
-                    return ctx_fragment_image_rgba8_RGBA8_box;
+                     int bitno = x & 7;
+                     *dstp |= (1<<bitno);
+                     dstp += (bitno == 7);
+                     x++;
                   }
-#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
-                  else if (factor > 0.99f && factor < 1.01f)
+
+                  for (unsigned int i = 0; i < count && count>8; count-=8)
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green;
-                    return ctx_fragment_image_rgba8_RGBA8_nearest;
+                     *dstp = 255;
+                     dstp++;
+                     x+=8;
                   }
-#endif
-                  else
+
+                  for (unsigned int i = 0; i < count; i++)
                   {
-                    if (rasterizer->swap_red_green)
-                      return ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green;
-                    return ctx_fragment_image_rgba8_RGBA8_bi;
+                     int bitno = x & 7;
+                     *dstp |= (1<<bitno);
+                     dstp += (bitno == 7);
+                     x++;
                   }
                 }
                 else
                 {
-                  if (rasterizer->swap_red_green)
-                    return ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green;
-                  return ctx_fragment_image_rgba8_RGBA8_nearest;
+                  unsigned int x = first + 1;
+                  for (unsigned int i = 0; i < count && x & 7; count--)
+                  {
+                     int bitno = x & 7;
+                     *dstp &= ~(1<<bitno);
+                     dstp += (bitno == 7);
+                     x++;
+                  }
+
+                  for (unsigned int i = 0; i < count && count>8; count-=8)
+                  {
+                     *dstp = 0;
+                     dstp++;
+                     x+=8;
+                  }
+
+                  for (unsigned int i = 0; i < count; i++)
+                  {
+                     int bitno = x & 7;
+                     *dstp &= ~(1<<bitno);
+                     dstp += (bitno == 7);
+                     x++;
+                  }
+
                 }
               }
-            default: return ctx_fragment_image_RGBA8;
+              break;
+
+            case CTX_COV_PATH_RGBA8_OVER:
+            {
+              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
+              *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, graystart);
+              dst_pix++;
+              for (unsigned int i = first + 1; i < (unsigned)last; i++)
+              {
+                *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
+                dst_pix++;
+              }
+            }
+            break;
+            case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
+            {
+              float u0 = 0; float v0 = 0;
+              float ud = 0; float vd = 0;
+              uint8_t gs = graystart;
+              ctx_RGBA8_source_copy_normal_fragment (rasterizer, &dst[(first * bpp)/8], NULL, first, &gs, 1);
+              ctx_init_uv (rasterizer, first+1, &u0, &v0, &ud, &vd);
+              rasterizer->fragment (rasterizer, u0, v0, &dst[((first+1)*bpp)/8], last-first-1, ud, vd);
+            }
+            break;
+              case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
+            {
+              uint8_t gs = graystart;
+              ctx_RGBA8_source_over_normal_fragment (rasterizer, &dst[(first * bpp)/8], NULL, first, &gs, 1);
+              ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
+                                                     &dst[((first+1)*bpp)/8], NULL, first + 1, NULL, 
last-first-1, 1);
+            }
+            break;
+              default:
+            {
+              uint8_t opaque[last-first];
+              memset (opaque, 255, sizeof (opaque));
+              opaque[0] = graystart;
+              rasterizer->apply_coverage (rasterizer,
+                                             &dst[(first * bpp)/8],
+                                             rasterizer->color, first, opaque, last-first);
+            }
+            }
+            accumulated = grayend;
           }
-#else
-          return ctx_fragment_image_RGBA8;
-#endif
+          else if (first == last)
+          {
+            accumulated = (graystart-(grayend^255));
+          }
+          accumulator_x = last;
+        }
+   }
 
-      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBA8;
-#if CTX_GRADIENTS
-      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBA8;
-      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBA8;
-#endif
-    }
-  return ctx_fragment_color_RGBA8;
+   if (accumulated)
+   {
+     uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)/8]);
+     switch (comp)
+     {
+       case CTX_COV_PATH_RGBA8_COPY:
+         *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
+         break;
+       case CTX_COV_PATH_RGBA8_OVER:
+         *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
+         break;
+       default:
+         rasterizer->apply_coverage (rasterizer, (uint8_t*)dst_pix, rasterizer->color, accumulator_x, 
&accumulated, 1);
+     }
+   }
 }
 
-static void
-ctx_init_uv (CtxRasterizer *rasterizer,
-             int x0, int count,
-             float *u0, float *v0, float *ud, float *vd)
+inline static int ctx_rasterizer_is_simple (CtxRasterizer *rasterizer)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  *u0 = x0;
-  *v0 = rasterizer->scanline / 15;//rasterizer->aa;
-  float u1 = *u0 + count;
-  float v1 = *v0;
-
-  _ctx_matrix_apply_transform (&gstate->source_fill.transform, u0, v0);
-  _ctx_matrix_apply_transform (&gstate->source_fill.transform, &u1, &v1);
-
-  *ud = (u1-*u0) / (count);
-  *vd = (v1-*v0) / (count);
-}
-
+  if (rasterizer->fast_aa == 0 ||
+      rasterizer->ending_edges ||
+      rasterizer->pending_edges)
+   return 0;
+  int *edges  = rasterizer->edges;
+  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
 
-static void
-ctx_u8_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
-{
-  if (CTX_UNLIKELY(rasterizer->fragment))
+  int active_edges = rasterizer->active_edges;
+  for (int t = 0; t < active_edges -1;t++)
     {
-      float u0 = 0; float v0 = 0;
-      float ud = 0; float vd = 0;
-      ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
-      while (count--)
-      {
-        uint8_t cov = *coverage;
-        if (CTX_UNLIKELY(cov == 0))
-        {
-          u0+=ud;
-          v0+=vd;
-        }
-        else
-        {
-          rasterizer->fragment (rasterizer, u0, v0, src, 1, ud, vd);
-          u0+=ud;
-          v0+=vd;
-          if (cov == 255)
-          {
-            for (int c = 0; c < components; c++)
-              dst[c] = src[c];
-          }
-          else
-          {
-            uint8_t rcov = 255 - cov;
-            for (int c = 0; c < components; c++)
-              { dst[c] = (src[c]*cov + dst[c]*rcov)/255; }
-          }
-        }
-        dst += components;
-        coverage ++;
-      }
-      return;
+      CtxSegment *segment0 = segments + edges[t];
+      CtxSegment *segment1 = segments + edges[t+1];
+      const int delta0    = segment0->delta;
+      const int delta1    = segment1->delta;
+      const int x0        = segment0->val;
+      const int x1        = segment1->val;
+      int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
+      int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
+      int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
+      int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
+      if (x1_end < x0_end   ||
+          x1_start < x0_end ||
+          x1_end < x0_start
+         )
+         return 0;
     }
-
-  while (count--)
-  {
-    uint8_t cov = *coverage;
-    uint8_t rcov = 255-cov;
-    for (int c = 0; c < components; c++)
-      { dst[c] = (src[c]*cov+dst[c]*rcov)/255; }
-    dst += components;
-    coverage ++;
-  }
+  return 1;
 }
 
-static void
-ctx_u8_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
+
+inline static void
+ctx_rasterizer_generate_coverage_set2 (CtxRasterizer *rasterizer,
+                                         int            minx,
+                                         int            maxx,
+                                         uint8_t       *coverage,
+                                         int            is_winding)
 {
-  while (count--)
-  {
-    uint8_t cov = *coverage;
-    for (int c = 0; c < components; c++)
-      { dst[c] = (dst[c] * (256-cov)) >> 8; }
-    coverage ++;
-    dst += components;
-  }
-}
+  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
+  int *edges  = rasterizer->edges;
+  int scanline        = rasterizer->scanline;
+  int active_edges    = rasterizer->active_edges;
+  int parity        = 0;
 
-typedef enum {
-  CTX_PORTER_DUFF_0,
-  CTX_PORTER_DUFF_1,
-  CTX_PORTER_DUFF_ALPHA,
-  CTX_PORTER_DUFF_1_MINUS_ALPHA,
-} CtxPorterDuffFactor;
+  coverage -= minx;
 
-#define  \
-ctx_porter_duff_factors(mode, foo, bar)\
-{\
-  switch (mode)\
-  {\
-     case CTX_COMPOSITE_SOURCE_ATOP:\
-        f_s = CTX_PORTER_DUFF_ALPHA;\
-        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_ATOP:\
-        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-        f_d = CTX_PORTER_DUFF_ALPHA;\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_IN:\
-        f_s = CTX_PORTER_DUFF_0;\
-        f_d = CTX_PORTER_DUFF_ALPHA;\
-      break;\
-     case CTX_COMPOSITE_DESTINATION:\
-        f_s = CTX_PORTER_DUFF_0;\
-        f_d = CTX_PORTER_DUFF_1;\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OVER:\
-        f_s = CTX_PORTER_DUFF_1;\
-        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OVER:\
-        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-        f_d = CTX_PORTER_DUFF_1;\
-       break;\
-     case CTX_COMPOSITE_XOR:\
-        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OUT:\
-        f_s = CTX_PORTER_DUFF_0;\
-        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OUT:\
-        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-        f_d = CTX_PORTER_DUFF_0;\
-       break;\
-     case CTX_COMPOSITE_SOURCE_IN:\
-        f_s = CTX_PORTER_DUFF_ALPHA;\
-        f_d = CTX_PORTER_DUFF_0;\
-       break;\
-     case CTX_COMPOSITE_COPY:\
-        f_s = CTX_PORTER_DUFF_1;\
-        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
-       break;\
-     default:\
-     case CTX_COMPOSITE_CLEAR:\
-        f_s = CTX_PORTER_DUFF_0;\
-        f_d = CTX_PORTER_DUFF_0;\
-       break;\
-  }\
-}
+  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
+  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
 
-static void
-ctx_u8_source_over_normal_color (int components,
-                                 CtxRasterizer         *rasterizer,
-                                 uint8_t * __restrict__ dst,
-                                 uint8_t * __restrict__ src,
-                                 int                    x0,
-                                 uint8_t * __restrict__ coverage,
-                                 int                    count)
-{
-  uint8_t tsrc[5];
-  *((uint32_t*)tsrc) = *((uint32_t*)src);
+  for (int t = 0; t < active_edges -1;t++)
+    {
+      CtxSegment   *segment = &entries[edges[t]];
+      UPDATE_PARITY;
 
-  while (count--)
-  {
-    for (int c = 0; c < components; c++)
-      //dst[c] =  ((tsrc[c] * *coverage)>>8) + (dst[c] * (((65536)-(tsrc[components-1] * *coverage)))>>16);
-      dst[c] =  ((((tsrc[c] * *coverage)) + (dst[c] * (((255)-(((255+(tsrc[components-1] * 
*coverage))>>8))))))>>8);
-    coverage ++;
-    dst+=components;
-  }
-}
+       if (parity)
+        {
+          CtxSegment   *next_segment = &entries[edges[t+1]];
+          const int x0        = segment->val;
+          const int x1        = next_segment->val;
+          const int delta0    = segment->delta;
+          const int delta1    = next_segment->delta;
 
-static void
-ctx_u8_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
-{
-  while (count--)
-  {
-    for (int c = 0; c < components; c++)
-      dst[c] =  ctx_lerp_u8(dst[c],src[c],coverage[0]);
-    coverage ++;
-    dst+=components;
-  }
-}
+          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
+          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
+          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
+          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
 
-static inline void
-ctx_RGBA8_source_over_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
-{
-  while (count--)
-  {
-     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
-     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
-//   uint32_t di_ga = ((*((uint32_t*)dst)) & 0xff00ff00) >> 8;
-//   uint32_t di_rb = (*((uint32_t*)dst)) & 0x00ff00ff;
-     uint32_t si_a  = si_ga >> 16;
-     uint32_t cov = *coverage;
-     uint32_t racov = (255-((255+si_a*cov)>>8));
-     *((uint32_t*)(dst)) =
+          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int first     = graystart >> 8;
+          int last      = grayend   >> 8;
 
-     (((si_rb*cov+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
-     ((si_ga*cov+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);
+          if (CTX_UNLIKELY (first < minx))
+          { 
+            first = minx;
+            graystart=0;
+          }
+          if (CTX_UNLIKELY (last > maxx))
+          {
+            last = maxx;
+            grayend=255;
+          }
+          graystart = (graystart&0xff) ^ 255;
+          grayend   = (grayend & 0xff);
 
-     coverage ++;
-     tsrc += 4;
-     dst  += 4;
-  }
-}
+          if (first < last)
+          {
+            int pre = 1;
+            int post = 1;
 
-static inline void
-ctx_RGBA8_source_over_normal_full_cov_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
-{
-  while (count--)
-  {
-     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
-     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
-     uint32_t si_a  = si_ga >> 16;
-     uint32_t racov = (255-si_a);
-     *((uint32_t*)(dst)) =
-     (((si_rb*255+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
-     ((si_ga*255+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);
-     tsrc += 4;
-     dst  += 4;
-  }
-}
+            if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
+            {
+              coverage[first] += graystart;
+            }
+            else
+            {
+              unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));
+              unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));
 
-static void
-ctx_RGBA8_source_copy_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
-{
-  while (count--)
-  {
-    ((uint32_t*)dst)[0]=ctx_lerp_RGBA8 (((uint32_t*)dst)[0],
-                                        ((uint32_t*)tsrc)[0], coverage[0]);
-    coverage ++;
-    tsrc += 4;
-    dst  += 4;
-  }
-}
+              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
+              int count = 0;
 
-static void
-ctx_RGBA8_source_over_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
-{
-  float u0 = 0; float v0 = 0;
-  float ud = 0; float vd = 0;
-  ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
-  uint8_t _tsrc[4 * (count)];
-  rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
-  ctx_RGBA8_source_over_normal_buf (rasterizer,
-                       dst, src, x0, coverage, count, &_tsrc[0]);
-}
+              int mod = ((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *
+                         (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
+              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);
 
-static inline void
-ctx_RGBA8_source_over_normal_full_cov_fragment (CTX_COMPOSITE_ARGUMENTS)
-{
-  float u0 = 0; float v0 = 0;
-  float ud = 0; float vd = 0;
-  ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
-  uint8_t _tsrc[4 * (count)];
-  rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
-  ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
-                       dst, src, x0, coverage, count, &_tsrc[0]);
-}
+              int recip = 65536/sum;
+              for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
+              {
+                coverage[us + count] = ((u - u0 + mod) * recip)>>16;
+                count++;
+              }
+              pre = (us+count-1)-first+1;
+            }
+  
+            if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
+            {
+               coverage[last] += grayend;
+            }
+            else
+            {
+              unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));
+              unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));
 
-static void
-ctx_RGBA8_source_copy_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
-{
-  float u0 = 0; float v0 = 0;
-  float ud = 0; float vd = 0;
-  ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
-  uint8_t _tsrc[4 * (count)];
-  rasterizer->fragment (rasterizer, u0, v0, &_tsrc[0], count, ud, vd);
-  ctx_RGBA8_source_copy_normal_buf (rasterizer,
-                       dst, src, x0, coverage, count, &_tsrc[0]);
+              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
+              int count = 0;
+              int mod = ((((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255)+64) *
+                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
+              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);
+              int recip = 65536 / sum;
+              for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
+              {
+                coverage[us + count] = (((u - u0 + mod) * recip)>>16) ^ 255;
+                count++;
+              }
+              post = last-us+1;
+            }
+            for (int i = first + pre; i <= last - post; i++)
+              coverage[i] = 255;
+          }
+          else if (first == last)
+          {
+            coverage[last]+=(graystart-(grayend^255));
+          }
+        }
+   }
 }
 
 
-static void
-ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
+inline static void
+ctx_rasterizer_generate_coverage_apply2 (CtxRasterizer *rasterizer,
+                                         int            minx,
+                                         int            maxx,
+                                         uint8_t       *coverage,
+                                         int            is_winding,
+                                         CtxCovPath     comp)
 {
-#if CTX_REFERENCE
-  ctx_u8_source_over_normal_color (4, rasterizer, dst, src, x0, coverage, count);
-#else
-  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
-  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
-  uint32_t si_a  = si_ga >> 16;
+  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
+  int *edges          = rasterizer->edges;
+  int  scanline       = rasterizer->scanline;
+  const int  bpp      = rasterizer->format->bpp;
+  int  active_edges   = rasterizer->active_edges;
+  int  parity         = 0;
 
-  while (count--)
-  {
-     uint32_t cov   = *coverage++;
-     uint32_t rcov  = (((255+si_a * cov)>>8))^255;
-     uint32_t di    = *((uint32_t*)dst);
-     uint32_t di_ga = ((di & 0xff00ff00) >> 8);
-     uint32_t di_rb = (di & 0x00ff00ff);
-     *((uint32_t*)(dst)) =
-     (((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
-      ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00);
-     dst+=4;
-  }
-#endif
-}
+  uint32_t *src_pixp   = ((uint32_t*)rasterizer->color);
+  const uint32_t src_pix    = src_pixp[0];
+  const uint32_t si_ga      = ((uint32_t*)rasterizer->color)[1];
+  const uint32_t si_rb      = ((uint32_t*)rasterizer->color)[2];
+  const uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
+  const uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
+  const uint32_t si_a  = src_pix >> 24;
 
-static void
-ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
-{
-#if CTX_REFERENCE
-  ctx_u8_source_copy_normal_color (4, rasterizer, dst, src, x0, coverage, count);
-#else
-  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
-  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
+  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
+         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));
 
-  while (count--)
-  {
-     uint32_t cov   = *coverage++;
-     uint32_t di    = *((uint32_t*)dst);
-     uint32_t di_ga = (di & 0xff00ff00);
-     uint32_t di_rb = (di & 0x00ff00ff);
+  coverage -= minx;
 
-     uint32_t d_rb  = si_rb - di_rb;
-     uint32_t d_ga  = si_ga - (di_ga>>8);
+  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
+  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
 
-     *((uint32_t*)(dst)) =
+  int accumulated_x0 = 65538;
+  int accumulated_x1 = 65536;
 
-     (((di_rb + ((d_rb * cov)>>8)) & 0x00ff00ff))  |
-      ((di_ga + ((d_ga * cov)      & 0xff00ff00)));
-     dst +=4;
-  }
-#endif
-}
+  for (int t = 0; t < active_edges -1;t++)
+    {
+      CtxSegment   *segment = &entries[edges[t]];
+      UPDATE_PARITY;
 
-static void
-ctx_RGBA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
-{
-  ctx_u8_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
-}
+       if (parity)
+        {
+          CtxSegment   *next_segment = &entries[edges[t+1]];
+          const int x0        = segment->val;
+          const int x1        = next_segment->val;
+          const int delta0    = segment->delta;
+          const int delta1    = next_segment->delta;
 
-static void
-ctx_u8_blend_normal (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
-{
-  for (int j = 0; j < count; j++)
-  {
-  switch (components)
-  {
-     case 3:
-       ((uint8_t*)(blended))[2] = ((uint8_t*)(src))[2];
-       *((uint16_t*)(blended)) = *((uint16_t*)(src));
-       break;
-     case 2:
-       *((uint16_t*)(blended)) = *((uint16_t*)(src));
-       break;
-     case 5:
-       *((uint32_t*)(blended)) = *((uint32_t*)(src));
-       ((uint8_t*)(blended))[4] = ((uint8_t*)(src))[4];
-       break;
-     case 4:
-       *((uint32_t*)(blended)) = *((uint32_t*)(src));
-       break;
-     default:
-       {
-        for (int i = 0; i<components;i++)
-           blended[i] = src[i];
-       }
-       break;
-  }
-    blended+=components;
-    src+=components;
-  }
-}
+          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
+          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
+          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
+          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
 
-/* branchless 8bit add that maxes out at 255 */
-static inline uint8_t ctx_sadd8(uint8_t a, uint8_t b)
-{
-  uint16_t s = (uint16_t)a+b;
-  return -(s>>8) | (uint8_t)s;
-}
+          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
+          int first     = graystart >> 8;
+          int last      = grayend   >> 8;
 
-#if CTX_BLENDING_AND_COMPOSITING
+          if (CTX_UNLIKELY (first < minx))
+          { 
+            first = minx;
+            graystart=0;
+          }
+          if (CTX_UNLIKELY (last > maxx))
+          {
+            last = maxx;
+            grayend=255;
+          }
+          graystart = (graystart&0xff) ^ 255;
+          grayend   = (grayend & 0xff);
 
-#define ctx_u8_blend_define(name, CODE) \
-static void \
-ctx_u8_blend_##name (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)\
-{\
-  for (int j = 0; j < count; j++) { \
-  uint8_t *s=src; uint8_t b[components];\
-  ctx_u8_deassociate_alpha (components, dst, b);\
-    CODE;\
-  blended[components-1] = src[components-1];\
-  ctx_u8_associate_alpha (components, blended);\
-  src += components;\
-  dst += components;\
-  blended += components;\
-  }\
-}
+          if (first < last)
+          {
+            int pre = 1;
+            int post = 1;
 
-#define ctx_u8_blend_define_seperable(name, CODE) \
-        ctx_u8_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \
+          if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
+          {
+             coverage[first] += graystart;
 
-ctx_u8_blend_define_seperable(multiply,     blended[c] = (b[c] * s[c])/255;)
-ctx_u8_blend_define_seperable(screen,       blended[c] = s[c] + b[c] - (s[c] * b[c])/255;)
-ctx_u8_blend_define_seperable(overlay,      blended[c] = b[c] < 127 ? (s[c] * b[c])/255 :
-                                                         s[c] + b[c] - (s[c] * b[c])/255;)
-ctx_u8_blend_define_seperable(darken,       blended[c] = ctx_mini (b[c], s[c]))
-ctx_u8_blend_define_seperable(lighten,      blended[c] = ctx_maxi (b[c], s[c]))
-ctx_u8_blend_define_seperable(color_dodge,  blended[c] = b[c] == 0 ? 0 :
-                                     s[c] == 255 ? 255 : ctx_mini(255, (255 * b[c]) / (255-s[c])))
-ctx_u8_blend_define_seperable(color_burn,   blended[c] = b[c] == 1 ? 1 :
-                                     s[c] == 0 ? 0 : 255 - ctx_mini(255, (255*(255 - b[c])) / s[c]))
-ctx_u8_blend_define_seperable(hard_light,   blended[c] = s[c] < 127 ? (b[c] * s[c])/255 :
-                                                          b[c] + s[c] - (b[c] * s[c])/255;)
-ctx_u8_blend_define_seperable(difference,   blended[c] = (b[c] - s[c]))
-ctx_u8_blend_define_seperable(divide,       blended[c] = s[c]?(255 * b[c]) / s[c]:0)
-ctx_u8_blend_define_seperable(addition,     blended[c] = ctx_sadd8 (s[c], b[c]))
-ctx_u8_blend_define_seperable(subtract,     blended[c] = ctx_maxi(0, s[c]-b[c]))
-ctx_u8_blend_define_seperable(exclusion,    blended[c] = b[c] + s[c] - 2 * (b[c] * s[c]/255))
-ctx_u8_blend_define_seperable(soft_light,
-  if (s[c] <= 255/2)
-  {
-    blended[c] = b[c] - (255 - 2 * s[c]) * b[c] * (255 - b[c]) / (255 * 255);
-  }
-  else
-  {
-    int d;
-    if (b[c] <= 255/4)
-      d = (((16 * b[c] - 12 * 255)/255 * b[c] + 4 * 255) * b[c])/255;
-    else
-      d = ctx_sqrtf(b[c]/255.0) * 255.4;
-    blended[c] = (b[c] + (2 * s[c] - 255) * (d - b[c]))/255;
-  }
-)
+            accumulated_x1 = first;
+            accumulated_x0 = ctx_mini (accumulated_x0, first);
+          }
+          else
+          {
+            unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));
+            unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));
 
-static int ctx_int_get_max (int components, int *c)
-{
-  int max = 0;
-  for (int i = 0; i < components - 1; i ++)
-  {
-    if (c[i] > max) max = c[i];
-  }
-  return max;
-}
+            int mod = ((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *
+                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
+            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);
 
-static int ctx_int_get_min (int components, int *c)
-{
-  int min = 400;
-  for (int i = 0; i < components - 1; i ++)
-  {
-    if (c[i] < min) min = c[i];
-  }
-  return min;
-}
+            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
+            int count = 0;
+            int recip = 65536/ sum;
+            for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
+            {
+              coverage[us + count] = ((u - u0 + mod) * recip)>>16;
+              count++;
+            }
+            pre = (us+count-1)-first+1;
 
-static int ctx_int_get_lum (int components, int *c)
-{
-  switch (components)
-  {
-    case 3:
-    case 4:
-            return CTX_CSS_RGB_TO_LUMINANCE(c);
-    case 1:
-    case 2:
-            return c[0];
-            break;
-    default:
-       {
-         int sum = 0;
-         for (int i = 0; i < components - 1; i ++)
-         {
-           sum += c[i];
-         }
-         return sum / (components - 1);
-       }
-            break;
-  }
-}
+            accumulated_x0 = ctx_mini (accumulated_x0, us);
+            accumulated_x1 = us + count - 1;
+          }
 
-static int ctx_u8_get_lum (int components, uint8_t *c)
-{
-  switch (components)
-  {
-    case 3:
-    case 4:
-            return CTX_CSS_RGB_TO_LUMINANCE(c);
-    case 1:
-    case 2:
-            return c[0];
+          if (accumulated_x1-accumulated_x0>=0)
+          {
+             switch (comp)
+             {
+                case CTX_COV_PATH_RGBA8_OVER:
+                {
+                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
+                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
+                    {
+                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
+                      dst_i++;
+                    }
+                }
+                break;
+
+                case CTX_COV_PATH_RGBA8_COPY:
+                {
+                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
+                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
+                  {
+                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
+                    dst_i++;
+                  }
+                }
+                  break;
+                case CTX_COV_PATH_RGB8_COPY:
+                {
+                  uint8_t *dst_i = (uint8_t*)&dst[((accumulated_x0) * bpp)/8];
+                  uint8_t *srcp = (uint8_t*)src_pixp;
+                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
+                  {
+                    for (int c = 0; c < 3; c++)
+                      dst_i[c] = ctx_lerp_u8 (dst_i[c], srcp[c], coverage[accumulated_x0+i]);
+                    dst_i +=3;
+                  }
+                }
+                  break;
+                default:
+                rasterizer->apply_coverage (rasterizer,
+                          &dst[((accumulated_x0) * bpp)/8],
+                          rasterizer->color,
+                          accumulated_x0,
+                          &coverage[accumulated_x0],
+                          accumulated_x1-accumulated_x0+1);
+             }
+             accumulated_x0 = 65538;
+             accumulated_x1 = 65536;
+          }
+
+          if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
+          {
+             coverage[last] += grayend;
+             accumulated_x1 = last;
+             accumulated_x0 = last;
+          }
+          else
+          {
+            unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));
+            unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));
+
+            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
+            int count = 0;
+
+            int mod = ((((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) +64) *
+                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
+            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);
+
+            int recip = 65536/ sum;
+            for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
+            {
+              coverage[us + count] = (((u - u0 + mod)*recip)>>16)^255;
+              count++;
+            }
+            post = last-us+1;
+
+            accumulated_x1 = us + count;
+            accumulated_x0 = us;
+          }
+          switch (comp)
+          {
+            case CTX_COV_PATH_RGBAF_COPY:
+            case CTX_COV_PATH_GRAY8_COPY:
+            case CTX_COV_PATH_RGB8_COPY:
+            case CTX_COV_PATH_GRAYA8_COPY:
+            case CTX_COV_PATH_RGB565_COPY:
+            case CTX_COV_PATH_RGB332_COPY:
+            case CTX_COV_PATH_CMYK8_COPY:
+            case CTX_COV_PATH_CMYKA8_COPY:
+            {
+              uint8_t* dsts = (uint8_t*)(&dst[(first *bpp)/8]);
+              uint8_t* dst_i = (uint8_t*)dsts;
+              uint8_t* color = ((uint8_t*)&rasterizer->color_native);
+              unsigned int bytes = rasterizer->format->bpp/8;
+              dst_i+=pre*bytes;
+
+              int scount = (last - post) - (first+pre) + 1;
+              unsigned int count = scount;
+
+              //for (int i = first + pre; i <= last - post; i++)
+              if (CTX_LIKELY(scount>0))
+              switch (bytes)
+              {
+                case 1:
+#if 1
+                  memset (dst_i, color[0], count);
+#else
+                  while (count--)
+                  {
+                    dst_i[0] = color[0];
+                    dst_i++;
+                  }
+#endif
+                  break;
+                case 2:
+                  {
+                    uint16_t val = ((uint16_t*)color)[0];
+                    while (count--)
+                    {
+                      ((uint16_t*)dst_i)[0] = val;
+                      dst_i+=2;
+                    }
+                  }
+                  break;
+                case 4:
+                  {
+                    uint32_t val = ((uint32_t*)color)[0];
+                    while (count--)
+                    {
+                      ((uint32_t*)dst_i)[0] = val;
+                      dst_i+=4;
+                    }
+                  }
+                  break;
+                case 16:
+                  ctx_span_set_color_x4 ((uint32_t*)dst, (uint32_t*)color, count);
+                  break;
+                case 3:
+                 while (count--)
+                 {
+                   *dst_i++ = color[0];
+                   *dst_i++ = color[1];
+                   *dst_i++ = color[2];
+                 }
+                 break;
+                case 5:
+                 while (count--)
+                 {
+                   *dst_i++ = color[0];
+                   *dst_i++ = color[1];
+                   *dst_i++ = color[2];
+                   *dst_i++ = color[3];
+                   *dst_i++ = color[4];
+                 }
+                 break;
+                default:
+                 while (count--)
+                 {
+                   for (unsigned int b = 0; b < bytes; b++)
+                     *dst_i++ = color[b];
+                 }
+                  break;
+               }
+             }
+             break;
+
+            case CTX_COV_PATH_RGBA8_COPY:
+            {
+              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
+              dst_pix+=pre;
+              ctx_span_set_color (dst_pix, src_pix, last-first-pre-post + 1);
+            }
             break;
-    default:
-       {
-         int sum = 0;
-         for (int i = 0; i < components - 1; i ++)
-         {
-           sum += c[i];
-         }
-         return sum / (components - 1);
-       }
+
+
+            case CTX_COV_PATH_RGBA8_OVER:
+            {
+              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
+              dst_pix+=pre;
+              int scount = (last - post) - (first + pre) + 1;
+              if (scount > 0)
+              {
+                unsigned int count = scount;
+                while (count--)
+                {
+                  *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
+                  dst_pix++;
+                }
+              }
+            }
             break;
-  }
-}
-static int ctx_u8_get_sat (int components, uint8_t *c)
-{
-  switch (components)
-  {
-    case 3:
-    case 4:
-            { int r = c[0];
-              int g = c[1];
-              int b = c[2];
-              return ctx_maxi(r, ctx_maxi(g,b)) - ctx_mini(r,ctx_mini(g,b));
+            case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
+            {
+              int width = last-first-pre-post+1;
+              if (width>0)
+              {
+                float u0 = 0; float v0 = 0;
+                float ud = 0; float vd = 0;
+                ctx_init_uv (rasterizer, first+pre, &u0, &v0, &ud, &vd);
+                rasterizer->fragment (rasterizer, u0, v0, &dst[(first+pre)*bpp/8],
+                                      width, ud, vd);
+              }
             }
             break;
-    case 1:
-    case 2:
-            return 0.0;
+            case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
+              {
+                int width = last-first-pre-post+1;
+                if (width>0)
+                ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
+                               &dst[((first+pre)*bpp)/8],
+                               NULL,
+                               first + pre,
+                               NULL,
+                               width, 1);
+              }
             break;
-    default:
-       {
-         int min = 1000;
-         int max = -1000;
-         for (int i = 0; i < components - 1; i ++)
-         {
-           if (c[i] < min) min = c[i];
-           if (c[i] > max) max = c[i];
-         }
-         return max-min;
-       }
-       break;
-  }
-}
-
-static void ctx_u8_set_lum (int components, uint8_t *c, uint8_t lum)
-{
-  int d = lum - ctx_u8_get_lum (components, c);
-  int tc[components];
-  for (int i = 0; i < components - 1; i++)
-  {
-    tc[i] = c[i] + d;
-  }
-
-  int l = ctx_int_get_lum (components, tc);
-  int n = ctx_int_get_min (components, tc);
-  int x = ctx_int_get_max (components, tc);
+            default:
+              {
+                int width = last-first-pre-post+1;
+                if (width > 0)
+                {
+                uint8_t opaque[width];
+                memset (opaque, 255, sizeof (opaque));
+                rasterizer->apply_coverage (rasterizer,
+                            &dst[((first + pre) * bpp)/8],
+                            rasterizer->color,
+                            first + pre,
+                            opaque,
+                            width);
+                }
+              }
+          }
+          }
+          else if (first == last)
+          {
+            coverage[last]+=(graystart-(grayend^255));
 
-  if (n < 0 && l!=n)
-  {
-    for (int i = 0; i < components - 1; i++)
-      tc[i] = l + (((tc[i] - l) * l) / (l-n));
-  }
+            accumulated_x1 = last;
+            accumulated_x0 = ctx_mini (accumulated_x0, last);
+          }
+        }
+   }
 
-  if (x > 255 && x!=l)
-  {
-    for (int i = 0; i < components - 1; i++)
-      tc[i] = l + (((tc[i] - l) * (255 - l)) / (x-l));
-  }
-  for (int i = 0; i < components - 1; i++)
-    c[i] = tc[i];
+   if (accumulated_x1-accumulated_x0>=0)
+   {
+             switch (comp)
+             {
+                case CTX_COV_PATH_RGBA8_OVER:
+                {
+                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
+                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
+                    {
+                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
+                      dst_i++;
+                    }
+                }
+                break;
+                case CTX_COV_PATH_RGBA8_COPY:
+                {
+                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
+                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
+                  {
+                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
+                    dst_i++;
+                  }
+                }
+                  break;
+                default:
+                rasterizer->apply_coverage (rasterizer,
+                          &dst[((accumulated_x0) * bpp)/8],
+                          rasterizer->color,
+                          accumulated_x0,
+                          &coverage[accumulated_x0],
+                          accumulated_x1-accumulated_x0+1);
+             }
+   }
 }
 
-static void ctx_u8_set_sat (int components, uint8_t *c, uint8_t sat)
-{
-  int max = 0, mid = 1, min = 2;
-  
-  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
-  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
-  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
+#undef CTX_EDGE_Y0
+#undef CTX_EDGE
 
-  if (c[max] > c[min])
-  {
-    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
-    c[max] = sat;
-  }
-  else
+static inline void
+ctx_rasterizer_reset (CtxRasterizer *rasterizer)
+{
+  rasterizer->has_shape       =   
+  rasterizer->has_prev        =   
+  rasterizer->edge_list.count =    // ready for new edges
+  rasterizer->edge_pos        =   
+  rasterizer->scanline        = 0;
+  if (CTX_LIKELY(!rasterizer->preserve))
   {
-    c[mid] = c[max] = 0;
+    rasterizer->scan_min      =
+    rasterizer->col_min       = 5000;
+    rasterizer->scan_max      =
+    rasterizer->col_max       = -5000;
   }
-  c[min] = 0;
+  //rasterizer->comp_op       = NULL; // keep comp_op cached 
+  //     between rasterizations where rendering attributes are
+  //     nonchanging
 }
 
-ctx_u8_blend_define(color,
-  for (int i = 0; i < components; i++)
-    blended[i] = s[i];
-  ctx_u8_set_lum(components, blended, ctx_u8_get_lum (components, s));
-)
-
-ctx_u8_blend_define(hue,
-  int in_sat = ctx_u8_get_sat(components, b);
-  int in_lum = ctx_u8_get_lum(components, b);
-  for (int i = 0; i < components; i++)
-    blended[i] = s[i];
-  ctx_u8_set_sat(components, blended, in_sat);
-  ctx_u8_set_lum(components, blended, in_lum);
-)
-
-ctx_u8_blend_define(saturation,
-  int in_sat = ctx_u8_get_sat(components, s);
-  int in_lum = ctx_u8_get_lum(components, b);
-  for (int i = 0; i < components; i++)
-    blended[i] = b[i];
-  ctx_u8_set_sat(components, blended, in_sat);
-  ctx_u8_set_lum(components, blended, in_lum);
-)
+static void
+ctx_rasterizer_rasterize_edges2 (CtxRasterizer *rasterizer, const int fill_rule 
+#if CTX_SHAPE_CACHE
+                                ,CtxShapeEntry *shape
+#endif
+                               )
+{
+  rasterizer->pending_edges   =   
+  rasterizer->active_edges    =   0;
+  //rasterizer->scanline        = 0;
+  int       is_winding  = fill_rule == CTX_FILL_RULE_WINDING;
+  const CtxCovPath comp = rasterizer->comp;
+  const int real_aa     = rasterizer->aa;
+  uint8_t  *dst         = ((uint8_t *) rasterizer->buf);
+  int       scan_start  = rasterizer->blit_y * CTX_FULL_AA;
+  int       scan_end    = scan_start + (rasterizer->blit_height - 1) * CTX_FULL_AA;
+  const int blit_width  = rasterizer->blit_width;
+  const int blit_max_x  = rasterizer->blit_x + blit_width;
+  int       minx        = rasterizer->col_min / CTX_SUBDIV - rasterizer->blit_x;
+  int       maxx        = (rasterizer->col_max + CTX_SUBDIV-1) / CTX_SUBDIV -
+                          rasterizer->blit_x;
+  const int blit_stride = rasterizer->blit_stride;
+  uint8_t   real_fraction = 255/real_aa;
 
-ctx_u8_blend_define(luminosity,
-  int in_lum = ctx_u8_get_lum(components, s);
-  for (int i = 0; i < components; i++)
-    blended[i] = b[i];
-  ctx_u8_set_lum(components, blended, in_lum);
-)
+  rasterizer->prev_active_edges = -1;
+  if (CTX_UNLIKELY (
+#if CTX_SHAPE_CACHE
+    !shape &&
 #endif
+    maxx > blit_max_x - 1))
+    { maxx = blit_max_x - 1; }
 
-CTX_INLINE static void
-ctx_u8_blend (int components, CtxBlend blend, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, 
int count)
-{
-#if CTX_BLENDING_AND_COMPOSITING
-  switch (blend)
-  {
-    case CTX_BLEND_NORMAL:      ctx_u8_blend_normal      (components, dst, src, blended, count); break;
-    case CTX_BLEND_MULTIPLY:    ctx_u8_blend_multiply    (components, dst, src, blended, count); break;
-    case CTX_BLEND_SCREEN:      ctx_u8_blend_screen      (components, dst, src, blended, count); break;
-    case CTX_BLEND_OVERLAY:     ctx_u8_blend_overlay     (components, dst, src, blended, count); break;
-    case CTX_BLEND_DARKEN:      ctx_u8_blend_darken      (components, dst, src, blended, count); break;
-    case CTX_BLEND_LIGHTEN:     ctx_u8_blend_lighten     (components, dst, src, blended, count); break;
-    case CTX_BLEND_COLOR_DODGE: ctx_u8_blend_color_dodge (components, dst, src, blended, count); break;
-    case CTX_BLEND_COLOR_BURN:  ctx_u8_blend_color_burn  (components, dst, src, blended, count); break;
-    case CTX_BLEND_HARD_LIGHT:  ctx_u8_blend_hard_light  (components, dst, src, blended, count); break;
-    case CTX_BLEND_SOFT_LIGHT:  ctx_u8_blend_soft_light  (components, dst, src, blended, count); break;
-    case CTX_BLEND_DIFFERENCE:  ctx_u8_blend_difference  (components, dst, src, blended, count); break;
-    case CTX_BLEND_EXCLUSION:   ctx_u8_blend_exclusion   (components, dst, src, blended, count); break;
-    case CTX_BLEND_COLOR:       ctx_u8_blend_color       (components, dst, src, blended, count); break;
-    case CTX_BLEND_HUE:         ctx_u8_blend_hue         (components, dst, src, blended, count); break;
-    case CTX_BLEND_SATURATION:  ctx_u8_blend_saturation  (components, dst, src, blended, count); break;
-    case CTX_BLEND_LUMINOSITY:  ctx_u8_blend_luminosity  (components, dst, src, blended, count); break;
-    case CTX_BLEND_ADDITION:    ctx_u8_blend_addition    (components, dst, src, blended, count); break;
-    case CTX_BLEND_DIVIDE:      ctx_u8_blend_divide      (components, dst, src, blended, count); break;
-    case CTX_BLEND_SUBTRACT:    ctx_u8_blend_subtract    (components, dst, src, blended, count); break;
-  }
+  minx = ctx_maxi (rasterizer->state->gstate.clip_min_x, minx);
+  maxx = ctx_mini (rasterizer->state->gstate.clip_max_x, maxx);
+  minx = ctx_maxi (0, minx); // redundant?
+  if (CTX_UNLIKELY (minx >= maxx))
+    {
+      return;
+    }
+#if CTX_SHAPE_CACHE
+  uint8_t _coverage[shape?2:maxx-minx+1];
 #else
-  switch (blend)
-  {
-    default:                    ctx_u8_blend_normal      (components, dst, src, blended, count); break;
-  }
-
+  uint8_t _coverage[maxx-minx+1];
 #endif
-}
+  uint8_t *coverage = &_coverage[0];
 
-CTX_INLINE static void
-__ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
-                     int                    components,
-                     uint8_t *              dst,
-                     uint8_t *              src,
-                     int                    x0,
-                     uint8_t * __restrict__ coverage,
-                     int                    count,
-                     CtxCompositingMode     compositing_mode,
-                     CtxFragment            fragment,
-                     CtxBlend               blend)
-{
-  CtxPorterDuffFactor f_s, f_d;
-  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
-  CtxGState *gstate = &rasterizer->state->gstate;
-  uint8_t global_alpha_u8 = gstate->global_alpha_u8;
-  uint8_t tsrc[components * count];
-  int src_step = 0;
+  int coverage_size;
 
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
-  {
-    src = &tsrc[0];
-    fragment (rasterizer, 0, 0, src, 1, 0, 0);
-    if (blend != CTX_BLEND_NORMAL)
-      ctx_u8_blend (components, blend, dst, src, src, 1);
-  }
+  rasterizer->scan_min -= (rasterizer->scan_min % CTX_FULL_AA);
+#if CTX_SHAPE_CACHE
+  if (shape)
+    {
+      coverage_size = shape->width;
+      coverage = &shape->data[0];
+      scan_start = rasterizer->scan_min;
+      scan_end   = rasterizer->scan_max;
+    }
   else
+#endif
   {
-    float u0 = 0; float v0 = 0;
-    float ud = 0; float vd = 0;
-    src = &tsrc[0];
+     coverage_size = sizeof (_coverage);
+     if (rasterizer->scan_min > scan_start)
+       {
+          dst += (rasterizer->blit_stride * (rasterizer->scan_min-scan_start) / CTX_FULL_AA);
+          scan_start = rasterizer->scan_min;
+       }
+      scan_end = ctx_mini (rasterizer->scan_max, scan_end);
+  }
 
-    ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
-    fragment (rasterizer, u0, v0, src, count, ud, vd);
-    if (blend != CTX_BLEND_NORMAL)
-      ctx_u8_blend (components, blend, dst, src, src, count);
-    src_step = components;
+  if (CTX_UNLIKELY(rasterizer->state->gstate.clip_min_y * CTX_FULL_AA > scan_start ))
+    { 
+       dst += (rasterizer->blit_stride * (rasterizer->state->gstate.clip_min_y * CTX_FULL_AA -scan_start) / 
CTX_FULL_AA);
+       scan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA; 
+    }
+  scan_end = ctx_mini (rasterizer->state->gstate.clip_max_y * CTX_FULL_AA, scan_end);
+  if (CTX_UNLIKELY(scan_start > scan_end ||
+      (scan_start > (rasterizer->blit_y + (rasterizer->blit_height-1)) * CTX_FULL_AA) ||
+      (scan_end < (rasterizer->blit_y) * CTX_FULL_AA)))
+  { 
+    /* not affecting this rasterizers scanlines */
+    return;
   }
 
-  while (count--)
-  {
-    uint32_t cov = *coverage;
+  rasterizer->horizontal_edges =
+    rasterizer->needs_aa3  =
+    rasterizer->needs_aa5  =
+    rasterizer->needs_aa15 = 0;
 
-    if (CTX_UNLIKELY(global_alpha_u8 != 255))
-      cov = (cov * global_alpha_u8 + 255) >> 8;
+  ctx_rasterizer_sort_edges (rasterizer);
+  rasterizer->scanline = scan_start;
+  ctx_rasterizer_feed_edges (rasterizer, 0); 
 
-    uint8_t csrc[components];
-    for (int c = 0; c < components; c++)
-      csrc[c] = (src[c] * cov + 255) >> 8;
+  int avoid_direct = (0 
+#if CTX_ENABLE_CLIP
+         || rasterizer->clip_buffer
+#endif
+#if CTX_ENABLE_SHADOW_BLUR
+         || rasterizer->in_shadow
+#endif
+#if CTX_SHAPE_CACHE
+         || shape != NULL
+#endif
+         );
 
-    for (int c = 0; c < components; c++)
+  for (; rasterizer->scanline <= scan_end;)
     {
-      uint32_t res = 0;
-#if 1
-      switch (f_s)
-      {
-        case CTX_PORTER_DUFF_0:             break;
-        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
-        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1] + 255) >> 8; break;
-        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (256-dst[components-1])) >> 8; break;
-      }
-      switch (f_d)
-      {
-        case CTX_PORTER_DUFF_0: break;
-        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
-        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1] + 255) >> 8; break;
-        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (256-csrc[components-1])) >> 8; break;
-      }
-#else
-      switch (f_s)
-      {
-        case CTX_PORTER_DUFF_0:             break;
-        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
-        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1])/255; break;
-        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (255-dst[components-1]))/255; break;
-      }
-      switch (f_d)
+
+    if (rasterizer->active_edges == 0 && rasterizer->pending_edges == 0)
+    { /* no edges */
+      ctx_rasterizer_feed_edges (rasterizer, 0);
+      ctx_rasterizer_increment_edges (rasterizer, CTX_FULL_AA);
+      dst += blit_stride;
+#if CTX_SHAPE_CACHE
+      if (shape)
       {
-        case CTX_PORTER_DUFF_0: break;
-        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
-        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1])/255; break;
-        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (255-csrc[components-1]))/255; break;
+        memset (coverage, 0, coverage_size);
+        coverage += shape->width;
       }
 #endif
-      dst[c] = res;
+      rasterizer->prev_active_edges = rasterizer->active_edges;
+      continue;
     }
-    coverage ++;
-    src+=src_step;
-    dst+=components;
-  }
-}
-
-CTX_INLINE static void
-_ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
-                     int                    components,
-                     uint8_t *              dst,
-                     uint8_t * __restrict__ src,
-                     int                    x0,
-                     uint8_t *              coverage,
-                     int                    count,
-                     CtxCompositingMode     compositing_mode,
-                     CtxFragment            fragment,
-                     CtxBlend               blend)
-{
-  __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, compositing_mode, fragment, 
blend);
-}
-
-#define _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend) \
-   switch (rasterizer->state->gstate.compositing_mode) \
-   { \
-     case CTX_COMPOSITE_SOURCE_ATOP: \
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
-        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_ATOP:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_IN:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OVER:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OVER:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_XOR:\
-      _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_XOR, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OUT:\
-       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OUT:\
-       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_IN:\
-       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_COPY:\
-       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_COPY, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_CLEAR:\
-       _ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_CLEAR, fragment, blend);\
-       break;\
-   }
-
-/* generating one function per compositing_mode would be slightly more efficient,
- * but on embedded targets leads to slightly more code bloat,
- * here we trade off a slight amount of performance
- */
-#define ctx_u8_porter_duff(comp_format, components, source, fragment, blend) \
-static void \
-ctx_##comp_format##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
-{ \
-  _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend);\
-}
-
-ctx_u8_porter_duff(RGBA8, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-//ctx_u8_porter_duff(comp_name, components,color_##blend_name,  NULL, blend_mode)
-
-static void
-ctx_RGBA8_nop (CTX_COMPOSITE_ARGUMENTS)
-{
-}
+    else if (real_aa != 1 && ( (rasterizer->horizontal_edges!=0) 
+          || (rasterizer->active_edges != rasterizer->prev_active_edges)
+          || (rasterizer->active_edges + rasterizer->pending_edges == rasterizer->ending_edges)
+          ))
+    { /* needs full AA */
+        int increment = CTX_FULL_AA/real_aa;
+        memset (coverage, 0, coverage_size);
+        for (int i = 0; i < real_aa; i++)
+        {
+          ctx_rasterizer_feed_edges (rasterizer, 0);
+          ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, real_aa, 
real_fraction);
+          ctx_rasterizer_increment_edges (rasterizer, increment);
+        }
+    }
+    else if (! avoid_direct & (rasterizer->needs_aa3 == 0))
+    { /* can generate with direct rendering to target (we're not using shape cache) */
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
+      ctx_rasterizer_feed_edges (rasterizer, 0);
 
+      ctx_rasterizer_generate_coverage_apply (rasterizer, minx, maxx, coverage, is_winding, comp);
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
 
-static void
-ctx_setup_RGBA8 (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  int components       = 4;
-  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBA8 (rasterizer);
-  rasterizer->comp_op  = ctx_RGBA8_porter_duff_generic;
-  rasterizer->comp = CTX_COV_PATH_FALLBACK;
+      dst += blit_stride;
+      rasterizer->prev_active_edges = rasterizer->active_edges;
+      continue;
+    }
+    else if (avoid_direct & (rasterizer->needs_aa3 == 0))
+    { /* cheap fully correct AA, to coverage mask / clipping */
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
+      ctx_rasterizer_feed_edges (rasterizer, 0);
 
-  int blend_mode       = gstate->blend_mode;
-  int compositing_mode = gstate->compositing_mode;
+      memset (coverage, 0, coverage_size);
+      ctx_rasterizer_generate_coverage_set (rasterizer, minx, maxx, coverage, is_winding);
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
+    }
+    else if (ctx_rasterizer_is_simple (rasterizer))
+    { /* the scanline transitions does not contain multiple intersections - each aa segment is a linear ramp 
*/
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
+      ctx_rasterizer_feed_edges (rasterizer, 1);
+      memset (coverage, 0, coverage_size);
+      if (!avoid_direct)
+      {
+        ctx_rasterizer_generate_coverage_apply2 (rasterizer, minx, maxx, coverage, is_winding,
+                      comp);
+        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
 
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
-    {
-      ctx_fragment_color_RGBA8 (rasterizer, 0,0, rasterizer->color, 1, 0,0);
-      if (gstate->global_alpha_u8 != 255)
+        dst += blit_stride;
+        rasterizer->prev_active_edges = rasterizer->active_edges;
+        continue;
+      }
+      ctx_rasterizer_generate_coverage_set2 (rasterizer, minx, maxx, coverage, is_winding);
+      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
+      if (real_aa == 1)
       {
-        for (int c = 0; c < 4; c ++)
-          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8 + 255)>>8;
+        for (int x = minx; x <= maxx; x ++)
+          coverage[x] = coverage[x] > 127?255:0;
       }
-      uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
-      uint32_t si_ga      = (src_pix & 0xff00ff00) >> 8;
-      uint32_t si_rb      = src_pix & 0x00ff00ff;
-      uint32_t si_ga_full = si_ga * 255;
-      uint32_t si_rb_full = si_rb * 255;
-//      uint32_t si_a       = si_ga >> 16;
-
-      ((uint32_t*)rasterizer->color)[1] = si_ga;
-      ((uint32_t*)rasterizer->color)[2] = si_rb;
-      ((uint32_t*)rasterizer->color)[3] = si_ga_full;
-      ((uint32_t*)rasterizer->color)[4] = si_rb_full;
+    }
+    else
+    { /* determine level of oversampling based on lowest steepness edges */
+      int aa = 3;
+      if (rasterizer->needs_aa5 && real_aa >=5)
+      {
+         aa = 5;
+         if (rasterizer->needs_aa15 && real_aa >=15)
+           aa = 15;
+      }
+      int scanline_increment = 15/aa;
 
-      if (blend_mode == CTX_BLEND_NORMAL &&
-           compositing_mode == CTX_COMPOSITE_COPY)
+      memset (coverage, 0, coverage_size);
+      uint8_t fraction = 255/aa;
+      for (int i = 0; i < CTX_FULL_AA; i+= scanline_increment)
       {
-        rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
-        rasterizer->comp = CTX_COV_PATH_COPY;
+        ctx_rasterizer_feed_edges (rasterizer, 0);
+        ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, aa, fraction);
+        ctx_rasterizer_increment_edges (rasterizer, scanline_increment);
       }
-      else if (blend_mode == CTX_BLEND_NORMAL &&
-          compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
-     {
-       if (rasterizer->color[components-1] == 255)
-       {
-        rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
-        rasterizer->comp = CTX_COV_PATH_COPY;
-       }
-       else
-       {
-        rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
-        rasterizer->comp = CTX_COV_PATH_OVER;
-       }
-     }
-     else if (compositing_mode == CTX_COMPOSITE_CLEAR)
-     {
-       rasterizer->comp_op = ctx_RGBA8_clear_normal;
-     }
-  }
-  else if (blend_mode == CTX_BLEND_NORMAL &&
-           compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+    }
+
+  ctx_coverage_post_process (rasterizer, minx, maxx, coverage - minx, NULL, NULL);
+#if CTX_SHAPE_CACHE
+  if (shape == NULL)
+#endif
   {
-     rasterizer->comp_op = ctx_RGBA8_source_over_normal_fragment;
-     rasterizer->comp = CTX_COV_PATH_OVER_FRAGMENT;
+    rasterizer->apply_coverage (rasterizer,
+                         &dst[(minx * rasterizer->format->bpp) /8],
+                         rasterizer->color,
+                         minx,
+                         coverage,
+                         maxx-minx+ 1);
   }
-  else if (blend_mode == CTX_BLEND_NORMAL &&
-           compositing_mode == CTX_COMPOSITE_COPY)
+#if CTX_SHAPE_CACHE
+  else
   {
-     rasterizer->comp_op = ctx_RGBA8_source_copy_normal_fragment;
-     rasterizer->comp = CTX_COV_PATH_COPY_FRAGMENT;
+    coverage += shape->width;
   }
-}
+#endif
+      dst += blit_stride;
+      rasterizer->prev_active_edges = rasterizer->active_edges;
+    }
 
-static void
-ctx_composite_convert (CTX_COMPOSITE_ARGUMENTS)
-{
-  uint8_t pixels[count * rasterizer->format->ebpp];
-  rasterizer->format->to_comp (rasterizer, x0, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
-  rasterizer->format->from_comp (rasterizer, x0, &pixels[0], dst, count);
-}
+  if (CTX_UNLIKELY(rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_OUT ||
+      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_IN ||
+      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_IN ||
+      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_ATOP ||
+      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_CLEAR))
+  {
+     /* fill in the rest of the blitrect when compositing mode permits it */
+     uint8_t nocoverage[rasterizer->blit_width];
+     //int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
+     int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
+     int gscan_end = rasterizer->state->gstate.clip_max_y * CTX_FULL_AA;
+     memset (nocoverage, 0, sizeof(nocoverage));
+     int startx   = rasterizer->state->gstate.clip_min_x;
+     int endx     = rasterizer->state->gstate.clip_max_x;
+     int clipw    = endx-startx + 1;
+     uint8_t *dst = ( (uint8_t *) rasterizer->buf);
 
-#if CTX_ENABLE_FLOAT
-static void
-ctx_float_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
-{
-  float *dstf = (float*)dst;
-  float *srcf = (float*)src;
-  float u0 = 0; float v0 = 0;
-  float ud = 0; float vd = 0;
+     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (gscan_start / CTX_FULL_AA);
+     for (rasterizer->scanline = gscan_start; rasterizer->scanline < scan_start;)
+     {
+       rasterizer->apply_coverage (rasterizer,
+                                   &dst[ (startx * rasterizer->format->bpp) /8],
+                                   rasterizer->color,
+                                      0,
+                                      nocoverage, clipw);
+       rasterizer->scanline += CTX_FULL_AA;
+       dst += rasterizer->blit_stride;
+     }
+     if (minx < startx)
+     {
+     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
+     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
+     {
+       rasterizer->apply_coverage (rasterizer,
+                                   &dst[ (startx * rasterizer->format->bpp) /8],
+                                   rasterizer->color,
+                                   0,
+                                   nocoverage, minx-startx);
+       dst += blit_stride;
+     }
+     }
 
-  ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
+     if (endx > maxx)
+     {
+     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
+     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
+     {
+       rasterizer->apply_coverage (rasterizer,
+                                   &dst[ (maxx * rasterizer->format->bpp) /8],
+                                   rasterizer->color,
+                                   0,
+                                   nocoverage, endx-maxx);
 
-  while (count--)
-  {
-    uint8_t cov = *coverage;
-    float covf = ctx_u8_to_float (cov);
-    for (int c = 0; c < components; c++)
-      dstf[c] = dstf[c]*(1.0-covf) + srcf[c]*covf;
-    dstf += components;
-    coverage ++;
-  }
-}
+       rasterizer->scanline += CTX_FULL_AA;
+       dst += rasterizer->blit_stride;
+     }
+     }
+#if 1
+     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_end / CTX_FULL_AA);
+     // XXX this crashes under valgrind/asan
+     if(0)for (rasterizer->scanline = scan_end; rasterizer->scanline/CTX_FULL_AA < gscan_end-1;)
+     {
+       rasterizer->apply_coverage (rasterizer,
+                                   &dst[ (startx * rasterizer->format->bpp) /8],
+                                   rasterizer->color,
+                                   0,
+                                   nocoverage, clipw-1);
 
-static void
-ctx_float_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
-{
-  float *dstf = (float*)dst;
-  while (count--)
-  {
-#if 0
-    uint8_t cov = *coverage;
-    if (cov == 0)
-    {
-    }
-    else if (cov == 255)
-    {
-#endif
-      switch (components)
-      {
-        case 2:
-          ((uint64_t*)(dst))[0] = 0;
-          break;
-        case 4:
-          ((uint64_t*)(dst))[0] = 0;
-          ((uint64_t*)(dst))[1] = 0;
-          break;
-        default:
-          for (int c = 0; c < components; c++)
-            dstf[c] = 0.0f;
-      }
-#if 0
-    }
-    else
-    {
-      float ralpha = 1.0 - ctx_u8_to_float (cov);
-      for (int c = 0; c < components; c++)
-        { dstf[c] = (dstf[c] * ralpha); }
-    }
-    coverage ++;
+       rasterizer->scanline += CTX_FULL_AA;
+       dst += blit_stride;
+     }
 #endif
-    dstf += components;
   }
 }
 
 
-static inline void
-ctx_float_source_over_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
+#if CTX_INLINE_FILL_RULE
+
+void
+CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
+#if CTX_SHAPE_CACHE
+                                ,CtxShapeEntry *shape
+#endif
+                               )
 {
-  float *dstf = (float*)dst;
-  float *srcf = (float*)src;
-  while (count--)
+  if (fill_rule)
   {
-    uint8_t cov = *coverage;
-    float fcov = ctx_u8_to_float (cov);
-    float ralpha = 1.0f - fcov * srcf[components-1];
-    for (int c = 0; c < components-1; c++)
-      dstf[c] = (srcf[c]*fcov + dstf[c] * ralpha);
-    coverage ++;
-    dstf+= components;
+    ctx_rasterizer_rasterize_edges2 (rasterizer, 1
+#if CTX_SHAPE_CACHE
+                    ,shape
+#endif
+                    );
   }
-}
-
-static void
-ctx_float_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
-{
-  float *dstf = (float*)dst;
-  float *srcf = (float*)src;
-
-  while (count--)
+  else
   {
-    uint8_t cov = *coverage;
-    float fcov = ctx_u8_to_float (cov);
-    float ralpha = 1.0f - fcov;
-    for (int c = 0; c < components-1; c++)
-      dstf[c] = (srcf[c]*fcov + dstf[c] * ralpha);
-    coverage ++;
-    dstf+= components;
+    ctx_rasterizer_rasterize_edges2 (rasterizer, 0
+#if CTX_SHAPE_CACHE
+                    ,shape
+#endif
+                    );
   }
 }
+#else
 
-inline static void
-ctx_float_blend_normal (int components, float *dst, float *src, float *blended)
+void
+CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
+#if CTX_SHAPE_CACHE
+                                ,CtxShapeEntry *shape
+#endif
+                               )
 {
-  float a = src[components-1];
-  for (int c = 0; c <  components - 1; c++)
-    blended[c] = src[c] * a;
-  blended[components-1]=a;
+    ctx_rasterizer_rasterize_edges2 (rasterizer, fill_rule
+#if CTX_SHAPE_CACHE
+                    ,shape
+#endif
+                    );
 }
 
-static float ctx_float_get_max (int components, float *c)
+#endif
+
+
+
+extern CtxPixelFormatInfo *ctx_pixel_formats;
+void CTX_SIMD_SUFFIX(ctx_simd_setup)(void)
 {
-  float max = -1000.0f;
-  for (int i = 0; i < components - 1; i ++)
-  {
-    if (c[i] > max) max = c[i];
-  }
-  return max;
+  ctx_pixel_formats         = CTX_SIMD_SUFFIX(ctx_pixel_formats);
+  ctx_composite_setup       = CTX_SIMD_SUFFIX(ctx_composite_setup);
+  ctx_rasterizer_rasterize_edges = CTX_SIMD_SUFFIX(ctx_rasterizer_rasterize_edges);
+#if CTX_FAST_FILL_RECT
+  ctx_composite_fill_rect   = CTX_SIMD_SUFFIX(ctx_composite_fill_rect);
+  ctx_composite_stroke_rect = CTX_SIMD_SUFFIX(ctx_composite_stroke_rect);
+#endif
 }
 
-static float ctx_float_get_min (int components, float *c)
+
+#endif
+#endif
+#if CTX_IMPLEMENTATION
+#if CTX_RASTERIZER
+
+
+inline static float ctx_fast_hypotf (float x, float y)
 {
-  float min = 400.0;
-  for (int i = 0; i < components - 1; i ++)
-  {
-    if (c[i] < min) min = c[i];
-  }
-  return min;
+  if (x < 0) { x = -x; }
+  if (y < 0) { y = -y; }
+  if (x < y)
+    { return 0.96f * y + 0.4f * x; }
+  else
+    { return 0.96f * x + 0.4f * y; }
 }
 
-static float ctx_float_get_lum (int components, float *c)
+
+
+static void
+ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba)
 {
-  switch (components)
-  {
-    case 3:
-    case 4:
-            return CTX_CSS_RGB_TO_LUMINANCE(c);
-    case 1:
-    case 2:
-            return c[0];
-            break;
-    default:
-       {
-         float sum = 0;
-         for (int i = 0; i < components - 1; i ++)
-         {
-           sum += c[i];
-         }
-         return sum / (components - 1);
-       }
-  }
+  /* FIXME XXX we only have one gradient, but might need separate gradients
+   * for fill/stroke !
+   * 
+   */
+  CtxGradient *gradient = &rasterizer->state->gradient;
+  CtxGradientStop *stop = &gradient->stops[gradient->n_stops];
+  stop->pos = pos;
+  ctx_color_set_rgba (rasterizer->state, & (stop->color), rgba[0], rgba[1], rgba[2], rgba[3]);
+  if (gradient->n_stops < 15) //we'll keep overwriting the last when out of stops
+    { gradient->n_stops++; }
 }
 
-static float ctx_float_get_sat (int components, float *c)
+static inline void ctx_rasterizer_update_inner_point (CtxRasterizer *rasterizer, int x, int y)
 {
-  switch (components)
-  {
-    case 3:
-    case 4:
-            { float r = c[0];
-              float g = c[1];
-              float b = c[2];
-              return ctx_maxf(r, ctx_maxf(g,b)) - ctx_minf(r,ctx_minf(g,b));
-            }
-            break;
-    case 1:
-    case 2: return 0.0;
-            break;
-    default:
-       {
-         float min = 1000;
-         float max = -1000;
-         for (int i = 0; i < components - 1; i ++)
-         {
-           if (c[i] < min) min = c[i];
-           if (c[i] > max) max = c[i];
-         }
-         return max-min;
-       }
-  }
+  rasterizer->scan_min = ctx_mini (y, rasterizer->scan_min);
+  rasterizer->scan_max = ctx_maxi (y, rasterizer->scan_max);
+  rasterizer->col_min = ctx_mini (x, rasterizer->col_min);
+  rasterizer->col_max = ctx_maxi (x, rasterizer->col_max);
+  rasterizer->inner_x = x;
+  rasterizer->inner_y = y;
 }
 
-static void ctx_float_set_lum (int components, float *c, float lum)
+static inline int ctx_rasterizer_add_point (CtxRasterizer *rasterizer, int x1, int y1)
 {
-  float d = lum - ctx_float_get_lum (components, c);
-  float tc[components];
-  for (int i = 0; i < components - 1; i++)
-  {
-    tc[i] = c[i] + d;
-  }
+  CtxSegment entry = {CTX_EDGE, {{0},}};
 
-  float l = ctx_float_get_lum (components, tc);
-  float n = ctx_float_get_min (components, tc);
-  float x = ctx_float_get_max (components, tc);
+  entry.data.s16[0]=rasterizer->inner_x;
+  entry.data.s16[1]=rasterizer->inner_y;
 
-  if (n < 0.0f && l != n)
-  {
-    for (int i = 0; i < components - 1; i++)
-      tc[i] = l + (((tc[i] - l) * l) / (l-n));
-  }
+  entry.data.s16[2]=x1;
+  entry.data.s16[3]=y1;
 
-  if (x > 1.0f && x != l)
-  {
-    for (int i = 0; i < components - 1; i++)
-      tc[i] = l + (((tc[i] - l) * (1.0f - l)) / (x-l));
-  }
-  for (int i = 0; i < components - 1; i++)
-    c[i] = tc[i];
-}
-
-static void ctx_float_set_sat (int components, float *c, float sat)
-{
-  int max = 0, mid = 1, min = 2;
-  
-  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
-  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
-  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
-
-  if (c[max] > c[min])
-  {
-    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
-    c[max] = sat;
-  }
-  else
-  {
-    c[mid] = c[max] = 0.0f;
-  }
-  c[min] = 0.0f;
+  ctx_rasterizer_update_inner_point (rasterizer, x1, y1);
 
+  return ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);
 }
 
-#define ctx_float_blend_define(name, CODE) \
-static void \
-ctx_float_blend_##name (int components, float * __restrict__ dst, float *src, float *blended)\
-{\
-  float *s = src; float b[components];\
-  ctx_float_deassociate_alpha (components, dst, b);\
-    CODE;\
-  blended[components-1] = s[components-1];\
-  ctx_float_associate_alpha (components, blended);\
-}
+#if 0
+#define CTX_SHAPE_CACHE_PRIME1   7853
+#define CTX_SHAPE_CACHE_PRIME2   4129
+#define CTX_SHAPE_CACHE_PRIME3   3371
+#define CTX_SHAPE_CACHE_PRIME4   4221
+#else
+#define CTX_SHAPE_CACHE_PRIME1   283
+#define CTX_SHAPE_CACHE_PRIME2   599
+#define CTX_SHAPE_CACHE_PRIME3   101
+#define CTX_SHAPE_CACHE_PRIME4   661
+#endif
 
-#define ctx_float_blend_define_seperable(name, CODE) \
-        ctx_float_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \
+float ctx_shape_cache_rate = 0.0;
+#if CTX_SHAPE_CACHE
+int   _ctx_shape_cache_enabled = 1;
 
-ctx_float_blend_define_seperable(multiply,    blended[c] = (b[c] * s[c]);)
-ctx_float_blend_define_seperable(screen,      blended[c] = b[c] + s[c] - (b[c] * s[c]);)
-ctx_float_blend_define_seperable(overlay,     blended[c] = b[c] < 0.5f ? (s[c] * b[c]) :
-                                                          s[c] + b[c] - (s[c] * b[c]);)
-ctx_float_blend_define_seperable(darken,      blended[c] = ctx_minf (b[c], s[c]))
-ctx_float_blend_define_seperable(lighten,     blended[c] = ctx_maxf (b[c], s[c]))
-ctx_float_blend_define_seperable(color_dodge, blended[c] = (b[c] == 0.0f) ? 0.0f :
-                                     s[c] == 1.0f ? 1.0f : ctx_minf(1.0f, (b[c]) / (1.0f-s[c])))
-ctx_float_blend_define_seperable(color_burn,  blended[c] = (b[c] == 1.0f) ? 1.0f :
-                                     s[c] == 0.0f ? 0.0f : 1.0f - ctx_minf(1.0f, ((1.0f - b[c])) / s[c]))
-ctx_float_blend_define_seperable(hard_light,  blended[c] = s[c] < 0.f ? (b[c] * s[c]) :
-                                                          b[c] + s[c] - (b[c] * s[c]);)
-ctx_float_blend_define_seperable(difference,  blended[c] = (b[c] - s[c]))
+//static CtxShapeCache ctx_cache = {{NULL,}, 0};
 
-ctx_float_blend_define_seperable(divide,      blended[c] = s[c]?(b[c]) / s[c]:0.0f)
-ctx_float_blend_define_seperable(addition,    blended[c] = s[c]+b[c])
-ctx_float_blend_define_seperable(subtract,    blended[c] = s[c]-b[c])
+static long ctx_shape_cache_hits   = 0;
+static long ctx_shape_cache_misses = 0;
 
-ctx_float_blend_define_seperable(exclusion,   blended[c] = b[c] + s[c] - 2.0f * b[c] * s[c])
-ctx_float_blend_define_seperable(soft_light,
-  if (s[c] <= 0.5f)
-  {
-    blended[c] = b[c] - (1.0f - 2.0f * s[c]) * b[c] * (1.0f - b[c]);
-  }
-  else
+
+/* this returns the buffer to use for rendering, it always
+   succeeds..
+ */
+static inline CtxShapeEntry *ctx_shape_entry_find (CtxRasterizer *rasterizer, uint32_t hash, int width, int 
height)
+{
+  /* use both some high and some low bits  */
+  int entry_no = ( (hash >> 10) ^ (hash & 1023) ) % CTX_SHAPE_CACHE_ENTRIES;
   {
-    int d;
-    if (b[c] <= 255/4)
-      d = (((16 * b[c] - 12.0f) * b[c] + 4.0f) * b[c]);
-    else
-      d = ctx_sqrtf(b[c]);
-    blended[c] = (b[c] + (2.0f * s[c] - 1.0f) * (d - b[c]));
+    static int i = 0;
+    i++;
+    if (i>256)
+      {
+        if (ctx_shape_cache_hits+ctx_shape_cache_misses)
+        {
+          ctx_shape_cache_rate = 
+                0.5 * ctx_shape_cache_rate +
+                0.5 * (ctx_shape_cache_hits * 100.0  / (ctx_shape_cache_hits+ctx_shape_cache_misses));
+        }
+        i = 0;
+        ctx_shape_cache_hits = 0;
+        ctx_shape_cache_misses = 0;
+      }
   }
-)
-
+// XXX : this 1 one is needed  to silence a false positive:
+// ==90718== Invalid write of size 1
+// ==90718==    at 0x1189EF: ctx_rasterizer_generate_coverage (ctx.h:4786)
+// ==90718==    by 0x118E57: ctx_rasterizer_rasterize_edges (ctx.h:4907)
+//
+  int size = sizeof (CtxShapeEntry) + width * height + 1;
 
-ctx_float_blend_define(color,
-  for (int i = 0; i < components; i++)
-    blended[i] = s[i];
-  ctx_float_set_lum(components, blended, ctx_float_get_lum (components, s));
-)
+  CtxShapeEntry *entry = rasterizer->shape_cache.entries[entry_no];
+  if (entry)
+    {
+      int old_size = sizeof (CtxShapeEntry) + entry->width + entry->height + 1;
+      if (entry->hash == hash &&
+          entry->width == width &&
+          entry->height == height)
+        {
+          if (entry->uses < 1<<30)
+            { entry->uses++; }
+          ctx_shape_cache_hits ++;
+          return entry;
+        }
 
-ctx_float_blend_define(hue,
-  float in_sat = ctx_float_get_sat(components, b);
-  float in_lum = ctx_float_get_lum(components, b);
-  for (int i = 0; i < components; i++)
-    blended[i] = s[i];
-  ctx_float_set_sat(components, blended, in_sat);
-  ctx_float_set_lum(components, blended, in_lum);
-)
+      if (old_size >= size)
+      {
+         rasterizer->shape_cache.size -= old_size;
+         rasterizer->shape_cache.size += (old_size-size); // slack/leaked
+      }
+      else
+      {
+        rasterizer->shape_cache.entries[entry_no] = NULL;
+        rasterizer->shape_cache.size -= entry->width * entry->height;
+        rasterizer->shape_cache.size -= sizeof (CtxShapeEntry);
+        free (entry);
+        entry = NULL;
+      }
+    }
 
-ctx_float_blend_define(saturation,
-  float in_sat = ctx_float_get_sat(components, s);
-  float in_lum = ctx_float_get_lum(components, b);
-  for (int i = 0; i < components; i++)
-    blended[i] = b[i];
-  ctx_float_set_sat(components, blended, in_sat);
-  ctx_float_set_lum(components, blended, in_lum);
-)
+  if (!entry)
+    entry = rasterizer->shape_cache.entries[entry_no] = (CtxShapeEntry *) calloc (size, 1);
 
-ctx_float_blend_define(luminosity,
-  float in_lum = ctx_float_get_lum(components, s);
-  for (int i = 0; i < components; i++)
-    blended[i] = b[i];
-  ctx_float_set_lum(components, blended, in_lum);
-)
+  rasterizer->shape_cache.size += size;
 
-inline static void
-ctx_float_blend (int components, CtxBlend blend, float * __restrict__ dst, float *src, float *blended)
-{
-  switch (blend)
-  {
-    case CTX_BLEND_NORMAL:      ctx_float_blend_normal      (components, dst, src, blended); break;
-    case CTX_BLEND_MULTIPLY:    ctx_float_blend_multiply    (components, dst, src, blended); break;
-    case CTX_BLEND_SCREEN:      ctx_float_blend_screen      (components, dst, src, blended); break;
-    case CTX_BLEND_OVERLAY:     ctx_float_blend_overlay     (components, dst, src, blended); break;
-    case CTX_BLEND_DARKEN:      ctx_float_blend_darken      (components, dst, src, blended); break;
-    case CTX_BLEND_LIGHTEN:     ctx_float_blend_lighten     (components, dst, src, blended); break;
-    case CTX_BLEND_COLOR_DODGE: ctx_float_blend_color_dodge (components, dst, src, blended); break;
-    case CTX_BLEND_COLOR_BURN:  ctx_float_blend_color_burn  (components, dst, src, blended); break;
-    case CTX_BLEND_HARD_LIGHT:  ctx_float_blend_hard_light  (components, dst, src, blended); break;
-    case CTX_BLEND_SOFT_LIGHT:  ctx_float_blend_soft_light  (components, dst, src, blended); break;
-    case CTX_BLEND_DIFFERENCE:  ctx_float_blend_difference  (components, dst, src, blended); break;
-    case CTX_BLEND_EXCLUSION:   ctx_float_blend_exclusion   (components, dst, src, blended); break;
-    case CTX_BLEND_COLOR:       ctx_float_blend_color       (components, dst, src, blended); break;
-    case CTX_BLEND_HUE:         ctx_float_blend_hue         (components, dst, src, blended); break;
-    case CTX_BLEND_SATURATION:  ctx_float_blend_saturation  (components, dst, src, blended); break;
-    case CTX_BLEND_LUMINOSITY:  ctx_float_blend_luminosity  (components, dst, src, blended); break;
-    case CTX_BLEND_ADDITION:    ctx_float_blend_addition    (components, dst, src, blended); break;
-    case CTX_BLEND_SUBTRACT:    ctx_float_blend_subtract    (components, dst, src, blended); break;
-    case CTX_BLEND_DIVIDE:      ctx_float_blend_divide      (components, dst, src, blended); break;
-  }
+  ctx_shape_cache_misses ++;
+  entry->hash   = hash;
+  entry->width  = width;
+  entry->height = height;
+  entry->uses = 0;
+  return entry;
 }
 
-/* this is the grunt working function, when inlined code-path elimination makes
- * it produce efficient code.
- */
-CTX_INLINE static void
-ctx_float_porter_duff (CtxRasterizer         *rasterizer,
-                       int                    components,
-                       uint8_t * __restrict__ dst,
-                       uint8_t * __restrict__ src,
-                       int                    x0,
-                       uint8_t * __restrict__ coverage,
-                       int                    count,
-                       CtxCompositingMode     compositing_mode,
-                       CtxFragment            fragment,
-                       CtxBlend               blend)
-{
-  float *dstf = (float*)dst;
+#endif
 
-  CtxPorterDuffFactor f_s, f_d;
-  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
-  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
-  float   global_alpha_f = rasterizer->state->gstate.global_alpha_f;
-  
-  {
-    float tsrc[components];
-    float u0 = 0; float v0 = 0;
-    float ud = 0; float vd = 0;
+static uint32_t ctx_rasterizer_poly_to_hash (CtxRasterizer *rasterizer)
+{
+  int x = 0;
+  int y = 0;
 
-    ctx_init_uv (rasterizer, x0, count, &u0, &v0, &ud, &vd);
+  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];
 
-    while (count--)
+  int ox = entry->data.s16[2];
+  int oy = entry->data.s16[3];
+  uint32_t hash = rasterizer->edge_list.count;
+  hash = ox;//(ox % CTX_SUBDIV);
+  hash *= CTX_SHAPE_CACHE_PRIME1;
+  hash += oy; //(oy % CTX_RASTERIZER_AA);
+  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
     {
-      uint8_t cov = *coverage;
+      CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[i];
+      x = entry->data.s16[2];
+      y = entry->data.s16[3];
+      int dx = x-ox;
+      int dy = y-oy;
+      ox = x;
+      oy = y;
+      hash *= CTX_SHAPE_CACHE_PRIME3;
+      hash += dx;
+      hash *= CTX_SHAPE_CACHE_PRIME4;
+      hash += dy;
+    }
+  return hash;
+}
+
+static uint32_t ctx_rasterizer_poly_to_edges (CtxRasterizer *rasterizer)
+{
+#if CTX_SHAPE_CACHE
+  int x = 0;
+  int y = 0;
+#endif
+  unsigned int count = rasterizer->edge_list.count;
+  if (CTX_UNLIKELY (count == 0))
+     return 0;
+  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];
+#if CTX_SHAPE_CACHE
 #if 1
-      if (
-        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
-        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
-        compositing_mode == CTX_COMPOSITE_XOR               ||
-        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
-        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
-        ))))
-      {
-        u0 += ud;
-        v0 += vd;
-        coverage ++;
-        dstf+=components;
-        continue;
-      }
+  int ox = entry->data.s16[2];
+  int oy = entry->data.s16[3];
 #endif
-
-      fragment (rasterizer, u0, v0, tsrc, 1, ud, vd);
-      if (blend != CTX_BLEND_NORMAL)
-        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
-      u0 += ud;
-      v0 += vd;
-      float covf = ctx_u8_to_float (cov);
-
-      if (global_alpha_u8 != 255)
-        covf = covf * global_alpha_f;
-
-      if (covf != 1.0f)
-      {
-        for (int c = 0; c < components; c++)
-          tsrc[c] *= covf;
-      }
-
-      for (int c = 0; c < components; c++)
-      {
-        float res;
-        /* these switches and this whole function is written to be
-         * inlined when compiled when the enum values passed in are
-         * constants.
-         */
-        switch (f_s)
-        {
-          case CTX_PORTER_DUFF_0: res = 0.0f; break;
-          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
-          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
-          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
-        }
-        switch (f_d)
+  uint32_t hash = rasterizer->edge_list.count;
+  hash = (ox & CTX_SUBDIV);
+  hash *= CTX_SHAPE_CACHE_PRIME1;
+  hash += (oy & CTX_SUBDIV);
+#endif
+  //CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
+  for (unsigned int i = 0; i < count; i++)
+    {
+#if CTX_SHAPE_CACHE
+      x = entry->data.s16[2];
+      y = entry->data.s16[3];
+      int dx = x-ox;
+      int dy = y-oy;
+      ox = x;
+      oy = y;
+      hash *= CTX_SHAPE_CACHE_PRIME3;
+      hash += dx;
+      hash *= CTX_SHAPE_CACHE_PRIME4;
+      hash += dy;
+#endif
+#if 1
+      if (entry->data.s16[3] < entry->data.s16[1])
         {
-          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
-          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
-          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
-          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
+          *entry = ctx_segment_s16 (CTX_EDGE_FLIPPED,
+                            entry->data.s16[2], entry->data.s16[3],
+                            entry->data.s16[0], entry->data.s16[1]);
         }
-      }
-      coverage ++;
-      dstf     +=components;
+#endif
+      entry++;
     }
-  }
+#if CTX_SHAPE_CACHE
+  return hash;
+#else
+  return 0;
+#endif
 }
 
-/* generating one function per compositing_mode would be slightly more efficient,
- * but on embedded targets leads to slightly more code bloat,
- * here we trade off a slight amount of performance
- */
-#define ctx_float_porter_duff(compformat, components, source, fragment, blend) \
-static void \
-ctx_##compformat##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
-{ \
-   switch (rasterizer->state->gstate.compositing_mode) \
-   { \
-     case CTX_COMPOSITE_SOURCE_ATOP: \
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
-        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_ATOP:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION_IN:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
-      break;\
-     case CTX_COMPOSITE_DESTINATION:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OVER:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OVER:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_XOR:\
-      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_XOR, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_DESTINATION_OUT:\
-       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_OUT:\
-       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_SOURCE_IN:\
-       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_COPY:\
-       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_COPY, fragment, blend);\
-       break;\
-     case CTX_COMPOSITE_CLEAR:\
-       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
-        CTX_COMPOSITE_CLEAR, fragment, blend);\
-       break;\
-   }\
+static inline void ctx_rasterizer_finish_shape (CtxRasterizer *rasterizer)
+{
+  if (rasterizer->has_shape && rasterizer->has_prev)
+    {
+      ctx_rasterizer_line_to (rasterizer, rasterizer->first_x, rasterizer->first_y);
+      rasterizer->has_prev = 0;
+    }
 }
-#endif
 
-#if CTX_ENABLE_RGBAF
-
-ctx_float_porter_duff(RGBAF, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-ctx_float_porter_duff(RGBAF, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
+static inline void ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y)
+{
+  float tx = x; float ty = y;
+  rasterizer->x        = x;
+  rasterizer->y        = y;
+  rasterizer->first_x  = x;
+  rasterizer->first_y  = y;
+  rasterizer->has_prev = -1;
+  if (rasterizer->uses_transforms)
+    {
+      _ctx_user_to_device (rasterizer->state, &tx, &ty);
+    }
 
-#if CTX_INLINED_NORMAL
-#if CTX_GRADIENTS
-ctx_float_porter_duff(RGBAF, 4,linear_gradient, ctx_fragment_linear_gradient_RGBAF, 
rasterizer->state->gstate.blend_mode)
-ctx_float_porter_duff(RGBAF, 4,radial_gradient, ctx_fragment_radial_gradient_RGBAF, 
rasterizer->state->gstate.blend_mode)
-#endif
-ctx_float_porter_duff(RGBAF, 4,image,           ctx_fragment_image_RGBAF,           
rasterizer->state->gstate.blend_mode)
+  tx = (tx - rasterizer->blit_x) * CTX_SUBDIV;
+  ty = ty * CTX_FULL_AA;
 
+  ctx_rasterizer_update_inner_point (rasterizer, tx, ty);
+}
 
-#if CTX_GRADIENTS
-#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
-ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,             
                  blend_mode)\
-ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,             
  blend_mode)\
-ctx_float_porter_duff(comp_name, components,linear_gradient_##blend_name,  
ctx_fragment_linear_gradient_RGBA8, blend_mode)\
-ctx_float_porter_duff(comp_name, components,radial_gradient_##blend_name,  
ctx_fragment_radial_gradient_RGBA8, blend_mode)\
-ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,         
  blend_mode)
-#else
-#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
-ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,             
                  blend_mode)\
-ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,             
  blend_mode)\
-ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,         
  blend_mode)
-#endif
+static inline void
+ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y)
+{
+  rasterizer->has_shape = 1;
+  rasterizer->y         = y;
+  rasterizer->x         = x;
 
-ctx_float_porter_duff_blend(RGBAF, 4, CTX_BLEND_NORMAL, normal)
+  float tx = x;
+  float ty = y;
+  //float ox = rasterizer->x;
+  //float oy = rasterizer->y;
+  if (rasterizer->uses_transforms)
+    {
+      _ctx_user_to_device (rasterizer->state, &tx, &ty);
+    }
+  tx -= rasterizer->blit_x;
+#define MIN_Y -1000
+#define MAX_Y 1400
 
+  ty = ctx_maxf (MIN_Y, ty);
+  ty = ctx_minf (MAX_Y, ty);
+  
+  ctx_rasterizer_add_point (rasterizer, tx * CTX_SUBDIV, ty * CTX_FULL_AA);//rasterizer->aa);
 
-static void
-ctx_RGBAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
-{
-  ctx_float_copy_normal (4, rasterizer, dst, src, x0, coverage, count);
+  if (CTX_UNLIKELY(rasterizer->has_prev<=0))
+    {
+      CtxSegment *entry = & ((CtxSegment*)rasterizer->edge_list.entries)[rasterizer->edge_list.count-1];
+      entry->code = CTX_NEW_EDGE;
+      rasterizer->has_prev = 1;
+    }
 }
 
-static void
-ctx_RGBAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
+
+CTX_INLINE static float
+ctx_bezier_sample_1d (float x0, float x1, float x2, float x3, float dt)
 {
-  ctx_float_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
+  float ab   = ctx_lerpf (x0, x1, dt);
+  float bc   = ctx_lerpf (x1, x2, dt);
+  float cd   = ctx_lerpf (x2, x3, dt);
+  float abbc = ctx_lerpf (ab, bc, dt);
+  float bccd = ctx_lerpf (bc, cd, dt);
+  return ctx_lerpf (abbc, bccd, dt);
 }
 
-#if 0
-static void
-ctx_RGBAF_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
+CTX_INLINE static void
+ctx_bezier_sample (float x0, float y0,
+                   float x1, float y1,
+                   float x2, float y2,
+                   float x3, float y3,
+                   float dt, float *x, float *y)
 {
-  ctx_float_source_over_normal_color (4, rasterizer, dst, rasterizer->color, x0, coverage, count);
+  *x = ctx_bezier_sample_1d (x0, x1, x2, x3, dt);
+  *y = ctx_bezier_sample_1d (y0, y1, y2, y3, dt);
 }
-#endif
-#endif
 
-static void
-ctx_setup_RGBAF (CtxRasterizer *rasterizer)
+static inline void
+ctx_rasterizer_bezier_divide (CtxRasterizer *rasterizer,
+                              float ox, float oy,
+                              float x0, float y0,
+                              float x1, float y1,
+                              float x2, float y2,
+                              float sx, float sy,
+                              float ex, float ey,
+                              float s,
+                              float e,
+                              int   iteration,
+                              float tolerance)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  int components = 4;
-  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBAF (rasterizer);
-  rasterizer->comp = CTX_COV_PATH_FALLBACK;
-#if 1
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  float t = (s + e) * 0.5f;
+  float x, y, lx, ly, dx, dy;
+  ctx_bezier_sample (ox, oy, x0, y0, x1, y1, x2, y2, t, &x, &y);
+  if (iteration)
     {
-      rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
-      ctx_fragment_color_RGBAF (rasterizer, 0,0, rasterizer->color, 1, 0,0);
-      if (gstate->global_alpha_u8 != 255)
-        for (int c = 0; c < components; c ++)
-          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;
+      lx = ctx_lerpf (sx, ex, t);
+      ly = ctx_lerpf (sy, ey, t);
+      dx = lx - x;
+      dy = ly - y;
+      if (CTX_UNLIKELY( (dx*dx+dy*dy) < tolerance))
+        /* bailing - because for the mid-point straight line difference is
+           tiny */
+        { return; }
+      dx = sx - ex;
+      dy = sy - ey;
+      if (CTX_UNLIKELY( (dx*dx+dy*dy) < tolerance))
+        /* bailing on tiny segments */
+        { return; }
     }
-  else
-#endif
+  if (iteration < 5)
   {
-    rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
+    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
+                                  sx, sy, x, y, s, t, iteration + 1,
+                                  tolerance);
+    ctx_rasterizer_line_to (rasterizer, x, y);
+    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
+                                  x, y, ex, ey, t, e, iteration + 1,
+                                  tolerance);
   }
-
-#if CTX_INLINED_NORMAL
-  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
-    rasterizer->comp_op = ctx_RGBAF_clear_normal;
-  else
-    switch (gstate->blend_mode)
-    {
-      case CTX_BLEND_NORMAL:
-        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
-        {
-          rasterizer->comp_op = ctx_RGBAF_copy_normal;
-        }
-        else if (gstate->global_alpha_u8 == 0)
-        {
-          rasterizer->comp_op = ctx_RGBA8_nop;
-        }
-        else
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            //if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
-            //{
-            //  rasterizer->comp_op = ctx_RGBAF_source_over_normal_color;
-           // }
-           // else
-            {
-              rasterizer->comp_op = ctx_RGBAF_porter_duff_color_normal;
-            }
-            break;
-#if CTX_GRADIENTS
-          case CTX_SOURCE_LINEAR_GRADIENT:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient_normal;
-            break;
-          case CTX_SOURCE_RADIAL_GRADIENT:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient_normal;
-            break;
-#endif
-          case CTX_SOURCE_TEXTURE:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_image_normal;
-            break;
-          default:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic_normal;
-            break;
-        }
-        break;
-      default:
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
-            //rasterizer->fragment = NULL;
-            break;
-#if CTX_GRADIENTS
-          case CTX_SOURCE_LINEAR_GRADIENT:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient;
-            break;
-          case CTX_SOURCE_RADIAL_GRADIENT:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient;
-            break;
-#endif
-          case CTX_SOURCE_TEXTURE:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_image;
-            break;
-          default:
-            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
-            break;
-        }
-        break;
-    }
-#endif
 }
 
-#endif
-#if CTX_ENABLE_GRAYAF
-
-#if CTX_GRADIENTS
 static void
-ctx_fragment_linear_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
+ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
+                         float x0, float y0,
+                         float x1, float y1,
+                         float x2, float y2)
 {
-  float rgba[4];
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0 ; i < count; i++)
-  {
-  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
-                g->linear_gradient.length) -
-              g->linear_gradient.start) * (g->linear_gradient.rdelta);
-  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0, rgba);
-  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
-  ((float*)out)[1] = rgba[3];
-     out = ((float*)(out)) + 2;
-     x += dx;
-     y += dy;
-  }
-}
+  //float tolerance =
+  //  1.0f*(ctx_pow2 (rasterizer->state->gstate.transform.m[0][0]) +
+  //  ctx_pow2 (rasterizer->state->gstate.transform.m[1][1]));
+  float tolerance = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
+  float ox = rasterizer->x;
+  float oy = rasterizer->y;
+  //tolerance *= tolerance;
+  tolerance = 1.0/(tolerance);
 
-static void
-ctx_fragment_radial_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
-{
-  float rgba[4];
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i < count; i ++)
+  tolerance *= 1.2;
+  tolerance = tolerance * tolerance;
+  ox = rasterizer->state->x;
+  oy = rasterizer->state->y;
+#if 0 // skipping this to preserve hash integrity
+  if (tolerance == 1.0f || 1)
   {
-  float v = 0.0f;
-  if ((g->radial_gradient.r1-g->radial_gradient.r0) > 0.0f)
+  float maxx = ctx_maxf (x1,x2);
+  maxx = ctx_maxf (maxx, ox);
+  maxx = ctx_maxf (maxx, x0);
+  float maxy = ctx_maxf (y1,y2);
+  maxy = ctx_maxf (maxy, oy);
+  maxy = ctx_maxf (maxy, y0);
+  float minx = ctx_minf (x1,x2);
+  minx = ctx_minf (minx, ox);
+  minx = ctx_minf (minx, x0);
+  float miny = ctx_minf (y1,y2);
+  miny = ctx_minf (miny, oy);
+  miny = ctx_minf (miny, y0);
+  
+  _ctx_user_to_device (rasterizer->state, &minx, &miny);
+  _ctx_user_to_device (rasterizer->state, &maxx, &maxy);
+#if 1
+    if(
+        (minx > rasterizer->blit_x + rasterizer->blit_width) ||
+        (miny > rasterizer->blit_y + rasterizer->blit_height) ||
+        (maxx < rasterizer->blit_x) ||
+        (maxy < rasterizer->blit_y) )
     {
-      v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
-      v = (v - g->radial_gradient.r0) / (g->radial_gradient.rdelta);
     }
-  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0, rgba);
-  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
-  ((float*)out)[1] = rgba[3];
-     out = ((float*)(out)) + 2;
-     x += dx;
-     y += dy;
-  }
-}
+    else
 #endif
-
-static void
-ctx_fragment_color_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
-{
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  for (int i = 0; i < count; i++)
-  {
-     ctx_color_get_graya (rasterizer->state, &g->color, (float*)out);
-     out = ((float*)(out)) + 2;
-     x += dx;
-     y += dy;
-  }
-}
-
-static void ctx_fragment_image_GRAYAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
-{
-  uint8_t rgba[4];
-  float rgbaf[4];
-  CtxGState *gstate = &rasterizer->state->gstate;
-  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
-  switch (buffer->format->bpp)
     {
-#if CTX_FRAGMENT_SPECIALIZE
-      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
-      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
-      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
-#endif
-      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
+      ctx_rasterizer_bezier_divide (rasterizer,
+                                    ox, oy, x0, y0,
+                                    x1, y1, x2, y2,
+                                    ox, oy, x2, y2,
+                                    0.0f, 1.0f, 0.0f, tolerance);
     }
-  for (int c = 0; c < 2 * count; c ++) { 
-    rgbaf[c] = ctx_u8_to_float (rgba[c]);
-    ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgbaf);
-    ((float*)out)[1] = rgbaf[3];
-    out = ((float*)out) + 2;
   }
-}
-
-static CtxFragment ctx_rasterizer_get_fragment_GRAYAF (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  switch (gstate->source_fill.type)
-    {
-      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYAF;
-      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYAF;
-#if CTX_GRADIENTS
-      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYAF;
-      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYAF;
+  else
 #endif
+    {
+      ctx_rasterizer_bezier_divide (rasterizer,
+                                    ox, oy, x0, y0,
+                                    x1, y1, x2, y2,
+                                    ox, oy, x2, y2,
+                                    0.0f, 1.0f, 0, tolerance);
     }
-  return ctx_fragment_color_GRAYAF;
+  ctx_rasterizer_line_to (rasterizer, x2, y2);
 }
 
-ctx_float_porter_duff(GRAYAF, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-ctx_float_porter_duff(GRAYAF, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-
-#if CTX_INLINED_NORMAL
-ctx_float_porter_duff(GRAYAF, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
-ctx_float_porter_duff(GRAYAF, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)
-
 static void
-ctx_GRAYAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y)
 {
-  ctx_float_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
+  //if (CTX_UNLIKELY(x == 0.f && y == 0.f))
+  //{ return; }
+  x += rasterizer->x;
+  y += rasterizer->y;
+  ctx_rasterizer_move_to (rasterizer, x, y);
 }
 
 static void
-ctx_GRAYAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y)
 {
-  ctx_float_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
+  //if (CTX_UNLIKELY(x== 0.f && y==0.f))
+  //  { return; }
+  x += rasterizer->x;
+  y += rasterizer->y;
+  ctx_rasterizer_line_to (rasterizer, x, y);
 }
 
 static void
-ctx_GRAYAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
+                             float x0, float y0, float x1, float y1, float x2, float y2)
 {
-  ctx_float_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
+  x0 += rasterizer->x;
+  y0 += rasterizer->y;
+  x1 += rasterizer->x;
+  y1 += rasterizer->y;
+  x2 += rasterizer->x;
+  y2 += rasterizer->y;
+  ctx_rasterizer_curve_to (rasterizer, x0, y0, x1, y1, x2, y2);
+}
+
+
+static int
+ctx_rasterizer_find_texture (CtxRasterizer *rasterizer,
+                             const char *eid)
+{
+  int no;
+  for (no = 0; no < CTX_MAX_TEXTURES; no++)
+  {
+    if (rasterizer->texture_source->texture[no].data &&
+        rasterizer->texture_source->texture[no].eid &&
+        !strcmp (rasterizer->texture_source->texture[no].eid, eid))
+      return no;
+  }
+  return -1;
 }
-#endif
 
 static void
-ctx_setup_GRAYAF (CtxRasterizer *rasterizer)
+ctx_rasterizer_set_texture (CtxRasterizer *rasterizer,
+                            const char *eid,
+                            float x,
+                            float y)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  int components = 2;
-  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYAF (rasterizer);
-  rasterizer->comp = CTX_COV_PATH_FALLBACK;
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  int is_stroke = (rasterizer->state->source != 0);
+  CtxSource *source = is_stroke && (rasterizer->state->gstate.source_stroke.type != CTX_SOURCE_INHERIT_FILL)?
+                        &rasterizer->state->gstate.source_stroke:
+                        &rasterizer->state->gstate.source_fill;
+  rasterizer->state->source = 0;
+
+  int no = ctx_rasterizer_find_texture (rasterizer, eid);
+  if (no < 0 || no >= CTX_MAX_TEXTURES) { no = 0; }
+  if (rasterizer->texture_source->texture[no].data == NULL)
     {
-      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
-  //  rasterizer->fragment = NULL;
-      ctx_color_get_rgba (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
-      if (gstate->global_alpha_u8 != 255)
-        for (int c = 0; c < components; c ++)
-          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;
+      fprintf (stderr, "ctx tex fail %p %s %i\n", rasterizer->texture_source, eid, no);
+      return;
     }
   else
   {
-    rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
+    rasterizer->texture_source->texture[no].frame = rasterizer->texture_source->frame;
   }
-
-#if CTX_INLINED_NORMAL
-  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
-    rasterizer->comp_op = ctx_GRAYAF_clear_normal;
-  else
-    switch (gstate->blend_mode)
-    {
-      case CTX_BLEND_NORMAL:
-        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
-        {
-          rasterizer->comp_op = ctx_GRAYAF_copy_normal;
-        }
-        else if (gstate->global_alpha_u8 == 0)
-          rasterizer->comp_op = ctx_RGBA8_nop;
-        else
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
-            {
-              if (((float*)rasterizer->color)[components-1] == 0.0f)
-                rasterizer->comp_op = ctx_RGBA8_nop;
-#if 1
-              else //if (((float*)rasterizer->color)[components-1] == 0.0f)
-                rasterizer->comp_op = ctx_GRAYAF_source_copy_normal_color;
-#endif
-              //else
-          //      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
-//            rasterizer->fragment = NULL;
-            }
-            else
-            {
-              rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
-//            rasterizer->fragment = NULL;
-            }
-            break;
-          default:
-            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic_normal;
-            break;
-        }
-        break;
-      default:
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
-//          rasterizer->fragment = NULL;
-            break;
-          default:
-            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
-            break;
-        }
-        break;
-    }
-#endif
+  source->type = CTX_SOURCE_TEXTURE;
+  source->texture.buffer = &rasterizer->texture_source->texture[no];
+  ctx_matrix_identity (&source->set_transform);
+  ctx_matrix_translate (&source->set_transform, x, y);
 }
 
-#endif
-#if CTX_ENABLE_GRAYF
 
 static void
-ctx_composite_GRAYF (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_define_texture (CtxRasterizer *rasterizer,
+                               const char    *eid,
+                               int            width,
+                               int            height,
+                               int            format,
+                               char unsigned *data)
 {
-  float *dstf = (float*)dst;
+  _ctx_texture_lock (); // we're using the same texture_source from all threads, keeping allocaitons down
+                        // need synchronizing (it could be better to do a pre-pass)
+  ctx_texture_init (rasterizer->texture_source,
+                    eid,
+                    width,
+                    height,
+                    ctx_pixel_format_get_stride ((CtxPixelFormat)format, width),
+                    (CtxPixelFormat)format,
+#if CTX_ENABLE_CM
+                    (void*)rasterizer->state->gstate.texture_space,
+#else
+                    NULL,
+#endif
+                    data,
+                    ctx_buffer_pixels_free, (void*)23);
+                    /*  when userdata for ctx_buffer_pixels_free is 23, texture_init dups the data on
+                     *  use
+                     */
 
-  float temp[count*2];
-  for (int i = 0; i < count; i++)
-  {
-    temp[i*2] = dstf[i];
-    temp[i*2+1] = 1.0f;
-  }
-  rasterizer->comp_op (rasterizer, (uint8_t*)temp, rasterizer->color, x0, coverage, count);
-  for (int i = 0; i < count; i++)
-  {
-    dstf[i] = temp[i*2];
-  }
+  ctx_rasterizer_set_texture (rasterizer, eid, 0.0, 0.0);
+  _ctx_texture_unlock ();
 }
 
-#endif
-#if CTX_ENABLE_BGRA8
 
-inline static void
-ctx_swap_red_green (uint8_t *rgba)
+inline static int
+ctx_is_transparent (CtxRasterizer *rasterizer, int stroke)
 {
-  uint32_t *buf  = (uint32_t *) rgba;
-  uint32_t  orig = *buf;
-  uint32_t  green_alpha = (orig & 0xff00ff00);
-  uint32_t  red_blue    = (orig & 0x00ff00ff);
-  uint32_t  red         = red_blue << 16;
-  uint32_t  blue        = red_blue >> 16;
-  *buf = green_alpha | red | blue;
+  CtxGState *gstate = &rasterizer->state->gstate;
+  if (gstate->global_alpha_u8 == 0)
+    return 1;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  {
+    uint8_t ga[2];
+    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
+    if (ga[1] == 0)
+      return 1;
+  }
+  return 0;
 }
 
-static void
-ctx_BGRA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  uint32_t *srci = (uint32_t *) buf;
-  uint32_t *dsti = (uint32_t *) rgba;
-  while (count--)
-    {
-      uint32_t val = *srci++;
-      ctx_swap_red_green ( (uint8_t *) &val);
-      *dsti++      = val;
-    }
-}
 
-static void
-ctx_RGBA8_to_BGRA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  ctx_BGRA8_to_RGBA8 (rasterizer, x, rgba, (uint8_t *) buf, count);
-}
 
 static void
-ctx_composite_BGRA8 (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_fill (CtxRasterizer *rasterizer)
 {
-  // for better performance, this could be done without a pre/post conversion,
-  // by swapping R and B of source instead... as long as it is a color instead
-  // of gradient or image
-  //
-  //
-  uint8_t pixels[count * 4];
-  ctx_BGRA8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
-  ctx_BGRA8_to_RGBA8  (rasterizer, x0, &pixels[0], dst, count);
-}
+  unsigned int preserved_count =
+          (rasterizer->preserve&&rasterizer->edge_list.count)?
+             rasterizer->edge_list.count:1;
+  int blit_x = rasterizer->blit_x;
+  int blit_y = rasterizer->blit_y;
+  int blit_width = rasterizer->blit_width;
+  int blit_height = rasterizer->blit_height;
+#if CTX_SHAPE_CACHE
+  int blit_stride = rasterizer->blit_stride;
+#endif
 
+  CtxSegment temp[preserved_count]; /* copy of already built up path's poly line
+                                       XXX - by building a large enough path
+                                       the stack can be smashed!
+                                     */
+  if (CTX_UNLIKELY(rasterizer->preserve))
+    { memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) ); }
 
+#if CTX_ENABLE_SHADOW_BLUR
+  if (CTX_UNLIKELY(rasterizer->in_shadow))
+  {
+  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
+    {
+      CtxSegment *entry = &((CtxSegment*)rasterizer->edge_list.entries)[i];
+      entry->data.s16[2] += rasterizer->shadow_x * CTX_SUBDIV;
+      entry->data.s16[3] += rasterizer->shadow_y * CTX_FULL_AA;
+    }
+    rasterizer->scan_min += rasterizer->shadow_y * CTX_FULL_AA;
+    rasterizer->scan_max += rasterizer->shadow_y * CTX_FULL_AA;
+    rasterizer->col_min  += (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
+    rasterizer->col_max  += (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
+  }
 #endif
-#if CTX_ENABLE_CMYKAF
 
-static void
-ctx_fragment_other_CMYKAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
-{
-  float *cmyka = (float*)out;
-  float _rgba[4 * count];
-  float *rgba = &_rgba[0];
-  CtxGState *gstate = &rasterizer->state->gstate;
-  switch (gstate->source_fill.type)
+  if (CTX_UNLIKELY(ctx_is_transparent (rasterizer, 0) ||
+      rasterizer->scan_min > CTX_FULL_AA * (blit_y + blit_height) ||
+      rasterizer->scan_max < CTX_FULL_AA * blit_y ||
+      rasterizer->col_min > CTX_SUBDIV * (blit_x + blit_width) ||
+      rasterizer->col_max < CTX_SUBDIV * blit_x))
     {
-      case CTX_SOURCE_TEXTURE:
-        ctx_fragment_image_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
-        break;
-      case CTX_SOURCE_COLOR:
-        ctx_fragment_color_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
-        break;
-#if CTX_GRADIENTS
-      case CTX_SOURCE_LINEAR_GRADIENT:
-        ctx_fragment_linear_gradient_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
-        break;
-      case CTX_SOURCE_RADIAL_GRADIENT:
-        ctx_fragment_radial_gradient_RGBAF (rasterizer, x, y, rgba, count, dx, dy);
-        break;
-#endif
-      default:
-        rgba[0]=rgba[1]=rgba[2]=rgba[3]=0.0f;
-        break;
     }
-  for (int i = 0; i < count; i++)
+  else
   {
-    cmyka[4]=rgba[3];
-    ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2], &cmyka[0], &cmyka[1], &cmyka[2], &cmyka[3]);
-    cmyka += 5;
-    rgba += 4;
-  }
-}
+    ctx_composite_setup (rasterizer);
 
-static void
-ctx_fragment_color_CMYKAF (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  float *cmyka = (float*)out;
-  float cmyka_in[5];
-  ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, cmyka_in);
-  for (int i = 0; i < count; i++)
-  {
-    for (int c = 0; c < 4; c ++)
-    {
-      cmyka[c] = (1.0f - cmyka_in[c]);
-    }
-    cmyka[4] = cmyka_in[4];
-    cmyka += 5;
-  }
-}
+    rasterizer->state->min_x = ctx_mini (rasterizer->state->min_x, rasterizer->col_min / CTX_SUBDIV);
+    rasterizer->state->max_x = ctx_maxi (rasterizer->state->min_x, rasterizer->col_max / CTX_SUBDIV);
+    rasterizer->state->min_y = ctx_mini (rasterizer->state->min_y, rasterizer->scan_min / CTX_FULL_AA);
+    rasterizer->state->max_y = ctx_maxi (rasterizer->state->max_y, rasterizer->scan_max / CTX_FULL_AA);
 
-static CtxFragment ctx_rasterizer_get_fragment_CMYKAF (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  switch (gstate->source_fill.type)
+#if CTX_FAST_FILL_RECT
+  if (rasterizer->edge_list.count == 5)
     {
-      case CTX_SOURCE_COLOR:
-        return ctx_fragment_color_CMYKAF;
-    }
-  return ctx_fragment_other_CMYKAF;
-}
+      CtxSegment *entry0 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
+      CtxSegment *entry1 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[1];
+      CtxSegment *entry2 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[2];
+      CtxSegment *entry3 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[3];
 
-ctx_float_porter_duff (CMYKAF, 5,color,           rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-ctx_float_porter_duff (CMYKAF, 5,generic,         rasterizer->fragment, rasterizer->state->gstate.blend_mode)
 
-#if CTX_INLINED_NORMAL
-ctx_float_porter_duff (CMYKAF, 5,color_normal,            rasterizer->fragment, CTX_BLEND_NORMAL)
-ctx_float_porter_duff (CMYKAF, 5,generic_normal,          rasterizer->fragment, CTX_BLEND_NORMAL)
+      if (
+          (!(rasterizer->state->gstate.clipped != 0)) &
+          (entry0->data.s16[2] == entry1->data.s16[2]) &
+          (entry0->data.s16[3] == entry3->data.s16[3]) &
+          (entry1->data.s16[3] == entry2->data.s16[3]) &
+          (entry2->data.s16[2] == entry3->data.s16[2])
+#if CTX_ENABLE_SHADOW_BLUR
+           && !rasterizer->in_shadow
+#endif
+         )
+       {
+         float x0 = entry3->data.s16[2] * (1.0f / CTX_SUBDIV);
+         float y0 = entry3->data.s16[3] * (1.0f / CTX_FULL_AA);
+         float x1 = entry1->data.s16[2] * (1.0f / CTX_SUBDIV);
+         float y1 = entry1->data.s16[3] * (1.0f / CTX_FULL_AA);
 
-static void
-ctx_CMYKAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
-{
-  ctx_float_copy_normal (5, rasterizer, dst, src, x0, coverage, count);
-}
+         if (x1 > x0 && y1 > y0)
+         {
+           ctx_composite_fill_rect (rasterizer, x0, y0, x1, y1, 255);
+           goto done;
+         }
+       }
+    }
+#endif
 
-static void
-ctx_CMYKAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
-{
-  ctx_float_clear_normal (5, rasterizer, dst, src, x0, coverage, count);
-}
+    ctx_rasterizer_finish_shape (rasterizer);
 
-static void
-ctx_CMYKAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
-{
-  ctx_float_source_copy_normal_color (5, rasterizer, dst, rasterizer->color, x0, coverage, count);
-}
-#endif
+    uint32_t hash = ctx_rasterizer_poly_to_edges (rasterizer);
+    if (hash){};
 
-static void
-ctx_setup_CMYKAF (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  int components = 5;
-  rasterizer->fragment = ctx_rasterizer_get_fragment_CMYKAF (rasterizer);
-  rasterizer->comp = CTX_COV_PATH_FALLBACK;
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
-    {
-      rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
-      rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
- //     rasterizer->fragment = NULL;
-      ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
-      if (gstate->global_alpha_u8 != 255)
-        ((float*)rasterizer->color)[components-1] *= gstate->global_alpha_f;
-    }
-  else
-  {
-    rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
-  }
+#if CTX_SHAPE_CACHE
+    int width = (rasterizer->col_max + (CTX_SUBDIV-1) ) / CTX_SUBDIV - rasterizer->col_min/CTX_SUBDIV + 1;
+    int height = (rasterizer->scan_max + (CTX_FULL_AA-1) ) / CTX_FULL_AA - rasterizer->scan_min / 
CTX_FULL_AA + 1;
+    if (width * height < CTX_SHAPE_CACHE_DIM && width >=1 && height >= 1
+        && width < CTX_SHAPE_CACHE_MAX_DIM
+        && height < CTX_SHAPE_CACHE_MAX_DIM 
+#if CTX_ENABLE_SHADOW_BLUR
+        && !rasterizer->in_shadow
+#endif
+        )
+      {
+        int scan_min = rasterizer->scan_min;
+        int col_min = rasterizer->col_min;
+        scan_min -= (scan_min % CTX_FULL_AA);
+        int y0 = scan_min / CTX_FULL_AA;
+        int y1 = y0 + height;
+        int x0 = col_min / CTX_SUBDIV;
+        int ymin = y0;
+        int x1 = x0 + width;
+        int clip_x_min = blit_x;
+        int clip_x_max = blit_x + blit_width - 1;
+        int clip_y_min = blit_y;
+        int clip_y_max = blit_y + blit_height - 1;
 
-#if CTX_INLINED_NORMAL
-  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
-    rasterizer->comp_op = ctx_CMYKAF_clear_normal;
-#if 1
-  else
-    switch (gstate->blend_mode)
-    {
-      case CTX_BLEND_NORMAL:
-        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
+        int dont_cache = 0;
+        if (CTX_UNLIKELY(x1 >= clip_x_max))
+          { x1 = clip_x_max;
+            dont_cache = 1;
+          }
+        int xo = 0;
+        if (CTX_UNLIKELY(x0 < clip_x_min))
+          {
+            xo = clip_x_min - x0;
+            x0 = clip_x_min;
+            dont_cache = 1;
+          }
+        if (CTX_UNLIKELY(y0 < clip_y_min || y1 >= clip_y_max))
+          dont_cache = 1;
+        if (dont_cache || !_ctx_shape_cache_enabled)
         {
-          rasterizer->comp_op = ctx_CMYKAF_copy_normal;
+          rasterizer->scanline = scan_min;
+          ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
+#if CTX_SHAPE_CACHE
+                                        , NULL
+#endif
+                                       );
         }
-        else if (gstate->global_alpha_u8 == 0)
-          rasterizer->comp_op = ctx_RGBA8_nop;
         else
-        switch (gstate->source_fill.type)
         {
-          case CTX_SOURCE_COLOR:
-            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
+        rasterizer->scanline = scan_min;
+        CtxShapeEntry *shape = ctx_shape_entry_find (rasterizer, hash, width, height); 
+
+        if (shape->uses == 0)
+          {
+            CtxBuffer *buffer_backup = rasterizer->clip_buffer;
+            rasterizer->clip_buffer = NULL;
+            ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule, shape);
+            rasterizer->clip_buffer = buffer_backup;
+          }
+
+        int ewidth = x1 - x0;
+        if (ewidth>0)
+        {
+          rasterizer->scanline = scan_min;
+          int bpp = rasterizer->format->bpp;
+          if (rasterizer->clip_buffer && !rasterizer->clip_rectangle)
+          {
+          uint8_t composite[ewidth];
+          uint8_t *clip_data = (uint8_t*)rasterizer->clip_buffer->data;
+          int shape_width = shape->width;
+          for (int y = y0; y < y1; y++)
             {
-              if (((float*)rasterizer->color)[components-1] == 0.0f)
-                rasterizer->comp_op = ctx_RGBA8_nop;
-#if 1
-              else //if (((float*)rasterizer->color)[components-1] == 1.0f)
-                rasterizer->comp_op = ctx_CMYKAF_source_copy_normal_color;
-   //           else
-   //             rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
-              rasterizer->fragment = NULL;
-#endif
+              if ( (y >= clip_y_min) && (y <= clip_y_max) )
+                {
+                    for (int x = 0; x < ewidth; x++)
+                    {
+                      int val = shape->data[shape_width * (int)(y-ymin) + xo + x];
+                      // XXX : not valid for 1bit clip buffers
+                      val = (val*(clip_data) [
+                              ((y-blit_y) * blit_width) + x0 + x])/255;
+                      composite[x] = val;
+                    }
+                    rasterizer->apply_coverage (rasterizer,
+                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + 
((int) (x0) * bpp)/8,
+                                                 rasterizer->color,
+                                                 x0, // is 0
+                                                 composite,
+                                                 ewidth );
+                 }
+               rasterizer->scanline += CTX_FULL_AA;
             }
-            else
+          }
+          else
+          {
+          for (int y = y0; y < y1; y++)
             {
-              rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
-   //         rasterizer->fragment = NULL;
+              if (CTX_LIKELY((y >= clip_y_min) && (y <= clip_y_max) ))
+                {
+                    rasterizer->apply_coverage (rasterizer,
+                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + 
(int) ((x0) * bpp)/8, rasterizer->color,
+                                                 x0,
+                                                 &shape->data[shape->width * (int) (y-ymin) + xo],
+                                                 ewidth );
+                }
+               rasterizer->scanline += CTX_FULL_AA;
             }
-            break;
-          default:
-            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic_normal;
-            break;
-        }
-        break;
-      default:
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
-    //      rasterizer->fragment = NULL;
-            break;
-          default:
-            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
-            break;
+          }
+         }
         }
-        break;
-    }
+      }
+    else
 #endif
+    {
+            
+    ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
+#if CTX_SHAPE_CACHE
+                                    , NULL
 #endif
-}
-
+                                   );
+    }
+  }
+#if CTX_FAST_FILL_RECT
+done:
 #endif
-#if CTX_ENABLE_CMYKA8
-
-static void
-ctx_CMYKA8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
-{
-  for (int i = 0; i < count; i ++)
+  if (CTX_UNLIKELY(rasterizer->preserve))
     {
-      for (int c = 0; c < 4; c ++)
-        { dst[c] = ctx_u8_to_float ( (255-src[c]) ); }
-      dst[4] = ctx_u8_to_float (src[4]);
-      for (int c = 0; c < 4; c++)
-        { dst[c] *= dst[4]; }
-      src += 5;
-      dst += 5;
+      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
+      rasterizer->edge_list.count = preserved_count;
     }
+#if CTX_ENABLE_SHADOW_BLUR
+  if (CTX_UNLIKELY(rasterizer->in_shadow))
+  {
+    rasterizer->scan_min -= rasterizer->shadow_y * CTX_FULL_AA;
+    rasterizer->scan_max -= rasterizer->shadow_y * CTX_FULL_AA;
+    rasterizer->col_min  -= (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
+    rasterizer->col_max  -= (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
+  }
+#endif
+  rasterizer->preserve = 0;
 }
+
+#if 0
 static void
-ctx_CMYKAF_to_CMYKA8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
+ctx_rasterizer_triangle (CtxRasterizer *rasterizer,
+                         int x0, int y0,
+                         int x1, int y1,
+                         int x2, int y2,
+                         int r0, int g0, int b0, int a0,
+                         int r1, int g1, int b1, int a1,
+                         int r2, int g2, int b2, int a2,
+                         int u0, int v0,
+                         int u1, int v1)
 {
-  for (int i = 0; i < count; i ++)
-    {
-      int a = ctx_float_to_u8 (src[4]);
-      if (a != 0 && a != 255)
-      {
-        float recip = 1.0f/src[4];
-        for (int c = 0; c < 4; c++)
-        {
-          dst[c] = ctx_float_to_u8 (1.0f - src[c] * recip);
-        }
-      }
-      else
-      {
-        for (int c = 0; c < 4; c++)
-          dst[c] = 255 - ctx_float_to_u8 (src[c]);
-      }
-      dst[4]=a;
 
-      src += 5;
-      dst += 5;
-    }
 }
+#endif
+
+
+typedef struct _CtxTermGlyph CtxTermGlyph;
+
+struct _CtxTermGlyph
+{
+  uint32_t unichar;
+  int      col;
+  int      row;
+  uint8_t  rgba_bg[4];
+  uint8_t  rgba_fg[4];
+};
 
+static int _ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke);
 static void
-ctx_composite_CMYKA8 (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_glyph (CtxRasterizer *rasterizer, uint32_t unichar, int stroke)
 {
-  float pixels[count * 5];
-  ctx_CMYKA8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], rasterizer->color, x0, coverage, count);
-  ctx_CMYKAF_to_CMYKA8 (rasterizer, &pixels[0], dst, count);
-}
+  float tx = rasterizer->state->x;
+  float ty = rasterizer->state->y - rasterizer->state->gstate.font_size;
+  float tx2 = rasterizer->state->x + rasterizer->state->gstate.font_size;
+  float ty2 = rasterizer->state->y + rasterizer->state->gstate.font_size;
+  _ctx_user_to_device (rasterizer->state, &tx, &ty);
+  _ctx_user_to_device (rasterizer->state, &tx2, &ty2);
+
+  if (tx2 < rasterizer->blit_x || ty2 < rasterizer->blit_y) return;
+  if (tx  > rasterizer->blit_x + rasterizer->blit_width ||
+      ty  > rasterizer->blit_y + rasterizer->blit_height)
+          return;
+
+#if CTX_BRAILLE_TEXT
+  float font_size = 0;
+  int ch = 1;
+  int cw = 1;
+
+  if (rasterizer->term_glyphs)
+  {
+    float tx = 0;
+    font_size = rasterizer->state->gstate.font_size;
+
+    ch = ctx_term_get_cell_height (rasterizer->backend.ctx);
+    cw = ctx_term_get_cell_width (rasterizer->backend.ctx);
 
+    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
+  }
+  if (rasterizer->term_glyphs && !stroke &&
+      fabs (font_size - ch) < 0.5)
+  {
+    float tx = rasterizer->x;
+    float ty = rasterizer->y;
+    _ctx_user_to_device (rasterizer->state, &tx, &ty);
+    int col = tx / cw + 1;
+    int row = ty / ch + 1;
+    CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
+    ctx_list_append (&rasterizer->glyphs, glyph);
+    glyph->unichar = unichar;
+    glyph->col = col;
+    glyph->row = row;
+    ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
+                         &glyph->rgba_fg[0]);
+  }
+  else
 #endif
-#if CTX_ENABLE_CMYK8
+  _ctx_glyph (rasterizer->backend.ctx, unichar, stroke);
+}
 
 static void
-ctx_CMYK8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
+_ctx_text (Ctx        *ctx,
+           const char *string,
+           int         stroke,
+           int         visible);
+static void
+ctx_rasterizer_text (CtxRasterizer *rasterizer, const char *string, int stroke)
 {
-  for (int i = 0; i < count; i ++)
+#if CTX_BRAILLE_TEXT
+  float font_size = 0;
+  if (rasterizer->term_glyphs)
+  {
+    float tx = 0;
+    font_size = rasterizer->state->gstate.font_size;
+    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
+  }
+  int   ch = ctx_term_get_cell_height (rasterizer->backend.ctx);
+  int   cw = ctx_term_get_cell_width (rasterizer->backend.ctx);
+
+  if (rasterizer->term_glyphs && !stroke &&
+      fabs (font_size - ch) < 0.5)
+  {
+    float tx = rasterizer->x;
+    float ty = rasterizer->y;
+    _ctx_user_to_device (rasterizer->state, &tx, &ty);
+    int col = tx / cw + 1;
+    int row = ty / ch + 1;
+    for (int i = 0; string[i]; i++, col++)
     {
-      dst[0] = ctx_u8_to_float (255-src[0]);
-      dst[1] = ctx_u8_to_float (255-src[1]);
-      dst[2] = ctx_u8_to_float (255-src[2]);
-      dst[3] = ctx_u8_to_float (255-src[3]);
-      dst[4] = 1.0f;
-      src += 4;
-      dst += 5;
+      CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
+      ctx_list_prepend (&rasterizer->glyphs, glyph);
+      glyph->unichar = string[i];
+      glyph->col = col;
+      glyph->row = row;
+      ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
+                      glyph->rgba_fg);
     }
+  }
+  else
+#endif
+  {
+    _ctx_text (rasterizer->backend.ctx, string, stroke, 1);
+  }
 }
+
+void
+_ctx_font (Ctx *ctx, const char *name);
 static void
-ctx_CMYKAF_to_CMYK8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
+ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name)
 {
-  for (int i = 0; i < count; i ++)
-    {
-      float c = src[0];
-      float m = src[1];
-      float y = src[2];
-      float k = src[3];
-      float a = src[4];
-      if (a != 0.0f && a != 1.0f)
-        {
-          float recip = 1.0f/a;
-          c *= recip;
-          m *= recip;
-          y *= recip;
-          k *= recip;
-        }
-      c = 1.0 - c;
-      m = 1.0 - m;
-      y = 1.0 - y;
-      k = 1.0 - k;
-      dst[0] = ctx_float_to_u8 (c);
-      dst[1] = ctx_float_to_u8 (m);
-      dst[2] = ctx_float_to_u8 (y);
-      dst[3] = ctx_float_to_u8 (k);
-      src += 5;
-      dst += 4;
-    }
+  _ctx_font (rasterizer->backend.ctx, font_name);
 }
 
 static void
-ctx_composite_CMYK8 (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_arc (CtxRasterizer *rasterizer,
+                    float          x,
+                    float          y,
+                    float          radius,
+                    float          start_angle,
+                    float          end_angle,
+                    int            anticlockwise)
 {
-  float pixels[count * 5];
-  ctx_CMYK8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], src, x0, coverage, count);
-  ctx_CMYKAF_to_CMYK8 (rasterizer, &pixels[0], dst, count);
-}
-#endif
+  int full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS;
+  full_segments = radius * CTX_PI * 2 / 4.0;
+  if (full_segments > CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS)
+    { full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS; }
+  if (full_segments < 24) full_segments = 24;
+  float step = CTX_PI*2.0/full_segments;
+  int steps;
 
-#if CTX_ENABLE_RGB8
+  if (end_angle < -30.0)
+    end_angle = -30.0;
+  if (start_angle < -30.0)
+    start_angle = -30.0;
+  if (end_angle > 30.0)
+    end_angle = 30.0;
+  if (start_angle > 30.0)
+    start_angle = 30.0;
 
-inline static void
-ctx_RGB8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (const uint8_t *) buf;
-  while (count--)
+  if (radius <= 0.0001)
+          return;
+
+  if (end_angle == start_angle)
+          // XXX also detect arcs fully outside render view
     {
-      rgba[0] = pixel[0];
-      rgba[1] = pixel[1];
-      rgba[2] = pixel[2];
-      rgba[3] = 255;
-      pixel+=3;
-      rgba +=4;
+    if (rasterizer->has_prev!=0)
+      ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
+                              y + ctx_sinf (end_angle) * radius);
+      else
+      ctx_rasterizer_move_to (rasterizer, x + ctx_cosf (end_angle) * radius,
+                            y + ctx_sinf (end_angle) * radius);
+      return;
     }
-}
-
-inline static void
-ctx_RGBA8_to_RGB8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+#if 1
+  if ( (!anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||
+       ( (anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f ) ) 
+  ||   (anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||  (!anticlockwise && 
fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f )  )
     {
-      pixel[0] = rgba[0];
-      pixel[1] = rgba[1];
-      pixel[2] = rgba[2];
-      pixel+=3;
-      rgba +=4;
+      steps = full_segments - 1;
     }
-}
-
+  else
 #endif
-#if CTX_ENABLE_GRAY1
-
-#if CTX_NATIVE_GRAYA8
-inline static void
-ctx_GRAY1_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
     {
-      rgba[0] = 255 * (*pixel & (1<< (x&7) ) );
-      rgba[1] = 255;
-      pixel+= ( (x&7) ==7);
-      x++;
-      rgba +=2;
+      steps = (end_angle - start_angle) / (CTX_PI*2) * full_segments;
+      if (anticlockwise)
+        { steps = full_segments - steps; };
+   // if (steps > full_segments)
+   //   steps = full_segments;
     }
-}
-
-inline static void
-ctx_GRAYA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  *pixel = 0;
-  while (count--)
+  if (anticlockwise) { step = step * -1; }
+  int first = 1;
+  if (steps == 0 /* || steps==full_segments -1  || (anticlockwise && steps == full_segments) */)
     {
-      int gray = rgba[0];
-      //gray += ctx_dither_mask_a (x, rasterizer->scanline/aa, 0, 127);
-      if (gray >= 127)
+      float xv = x + ctx_cosf (start_angle) * radius;
+      float yv = y + ctx_sinf (start_angle) * radius;
+      if (!rasterizer->has_prev)
+        { ctx_rasterizer_move_to (rasterizer, xv, yv); }
+      first = 0;
+    }
+  else
+    {
+      for (float angle = start_angle, i = 0; i < steps; angle += step, i++)
         {
-          *pixel = *pixel | ((1<< (x&7) ) * (gray >= 127));
-        }
-#if 0
-      else
-      {
-          *pixel = *pixel & (~ (1<< (x&7) ) );
-      }
-#endif
-      if ( (x&7) ==7)
-        { pixel+=1;
-          if(count>0)*pixel = 0;
+          float xv = x + ctx_cosf (angle) * radius;
+          float yv = y + ctx_sinf (angle) * radius;
+          if (first && !rasterizer->has_prev)
+            { ctx_rasterizer_move_to (rasterizer, xv, yv); }
+          else
+            { ctx_rasterizer_line_to (rasterizer, xv, yv); }
+          first = 0;
         }
-      x++;
-      rgba +=2;
     }
+  ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
+                          y + ctx_sinf (end_angle) * radius);
 }
 
-#else
+static void
+ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
+                        float        cx,
+                        float        cy,
+                        float        x,
+                        float        y)
+{
+  ctx_rasterizer_curve_to (rasterizer,
+                           (cx * 2 + rasterizer->x) / 3.0f, (cy * 2 + rasterizer->y) / 3.0f,
+                           (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
+                           x,                              y);
+}
 
-inline static void
-ctx_GRAY1_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+static void
+ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
+                            float cx, float cy,
+                            float x,  float y)
 {
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      *((uint32_t*)(rgba))=0xff000000 + 0x00ffffff * ((*pixel & (1<< (x&7) ) )!=0);
-      pixel+= ( (x&7) ==7);
-      x++;
-      rgba +=4;
-    }
+  ctx_rasterizer_quad_to (rasterizer, cx + rasterizer->x, cy + rasterizer->y,
+                          x  + rasterizer->x, y  + rasterizer->y);
 }
 
-inline static void
-ctx_RGBA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
+static void
+ctx_rasterizer_stroke (CtxRasterizer *rasterizer)
 {
-  uint8_t *pixel = (uint8_t *) buf;
-  *pixel = 0;
-  while (count--)
+  CtxGState *gstate = &rasterizer->state->gstate;
+  CtxSource source_backup;
+  int count = rasterizer->edge_list.count;
+  if (count == 0)
+    return;
+  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
+  {
+    source_backup = gstate->source_fill;
+    gstate->source_fill = rasterizer->state->gstate.source_stroke;
+  }
+  int preserved = rasterizer->preserve;
+  float factor = ctx_matrix_get_scale (&gstate->transform);
+  float line_width = gstate->line_width * factor;
+
+  rasterizer->comp_op = NULL;
+  ctx_composite_setup (rasterizer);
+
+  CtxSegment temp[count]; /* copy of already built up path's poly line  */
+  memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) );
+
+#if CTX_FAST_FILL_RECT
+  if (rasterizer->edge_list.count == 5)
     {
-      int gray = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
-      //gray += ctx_dither_mask_a (x, rasterizer->scanline/aa, 0, 127);
-      if (gray <= 127)
+      CtxSegment *entry0 = &((CtxSegment*)rasterizer->edge_list.entries)[0];
+      CtxSegment *entry1 = &((CtxSegment*)rasterizer->edge_list.entries)[1];
+      CtxSegment *entry2 = &((CtxSegment*)rasterizer->edge_list.entries)[2];
+      CtxSegment *entry3 = &((CtxSegment*)rasterizer->edge_list.entries)[3];
+
+      if (!rasterizer->state->gstate.clipped &&
+          (entry0->data.s16[2] == entry1->data.s16[2]) &&
+          (entry0->data.s16[3] == entry3->data.s16[3]) &&
+          (entry1->data.s16[3] == entry2->data.s16[3]) &&
+          (entry2->data.s16[2] == entry3->data.s16[2])
+#if CTX_ENABLE_SHADOW_BLUR
+           && !rasterizer->in_shadow
+#endif
+         )
+       {
+
+        float x0 = entry3->data.s16[2] * 1.0f / CTX_SUBDIV;
+        float y0 = entry3->data.s16[3] * 1.0f / CTX_FULL_AA;
+        float x1 = entry1->data.s16[2] * 1.0f / CTX_SUBDIV;
+        float y1 = entry1->data.s16[3] * 1.0f / CTX_FULL_AA;
+
+        ctx_composite_stroke_rect (rasterizer, x0, y0, x1, y1, line_width);
+
+        goto done;
+
+
+       }
+    }
+#endif
+  
+    {
+    {
+      if (line_width < 5.0f)
+      {
+      factor *= 0.89; /* this hack adjustment makes sharp 1px and 2px strokewidths
+      //                 end up sharp without erronious AA; we seem to be off by
+      //                 one somewhere else, causing the need for this
+      //                 */
+      line_width *= 0.89f;
+      }
+      ctx_rasterizer_reset (rasterizer); /* then start afresh with our stroked shape  */
+      CtxMatrix transform_backup = gstate->transform;
+      _ctx_matrix_identity (&gstate->transform);
+      float prev_x = 0.0f;
+      float prev_y = 0.0f;
+      float half_width_x = line_width/2;
+      float half_width_y = half_width_x;
+
+      if (CTX_UNLIKELY(line_width <= 0.0f))
+        { // makes 0 width be hairline
+          half_width_x = .5f;
+          half_width_y = .5f;
+        }
+      int start = 0;
+      int end   = 0;
+      while (start < count)
         {
-          //*pixel = *pixel & (~ (1<< (x&7) ) );
+          int started = 0;
+          int i;
+          for (i = start; i < count; i++)
+            {
+              CtxSegment *entry = &temp[i];
+              float x, y;
+              if (entry->code == CTX_NEW_EDGE)
+                {
+                  if (CTX_LIKELY(started))
+                    {
+                      end = i - 1;
+                      goto foo;
+                    }
+                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+                  prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+                  started = 1;
+                  start = i;
+                }
+              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+              float dx = x - prev_x;
+              float dy = y - prev_y;
+              float length = ctx_fast_hypotf (dx, dy);
+              if (length>0.001f)
+                {
+                  float recip_length = 1.0/length;
+                  dx = dx * recip_length * half_width_x;
+                  dy = dy * recip_length * half_width_y;
+                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
+                    {
+                      ctx_rasterizer_finish_shape (rasterizer);
+                      ctx_rasterizer_move_to (rasterizer, prev_x+dy, prev_y-dx);
+                    }
+                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
+                  
+                  // we need to know the slope of the other side
+
+                  // XXX possible miter line-to
+                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+4, prev_y+dx+10);
+                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+8, prev_y+dx+0);
+
+                  ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
+                }
+              prev_x = x;
+              prev_y = y;
+            }
+          end = i-1;
+foo:
+          for (int i = end; i >= start; i--)
+            {
+              CtxSegment *entry = &temp[i];
+              float x, y, dx, dy;
+              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+              dx = x - prev_x;
+              dy = y - prev_y;
+              float length = ctx_fast_hypotf (dx, dy);
+              float recip_length = 1.0f/length;
+              dx = dx * recip_length * half_width_x;
+              dy = dy * recip_length * half_width_y;
+              if (CTX_LIKELY(length>0.001f))
+                {
+                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
+                  // XXX possible miter line-to
+             //   ctx_rasterizer_line_to (rasterizer, prev_x-dy+10, prev_y+dx+10);
+                  ctx_rasterizer_line_to (rasterizer, x-dy,      y+dx);
+                }
+              prev_x = x;
+              prev_y = y;
+              if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
+                {
+                  x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+                  y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+                  dx = x - prev_x;
+                  dy = y - prev_y;
+                  length = ctx_fast_hypotf (dx, dy);
+                  recip_length = 1.0f/length;
+                  if (CTX_LIKELY(length>0.001f))
+                    {
+                      dx = dx * recip_length * half_width_x;
+                      dy = dy * recip_length * half_width_y;
+                      ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
+                      ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
+                    }
+                }
+              if ( (prev_x != x) && (prev_y != y) )
+                {
+                  prev_x = x;
+                  prev_y = y;
+                }
+            }
+          start = end+1;
         }
-      else
+      ctx_rasterizer_finish_shape (rasterizer);
+      switch (gstate->line_cap)
         {
-          *pixel = *pixel | (1<< (x&7) );
+          case CTX_CAP_SQUARE: // XXX: incorrect - if rectangles were in
+                               //                  reverse order - rotation would be off
+                               //                  better implement correct here
+            {
+              float x = 0, y = 0;
+              int has_prev = 0;
+              for (int i = 0; i < count; i++)
+                {
+                  CtxSegment *entry = &temp[i];
+                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
+                    {
+                      if (has_prev)
+                        {
+                          ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, 
half_width_x, half_width_y);
+                          ctx_rasterizer_finish_shape (rasterizer);
+                        }
+                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+                      y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+                      ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, half_width_x 
* 2, half_width_y * 2);
+                      ctx_rasterizer_finish_shape (rasterizer);
+                    }
+                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+                  has_prev = 1;
+                }
+              ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, 
half_width_y * 2);
+              ctx_rasterizer_finish_shape (rasterizer);
+            }
+            break;
+          case CTX_CAP_NONE: /* nothing to do */
+            break;
+          case CTX_CAP_ROUND:
+            {
+              float x = 0, y = 0;
+              int has_prev = 0;
+              for (int i = 0; i < count; i++)
+                {
+                  CtxSegment *entry = &temp[i];
+                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
+                    {
+                      if (has_prev)
+                        {
+                          ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
+                          ctx_rasterizer_finish_shape (rasterizer);
+                        }
+                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+                      y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
+                      ctx_rasterizer_finish_shape (rasterizer);
+                    }
+                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+                  has_prev = 1;
+                }
+              ctx_rasterizer_move_to (rasterizer, x, y);
+              ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
+              ctx_rasterizer_finish_shape (rasterizer);
+              break;
+            }
         }
-      if ( (x&7) ==7)
-        { pixel+=1;
-          if(count>0)*pixel = 0;
+      switch (gstate->line_join)
+        {
+          case CTX_JOIN_BEVEL:
+          case CTX_JOIN_MITER:
+            break;
+          case CTX_JOIN_ROUND:
+            {
+              float x = 0, y = 0;
+              for (int i = 0; i < count-1; i++)
+                {
+                  CtxSegment *entry = &temp[i];
+                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+                  if (CTX_UNLIKELY(entry[1].code == CTX_EDGE))
+                    {
+                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
+                      ctx_rasterizer_finish_shape (rasterizer);
+                    }
+                }
+              break;
+            }
         }
-      x++;
-      rgba +=4;
+      CtxFillRule rule_backup = gstate->fill_rule;
+      gstate->fill_rule = CTX_FILL_RULE_WINDING;
+      rasterizer->preserve = 0; // so fill isn't tripped
+      ctx_rasterizer_fill (rasterizer);
+      gstate->fill_rule = rule_backup;
+      gstate->transform = transform_backup;
     }
-}
-#endif
-
+  }
+#if CTX_FAST_FILL_RECT
+done:
 #endif
-#if CTX_ENABLE_GRAY2
-
-#if CTX_NATIVE_GRAYA8
-inline static void
-ctx_GRAY2_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+  if (preserved)
     {
-      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
-      val <<= 6;
-      rgba[0] = val;
-      rgba[1] = 255;
-      if ( (x&3) ==3)
-        { pixel+=1; }
-      x++;
-      rgba +=2;
+      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
+      rasterizer->edge_list.count = count;
+      rasterizer->preserve = 0;
     }
+  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
+    gstate->source_fill = source_backup;
 }
 
-inline static void
-ctx_GRAYA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = rgba[0];
-      val >>= 6;
-      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
-      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
-      if ( (x&3) ==3)
-        { pixel+=1; }
-      x++;
-      rgba +=2;
-    }
-}
+#if CTX_1BIT_CLIP
+#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY1
 #else
+#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY8
+#endif
 
-inline static void
-ctx_GRAY2_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
-      val <<= 6;
-      rgba[0] = val;
-      rgba[1] = val;
-      rgba[2] = val;
-      rgba[3] = 255;
-      if ( (x&3) ==3)
-        { pixel+=1; }
-      x++;
-      rgba +=4;
-    }
-}
 
-inline static void
-ctx_RGBA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
+static void
+ctx_rasterizer_clip_reset (CtxRasterizer *rasterizer)
 {
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
-      val >>= 6;
-      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
-      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
-      if ( (x&3) ==3)
-        { pixel+=1; }
-      x++;
-      rgba +=4;
-    }
-}
-#endif
-
+#if CTX_ENABLE_CLIP
+  if (rasterizer->clip_buffer)
+   ctx_buffer_free (rasterizer->clip_buffer);
+  rasterizer->clip_buffer = NULL;
 #endif
-#if CTX_ENABLE_GRAY4
+  rasterizer->state->gstate.clip_min_x = rasterizer->blit_x;
+  rasterizer->state->gstate.clip_min_y = rasterizer->blit_y;
 
-#if CTX_NATIVE_GRAYA8
-inline static void
-ctx_GRAY4_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
-      val <<= 4;
-      rgba[0] = val;
-      rgba[1] = 255;
-      if ( (x&1) ==1)
-        { pixel+=1; }
-      x++;
-      rgba +=2;
-    }
+  rasterizer->state->gstate.clip_max_x = rasterizer->blit_x + rasterizer->blit_width - 1;
+  rasterizer->state->gstate.clip_max_y = rasterizer->blit_y + rasterizer->blit_height - 1;
 }
 
-inline static void
-ctx_GRAYA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = rgba[0];
-      val >>= 4;
-      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
-      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
-      if ( (x&1) ==1)
-        { pixel+=1; }
-      x++;
-      rgba +=2;
-    }
-}
-#else
-inline static void
-ctx_GRAY4_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+static void
+ctx_rasterizer_clip_apply (CtxRasterizer *rasterizer,
+                           CtxSegment    *edges)
 {
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
-      val <<= 4;
-      rgba[0] = val;
-      rgba[1] = val;
-      rgba[2] = val;
-      rgba[3] = 255;
-      if ( (x&1) ==1)
-        { pixel+=1; }
-      x++;
-      rgba +=4;
-    }
-}
+  unsigned int count = edges[0].data.u32[0];
 
-inline static void
-ctx_RGBA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
-      val >>= 4;
-      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
-      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
-      if ( (x&1) ==1)
-        { pixel+=1; }
-      x++;
-      rgba +=4;
-    }
-}
-#endif
+  int minx = 5000;
+  int miny = 5000;
+  int maxx = -5000;
+  int maxy = -5000;
+  int prev_x = 0;
+  int prev_y = 0;
+  int blit_width = rasterizer->blit_width;
+  int blit_height = rasterizer->blit_height;
 
-#endif
-#if CTX_ENABLE_GRAY8
+  float coords[6][2];
 
-#if CTX_NATIVE_GRAYA8
-inline static void
-ctx_GRAY8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+  for (unsigned int i = 0; i < count; i++)
     {
-      rgba[0] = pixel[0];
-      rgba[1] = 255;
-      pixel+=1;
-      rgba +=2;
+      CtxSegment *entry = &edges[i+1];
+      float x, y;
+      if (entry->code == CTX_NEW_EDGE)
+        {
+          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+          prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+          if (prev_x < minx) { minx = prev_x; }
+          if (prev_y < miny) { miny = prev_y; }
+          if (prev_x > maxx) { maxx = prev_x; }
+          if (prev_y > maxy) { maxy = prev_y; }
+        }
+      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+      y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+      if (x < minx) { minx = x; }
+      if (y < miny) { miny = y; }
+      if (x > maxx) { maxx = x; }
+      if (y > maxy) { maxy = y; }
+
+      if (i < 6)
+      {
+        coords[i][0] = x;
+        coords[i][1] = y;
+      }
     }
-}
 
-inline static void
-ctx_GRAYA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+#if CTX_ENABLE_CLIP
+
+  if ((rasterizer->clip_rectangle==1
+       || !rasterizer->clip_buffer)
+      )
+  {
+    if (count == 5)
     {
-      pixel[0] = rgba[0];
-      pixel+=1;
-      rgba +=2;
+      if (coords[0][0] == coords[1][0] &&
+          coords[0][1] == coords[4][1] &&
+          coords[0][1] == coords[3][1] &&
+          coords[1][1] == coords[2][1] &&
+          coords[3][0] == coords[4][0]
+          )
+      {
+#if 0
+        printf ("%d,%d %dx%d\n", minx, miny,
+                                       maxx-minx+1, maxy-miny+1);
+#endif
+
+         rasterizer->state->gstate.clip_min_x =
+            ctx_maxi (minx, rasterizer->state->gstate.clip_min_x);
+         rasterizer->state->gstate.clip_min_y =
+            ctx_maxi (miny, rasterizer->state->gstate.clip_min_y);
+         rasterizer->state->gstate.clip_max_x =
+            ctx_mini (maxx, rasterizer->state->gstate.clip_max_x);
+         rasterizer->state->gstate.clip_max_y =
+            ctx_mini (maxy, rasterizer->state->gstate.clip_max_y);
+
+         rasterizer->clip_rectangle = 1;
+
+#if 0
+         if (!rasterizer->clip_buffer)
+           rasterizer->clip_buffer = ctx_buffer_new (blit_width,
+                                                     blit_height,
+                                                     CTX_CLIP_FORMAT);
+
+         memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
+         int i = 0;
+         for (int y = rasterizer->state->gstate.clip_min_y;
+                  y <= rasterizer->state->gstate.clip_max_y;
+                  y++)
+         for (int x = rasterizer->state->gstate.clip_min_x;
+                  x <= rasterizer->state->gstate.clip_max_x;
+                  x++, i++)
+         {
+           ((uint8_t*)(rasterizer->clip_buffer->data))[i] = 255;
+         }
+#endif
+
+         return;
+      }
+#if 0
+      else
+      {
+        printf ("%d,%d %dx%d  0,0:%.2f 0,1:%.2f 1,0:%.2f 11:%.2f 20:%.2f 21:%2.f 30:%.2f 31:%.2f 40:%.2f 
41:%.2f\n", minx, miny,
+                                       maxx-minx+1, maxy-miny+1
+                                       
+         ,coords[0][0] ,  coords[0][1]
+         ,coords[1][0] ,  coords[1][1]
+         ,coords[2][0] ,  coords[2][1]
+         ,coords[3][0] ,  coords[3][1]
+         ,coords[4][0] ,  coords[4][1]
+         );
+      }
+#endif
     }
-}
-#else
-inline static void
-ctx_GRAY8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+  }
+  rasterizer->clip_rectangle = 0;
+
+  if ((minx == maxx) || (miny == maxy)) // XXX : reset hack
+  {
+    ctx_rasterizer_clip_reset (rasterizer);
+    return;//goto done;
+  }
+
+  int we_made_it = 0;
+  CtxBuffer *clip_buffer;
+
+  if (!rasterizer->clip_buffer)
+  {
+    rasterizer->clip_buffer = ctx_buffer_new (blit_width,
+                                              blit_height,
+                                              CTX_CLIP_FORMAT);
+    clip_buffer = rasterizer->clip_buffer;
+    we_made_it = 1;
+    if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
+      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height/8);
+    else
+      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
+  }
+  else
+  {
+    clip_buffer = ctx_buffer_new (blit_width, blit_height,
+                                  CTX_CLIP_FORMAT);
+  }
+
+  {
+
+  int prev_x = 0;
+  int prev_y = 0;
+
+    Ctx *ctx = ctx_new_for_framebuffer (clip_buffer->data, blit_width, blit_height,
+       blit_width,
+       CTX_CLIP_FORMAT);
+
+  for (unsigned int i = 0; i < count; i++)
     {
-      rgba[0] = pixel[0];
-      rgba[1] = pixel[0];
-      rgba[2] = pixel[0];
-      rgba[3] = 255;
-      pixel+=1;
-      rgba +=4;
+      CtxSegment *entry = &edges[i+1];
+      float x, y;
+      if (entry->code == CTX_NEW_EDGE)
+        {
+          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+          prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+          ctx_move_to (ctx, prev_x, prev_y);
+        }
+      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+      y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+      ctx_line_to (ctx, x, y);
     }
-}
+    ctx_gray (ctx, 1.0f);
+    ctx_fill (ctx);
+    ctx_free (ctx);
+  }
 
-inline static void
-ctx_RGBA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  for (int i = 0; i < count; i ++)
+  int maybe_rect = 1;
+  rasterizer->clip_rectangle = 0;
+
+  if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
+  {
+    unsigned int count = blit_width * blit_height / 8;
+    for (unsigned int i = 0; i < count; i++)
     {
-      pixel[i] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba + i * 4);
+      ((uint8_t*)rasterizer->clip_buffer->data)[i] =
+      (((uint8_t*)rasterizer->clip_buffer->data)[i] &
+      ((uint8_t*)clip_buffer->data)[i]);
     }
-}
-#endif
+  }
+  else
+  {
+    int count = blit_width * blit_height;
 
-#endif
-#if CTX_ENABLE_GRAYA8
 
-inline static void
-ctx_GRAYA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint8_t *pixel = (const uint8_t *) buf;
-  while (count--)
+    int i;
+    int x0 = 0;
+    int y0 = 0;
+    int width = -1;
+    int next_stage = 0;
+    uint8_t *p_data = (uint8_t*)rasterizer->clip_buffer->data;
+    uint8_t *data = (uint8_t*)clip_buffer->data;
+
+    i=0;
+    /* find upper left */
+    for (; i < count && maybe_rect && !next_stage; i++)
     {
-      rgba[0] = pixel[0];
-      rgba[1] = pixel[0];
-      rgba[2] = pixel[0];
-      rgba[3] = pixel[1];
-      pixel+=2;
-      rgba +=4;
+      uint8_t val = (p_data[i] * data[i])/255;
+      data[i] = val;
+      switch (val)
+      {
+        case 255:
+          x0 = i % blit_width;
+          y0 = i / blit_width;
+          next_stage = 1;
+          break;
+        case 0: break;
+        default:
+          maybe_rect = 0;
+          break;
+      }
     }
-}
 
-inline static void
-ctx_RGBA8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
+    next_stage = 0;
+    /* figure out with */
+    for (; i < count && !next_stage && maybe_rect; i++)
     {
-      pixel[0] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
-      pixel[1] = rgba[3];
-      pixel+=2;
-      rgba +=4;
+      int x = i % blit_width;
+      int y = i / blit_width;
+      uint8_t val = (p_data[i] * data[i])/255;
+      data[i] = val;
+
+      if (y == y0)
+      {
+        switch (val)
+        {
+          case 255:
+            width = x - x0 + 1;
+            break;
+          case 0:
+            next_stage = 1;
+            break;
+          default:
+            maybe_rect = 0;
+            break;
+        }
+        if (x % blit_width == blit_width - 1) next_stage = 1;
+      }
+      else next_stage = 1;
     }
-}
 
-#if CTX_NATIVE_GRAYA8
-CTX_INLINE static void ctx_rgba_to_graya_u8 (CtxState *state, uint8_t *in, uint8_t *out)
-{
-  out[0] = ctx_u8_color_rgb_to_gray (state, in);
-  out[1] = in[3];
-}
+    next_stage = 0;
+    /* body */
+    for (; i < count && maybe_rect && !next_stage; i++)
+    {
+      int x = i % blit_width;
+      uint8_t val = (p_data[i] * data[i])/255;
+      data[i] = val;
 
-#if CTX_GRADIENTS
-static void
-ctx_fragment_linear_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
-{
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-        uint8_t *dst = (uint8_t*)out;
-  for (int i = 0; i < count;i ++)
-  {
-  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
-                g->linear_gradient.length) -
-              g->linear_gradient.start) * (g->linear_gradient.rdelta);
-  {
-    uint8_t rgba[4];
-    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
-    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
-   
-  }
+      if (x < x0)
+      {
+        if (val != 0){ maybe_rect = 0; next_stage = 1; }
+      } else if (x < x0 + width)
+      {
+        if (val != 255){ if (val != 0) maybe_rect = 0; next_stage = 1; }
+      } else {
+        if (val != 0){ maybe_rect = 0; next_stage = 1; }
+      }
+    }
 
+    next_stage = 0;
+    /* foot */
+    for (; i < count && maybe_rect && !next_stage; i++)
+    {
+      uint8_t val = (p_data[i] * data[i])/255;
+      data[i] = val;
 
-#if CTX_DITHER
-  ctx_dither_graya_u8 ((uint8_t*)dst, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
-#endif
-  dst += 2;
-  x += dx;
-  y += dy;
-  }
-}
+      if (val != 0){ maybe_rect = 0; next_stage = 1; }
+    }
 
-#if 0
-static void
-ctx_fragment_radial_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, void *out)
-{
-  uint8_t *rgba = (uint8_t *) out;
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  float v = (ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
-              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
-  ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
-#if CTX_DITHER
-  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
-#endif
-}
-#endif
 
+    for (; i < count; i++)
+    {
+      uint8_t val = (p_data[i] * data[i])/255;
+      data[i] = val;
+    }
 
-static void
-ctx_fragment_radial_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
-{
-        uint8_t *dst = (uint8_t*)out;
-  for (int i = 0; i < count;i ++)
-  {
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  float v = (ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
-              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
-  {
-    uint8_t rgba[4];
-    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
-    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
+    if (maybe_rect)
+       rasterizer->clip_rectangle = 1;
   }
-#if CTX_DITHER
-  ctx_dither_graya_u8 ((uint8_t*)dst, x, y, rasterizer->format->dither_red_blue,
-                      rasterizer->format->dither_green);
+  if (!we_made_it)
+   ctx_buffer_free (clip_buffer);
+#else
+  if (coords[0][0]){};
 #endif
-  dst += 2;
-  x += dx;
-  y += dy;
-  }
+  
+  rasterizer->state->gstate.clip_min_x = ctx_maxi (minx,
+                                         rasterizer->state->gstate.clip_min_x);
+  rasterizer->state->gstate.clip_min_y = ctx_maxi (miny,
+                                         rasterizer->state->gstate.clip_min_y);
+  rasterizer->state->gstate.clip_max_x = ctx_mini (maxx,
+                                         rasterizer->state->gstate.clip_max_x);
+  rasterizer->state->gstate.clip_max_y = ctx_mini (maxy,
+                                         rasterizer->state->gstate.clip_max_y);
 }
-#endif
 
 static void
-ctx_fragment_color_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, float dx, 
float dy)
-{
-  CtxSource *g = &rasterizer->state->gstate.source_fill;
-  uint16_t *dst = (uint16_t*)out;
-  uint16_t pix;
-  ctx_color_get_graya_u8 (rasterizer->state, &g->color, (void*)&pix);
-  for (int i = 0; i <count; i++)
-  {
-    dst[i]=pix;
-  }
-}
-
-static void ctx_fragment_image_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, void *out, int count, 
float dx, float dy)
+ctx_rasterizer_clip (CtxRasterizer *rasterizer)
 {
-  uint8_t rgba[4*count];
-  CtxGState *gstate = &rasterizer->state->gstate;
-  CtxBuffer *buffer = gstate->source_fill.texture.buffer;
-  switch (buffer->format->bpp)
+  int count = rasterizer->edge_list.count;
+  CtxSegment temp[count+1]; /* copy of already built up path's poly line  */
+  rasterizer->state->has_clipped=1;
+  rasterizer->state->gstate.clipped=1;
+  //if (rasterizer->preserve)
+    { memcpy (temp + 1, rasterizer->edge_list.entries, sizeof (temp) - sizeof (temp[0]));
+      temp[0].code = CTX_NOP;
+      temp[0].data.u32[0] = count;
+      ctx_state_set_blob (rasterizer->state, CTX_clip, (uint8_t*)temp, sizeof(temp));
+    }
+  ctx_rasterizer_clip_apply (rasterizer, temp);
+  ctx_rasterizer_reset (rasterizer);
+  if (rasterizer->preserve)
     {
-#if CTX_FRAGMENT_SPECIALIZE
-      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
-      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);  break;
-      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, rgba, count, dx, dy); break;
-#endif
-      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, rgba, count, dx, dy);       break;
+      memcpy (rasterizer->edge_list.entries, temp + 1, sizeof (temp) - sizeof(temp[0]));
+      rasterizer->edge_list.count = count;
+      rasterizer->preserve = 0;
     }
-  for (int i = 0; i < count; i++)
-    ctx_rgba_to_graya_u8 (rasterizer->state, &rgba[i*4], &((uint8_t*)out)[i*2]);
 }
 
-static CtxFragment ctx_rasterizer_get_fragment_GRAYA8 (CtxRasterizer *rasterizer)
+
+#if 0
+static void
+ctx_rasterizer_load_image (CtxRasterizer *rasterizer,
+                           const char  *path,
+                           float x,
+                           float y)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  switch (gstate->source_fill.type)
-    {
-      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYA8;
-      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYA8;
-#if CTX_GRADIENTS
-      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYA8;
-      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYA8;
-#endif
-    }
-  return ctx_fragment_color_GRAYA8;
+  // decode PNG, put it in image is slot 1,
+  // magic width height stride format data
+  ctx_buffer_load_png (&rasterizer->backend.ctx->texture[0], path);
+  ctx_rasterizer_set_texture (rasterizer, 0, x, y);
 }
+#endif
 
-//ctx_u8_porter_duff(GRAYA8, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-ctx_u8_porter_duff(GRAYA8, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
-
-#if CTX_INLINED_NORMAL
-//ctx_u8_porter_duff(GRAYA8, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
-ctx_u8_porter_duff(GRAYA8, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)
 
 static void
-ctx_GRAYA8_copy_normal (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
+                          float x,
+                          float y,
+                          float width,
+                          float height)
 {
-  ctx_u8_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
+  ctx_rasterizer_move_to (rasterizer, x, y);
+  ctx_rasterizer_rel_line_to (rasterizer, width, 0);
+  ctx_rasterizer_rel_line_to (rasterizer, 0, height);
+  ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
+  ctx_rasterizer_rel_line_to (rasterizer, 0, -height);
+  //ctx_rasterizer_rel_line_to (rasterizer, width/2, 0);
+  ctx_rasterizer_finish_shape (rasterizer);
 }
 
 static void
-ctx_GRAYA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
+                          uint16_t x,
+                          uint16_t y,
+                          uint8_t r,
+                          uint8_t g,
+                          uint8_t b,
+                          uint8_t a)
 {
-  ctx_u8_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
+  rasterizer->state->gstate.source_fill.type = CTX_SOURCE_COLOR;
+  ctx_color_set_RGBA8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, r, g, b, a);
+  rasterizer->comp_op = NULL;
+#if 0
+  // XXX : doesn't take transforms into account - and has
+  // received less testing than code paths part of protocol,
+  // using rectangle properly will trigger the fillrect fastpath
+  ctx_rasterizer_pset (rasterizer, x, y, 255);
+#else
+  ctx_rasterizer_rectangle (rasterizer, x, y, 1.0, 1.0);
+  ctx_rasterizer_fill (rasterizer);
+#endif
 }
 
-static void
-ctx_GRAYA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
+#if CTX_ENABLE_SHADOW_BLUR
+static inline float
+ctx_gaussian (float x, float mu, float sigma)
 {
-  ctx_u8_source_over_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
+  float a = ( x- mu) / sigma;
+  return ctx_expf (-0.5 * a * a);
 }
 
-static void
-ctx_GRAYA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
+static inline void
+ctx_compute_gaussian_kernel (int dim, float radius, float *kernel)
 {
-  ctx_u8_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
+  float sigma = radius / 2;
+  float sum = 0.0;
+  int i = 0;
+  //for (int row = 0; row < dim; row ++)
+    for (int col = 0; col < dim; col ++, i++)
+    {
+      float val = //ctx_gaussian (row, radius, sigma) *
+                            ctx_gaussian (col, radius, sigma);
+      kernel[i] = val;
+      sum += val;
+    }
+  i = 0;
+  //for (int row = 0; row < dim; row ++)
+    for (int col = 0; col < dim; col ++, i++)
+        kernel[i] /= sum;
 }
 #endif
 
-inline static int
-ctx_is_opaque_color (CtxRasterizer *rasterizer)
+static void
+ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, 
float corner_radius)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  if (gstate->global_alpha_u8 != 255)
-    return 0;
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
-  {
-    uint8_t ga[2];
-    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
-    return ga[1] == 255;
-  }
-  return 0;
-}
+  float aspect  = 1.0f;
+  float radius  = corner_radius / aspect;
+  float degrees = CTX_PI / 180.0f;
 
-static void
-ctx_setup_GRAYA8 (CtxRasterizer *rasterizer)
-{
-  CtxGState *gstate = &rasterizer->state->gstate;
-  int components = 2;
-  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYA8 (rasterizer);
-  rasterizer->comp_op  = ctx_GRAYA8_porter_duff_generic;
-  rasterizer->comp = CTX_COV_PATH_FALLBACK;
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
-    {
-      ctx_fragment_color_GRAYA8 (rasterizer, 0,0, rasterizer->color, 1, 0,0);
-      if (gstate->global_alpha_u8 != 255)
-        for (int c = 0; c < components; c ++)
-          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8)/255;
-    }
+  if (radius > width*0.5f) radius = width/2;
+  if (radius > height*0.5f) radius = height/2;
 
-#if CTX_INLINED_NORMAL
-  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
-    rasterizer->comp_op = ctx_GRAYA8_clear_normal;
-  else
-    switch (gstate->blend_mode)
-    {
-      case CTX_BLEND_NORMAL:
-        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
-        {
-          rasterizer->comp_op = ctx_GRAYA8_copy_normal;
-        }
-        else if (gstate->global_alpha_u8 == 0)
-          rasterizer->comp_op = ctx_RGBA8_nop;
-        else
-        switch (gstate->source_fill.type)
-        {
-          case CTX_SOURCE_COLOR:
-            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
-            {
-              if (rasterizer->color[components-1] == 0)
-                rasterizer->comp_op = ctx_RGBA8_nop;
-              else if (rasterizer->color[components-1] == 255)
-                rasterizer->comp_op = ctx_GRAYA8_source_copy_normal_color;
-              else
-                rasterizer->comp_op = ctx_GRAYA8_source_over_normal_color;
-            }
-            else
-            {
-              rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
-            }
-            break;
-          default:
-            rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
-            break;
-        }
-        break;
-      default:
-        rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic;
-        break;
-    }
-#endif
+  ctx_rasterizer_finish_shape (rasterizer);
+  ctx_rasterizer_arc (rasterizer, x + width - radius, y + radius, radius, -90 * degrees, 0 * degrees, 0);
+  ctx_rasterizer_arc (rasterizer, x + width - radius, y + height - radius, radius, 0 * degrees, 90 * 
degrees, 0);
+  ctx_rasterizer_arc (rasterizer, x + radius, y + height - radius, radius, 90 * degrees, 180 * degrees, 0);
+  ctx_rasterizer_arc (rasterizer, x + radius, y + radius, radius, 180 * degrees, 270 * degrees, 0);
+
+  ctx_rasterizer_finish_shape (rasterizer);
 }
-#endif
 
-#endif
-#if CTX_ENABLE_RGB332
+static void
+ctx_rasterizer_process (Ctx *ctx, CtxCommand *command);
 
-inline static void
-ctx_332_unpack (uint8_t pixel,
-                uint8_t *red,
-                uint8_t *green,
-                uint8_t *blue)
+#if CTX_COMPOSITING_GROUPS
+static void
+ctx_rasterizer_start_group (CtxRasterizer *rasterizer) /* add a radius? */
 {
-  *blue   = (pixel & 3) <<6;
-  *green = ( (pixel >> 2) & 7) <<5;
-  *red   = ( (pixel >> 5) & 7) <<5;
-  if (*blue > 223)  { *blue  = 255; }
-  if (*green > 223) { *green = 255; }
-  if (*red > 223)   { *red   = 255; }
-}
+  CtxEntry save_command = ctx_void(CTX_SAVE);
+  // allocate buffer, and set it as temporary target
+  int no;
+  if (rasterizer->group[0] == NULL) // first group
+  {
+    rasterizer->saved_buf = rasterizer->buf;
+  }
+  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
 
-static inline uint8_t
-ctx_332_pack (uint8_t red,
-              uint8_t green,
-              uint8_t blue)
-{
-  uint8_t c  = (red >> 5) << 5;
-  c |= (green >> 5) << 2;
-  c |= (blue >> 6);
-  return c;
+  if (no >= CTX_GROUP_MAX)
+     return;
+  rasterizer->group[no] = ctx_buffer_new (rasterizer->blit_width,
+                                          rasterizer->blit_height,
+                                          rasterizer->format->composite_format);
+  rasterizer->buf = rasterizer->group[no]->data;
+  ctx_rasterizer_process (rasterizer->backend.ctx, (CtxCommand*)&save_command);
 }
 
-static inline void
-ctx_RGB332_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+static void
+ctx_rasterizer_end_group (CtxRasterizer *rasterizer)
 {
-  const uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-      ctx_332_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2]);
-#if CTX_RGB332_ALPHA
-      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
-        { rgba[3] = 0; }
-      else
-#endif
-        { rgba[3] = 255; }
-      pixel+=1;
-      rgba +=4;
-    }
-}
+  CtxEntry restore_command = ctx_void(CTX_RESTORE);
+  CtxEntry save_command = ctx_void(CTX_SAVE);
+  int no = 0;
+  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
+  no--;
 
-static inline void
-ctx_RGBA8_to_RGB332 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint8_t *pixel = (uint8_t *) buf;
-  while (count--)
-    {
-#if CTX_RGB332_ALPHA
-      if (rgba[3]==0)
-        { pixel[0] = ctx_332_pack (255, 0, 255); }
-      else
-#endif
-        { pixel[0] = ctx_332_pack (rgba[0], rgba[1], rgba[2]); }
-      pixel+=1;
-      rgba +=4;
-    }
-}
+  if (no < 0)
+    return;
 
-#endif
-#if CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED
+  Ctx *ctx = rasterizer->backend.ctx;
 
-static inline void
-ctx_565_unpack (const uint16_t pixel,
-                uint8_t *red,
-                uint8_t *green,
-                uint8_t *blue,
-                const int byteswap)
-{
-  uint16_t byteswapped;
-  if (byteswap)
-    { byteswapped = (pixel>>8) | (pixel<<8); }
+  CtxCompositingMode comp = rasterizer->state->gstate.compositing_mode;
+  CtxBlend blend = rasterizer->state->gstate.blend_mode;
+  float global_alpha = rasterizer->state->gstate.global_alpha_f;
+  // fetch compositing, blending, global alpha
+  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
+  CtxEntry set_state[3]=
+  {
+    ctx_u32 (CTX_COMPOSITING_MODE, comp,  0),
+    ctx_u32 (CTX_BLEND_MODE,       blend, 0),
+    ctx_f  (CTX_GLOBAL_ALPHA,     global_alpha, 0.0)
+  };
+  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[0]);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[1]);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[2]);
+  if (no == 0)
+  {
+    rasterizer->buf = rasterizer->saved_buf;
+  }
   else
-    { byteswapped  = pixel; }
-  *blue   =  (byteswapped & 31) <<3;
-  *green = ( (byteswapped>>5) & 63) <<2;
-  *red   = ( (byteswapped>>11) & 31) <<3;
-#if 0
-  if (*blue > 248) { *blue = 255; }
-  if (*green > 248) { *green = 255; }
-  if (*red > 248) { *red = 255; }
-#endif
-}
+  {
+    rasterizer->buf = rasterizer->group[no-1]->data;
+  }
+  // XXX use texture_source ?
+   ctx_texture_init (ctx, ".ctx-group", 
+                  rasterizer->blit_width, 
+                  rasterizer->blit_height,
+                                         
+                  rasterizer->blit_width * rasterizer->format->bpp/8,
+                  rasterizer->format->pixel_format,
+                  NULL, // space
+                  (uint8_t*)rasterizer->group[no]->data,
+                  NULL, NULL);
+  {
+     const char *eid = ".ctx-group";
+     int   eid_len = strlen (eid);
 
-static inline uint32_t
-ctx_565_unpack_32 (const uint16_t pixel,
-                   const int byteswap)
-{
-  uint16_t byteswapped;
-  if (byteswap)
-    { byteswapped = (pixel>>8) | (pixel<<8); }
-  else
-    { byteswapped  = pixel; }
-  uint8_t blue   = (byteswapped & 31) <<3;
-  uint8_t green = ( (byteswapped>>5) & 63) <<2;
-  uint8_t red   = ( (byteswapped>>11) & 31) <<3;
-#if 0
-  if (*blue > 248) { *blue = 255; }
-  if (*green > 248) { *green = 255; }
-  if (*red > 248) { *red = 255; }
-#endif
-  return red +  (green << 8) + (blue << 16) + (0xff << 24);
-}
+     CtxEntry commands[4] =
+      {
+       ctx_f   (CTX_TEXTURE, rasterizer->blit_x, rasterizer->blit_y), 
+       ctx_u32 (CTX_DATA, eid_len, eid_len/9+1),
+       ctx_u32 (CTX_CONT, 0,0),
+       ctx_u32 (CTX_CONT, 0,0)
+      };
+     memcpy( (char *) &commands[2].data.u8[0], eid, eid_len);
+     ( (char *) (&commands[2].data.u8[0]) ) [eid_len]=0;
 
-static inline uint16_t
-ctx_565_pack (const uint8_t  red,
-              const uint8_t  green,
-              const uint8_t  blue,
-              const int      byteswap)
-{
-  uint32_t c = (red >> 3) << 11;
-  c |= (green >> 2) << 5;
-  c |= blue >> 3;
-  if (byteswap)
-    { return (c>>8) | (c<<8); } /* swap bytes */
-  return c;
+     ctx_rasterizer_process (ctx, (CtxCommand*)commands);
+  }
+  {
+    CtxEntry commands[2]=
+    {
+      ctx_f (CTX_RECTANGLE, rasterizer->blit_x, rasterizer->blit_y),
+      ctx_f (CTX_CONT,      rasterizer->blit_width, rasterizer->blit_height)
+    };
+    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
+  }
+  {
+    CtxEntry commands[1] = { ctx_void (CTX_FILL) };
+    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
+  }
+  //ctx_texture_release (rasterizer->backend.ctx, ".ctx-group");
+  ctx_buffer_free (rasterizer->group[no]);
+  rasterizer->group[no] = 0;
+  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
 }
+#endif
 
-static inline uint16_t
-ctx_888_to_565 (uint32_t in, int byteswap)
+#if CTX_ENABLE_SHADOW_BLUR
+static void
+ctx_rasterizer_shadow_stroke (CtxRasterizer *rasterizer)
 {
-  uint8_t *rgb=(uint8_t*)(&in);
-  return ctx_565_pack (rgb[0],rgb[1],rgb[2], byteswap);
-}
+  CtxColor color;
+  CtxEntry save_command = ctx_void(CTX_SAVE);
+  Ctx *ctx = rasterizer->backend.ctx;
 
-static inline uint32_t
-ctx_565_to_888 (uint16_t in, int byteswap)
-{
-  uint32_t ret = 0;
-  uint8_t *rgba=(uint8_t*)&ret;
-  ctx_565_unpack (in,
-                  &rgba[0],
-                  &rgba[1],
-                  &rgba[2],
-                  byteswap);
-  return ret;
-}
+  float rgba[4] = {0, 0, 0, 1.0};
+  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
+    ctx_color_get_rgba (rasterizer->state, &color, rgba);
 
+  CtxEntry set_color_command [3]=
+  {
+    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
+    ctx_f (CTX_CONT, rgba[1], rgba[2]),
+    ctx_f (CTX_CONT, rgba[3], 0)
+  };
+  CtxEntry restore_command = ctx_void(CTX_RESTORE);
+  float radius = rasterizer->state->gstate.shadow_blur;
+  int dim = 2 * radius + 1;
+  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
+    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
+  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
+  {
+    int i = 0;
+    for (int v = 0; v < dim; v += 1, i++)
+      {
+        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
+        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
+        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command[0]);
+#if CTX_ENABLE_SHADOW_BLUR
+        rasterizer->in_shadow = 1;
 #endif
-#if CTX_ENABLE_RGB565
-
-
-static inline void
-ctx_RGB565_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
-{
-  const uint16_t *pixel = (uint16_t *) buf;
-  while (count--)
-    {
-      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 0);
-#if CTX_RGB565_ALPHA
-      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
-        { rgba[3] = 0; }
+        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
+        rasterizer->shadow_y = dy;
+        rasterizer->preserve = 1;
+        ctx_rasterizer_stroke (rasterizer);
+#if CTX_ENABLE_SHADOW_BLUR
+        rasterizer->in_shadow = 0;
 #endif
-      pixel+=1;
-      rgba +=4;
-    }
+      }
+  }
+  //free (kernel);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
 }
 
-static inline void
-ctx_RGBA8_to_RGB565 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
+static void
+ctx_rasterizer_shadow_text (CtxRasterizer *rasterizer, const char *str)
 {
-  uint16_t *pixel = (uint16_t *) buf;
-  while (count--)
-    {
-#if CTX_RGB565_ALPHA
-      if (rgba[3]==0)
-        { pixel[0] = ctx_565_pack (255, 0, 255, 0); }
-      else
-#endif
-        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 0); }
-      pixel+=1;
-      rgba +=4;
-    }
-}
+  float x = rasterizer->state->x;
+  float y = rasterizer->state->y;
+  CtxColor color;
+  CtxEntry save_command = ctx_void(CTX_SAVE);
+  Ctx *ctx = rasterizer->backend.ctx;
 
-static void
-ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS);
-static void
-ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS);
+  float rgba[4] = {0, 0, 0, 1.0};
+  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
+    ctx_color_get_rgba (rasterizer->state, &color, rgba);
+
+  CtxEntry set_color_command [3]=
+  {
+    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
+    ctx_f (CTX_CONT, rgba[1], rgba[2]),
+    ctx_f (CTX_CONT, rgba[3], 0)
+  };
+  CtxEntry move_to_command [1]=
+  {
+    ctx_f (CTX_MOVE_TO, x, y),
+  };
+  CtxEntry restore_command = ctx_void(CTX_RESTORE);
+  float radius = rasterizer->state->gstate.shadow_blur;
+  int dim = 2 * radius + 1;
+  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
+    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
+  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
+
+  {
+      {
+        move_to_command[0].data.f[0] = x;
+        move_to_command[0].data.f[1] = y;
+        set_color_command[2].data.f[0] = rgba[3];
+        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
+        ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
+        rasterizer->in_shadow=1;
+        ctx_rasterizer_text (rasterizer, str, 0);
+        rasterizer->in_shadow=0;
+      }
+  }
+  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
+  move_to_command[0].data.f[0] = x;
+  move_to_command[0].data.f[1] = y;
+  ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
+}
 
 static void
-ctx_composite_RGB565 (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_shadow_fill (CtxRasterizer *rasterizer)
 {
-#if 0
-  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
+  CtxColor color;
+  Ctx *ctx = rasterizer->backend.ctx;
+  CtxEntry save_command = ctx_void(CTX_SAVE);
+
+  float rgba[4] = {0, 0, 0, 1.0};
+  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
+    ctx_color_get_rgba (rasterizer->state, &color, rgba);
+
+  CtxEntry set_color_command [3]=
   {
-     int byteswap = 0;
-     uint32_t si    = *((uint32_t*)(src));
-     uint16_t si_16 = ctx_888_to_565 (si, byteswap);
-     uint32_t sval  = (si_16 & ( (31 << 11 ) | 31));
-     uint32_t sg = (si_16 & (63 << 5)) >> 5;
-     uint32_t si_a = si >> (24 + 3);
-     while (count--)
-     {
-        uint32_t di_16 = *((uint16_t*)(dst));
-        uint32_t cov = (*coverage) >> 3;
-        uint32_t racov = (32-((31+si_a*cov)>>5));
-        uint32_t dval = (di_16 & ( (31 << 11 ) | 31));
-        uint32_t dg = (di_16 >> 5) & 63; // faster outside than
-                                         // remerged as part of dval
-        *((uint16_t*)(dst)) =
-                ((               
-                  (((sval * cov) + (dval * racov)) >> 5)
-                 ) & ((31 << 11 )|31)) |
-                  ((((sg * cov) + (dg * racov)) & 63) );
+    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
+    ctx_f (CTX_CONT, rgba[1], rgba[2]),
+    ctx_f (CTX_CONT, rgba[3], 0)
+  };
+  CtxEntry restore_command = ctx_void(CTX_RESTORE);
+  float radius = rasterizer->state->gstate.shadow_blur;
+  int dim = 2 * radius + 1;
+  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
+    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
+  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
 
-     }
-     return;
+  {
+    for (int v = 0; v < dim; v ++)
+      {
+        int i = v;
+        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
+        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
+        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
+        rasterizer->in_shadow = 1;
+        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
+        rasterizer->shadow_y = dy;
+        rasterizer->preserve = 1;
+        ctx_rasterizer_fill (rasterizer);
+        rasterizer->in_shadow = 0;
+      }
   }
-#endif
-  uint8_t pixels[count * 4];
-  ctx_RGB565_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
-  ctx_RGBA8_to_RGB565 (rasterizer, x0, &pixels[0], dst, count);
+  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
 }
 #endif
-#if CTX_ENABLE_RGB565_BYTESWAPPED
 
-static inline void
-ctx_RGB565_BS_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
+static void
+ctx_rasterizer_line_dash (CtxRasterizer *rasterizer, unsigned int count, float *dashes)
 {
-  const uint16_t *pixel = (uint16_t *) buf;
-  while (count--)
-    {
-      //ctx_565_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2], 1);
-      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 1);
-#if CTX_RGB565_ALPHA
-      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
-        { rgba[3] = 0; }
-      else
-        { rgba[3] = 255; }
-#endif
-      pixel+=1;
-      rgba +=4;
-    }
+  if (!dashes)
+  {
+    rasterizer->state->gstate.n_dashes = 0;
+    return;
+  }
+  count = CTX_MIN(count, CTX_PARSER_MAX_ARGS-1);
+  rasterizer->state->gstate.n_dashes = count;
+  memcpy(&rasterizer->state->gstate.dashes[0], dashes, count * sizeof(float));
+  for (unsigned int i = 0; i < count; i ++)
+  {
+    if (rasterizer->state->gstate.dashes[i] < 0.0001f)
+      rasterizer->state->gstate.dashes[i] = 0.0001f; // hang protection
+  }
 }
 
-static inline void
-ctx_RGBA8_to_RGB565_BS (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
-{
-  uint16_t *pixel = (uint16_t *) buf;
-  while (count--)
-    {
-#if CTX_RGB565_ALPHA
-      if (rgba[3]==0)
-        { pixel[0] = ctx_565_pack (255, 0, 255, 1); }
-      else
-#endif
-        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 1); }
-      pixel+=1;
-      rgba +=4;
-    }
-}
 
 static void
-ctx_composite_RGB565_BS (CTX_COMPOSITE_ARGUMENTS)
+ctx_rasterizer_process (Ctx *ctx, CtxCommand *command)
 {
-  uint8_t pixels[count * 4];
-  ctx_RGB565_BS_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
-  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
-  ctx_RGBA8_to_RGB565_BS (rasterizer, x0, &pixels[0], dst, count);
-}
-#endif
+  CtxEntry      *entry      = &command->entry;
+  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx->backend;
+  CtxState      *state      = rasterizer->state;
+  CtxCommand    *c          = (CtxCommand *) entry;
+  int            clear_clip = 0;
 
-static CtxPixelFormatInfo ctx_pixel_formats[]=
-{
-#if CTX_ENABLE_RGBA8
-  {
-    CTX_FORMAT_RGBA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
-    NULL, NULL, NULL, ctx_setup_RGBA8
-  },
-#endif
-#if CTX_ENABLE_BGRA8
-  {
-    CTX_FORMAT_BGRA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
-    ctx_BGRA8_to_RGBA8, ctx_RGBA8_to_BGRA8, ctx_composite_BGRA8, ctx_setup_RGBA8,
-  },
+  ctx_interpret_style (state, entry, NULL);
+  switch (c->code)
+    {
+#if CTX_ENABLE_SHADOW_BLUR
+      case CTX_SHADOW_COLOR:
+        {
+          CtxColor  col;
+          CtxColor *color = &col;
+          //state->gstate.source_fill.type = CTX_SOURCE_COLOR;
+          switch ((int)c->rgba.model)
+            {
+              case CTX_RGB:
+                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, 1.0f);
+                break;
+              case CTX_RGBA:
+                //ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
+                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
+                break;
+              case CTX_DRGBA:
+                ctx_color_set_drgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
+                break;
+#if CTX_ENABLE_CMYK
+              case CTX_CMYKA:
+                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 
c->cmyka.a);
+                break;
+              case CTX_CMYK:
+                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
+                break;
+              case CTX_DCMYKA:
+                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 
c->cmyka.a);
+                break;
+              case CTX_DCMYK:
+                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
+                break;
 #endif
-#if CTX_ENABLE_GRAYF
-  {
-    CTX_FORMAT_GRAYF, 1, 32, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
-    NULL, NULL, ctx_composite_GRAYF, ctx_setup_GRAYAF,
-  },
-#endif
-#if CTX_ENABLE_GRAYAF
-  {
-    CTX_FORMAT_GRAYAF, 2, 64, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
-    NULL, NULL, NULL, ctx_setup_GRAYAF,
-  },
-#endif
-#if CTX_ENABLE_RGBAF
-  {
-    CTX_FORMAT_RGBAF, 4, 128, 4 * 4, 0, 0, CTX_FORMAT_RGBAF,
-    NULL, NULL, NULL, ctx_setup_RGBAF,
-  },
-#endif
-#if CTX_ENABLE_RGB8
-  {
-    CTX_FORMAT_RGB8, 3, 24, 4, 0, 0, CTX_FORMAT_RGBA8,
-    ctx_RGB8_to_RGBA8, ctx_RGBA8_to_RGB8, ctx_composite_convert, ctx_setup_RGBA8,
-  },
-#endif
-#if CTX_ENABLE_GRAY1
-  {
-#if CTX_NATIVE_GRAYA8
-    CTX_FORMAT_GRAY1, 1, 1, 2, 1, 1, CTX_FORMAT_GRAYA8,
-    ctx_GRAY1_to_GRAYA8, ctx_GRAYA8_to_GRAY1, ctx_composite_convert, ctx_setup_GRAYA8,
-#else
-    CTX_FORMAT_GRAY1, 1, 1, 4, 1, 1, CTX_FORMAT_RGBA8,
-    ctx_GRAY1_to_RGBA8, ctx_RGBA8_to_GRAY1, ctx_composite_convert, ctx_setup_RGBA8,
-#endif
-  },
-#endif
-#if CTX_ENABLE_GRAY2
-  {
-#if CTX_NATIVE_GRAYA8
-    CTX_FORMAT_GRAY2, 1, 2, 2, 4, 4, CTX_FORMAT_GRAYA8,
-    ctx_GRAY2_to_GRAYA8, ctx_GRAYA8_to_GRAY2, ctx_composite_convert, ctx_setup_GRAYA8,
-#else
-    CTX_FORMAT_GRAY2, 1, 2, 4, 4, 4, CTX_FORMAT_RGBA8,
-    ctx_GRAY2_to_RGBA8, ctx_RGBA8_to_GRAY2, ctx_composite_convert, ctx_setup_RGBA8,
-#endif
-  },
-#endif
-#if CTX_ENABLE_GRAY4
-  {
-#if CTX_NATIVE_GRAYA8
-    CTX_FORMAT_GRAY4, 1, 4, 2, 16, 16, CTX_FORMAT_GRAYA8,
-    ctx_GRAY4_to_GRAYA8, ctx_GRAYA8_to_GRAY4, ctx_composite_convert, ctx_setup_GRAYA8,
-#else
-    CTX_FORMAT_GRAY4, 1, 4, 4, 16, 16, CTX_FORMAT_GRAYA8,
-    ctx_GRAY4_to_RGBA8, ctx_RGBA8_to_GRAY4, ctx_composite_convert, ctx_setup_RGBA8,
-#endif
-  },
-#endif
-#if CTX_ENABLE_GRAY8
-  {
-#if CTX_NATIVE_GRAYA8
-    CTX_FORMAT_GRAY8, 1, 8, 2, 0, 0, CTX_FORMAT_GRAYA8,
-    ctx_GRAY8_to_GRAYA8, ctx_GRAYA8_to_GRAY8, ctx_composite_convert, ctx_setup_GRAYA8,
-#else
-    CTX_FORMAT_GRAY8, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
-    ctx_GRAY8_to_RGBA8, ctx_RGBA8_to_GRAY8, ctx_composite_convert, ctx_setup_RGBA8,
-#endif
-  },
-#endif
-#if CTX_ENABLE_GRAYA8
-  {
-#if CTX_NATIVE_GRAYA8
-    CTX_FORMAT_GRAYA8, 2, 16, 2, 0, 0, CTX_FORMAT_GRAYA8,
-    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, NULL, ctx_setup_GRAYA8,
-#else
-    CTX_FORMAT_GRAYA8, 2, 16, 4, 0, 0, CTX_FORMAT_RGBA8,
-    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, ctx_composite_convert, ctx_setup_RGBA8,
-#endif
-  },
-#endif
-#if CTX_ENABLE_RGB332
-  {
-    CTX_FORMAT_RGB332, 3, 8, 4, 10, 12, CTX_FORMAT_RGBA8,
-    ctx_RGB332_to_RGBA8, ctx_RGBA8_to_RGB332,
-    ctx_composite_convert, ctx_setup_RGBA8,
-  },
-#endif
-#if CTX_ENABLE_RGB565
-  {
-    CTX_FORMAT_RGB565, 3, 16, 4, 32, 64, CTX_FORMAT_RGBA8,
-    ctx_RGB565_to_RGBA8, ctx_RGBA8_to_RGB565,
-    ctx_composite_RGB565, ctx_setup_RGBA8,
-  },
-#endif
-#if CTX_ENABLE_RGB565_BYTESWAPPED
-  {
-    CTX_FORMAT_RGB565_BYTESWAPPED, 3, 16, 4, 32, 64, CTX_FORMAT_RGBA8,
-    ctx_RGB565_BS_to_RGBA8,
-    ctx_RGBA8_to_RGB565_BS,
-    ctx_composite_RGB565_BS, ctx_setup_RGBA8,
-  },
-#endif
-#if CTX_ENABLE_CMYKAF
-  {
-    CTX_FORMAT_CMYKAF, 5, 160, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
-    NULL, NULL, NULL, ctx_setup_CMYKAF,
-  },
+              case CTX_GRAYA:
+                ctx_color_set_graya (state, color, c->graya.g, c->graya.a);
+                break;
+              case CTX_GRAY:
+                ctx_color_set_graya (state, color, c->graya.g, 1.0f);
+                break;
+            }
+          ctx_set_color (rasterizer->backend.ctx, CTX_shadowColor, color);
+        }
+        break;
 #endif
-#if CTX_ENABLE_CMYKA8
-  {
-    CTX_FORMAT_CMYKA8, 5, 40, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
-    NULL, NULL, ctx_composite_CMYKA8, ctx_setup_CMYKAF,
-  },
+      case CTX_LINE_DASH:
+        if (c->line_dash.count)
+          {
+            ctx_rasterizer_line_dash (rasterizer, c->line_dash.count, c->line_dash.data);
+          }
+        else
+        ctx_rasterizer_line_dash (rasterizer, 0, NULL);
+        break;
+
+      case CTX_LINE_TO:
+        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_REL_LINE_TO:
+        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_MOVE_TO:
+        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_REL_MOVE_TO:
+        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_CURVE_TO:
+        ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
+                                 c->c.x1, c->c.y1,
+                                 c->c.x2, c->c.y2);
+        break;
+      case CTX_REL_CURVE_TO:
+        ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
+                                     c->c.x1, c->c.y1,
+                                     c->c.x2, c->c.y2);
+        break;
+      case CTX_QUAD_TO:
+        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
+        break;
+      case CTX_REL_QUAD_TO:
+        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
+        break;
+      case CTX_ARC:
+        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, 
c->arc.direction);
+        break;
+      case CTX_RECTANGLE:
+        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
+                                  c->rectangle.width, c->rectangle.height);
+        break;
+      case CTX_ROUND_RECTANGLE:
+        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
+                                        c->rectangle.width, c->rectangle.height,
+                                        c->rectangle.radius);
+        break;
+      case CTX_SET_PIXEL:
+        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
+                                  c->set_pixel.rgba[0],
+                                  c->set_pixel.rgba[1],
+                                  c->set_pixel.rgba[2],
+                                  c->set_pixel.rgba[3]);
+        break;
+      case CTX_DEFINE_TEXTURE:
+        {
+          uint8_t *pixel_data = ctx_define_texture_pixel_data (entry);
+          ctx_rasterizer_define_texture (rasterizer, c->define_texture.eid,
+                                         c->define_texture.width, c->define_texture.height,
+                                         c->define_texture.format,
+                                         pixel_data);
+          rasterizer->comp_op = NULL;
+          rasterizer->fragment = NULL;
+        }
+        break;
+      case CTX_TEXTURE:
+        ctx_rasterizer_set_texture (rasterizer, c->texture.eid,
+                                    c->texture.x, c->texture.y);
+        rasterizer->comp_op = NULL;
+        rasterizer->fragment = NULL;
+        break;
+      case CTX_SOURCE_TRANSFORM:
+        ctx_matrix_set (&state->gstate.source_fill.set_transform,
+                        ctx_arg_float (0), ctx_arg_float (1),
+                        ctx_arg_float (2), ctx_arg_float (3),
+                        ctx_arg_float (4), ctx_arg_float (5));
+        rasterizer->comp_op = NULL;
+        break;
+#if 0
+      case CTX_LOAD_IMAGE:
+        ctx_rasterizer_load_image (rasterizer, ctx_arg_string(),
+                                   ctx_arg_float (0), ctx_arg_float (1) );
+        break;
 #endif
-#if CTX_ENABLE_CMYK8
-  {
-    CTX_FORMAT_CMYK8, 5, 32, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
-    NULL, NULL, ctx_composite_CMYK8, ctx_setup_CMYKAF,
-  },
+#if CTX_GRADIENTS
+      case CTX_GRADIENT_STOP:
+        {
+          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
+                         };
+          ctx_rasterizer_gradient_add_stop (rasterizer,
+                                            ctx_arg_float (0), rgba);
+          rasterizer->comp_op = NULL;
+        }
+        break;
+      case CTX_LINEAR_GRADIENT:
+        ctx_state_gradient_clear_stops (state);
+        rasterizer->comp_op = NULL;
+        break;
+      case CTX_RADIAL_GRADIENT:
+        ctx_state_gradient_clear_stops (state);
+        rasterizer->comp_op = NULL;
+        break;
 #endif
-#if CTX_ENABLE_YUV420
-  {
-    CTX_FORMAT_YUV420, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
-    NULL, NULL, ctx_composite_convert, ctx_setup_RGBA8,
-  },
+      case CTX_PRESERVE:
+        rasterizer->preserve = 1;
+        break;
+      case CTX_COLOR:
+      case CTX_COMPOSITING_MODE:
+      case CTX_BLEND_MODE:
+        rasterizer->comp_op = NULL;
+        break;
+#if CTX_COMPOSITING_GROUPS
+      case CTX_START_GROUP:
+        ctx_rasterizer_start_group (rasterizer);
+        break;
+      case CTX_END_GROUP:
+        ctx_rasterizer_end_group (rasterizer);
+        break;
 #endif
-  {
-    CTX_FORMAT_NONE
-  }
-};
-
 
-CtxPixelFormatInfo *
-ctx_pixel_format_info (CtxPixelFormat format)
-{
-  for (unsigned int i = 0; ctx_pixel_formats[i].pixel_format; i++)
-    {
-      if (ctx_pixel_formats[i].pixel_format == format)
+      case CTX_RESTORE:
+        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
+             i < state->gstate.keydb_pos; i++)
         {
-          return &ctx_pixel_formats[i];
+          if (state->keydb[i].key == CTX_clip)
+          {
+            clear_clip = 1;
+          }
         }
-    }
-  return NULL;
-}
-
+        /* FALLTHROUGH */
+      case CTX_ROTATE:
+      case CTX_SCALE:
+      case CTX_TRANSLATE:
+      case CTX_IDENTITY:
+        rasterizer->uses_transforms = 1;
+        /* FALLTHROUGH */
+      case CTX_SAVE:
+        rasterizer->comp_op = NULL;
+        ctx_interpret_transforms (state, entry, NULL);
+        if (clear_clip)
+        {
+          ctx_rasterizer_clip_reset (rasterizer);
+        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
+             i < state->gstate.keydb_pos; i++)
+        {
+          if (state->keydb[i].key == CTX_clip)
+          {
+            int idx = ctx_float_to_string_index (state->keydb[i].value);
+            if (idx >=0)
+            {
+              CtxSegment *edges = (CtxSegment*)&state->stringpool[idx];
+              ctx_rasterizer_clip_apply (rasterizer, edges);
+            }
+          }
+        }
+        }
+        break;
+      case CTX_STROKE:
+#if CTX_ENABLE_SHADOW_BLUR
+        if (state->gstate.shadow_blur > 0.0 &&
+            !rasterizer->in_text)
+          ctx_rasterizer_shadow_stroke (rasterizer);
 #endif
-#if CTX_RASTERIZER
-#define CTX_AA_HALFSTEP2   (CTX_FULL_AA/2)
-#define CTX_AA_HALFSTEP    ((CTX_FULL_AA/2)+1)
+        {
+        int count = rasterizer->edge_list.count;
+        if (state->gstate.n_dashes)
+        {
+          int n_dashes = state->gstate.n_dashes;
+          float *dashes = state->gstate.dashes;
+          float factor = ctx_matrix_get_scale (&state->gstate.transform);
 
-static void
-ctx_gradient_cache_prime (CtxRasterizer *rasterizer);
+          CtxSegment temp[count]; /* copy of already built up path's poly line  */
+          memcpy (temp, rasterizer->edge_list.entries, sizeof (temp));
+          int start = 0;
+          int end   = 0;
+      CtxMatrix transform_backup = state->gstate.transform;
+      _ctx_matrix_identity (&state->gstate.transform);
+      ctx_rasterizer_reset (rasterizer); /* for dashing we create
+                                            a dashed path to stroke */
+      float prev_x = 0.0f;
+      float prev_y = 0.0f;
+      float pos = 0.0;
 
-static inline void
-_ctx_setup_compositor (CtxRasterizer *rasterizer)
-{
-  if (CTX_UNLIKELY (rasterizer->comp_op==0))
-  {
-    rasterizer->format->setup (rasterizer);
-#if CTX_GRADIENTS
-#if CTX_GRADIENT_CACHE
-  switch (rasterizer->state->gstate.source_fill.type)
-  {
-    case CTX_SOURCE_LINEAR_GRADIENT:
-    case CTX_SOURCE_RADIAL_GRADIENT:
-      ctx_gradient_cache_prime (rasterizer);
-      break;
-    case CTX_SOURCE_TEXTURE:
+      int   dash_no  = 0.0;
+      float dash_lpos = state->gstate.line_dash_offset * factor;
+      int   is_down = 0;
 
-      _ctx_matrix_multiply (&rasterizer->state->gstate.source_fill.transform,
-                            &rasterizer->state->gstate.source_fill.set_transform,
-                            &rasterizer->state->gstate.transform);
+          while (start < count)
+          {
+            int started = 0;
+            int i;
+            is_down = 0;
 
-      ctx_matrix_invert (&rasterizer->state->gstate.source_fill.transform);
+            if (!is_down)
+            {
+              CtxSegment *entry = &temp[0];
+              prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+              prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+              ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
+              is_down = 1;
+            }
 
-      if (!rasterizer->state->gstate.source_fill.texture.buffer->color_managed)
-        _ctx_texture_prepare_color_management (rasterizer,
-        rasterizer->state->gstate.source_fill.texture.buffer);
-      break;
-  }
+            for (i = start; i < count; i++)
+            {
+              CtxSegment *entry = &temp[i];
+              float x, y;
+              if (entry->code == CTX_NEW_EDGE)
+                {
+                  if (started)
+                    {
+                      end = i - 1;
+                      dash_no = 0;
+                      dash_lpos = 0.0;
+                      goto foo;
+                    }
+                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
+                  prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
+                  started = 1;
+                  start = i;
+                  is_down = 1;
+                  ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
+                }
+
+again:
+
+              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
+              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
+              float dx = x - prev_x;
+              float dy = y - prev_y;
+              float length = ctx_fast_hypotf (dx, dy);
+
+              if (dash_lpos + length >= dashes[dash_no] * factor)
+              {
+                float p = (dashes[dash_no] * factor - dash_lpos) / length;
+                float splitx = x * p + (1.0f - p) * prev_x;
+                float splity = y * p + (1.0f - p) * prev_y;
+                if (is_down)
+                {
+                  ctx_rasterizer_line_to (rasterizer, splitx, splity);
+                  is_down = 0;
+                }
+                else
+                {
+                  ctx_rasterizer_move_to (rasterizer, splitx, splity);
+                  is_down = 1;
+                }
+                prev_x = splitx;
+                prev_y = splity;
+                dash_no++;
+                dash_lpos=0;
+                if (dash_no >= n_dashes) dash_no = 0;
+                goto again;
+              }
+              else
+              {
+                pos += length;
+                dash_lpos += length;
+                {
+                  if (is_down)
+                    ctx_rasterizer_line_to (rasterizer, x, y);
+                }
+              }
+              prev_x = x;
+              prev_y = y;
+            }
+          end = i-1;
+foo:
+          start = end+1;
+        }
+        state->gstate.transform = transform_backup;
+        }
+        ctx_rasterizer_stroke (rasterizer);
+        }
+        ctx_rasterizer_reset (rasterizer);
+
+        break;
+      case CTX_FONT:
+        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
+        break;
+      case CTX_TEXT:
+        rasterizer->in_text++;
+#if CTX_ENABLE_SHADOW_BLUR
+        if (state->gstate.shadow_blur > 0.0)
+          ctx_rasterizer_shadow_text (rasterizer, ctx_arg_string ());
 #endif
+        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 0);
+        rasterizer->in_text--;
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_STROKE_TEXT:
+        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 1);
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_GLYPH:
+        ctx_rasterizer_glyph (rasterizer, entry[0].data.u32[0], entry[0].data.u8[4]);
+        break;
+      case CTX_FILL:
+#if CTX_ENABLE_SHADOW_BLUR
+        if (state->gstate.shadow_blur > 0.0 &&
+            !rasterizer->in_text)
+          ctx_rasterizer_shadow_fill (rasterizer);
 #endif
-  }
-}
-
-#define CTX_FULL_AA 15
-inline static void
-ctx_rasterizer_apply_coverage (CtxRasterizer *rasterizer,
-                               uint8_t * dst,
-                               int       x,
-                               uint8_t * coverage,
-                               int       count)
-{
-  if (CTX_UNLIKELY(rasterizer->format->apply_coverage))
-    rasterizer->format->apply_coverage(rasterizer, dst, rasterizer->color, x, coverage, count);
-  else
-    /* it is faster to dispatch in this condition, than using a shared
-     * direct trampoline
-     */
-    rasterizer->comp_op (rasterizer, dst, rasterizer->color, x, coverage, count);
+        ctx_rasterizer_fill (rasterizer);
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_RESET:
+      case CTX_BEGIN_PATH:
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_CLIP:
+        ctx_rasterizer_clip (rasterizer);
+        break;
+      case CTX_CLOSE_PATH:
+        ctx_rasterizer_finish_shape (rasterizer);
+        break;
+      case CTX_IMAGE_SMOOTHING:
+        rasterizer->comp_op = NULL;
+        break;
+    }
+  ctx_interpret_pos_bare (state, entry, NULL);
 }
 
-static void
-ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba)
+void
+ctx_rasterizer_deinit (CtxRasterizer *rasterizer)
 {
-  /* FIXME XXX we only have one gradient, but might need separate gradients
-   * for fill/stroke !
-   * 
-   */
-  CtxGradient *gradient = &rasterizer->state->gradient;
-  CtxGradientStop *stop = &gradient->stops[gradient->n_stops];
-  stop->pos = pos;
-  ctx_color_set_rgba (rasterizer->state, & (stop->color), rgba[0], rgba[1], rgba[2], rgba[3]);
-  if (gradient->n_stops < 15) //we'll keep overwriting the last when out of stops
-    { gradient->n_stops++; }
-}
+  ctx_drawlist_deinit (&rasterizer->edge_list);
+#if CTX_ENABLE_CLIP
+  if (rasterizer->clip_buffer)
+  {
+    ctx_buffer_free (rasterizer->clip_buffer);
+    rasterizer->clip_buffer = NULL;
+  }
+#endif
+#if CTX_SHAPE_CACHE
+  for (int i = 0; i < CTX_SHAPE_CACHE_ENTRIES; i ++)
+    if (rasterizer->shape_cache.entries[i])
+    {
+      free (rasterizer->shape_cache.entries[i]);
+      rasterizer->shape_cache.entries[i] = NULL;
+    }
 
-static inline int ctx_rasterizer_add_point (CtxRasterizer *rasterizer, int x1, int y1)
-{
-  CtxSegment entry = {CTX_EDGE, {{0},}};
-  rasterizer->scan_min = ctx_mini (y1, rasterizer->scan_min);
-  rasterizer->scan_max = ctx_maxi (y1, rasterizer->scan_max);
+#endif
 
-  rasterizer->col_min = ctx_mini (x1, rasterizer->col_min);
-  rasterizer->col_max = ctx_maxi (x1, rasterizer->col_max);
 
-  entry.data.s16[0]=rasterizer->inner_x;
-  entry.data.s16[1]=rasterizer->inner_y;
+  free (rasterizer);
+}
 
-  entry.data.s16[2]=x1;
-  entry.data.s16[3]=y1;
 
-  rasterizer->inner_x = x1;
-  rasterizer->inner_y = y1;
-#if 0
-  if (entry.data.s16[3] < entry.data.s16[1])
+CtxAntialias ctx_get_antialias (Ctx *ctx)
+{
+#if CTX_EVENTS
+  if (ctx_backend_is_tiled (ctx))
   {
-    entry = ctx_segment_s16 (CTX_EDGE_FLIPPED,
-                            entry.data.s16[2], entry.data.s16[3],
-                            entry.data.s16[0], entry.data.s16[1]);
+     CtxTiled *fb = (CtxTiled*)(ctx->backend);
+     return fb->antialias;
   }
 #endif
+  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return CTX_ANTIALIAS_DEFAULT;
 
-  return ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);
+  switch (((CtxRasterizer*)(ctx->backend))->aa)
+  {
+    case 1: return CTX_ANTIALIAS_NONE;
+    case 3: return CTX_ANTIALIAS_FAST;
+    //case 5: return CTX_ANTIALIAS_GOOD;
+    default:
+    case 15: return CTX_ANTIALIAS_DEFAULT;
+  }
 }
 
-#if 0
-#define CTX_SHAPE_CACHE_PRIME1   7853
-#define CTX_SHAPE_CACHE_PRIME2   4129
-#define CTX_SHAPE_CACHE_PRIME3   3371
-#define CTX_SHAPE_CACHE_PRIME4   4221
-#else
-#define CTX_SHAPE_CACHE_PRIME1   283
-#define CTX_SHAPE_CACHE_PRIME2   599
-#define CTX_SHAPE_CACHE_PRIME3   101
-#define CTX_SHAPE_CACHE_PRIME4   661
+static int _ctx_antialias_to_aa (CtxAntialias antialias)
+{
+  switch (antialias)
+  {
+    case CTX_ANTIALIAS_NONE: return 1;
+    case CTX_ANTIALIAS_FAST: return 3;
+    case CTX_ANTIALIAS_GOOD: return 5;
+    default:
+    case CTX_ANTIALIAS_DEFAULT: return CTX_RASTERIZER_AA;
+  }
+}
+
+void
+ctx_set_antialias (Ctx *ctx, CtxAntialias antialias)
+{
+#if CTX_EVENTS
+  if (ctx_backend_is_tiled (ctx))
+  {
+     CtxTiled *fb = (CtxTiled*)(ctx->backend);
+     fb->antialias = antialias;
+     for (int i = 0; i < _ctx_max_threads; i++)
+     {
+       ctx_set_antialias (fb->host[i], antialias);
+     }
+     return;
+  }
 #endif
+  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return;
 
-float ctx_shape_cache_rate = 0.0;
+  ((CtxRasterizer*)(ctx->backend))->aa = 
+     _ctx_antialias_to_aa (antialias);
+  ((CtxRasterizer*)(ctx->backend))->fast_aa = 0;
+  if (antialias == CTX_ANTIALIAS_DEFAULT||
+      antialias == CTX_ANTIALIAS_FAST)
+    ((CtxRasterizer*)(ctx->backend))->fast_aa = 1;
+}
+
+CtxRasterizer *
+ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, 
int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias)
+{
+#if CTX_ENABLE_CLIP
+  if (rasterizer->clip_buffer)
+    ctx_buffer_free (rasterizer->clip_buffer);
+#endif
+  if (rasterizer->edge_list.size)
+    ctx_drawlist_deinit (&rasterizer->edge_list);
 #if CTX_SHAPE_CACHE
-int   _ctx_shape_cache_enabled = 1;
+  memset (rasterizer, 0, sizeof (CtxRasterizer) - sizeof (CtxShapeCache));
+#else
+  memset (rasterizer, 0, sizeof (CtxRasterizer));
+#endif
+  CtxBackend *backend = (CtxBackend*)rasterizer;
+  backend->process = ctx_rasterizer_process;
+  backend->free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
+  backend->ctx     = ctx;
+  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
+  rasterizer->state       = state;
+  rasterizer->texture_source = texture_source?texture_source:ctx;
 
-//static CtxShapeCache ctx_cache = {{NULL,}, 0};
+  rasterizer->aa          = _ctx_antialias_to_aa (antialias);
+  rasterizer->fast_aa = (antialias == CTX_ANTIALIAS_DEFAULT||antialias == CTX_ANTIALIAS_FAST);
+  ctx_state_init (rasterizer->state);
+  rasterizer->buf         = data;
+  rasterizer->blit_x      = x;
+  rasterizer->blit_y      = y;
+  rasterizer->blit_width  = width;
+  rasterizer->blit_height = height;
+  rasterizer->state->gstate.clip_min_x  = x;
+  rasterizer->state->gstate.clip_min_y  = y;
+  rasterizer->state->gstate.clip_max_x  = x + width - 1;
+  rasterizer->state->gstate.clip_max_y  = y + height - 1;
+  rasterizer->blit_stride = stride;
+  rasterizer->scan_min    = 5000;
+  rasterizer->scan_max    = -5000;
 
-static long ctx_shape_cache_hits   = 0;
-static long ctx_shape_cache_misses = 0;
+  if (pixel_format == CTX_FORMAT_BGRA8)
+  {
+    pixel_format = CTX_FORMAT_RGBA8;
+    rasterizer->swap_red_green = 1;
+  }
 
+  rasterizer->format = ctx_pixel_format_info (pixel_format);
 
-/* this returns the buffer to use for rendering, it always
-   succeeds..
- */
-static inline CtxShapeEntry *ctx_shape_entry_find (CtxRasterizer *rasterizer, uint32_t hash, int width, int 
height)
+  return rasterizer;
+}
+
+Ctx *
+ctx_new_for_buffer (CtxBuffer *buffer)
 {
-  /* use both some high and some low bits  */
-  int entry_no = ( (hash >> 10) ^ (hash & 1023) ) % CTX_SHAPE_CACHE_ENTRIES;
-  int i;
-  {
-    static int i = 0;
-    i++;
-    if (i>1000)
-      {
-        ctx_shape_cache_rate = ctx_shape_cache_hits * 100.0  / (ctx_shape_cache_hits+ctx_shape_cache_misses);
-        i = 0;
-        ctx_shape_cache_hits = 0;
-        ctx_shape_cache_misses = 0;
-      }
-  }
-// XXX : this 1 one is needed  to silence a false positive:
-// ==90718== Invalid write of size 1
-// ==90718==    at 0x1189EF: ctx_rasterizer_generate_coverage (ctx.h:4786)
-// ==90718==    by 0x118E57: ctx_rasterizer_rasterize_edges (ctx.h:4907)
-//
-  int size = sizeof (CtxShapeEntry) + width * height + 1;
+  Ctx *ctx = _ctx_new_drawlist (buffer->width, buffer->height);
+  ctx_set_backend (ctx,
+                    ctx_rasterizer_init ( (CtxRasterizer *) malloc (sizeof (CtxRasterizer) ),
+                                          ctx, NULL, &ctx->state,
+                                          buffer->data, 0, 0, buffer->width, buffer->height,
+                                          buffer->stride, buffer->format->pixel_format,
+                                          CTX_ANTIALIAS_DEFAULT));
+  return ctx;
+}
 
-  i = entry_no;
-  if (rasterizer->shape_cache.entries[i])
-    {
-      CtxShapeEntry *entry = rasterizer->shape_cache.entries[i];
-      int old_size = sizeof (CtxShapeEntry) + width + height + 1;
-      if (entry->hash == hash &&
-          entry->width == width &&
-          entry->height == height)
-        {
-          if (entry->uses < 1<<30)
-            { entry->uses++; }
-          ctx_shape_cache_hits ++;
-          return entry;
-        }
+Ctx *
+ctx_new_for_framebuffer (void *data, int width, int height,
+                         int stride,
+                         CtxPixelFormat pixel_format)
+{
+  Ctx *ctx = _ctx_new_drawlist (width, height);
+  CtxRasterizer *r = ctx_rasterizer_init ( (CtxRasterizer *) ctx_calloc (sizeof (CtxRasterizer), 1),
+                                          ctx, NULL, &ctx->state, data, 0, 0, width, height,
+                                          stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
+  ctx_set_backend (ctx, r);
+  if (pixel_format == CTX_FORMAT_GRAY1) // XXX we get some bugs without it..
+  {                                     //     something is going amiss with offsets
+    ctx_set_antialias (ctx, CTX_ANTIALIAS_NONE);
+  }
+  return ctx;
+}
 
-      if (old_size >= size)
-      {
-      }
-      else
-      {
-        rasterizer->shape_cache.entries[i] = NULL;
-        rasterizer->shape_cache.size -= entry->width * entry->height;
-        rasterizer->shape_cache.size -= sizeof (CtxShapeEntry);
-        free (entry);
-        rasterizer->shape_cache.entries[i] = (CtxShapeEntry *) calloc (size, 1);
-      }
-    }
-  else
-    {
-        rasterizer->shape_cache.entries[i] = (CtxShapeEntry *) calloc (size, 1);
-    }
+// ctx_new_for_stream (FILE *stream);
 
-  ctx_shape_cache_misses ++;
-  rasterizer->shape_cache.size              += size;
-  rasterizer->shape_cache.entries[i]->hash   = hash;
-  rasterizer->shape_cache.entries[i]->width  = width;
-  rasterizer->shape_cache.entries[i]->height = height;
-  rasterizer->shape_cache.entries[i]->uses = 0;
-  return rasterizer->shape_cache.entries[i];
+#if 0
+CtxRasterizer *ctx_rasterizer_new (void *data, int x, int y, int width, int height,
+                                   int stride, CtxPixelFormat pixel_format)
+{
+  CtxState    *state    = (CtxState *) malloc (sizeof (CtxState) );
+  CtxRasterizer *rasterizer = (CtxRasterizer *) malloc (sizeof (CtxBackend) );
+  ctx_rasterizer_init (rasterizer, state, data, x, y, width, height,
+                       stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
 }
+#endif
+
+#else
 
 #endif
 
-static uint32_t ctx_rasterizer_poly_to_hash (CtxRasterizer *rasterizer)
+
+int ctx_gradient_cache_valid = 0;
+
+void
+ctx_state_gradient_clear_stops (CtxState *state)
 {
-  int x = 0;
-  int y = 0;
+//#if CTX_GRADIENT_CACHE
+//  ctx_gradient_cache_reset ();
+//#endif
+  ctx_gradient_cache_valid = 0;
+  state->gradient.n_stops = 0;
+}
 
-  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];
 
-  int ox = entry->data.s16[2];
-  int oy = entry->data.s16[3];
-  uint32_t hash = rasterizer->edge_list.count;
-  hash = ox;//(ox % CTX_SUBDIV);
-  hash *= CTX_SHAPE_CACHE_PRIME1;
-  hash += oy; //(oy % CTX_RASTERIZER_AA);
-  for (int i = 0; i < rasterizer->edge_list.count; i++)
+/****  end of engine ****/
+/* atty - audio interface and driver for terminals
+ * Copyright (C) 2020 Øyvind Kolås <pippin gimp org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>. 
+ */
+
+static const char *base64_map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
+static void bin2base64_group (const unsigned char *in, int remaining, char *out)
+{
+  unsigned char digit[4] = {0,0,64,64};
+  int i;
+  digit[0] = in[0] >> 2;
+  digit[1] = ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4);
+  if (remaining > 1)
     {
-      CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[i];
-      x = entry->data.s16[2];
-      y = entry->data.s16[3];
-      int dx = x-ox;
-      int dy = y-oy;
-      ox = x;
-      oy = y;
-      hash *= CTX_SHAPE_CACHE_PRIME3;
-      hash += dx;
-      hash *= CTX_SHAPE_CACHE_PRIME4;
-      hash += dy;
+      digit[2] = ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6);
+      if (remaining > 2)
+        digit[3] = ((in[2] & 0x3f));
     }
-  return hash;
+  for (i = 0; i < 4; i++)
+    out[i] = base64_map[digit[i]];
 }
 
-static uint32_t ctx_rasterizer_poly_to_edges (CtxRasterizer *rasterizer)
+void
+ctx_bin2base64 (const void *bin,
+                int         bin_length,
+                char       *ascii)
 {
-#if CTX_SHAPE_CACHE
-  int x = 0;
-  int y = 0;
-#endif
-  int count = rasterizer->edge_list.count;
-  if (CTX_UNLIKELY (count == 0))
-     return 0;
-  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];
-#if CTX_SHAPE_CACHE
-#if 1
-  int ox = entry->data.s16[2];
-  int oy = entry->data.s16[3];
-#endif
-  uint32_t hash = rasterizer->edge_list.count;
-  hash = (ox & CTX_SUBDIV);
-  hash *= CTX_SHAPE_CACHE_PRIME1;
-  hash += (oy & CTX_SUBDIV);
-#endif
-  //CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
-  for (int i = 0; i < count; i++)
-    {
-#if CTX_SHAPE_CACHE
-      x = entry->data.s16[2];
-      y = entry->data.s16[3];
-      int dx = x-ox;
-      int dy = y-oy;
-      ox = x;
-      oy = y;
-      hash *= CTX_SHAPE_CACHE_PRIME3;
-      hash += dx;
-      hash *= CTX_SHAPE_CACHE_PRIME4;
-      hash += dy;
-#endif
-#if 1
-      if (entry->data.s16[3] < entry->data.s16[1])
-        {
-          *entry = ctx_segment_s16 (CTX_EDGE_FLIPPED,
-                            entry->data.s16[2], entry->data.s16[3],
-                            entry->data.s16[0], entry->data.s16[1]);
-        }
-#endif
-      entry++;
-    }
-#if CTX_SHAPE_CACHE
-  return hash;
-#else
-  return 0;
-#endif
+  /* this allocation is a hack to ensure we always produce the same result,
+   * regardless of padding data accidentally taken into account.
+   */
+  unsigned char *bin2 = (unsigned char*)calloc (bin_length + 4, 1);
+  unsigned const char *p = bin2;
+  int i;
+  memcpy (bin2, bin, bin_length);
+  for (i=0; i*3 < bin_length; i++)
+   {
+     int remaining = bin_length - i*3;
+     bin2base64_group (&p[i*3], remaining, &ascii[i*4]);
+   }
+  free (bin2);
+  ascii[i*4]=0;
 }
 
-static inline void ctx_rasterizer_finish_shape (CtxRasterizer *rasterizer)
+static unsigned char base64_revmap[255];
+static void base64_revmap_init (void)
 {
-  if (rasterizer->has_shape && rasterizer->has_prev)
-    {
-      ctx_rasterizer_line_to (rasterizer, rasterizer->first_x, rasterizer->first_y);
-      rasterizer->has_prev = 0;
-    }
+  static int done = 0;
+  if (done)
+    return;
+
+  for (int i = 0; i < 255; i ++)
+    base64_revmap[i]=255;
+  for (int i = 0; i < 64; i ++)
+    base64_revmap[((const unsigned char*)base64_map)[i]]=i;
+  /* include variants used in URI encodings for decoder,
+   * even if that is not how we encode
+  */
+  base64_revmap['-']=62;
+  base64_revmap['_']=63;
+  base64_revmap['+']=62;
+  base64_revmap['/']=63;
+
+  done = 1;
 }
 
-static inline void ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y)
+
+int
+ctx_base642bin (const char    *ascii,
+                int           *length,
+                unsigned char *bin)
 {
-  float tx = x; float ty = y;
-  int aa = 15;//rasterizer->aa;
-  rasterizer->x        = x;
-  rasterizer->y        = y;
-  rasterizer->first_x  = x;
-  rasterizer->first_y  = y;
-  rasterizer->has_prev = -1;
-  if (rasterizer->uses_transforms)
+  int i;
+  int charno = 0;
+  int outputno = 0;
+  int carry = 0;
+  base64_revmap_init ();
+  for (i = 0; ascii[i]; i++)
     {
-      _ctx_user_to_device (rasterizer->state, &tx, &ty);
+      int bits = base64_revmap[((const unsigned char*)ascii)[i]];
+      if (length && outputno > *length)
+        {
+          *length = -1;
+          return -1;
+        }
+      if (bits != 255)
+        {
+          switch (charno % 4)
+            {
+              case 0:
+                carry = bits;
+                break;
+              case 1:
+                bin[outputno] = (carry << 2) | (bits >> 4);
+                outputno++;
+                carry = bits & 15;
+                break;
+              case 2:
+                bin[outputno] = (carry << 4) | (bits >> 2);
+                outputno++;
+                carry = bits & 3;
+                break;
+              case 3:
+                bin[outputno] = (carry << 6) | bits;
+                outputno++;
+                carry = 0;
+                break;
+            }
+          charno++;
+        }
     }
+  bin[outputno]=0;
+  if (length)
+    *length= outputno;
+  return outputno;
+}
+#ifndef SQUOZE_H
+#define SQUOZE_H
 
-  tx = (tx - rasterizer->blit_x) * CTX_SUBDIV;
-  ty = ty * aa;
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+uint32_t squoze6 (const char *utf8);
+uint64_t squoze10 (const char *utf8);
+uint64_t squoze12 (const char *utf8);
+const char *squoze6_decode (uint32_t hash);
+const char *squoze10_decode (uint64_t hash);
+const char *squoze12_decode (uint64_t hash);
+
+//#define SQUOZE_NO_INTERNING  // this disables the interning - providing only a hash (and decode for 
non-overflowed hashes)
+
+#define SQUOZE_ENTER_SQUEEZE    16
+
+#define SQUOZE_SPACE            0
+#define SQUOZE_DEC_OFFSET_A     27
+#define SQUOZE_INC_OFFSET_A     28
+#define SQUOZE_DEC_OFFSET_B     29
+#define SQUOZE_INC_OFFSET_B     30
+#define SQUOZE_ENTER_UTF5       31
+
+#define SQUOZE_JUMP_STRIDE      26
+#define SQUOZE_JUMP_OFFSET      19
 
-  rasterizer->inner_x = tx;
-  rasterizer->inner_y = ty;
+static inline uint32_t squoze_utf8_to_unichar (const char *input);
+static inline int      squoze_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
+static inline int      squoze_utf8_len        (const unsigned char first_byte);
 
-  rasterizer->scan_min = ctx_mini (ty, rasterizer->scan_min);
-  rasterizer->scan_max = ctx_maxi (ty, rasterizer->scan_max);
 
-  rasterizer->col_min = ctx_mini (tx, rasterizer->col_min);
-  rasterizer->col_max = ctx_maxi (tx, rasterizer->col_max);
+/* returns the base-offset of the segment this unichar belongs to,
+ *
+ * segments are 26 items long and are offset so that the 'a'-'z' is
+ * one segment.
+ */
+static inline int squoze_new_offset (uint32_t unichar)
+{
+  uint32_t ret = unichar - (unichar % SQUOZE_JUMP_STRIDE) + SQUOZE_JUMP_OFFSET;
+  if (ret > unichar) ret -= SQUOZE_JUMP_STRIDE;
+  return ret;
 }
 
-static inline void ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y)
+static int squoze_needed_jump (uint32_t off, uint32_t unicha)
 {
-  float tx = x;
-  float ty = y;
-  //float ox = rasterizer->x;
-  //float oy = rasterizer->y;
-  if (rasterizer->uses_transforms)
-    {
-      _ctx_user_to_device (rasterizer->state, &tx, &ty);
-    }
-  tx -= rasterizer->blit_x;
-#define MIN_Y -1000
-#define MAX_Y 1400
+  int count = 0;
+  int unichar = unicha;
+  int offset = off;
 
-  ty = ctx_maxf (MIN_Y, ty);
-  ty = ctx_minf (MAX_Y, ty);
-  
-  ctx_rasterizer_add_point (rasterizer, tx * CTX_SUBDIV, ty * CTX_FULL_AA);//rasterizer->aa);
+  if (unichar == 32) // space is always in range
+    return 0;
 
-  if (CTX_UNLIKELY(rasterizer->has_prev<=0))
-    {
-#if 0
-      if (rasterizer->uses_transforms)
-      {
-        // storing transformed would save some processing for a tiny
-        // amount of runtime RAM XXX
-        _ctx_user_to_device (rasterizer->state, &ox, &oy);
-      }
-#endif
-      //ox -= rasterizer->blit_x;
-      //oy = ctx_maxf (oy, MIN_Y);
-      //oy = ctx_minf (oy, MAX_Y);
-      CtxSegment *entry = & ((CtxSegment*)rasterizer->edge_list.entries)[rasterizer->edge_list.count-1];
-      //entry->data.s16[0] = ox * CTX_SUBDIV;
-      //entry->data.s16[1] = oy * CTX_FULL_AA;
-      entry->code = CTX_NEW_EDGE;
-      rasterizer->has_prev = 1;
-    }
-  rasterizer->has_shape = 1;
-  rasterizer->y         = y;
-  rasterizer->x         = x;
-}
+  /* TODO: replace this with direct computation of values instead of loops */
 
+  while (unichar < offset)
+  {
+    offset -= SQUOZE_JUMP_STRIDE;
+    count ++;
+  }
+  if (count)
+  {
+    return -count;
+  }
+  while (unichar - offset >= SQUOZE_JUMP_STRIDE)
+  {
+    offset += SQUOZE_JUMP_STRIDE;
+    count ++;
+  }
+  return count;
+}
 
-CTX_INLINE static float
-ctx_bezier_sample_1d (float x0, float x1, float x2, float x3, float dt)
+static inline int
+squoze_utf5_length (uint32_t unichar)
 {
-  float ab   = ctx_lerpf (x0, x1, dt);
-  float bc   = ctx_lerpf (x1, x2, dt);
-  float cd   = ctx_lerpf (x2, x3, dt);
-  float abbc = ctx_lerpf (ab, bc, dt);
-  float bccd = ctx_lerpf (bc, cd, dt);
-  return ctx_lerpf (abbc, bccd, dt);
+  int octets = 0;
+  if (unichar == 0)
+    return 1;
+  while (unichar)
+  {
+    octets ++;
+    unichar /= 16;
+  }
+  return octets;
 }
 
-CTX_INLINE static void
-ctx_bezier_sample (float x0, float y0,
-                   float x1, float y1,
-                   float x2, float y2,
-                   float x3, float y3,
-                   float dt, float *x, float *y)
+typedef struct EncodeUtf5 {
+  int      is_utf5;
+  int      offset;
+  int      length;
+  void    *write_data;
+  uint32_t current;
+} EncodeUtf5;
+
+static inline uint64_t
+squoze_overflow_mask_for_dim (int squoze_dim)
 {
-  *x = ctx_bezier_sample_1d (x0, x1, x2, x3, dt);
-  *y = ctx_bezier_sample_1d (y0, y1, y2, y3, dt);
+  return ((uint64_t)1<<(squoze_dim * 5 + 1));
 }
 
-static inline void
-ctx_rasterizer_bezier_divide (CtxRasterizer *rasterizer,
-                              float ox, float oy,
-                              float x0, float y0,
-                              float x1, float y1,
-                              float x2, float y2,
-                              float sx, float sy,
-                              float ex, float ey,
-                              float s,
-                              float e,
-                              int   iteration,
-                              float tolerance)
+static int squoze_compute_cost_utf5 (int offset, int val, int next_val)
 {
-  float t = (s + e) * 0.5f;
-  float x, y, lx, ly, dx, dy;
-  ctx_bezier_sample (ox, oy, x0, y0, x1, y1, x2, y2, t, &x, &y);
-  if (iteration)
-    {
-      lx = ctx_lerpf (sx, ex, t);
-      ly = ctx_lerpf (sy, ey, t);
-      dx = lx - x;
-      dy = ly - y;
-      if (CTX_UNLIKELY( (dx*dx+dy*dy) < tolerance))
-        /* bailing - because for the mid-point straight line difference is
-           tiny */
-        { return; }
-      dx = sx - ex;
-      dy = ey - ey;
-      if (CTX_UNLIKELY( (dx*dx+dy*dy) < tolerance))
-        /* bailing on tiny segments */
-        { return; }
-    }
-  if (iteration < 8)
+  int cost = 0; 
+  cost += squoze_utf5_length (val);
+  if (next_val)
   {
-  ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
-                                sx, sy, x, y, s, t, iteration + 1,
-                                tolerance);
-  ctx_rasterizer_line_to (rasterizer, x, y);
-  ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
-                                x, y, ex, ey, t, e, iteration + 1,
-                                tolerance);
-  }
-}
+    int no_change_cost = squoze_utf5_length (next_val);
+#if 0 // not hit in test-corpus, it is easier to specify and
+      // port the hash consistently without it
+    offset = squoze_new_offset (val);
+    int change_cost = 1;
+    int needed_jump = squoze_needed_jump (offset, next_val);
 
-static void
-ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
-                         float x0, float y0,
-                         float x1, float y1,
-                         float x2, float y2)
-{
-  //float tolerance =
-  //  1.0f*(ctx_pow2 (rasterizer->state->gstate.transform.m[0][0]) +
-  //  ctx_pow2 (rasterizer->state->gstate.transform.m[1][1]));
-  float tolerance = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
-  float ox = rasterizer->x;
-  float oy = rasterizer->y;
-  //tolerance *= tolerance;
-  tolerance = 2.0/(tolerance*tolerance);
-  ox = rasterizer->state->x;
-  oy = rasterizer->state->y;
-  //tolerance = 10.0/(tolerance*tolerance);
-  //tolerance = 10.0f/tolerance;
-#if 0 // skipping this to preserve hash integrity
-  if (tolerance == 1.0f || 1)
-  {
-  float maxx = ctx_maxf (x1,x2);
-  maxx = ctx_maxf (maxx, ox);
-  maxx = ctx_maxf (maxx, x0);
-  float maxy = ctx_maxf (y1,y2);
-  maxy = ctx_maxf (maxy, oy);
-  maxy = ctx_maxf (maxy, y0);
-  float minx = ctx_minf (x1,x2);
-  minx = ctx_minf (minx, ox);
-  minx = ctx_minf (minx, x0);
-  float miny = ctx_minf (y1,y2);
-  miny = ctx_minf (miny, oy);
-  miny = ctx_minf (miny, y0);
-  
-  _ctx_user_to_device (rasterizer->state, &minx, &miny);
-  _ctx_user_to_device (rasterizer->state, &maxx, &maxy);
-#if 1
-    if(
-        (minx > rasterizer->blit_x + rasterizer->blit_width) ||
-        (miny > rasterizer->blit_y + rasterizer->blit_height) ||
-        (maxx < rasterizer->blit_x) ||
-        (maxy < rasterizer->blit_y) )
+    if (needed_jump == 0)
+    {
+      change_cost += 1;
+    } else if (needed_jump >= -2 && needed_jump <= 2)
+    {
+      change_cost += 2;
+    }
+    else if (needed_jump >= -10 && needed_jump <= -10)
     {
+      change_cost += 3;
     }
     else
-#endif
     {
-      ctx_rasterizer_bezier_divide (rasterizer,
-                                    ox, oy, x0, y0,
-                                    x1, y1, x2, y2,
-                                    ox, oy, x2, y2,
-                                    0.0f, 1.0f, 0.0f, tolerance);
+      change_cost += 100;
     }
-  }
-  else
+
+    if (change_cost < no_change_cost)
+    {
+      cost += change_cost;
+    }
+    else
 #endif
     {
-      ctx_rasterizer_bezier_divide (rasterizer,
-                                    ox, oy, x0, y0,
-                                    x1, y1, x2, y2,
-                                    ox, oy, x2, y2,
-                                    0.0f, 1.0f, 0.0f, tolerance);
+      cost += no_change_cost;
     }
-  ctx_rasterizer_line_to (rasterizer, x2, y2);
-}
 
-static void
-ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y)
-{
-  //if (CTX_UNLIKELY(x == 0.f && y == 0.f))
-  //{ return; }
-  x += rasterizer->x;
-  y += rasterizer->y;
-  ctx_rasterizer_move_to (rasterizer, x, y);
-}
+  }
 
-static void
-ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y)
-{
-  //if (CTX_UNLIKELY(x== 0.f && y==0.f))
-  //  { return; }
-  x += rasterizer->x;
-  y += rasterizer->y;
-  ctx_rasterizer_line_to (rasterizer, x, y);
-}
 
-static void
-ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
-                             float x0, float y0, float x1, float y1, float x2, float y2)
-{
-  x0 += rasterizer->x;
-  y0 += rasterizer->y;
-  x1 += rasterizer->x;
-  y1 += rasterizer->y;
-  x2 += rasterizer->x;
-  y2 += rasterizer->y;
-  ctx_rasterizer_curve_to (rasterizer, x0, y0, x1, y1, x2, y2);
-}
 
+  return cost;
+}
 
-static int
-ctx_rasterizer_find_texture (CtxRasterizer *rasterizer,
-                             const char *eid)
+static int squoze_compute_cost_squeezed (int offset, int val, int next_val)
 {
-  int no;
-  for (no = 0; no < CTX_MAX_TEXTURES; no++)
+  int needed_jump = squoze_needed_jump (offset, val);
+  int cost = 0;
+  if (needed_jump == 0)
   {
-    if (rasterizer->texture_source->texture[no].data &&
-        rasterizer->texture_source->texture[no].eid &&
-        !strcmp (rasterizer->texture_source->texture[no].eid, eid))
-      return no;
+    cost += 1;
+  }
+  else if (needed_jump >= -2 && needed_jump <= 2)
+  {
+    cost += 2;
+    offset += SQUOZE_JUMP_STRIDE * needed_jump;
+  }
+  else if (needed_jump >= -10 && needed_jump <= 10)
+  {
+    cost += 3;
+    offset += SQUOZE_JUMP_STRIDE * needed_jump;
+  }
+  else
+  {
+    cost += 100; // very expensive, makes the other choice win
   }
-  return -1;
-}
 
-static void
-ctx_rasterizer_set_texture (CtxRasterizer *rasterizer,
-                            const char *eid,
-                            float x,
-                            float y)
-{
-  int is_stroke = (rasterizer->state->source != 0);
-  CtxSource *source = is_stroke && (rasterizer->state->gstate.source_stroke.type != CTX_SOURCE_INHERIT_FILL)?
-                        &rasterizer->state->gstate.source_stroke:
-                        &rasterizer->state->gstate.source_fill;
-  rasterizer->state->source = 0;
+  if (next_val)
+  {
+    int change_cost = 1 + squoze_utf5_length (next_val);
+    int no_change_cost = 0;
+    needed_jump = squoze_needed_jump (offset, next_val);
 
-  int no = ctx_rasterizer_find_texture (rasterizer, eid);
-  if (no < 0 || no >= CTX_MAX_TEXTURES) { no = 0; }
-  if (rasterizer->texture_source->texture[no].data == NULL)
+    if (needed_jump == 0)
     {
-      fprintf (stderr, "ctx tex fail %p %s %i\n", rasterizer->texture_source, eid, no);
-      return;
+      no_change_cost += 1;
     }
-  else
-  {
-    rasterizer->texture_source->texture[no].frame = rasterizer->texture_source->frame;
+    else if (needed_jump >= -2 && needed_jump <= 2)
+    {
+      no_change_cost += 2;
+    }
+    else if (needed_jump >= -10 && needed_jump <= 10)
+    {
+      no_change_cost += 3;
+      offset += SQUOZE_JUMP_STRIDE * needed_jump;
+    }
+    else
+    {
+      no_change_cost = change_cost;
+    }
+    if (change_cost < no_change_cost)
+      cost += change_cost;
+    else
+      cost += no_change_cost;
   }
-  source->type = CTX_SOURCE_TEXTURE;
-  source->texture.buffer = &rasterizer->texture_source->texture[no];
-  ctx_matrix_identity (&source->set_transform);
-  ctx_matrix_translate (&source->set_transform, x, y);
+
+  return cost;
 }
 
 
-static void ctx_rasterizer_define_texture (CtxRasterizer *rasterizer,
-                                           const char *eid,
-                                           int width,
-                                           int height,
-                                           int format,
-                                           char unsigned *data)
+static void squoze5_encode (const char *input, int inlen,
+                            char *output, int *r_outlen,
+                            int permit_squeezed,
+                            int escape_endzero)
 {
-  _ctx_texture_lock (); // we're using the same texture_source from all threads, keeping allocaitons down
-                        // need synchronizing (it could be better to do a pre-pass)
-  ctx_texture_init (rasterizer->texture_source,
-                    eid,
-                    width,
-                    height,
-                    ctx_pixel_format_get_stride ((CtxPixelFormat)format, width),
-                    (CtxPixelFormat)format,
-#if CTX_ENABLE_CM
-                    (void*)rasterizer->state->gstate.texture_space,
-#else
-                    NULL,
-#endif
-                    data,
-                    ctx_buffer_pixels_free, (void*)23);
-                    /*  when userdata for ctx_buffer_pixels_free is 23, texture_init dups the data on
-                     *  use
-                     */
+  int offset  = squoze_new_offset('a');
+  int is_utf5 = 1;
+  int len     = 0;
 
-  ctx_rasterizer_set_texture (rasterizer, eid, 0.0, 0.0);
-  _ctx_texture_unlock ();
-}
+  for (int i = 0; i < inlen; i+= squoze_utf8_len (input[i]))
+  {
+    int val = squoze_utf8_to_unichar (&input[i]);
+    int next_val = 0;
+    int first_len = squoze_utf8_len (input[i]);
+    if (i + first_len < inlen)
+      next_val = squoze_utf8_to_unichar (&input[i+first_len]);
 
+    if (is_utf5)
+    {
+      int change_cost    = squoze_compute_cost_squeezed (offset, val, next_val);
+      int no_change_cost = squoze_compute_cost_utf5 (offset, val, next_val);
+  
+      if (i != 0)          /* ignore cost of initial 'G' */
+        change_cost += 1;
 
-CTX_INLINE static int ctx_compare_edges (const void *ap, const void *bp)
-{
-  const CtxSegment *a = (const CtxSegment *) ap;
-  const CtxSegment *b = (const CtxSegment *) bp;
-  return a->data.s16[1] - b->data.s16[1];
-}
+      if (permit_squeezed && change_cost <= no_change_cost)
+      {
+        output[len++] = SQUOZE_ENTER_SQUEEZE;
+        is_utf5 = 0;
+      }
+    }
+    else
+    {
+      int change_cost    = 1 + squoze_compute_cost_utf5 (offset, val, next_val);
+      int no_change_cost = squoze_compute_cost_squeezed (offset, val, next_val);
 
-CTX_INLINE static int ctx_edge_qsort_partition (CtxSegment *A, int low, int high)
-{
-  CtxSegment pivot = A[ (high+low) /2];
-  int i = low;
-  int j = high;
-  while (i <= j)
+      if (change_cost < no_change_cost)
+      {
+        output[len++] = SQUOZE_ENTER_UTF5;
+        is_utf5 = 1;
+      }
+    }
+
+    if (!is_utf5)
     {
-      while (ctx_compare_edges (&A[i], &pivot) < 0) { i ++; }
-      while (ctx_compare_edges (&pivot, &A[j]) < 0) { j --; }
-      if (i <= j)
+      int needed_jump = squoze_needed_jump (offset, val);
+      if (needed_jump)
+      {
+        if (needed_jump >= -2 && needed_jump <= 2)
         {
-          CtxSegment tmp = A[i];
-          A[i] = A[j];
-          A[j] = tmp;
-          i++;
-          j--;
+          switch (needed_jump)
+          {
+            case -1: output[len++] = SQUOZE_DEC_OFFSET_B; break;
+            case  1: output[len++] = SQUOZE_INC_OFFSET_B; break;
+            case -2: output[len++] = SQUOZE_DEC_OFFSET_A; break;
+            case  2: output[len++] = SQUOZE_INC_OFFSET_A; break;
+          }
+          offset += SQUOZE_JUMP_STRIDE * needed_jump;
         }
-    }
-  return i;
-}
-
-static inline void ctx_edge_qsort (CtxSegment *entries, int low, int high)
-{
-  {
-    int p = ctx_edge_qsort_partition (entries, low, high);
-    if (low < p -1 )
-      { ctx_edge_qsort (entries, low, p - 1); }
-    if (low < high)
-      { ctx_edge_qsort (entries, p, high); }
-  }
-}
+        else if (needed_jump >= -10 && needed_jump <= 10) {
+              int encoded_val;
+              if (needed_jump < -2)
+                encoded_val = 5 - needed_jump;
+              else
+                encoded_val = needed_jump - 3;
 
-static CTX_INLINE void ctx_rasterizer_sort_edges (CtxRasterizer *rasterizer)
-{
-  ctx_edge_qsort ((CtxSegment*)& (rasterizer->edge_list.entries[0]), 0, rasterizer->edge_list.count-1);
-}
+              output[len++] = (encoded_val / 4) + SQUOZE_DEC_OFFSET_A;
+              output[len++] = (encoded_val % 4) + SQUOZE_DEC_OFFSET_A;
 
-static inline void ctx_rasterizer_discard_edges (CtxRasterizer *rasterizer)
-{
-  int scanline = rasterizer->scanline;
-  int next_scanline = rasterizer->scanline + CTX_FULL_AA;
-  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3;
-  //if (rasterizer->fast_aa)
-    limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
-  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
-  int *edges = rasterizer->edges;
-  for (int i = 0; i < rasterizer->active_edges; i++)
-    {
-      CtxSegment *segment = segments + edges[i];
-      int edge_end = segment->data.s16[3]-1;
-      if (edge_end < scanline)
+              offset += SQUOZE_JUMP_STRIDE * needed_jump;
+        }
+        else
         {
-
-          int dx_dy = abs(segment->delta);
-          rasterizer->needs_aa3  -= (dx_dy > limit3);
-          rasterizer->needs_aa5  -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
-          rasterizer->needs_aa15 -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
-          rasterizer->edges[i] = rasterizer->edges[rasterizer->active_edges-1];
-          rasterizer->active_edges--;
-          i--;
+          assert(0); // should not be reached
+          output[len++] = SQUOZE_ENTER_UTF5;
+          is_utf5 = 1;
         }
-      else if (edge_end < next_scanline)
-        rasterizer->ending_edges++;
+      }
     }
-#if 0
-  // we should - but for 99% of the cases we do not need to, so we skip it
-  for (int i = 0; i < rasterizer->pending_edges; i++)
+
+    if (is_utf5)
     {
-      int edge_end = 
((CtxSegment*)(rasterizer->edge_list.entries))[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[3]-1;
-      if (edge_end < scanline + CTX_FULL_AA)
-        rasterizer->ending_edges++;
+      int octets = 0;
+      offset = squoze_new_offset (val);
+      while (val)
+      {
+        int oval = val % 16;
+        int hi = 16;
+        if (val / 16) hi = 0;
+        output[len+ (octets++)] = oval + hi;
+        val /= 16;
+      }
+      for (int j = 0; j < octets/2; j++) // mirror in-place
+      {                                  // TODO refactor to be single pass
+        int tmp = output[len+j];
+        output[len+j] = output[len+octets-1-j];
+        output[len+octets-1-j] = tmp;
+      }
+      len += octets;
     }
-#endif
+    else 
+    {
+       if (val == ' ')
+       {
+         output[len++] = SQUOZE_SPACE;
+       }
+       else
+       {
+         output[len++] = val-offset+1;
+       }
+    }
+  }
+
+  if (escape_endzero && len && output[len-1]==0)
+  {
+    if (is_utf5)
+      output[len++] = 16;
+    else
+      output[len++] = SQUOZE_ENTER_UTF5;
+  }
+  output[len]=0;
+  if (r_outlen)
+    *r_outlen = len;
 }
 
-inline static void ctx_rasterizer_increment_edges (CtxRasterizer *rasterizer, int count)
+static inline uint64_t _squoze (int squoze_dim, const char *utf8)
 {
-  rasterizer->scanline += count;
-  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
-  for (int i = 0; i < rasterizer->active_edges; i++)
+  char encoded[4096]="";
+  int  encoded_len=0;
+  squoze5_encode (utf8, strlen (utf8), encoded, &encoded_len, 1, 1);
+  uint64_t hash = 0;
+  int  utf5 = (encoded[0] != SQUOZE_ENTER_SQUEEZE);
+  uint64_t multiplier = ((squoze_dim == 6) ? 0x25bd1e975
+                                           : 0x98173415bd1e975);
+
+  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
+  uint64_t all_bits        = overflowed_mask - 1;
+
+  int rshift = (squoze_dim == 6) ? 8 : 16;
+
+
+  if (encoded_len - (!utf5) <= squoze_dim)
+  {
+    for (int i = !utf5; i < encoded_len; i++)
     {
-      CtxSegment *segment = segments + rasterizer->edges[i];
-      segment->val += segment->delta * count;
+      uint64_t val = encoded[i];
+      hash = hash | (val << (5*(i-(!utf5))));
     }
-  for (int i = 0; i < rasterizer->pending_edges; i++)
+    hash <<= 1; // make room for the bit that encodes utf5 or squeeze
+  }
+  else
+  {
+    for (int i = 0; i < encoded_len; i++)
     {
-      CtxSegment *segment = segments + rasterizer->edges[CTX_MAX_EDGES-1-i];
-      segment->val += segment->delta * count;
+      uint64_t val = encoded[i];
+      hash = hash ^ val;
+      hash = hash * multiplier;
+      hash = hash & all_bits;
+      hash = hash ^ ((hash >> rshift));
     }
+    hash |= overflowed_mask;
+  }
+  return hash | utf5;
 }
 
-/* feeds up to rasterizer->scanline,
-   keeps a pending buffer of edges - that encompass
-   the full incoming scanline,
-   feed until the start of the scanline and check for need for aa
-   in all of pending + active edges, then
-   again feed_edges until middle of scanline if doing non-AA
-   or directly render when doing AA
-*/
-CTX_INLINE static void ctx_edge2_insertion_sort (CtxSegment *segments, int *entries, int count)
-{
-  for(int i=1; i<count; i++)
-   {
-     int temp = entries[i];
-     int j = i-1;
-     while (j >= 0 && segments[temp].val - segments[entries[j]].val < 0)
-     {
-       entries[j+1] = entries[j];
-       j--;
-     }
-     entries[j+1] = temp;
-   }
-}
+typedef struct _CashInterned CashInterned;
 
-CTX_INLINE static int ctx_edge2_compare2 (CtxSegment *segments, int a, int b)
-{
-  CtxSegment *seg_a = &segments[a];
-  CtxSegment *seg_b = &segments[b];
-  int minval_a = ctx_mini (seg_a->val - seg_a->delta * CTX_AA_HALFSTEP2, seg_a->val + seg_a->delta * 
CTX_AA_HALFSTEP);
-  int minval_b = ctx_mini (seg_b->val - seg_b->delta * CTX_AA_HALFSTEP2, seg_b->val + seg_b->delta * 
CTX_AA_HALFSTEP);
-  return minval_a - minval_b;
-}
+struct _CashInterned {
+    uint64_t   hash;
+    char      *string;
+};
 
-CTX_INLINE static void ctx_edge2_insertion_sort2 (CtxSegment *segments, int *entries, int count)
-{
-  for(int i=1; i<count; i++)
-   {
-     int temp = entries[i];
-     int j = i-1;
-     while (j >= 0 && ctx_edge2_compare2 (segments, temp, entries[j]) < 0)
-     {
-       entries[j+1] = entries[j];
-       j--;
-     }
-     entries[j+1] = temp;
-   }
-}
+static CashInterned *interned = NULL;
+static int n_interned = 0;
+static int s_interned = 0;
 
-inline static void ctx_rasterizer_feed_edges (CtxRasterizer *rasterizer, int apply2_sort)
+static int squoze_interned_find (uint64_t hash)
 {
-  int miny;
-  CtxSegment *entries = (CtxSegment*)&rasterizer->edge_list.entries[0];
-  rasterizer->horizontal_edges = 0;
-  rasterizer->ending_edges = 0;
-  for (int i = 0; i < rasterizer->pending_edges; i++)
-    {
-      if (entries[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[1] - 1 <= rasterizer->scanline)
-        {
-          if (CTX_LIKELY(rasterizer->active_edges < CTX_MAX_EDGES-2))
-            {
-              int no = rasterizer->active_edges;
-              rasterizer->active_edges++;
-              rasterizer->edges[no] = rasterizer->edges[CTX_MAX_EDGES-1-i];
-              rasterizer->edges[CTX_MAX_EDGES-1-i] =
-                rasterizer->edges[CTX_MAX_EDGES-1-rasterizer->pending_edges + 1];
-              rasterizer->pending_edges--;
-              i--;
-            }
-        }
-    }
-  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3;
-  //if (rasterizer->fast_aa)
-    limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
-  int scanline = rasterizer->scanline;
-  int next_scanline = scanline + CTX_FULL_AA;
-  int edge_pos = rasterizer->edge_pos;
-  int edge_count = rasterizer->edge_list.count;
-  int *edges = rasterizer->edges;
-  while ((edge_pos < edge_count &&
-         (miny=entries[edge_pos].data.s16[1]-1)  <= next_scanline))
-    {
-      int maxy=entries[edge_pos].data.s16[3]-1;
-      if (rasterizer->active_edges < CTX_MAX_EDGES-2 &&
-          maxy >= scanline)
-        {
-          int dy = (entries[edge_pos].data.s16[3] - 1 - miny);
-          if (dy)
-            {
-              int yd = scanline - miny;
-              int no = rasterizer->active_edges;
-              rasterizer->active_edges++;
-              int index = edges[no] = edge_pos;
-              int x0 = entries[index].data.s16[0];
-              int x1 = entries[index].data.s16[2];
-              int dx_dy = CTX_RASTERIZER_EDGE_MULTIPLIER * (x1 - x0) / dy;
-              entries[index].delta = dx_dy;
-              entries[index].val = x0 * CTX_RASTERIZER_EDGE_MULTIPLIER +
-                                         (yd * dx_dy);
-
-              {
-                int abs_dx_dy = abs(dx_dy);
-                rasterizer->needs_aa3  += (abs_dx_dy > limit3);
-                rasterizer->needs_aa5  += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
-                rasterizer->needs_aa15 += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
-              }
-
-              if (miny > scanline)
-                {
-                  /* it is a pending edge - we add it to the end of the array
-                     and keep a different count for items stored here, like
-                     a heap and stack growing against each other
-                  */
-                  if (rasterizer->pending_edges < CTX_MAX_PENDING-1)
-                  {
-                    edges[CTX_MAX_EDGES-1-rasterizer->pending_edges] =
-                    rasterizer->edges[no];
-                    rasterizer->pending_edges++;
-                    rasterizer->active_edges--;
-                  }
-                }
-            }
-          else
-            rasterizer->horizontal_edges ++;
-        }
-      edge_pos++;
-    }
-    rasterizer->edge_pos = edge_pos;
-    ctx_rasterizer_discard_edges (rasterizer);
-    if (apply2_sort)
-      ctx_edge2_insertion_sort2 ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, 
rasterizer->active_edges);
-    else
-      ctx_edge2_insertion_sort ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, 
rasterizer->active_edges);
+#if 1
+  int min = 0;
+  int max = n_interned - 1;
+  if (max <= 0)
+    return 0;
+  do
+  {
+     int pos = (min + max)/2;
+     if (interned[pos].hash == hash)
+       return pos;
+     else if (min == max - 1)
+       return max;
+     else if (interned[pos].hash < hash)
+       min = pos;
+     else
+       max = pos;
+  } while (min != max);
+  return max;
+#else
+  for (int i = 0; i < n_interned; i++)
+    if (interned[i].hash > hash)
+      return i;
+  return 0;
+#endif
 }
 
-
-#undef CTX_CMPSWP
-
-static inline void ctx_coverage_post_process (CtxRasterizer *rasterizer, int minx, int maxx, uint8_t 
*coverage, int *first_col, int *last_col)
+static inline uint64_t squoze (int squoze_dim, const char *utf8)
 {
-  int scanline     = rasterizer->scanline - CTX_FULL_AA; // we do the
-                                                 // post process after
-                                                 // coverage generation icnrement
-#if CTX_ENABLE_SHADOW_BLUR
-  if (CTX_UNLIKELY(rasterizer->in_shadow))
+  uint64_t hash = _squoze (squoze_dim, utf8);
+#ifdef SQUOZE_NO_INTERNING
+  return hash;
+#endif
+  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
+  if (hash & overflowed_mask)
   {
-    float radius = rasterizer->state->gstate.shadow_blur;
-    int dim = 2 * radius + 1;
-    if (CTX_UNLIKELY (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM))
-      dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
+    int pos = squoze_interned_find (hash);
+    if (interned && interned[pos].hash == hash)
+      return hash;
+
+    if (n_interned + 1 >= s_interned)
     {
-      uint16_t temp[maxx-minx+1];
-      memset (temp, 0, sizeof (temp));
-      for (int x = dim/2; x < maxx-minx + 1 - dim/2; x ++)
-        for (int u = 0; u < dim; u ++)
-        {
-          temp[x] += coverage[minx+x+u-dim/2] * rasterizer->kernel[u] * 256;
-        }
-      for (int x = 0; x < maxx-minx + 1; x ++)
-        coverage[minx+x] = temp[x] >> 8;
+       s_interned = (s_interned + 128)*2;
+       interned = (CashInterned*)realloc (interned, s_interned * sizeof (CashInterned));
     }
-  }
-#endif
 
-#if CTX_ENABLE_CLIP
-  if (CTX_UNLIKELY(rasterizer->clip_buffer &&  !rasterizer->clip_rectangle))
-  {
-    /* perhaps not working right for clear? */
-    int y = scanline / CTX_FULL_AA;//rasterizer->aa;
-    uint8_t *clip_line = &((uint8_t*)(rasterizer->clip_buffer->data))[rasterizer->blit_width*y];
-    // XXX SIMD candidate
-    for (int x = minx; x <= maxx; x ++)
-    {
-#if CTX_1BIT_CLIP
-       coverage[x] = (coverage[x] * ((clip_line[x/8]&(1<<(x&8)))?255:0))/255;
+    n_interned++;
+#if 1
+    if (n_interned-pos)
+      memmove (&interned[pos+1], &interned[pos], (n_interned-pos) * sizeof (CashInterned));
+    // the memmove is the expensive part of testing for collisions
+    // insertions should be cheaper! at least looking up strings
+    // is cheap
 #else
-       coverage[x] = (255 + coverage[x] * clip_line[x-rasterizer->blit_x])>>8;
+    pos = n_interned-1;
 #endif
+    {
+      CashInterned *entry = &interned[pos];
+      entry->hash = hash;
+      entry->string = strdup (utf8);
     }
+
   }
-  if (CTX_UNLIKELY(rasterizer->aa == 1))
-  {
-    for (int x = minx; x <= maxx; x ++)
-     coverage[x] = coverage[x] > 127?255:0;
-  }
-#endif
+  return hash;
 }
 
-#define CTX_EDGE(no)      entries[edges[no]]
-#define CTX_EDGE_YMIN     (segment->data.s16[1]-1)
+uint32_t squoze6 (const char *utf8)
+{
+  return squoze (6, utf8);
+}
 
-#define UPDATE_PARITY \
-        { \
-          if (scanline!=CTX_EDGE_YMIN)\
-            parity = (is_winding)? \
-             parity + -1+2*(segment->code == CTX_EDGE_FLIPPED) : \
-                        1 - parity;\
-        }
+uint64_t squoze10 (const char *utf8)
+{
+  return squoze (10, utf8);
+}
 
-inline static void
-ctx_rasterizer_generate_coverage (CtxRasterizer *rasterizer,
-                                  int            minx,
-                                  int            maxx,
-                                  uint8_t       *coverage,
-                                  int            is_winding,
-                                  const uint8_t  aa_factor)
+uint64_t squoze12 (const char *utf8)
 {
-  CtxSegment *entries      = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
-  int        *edges        = rasterizer->edges;
-  int         scanline     = rasterizer->scanline;
-  int         active_edges = rasterizer->active_edges;
-  int         parity       = 0;
-  coverage -= minx;
-  uint8_t fraction = 255/aa_factor;
-  for (int t = 0; t < active_edges -1;t++)
-    {
-      CtxSegment *segment = &entries[edges[t]];
-      UPDATE_PARITY;
+  return squoze (12, utf8);
+}
 
-      if (parity)
-        {
-          CtxSegment *next_segment = &entries[edges[t+1]];
-          const int x0 = segment->val;
-          const int x1 = next_segment->val;
-          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int first     = graystart >> 8;
-          int last      = grayend   >> 8;
+uint32_t ctx_strhash(const char *str) {
+  return squoze (6, str);
+}
 
-          if (first < minx)
-          { 
-            first = minx;
-            graystart=0;
-          }
-          if (last > maxx)
-          {
-            last = maxx;
-            grayend=255;
-          }
+typedef struct CashUtf5Dec {
+  int       is_utf5;
+  int       offset;
+  void     *write_data;
+  uint32_t  current;
+  void    (*append_unichar) (uint32_t unichar, void *write_data);
+  int       jumped_amount;
+  int       jump_mode;
+} CashUtf5Dec;
 
-          graystart = fraction- (graystart&0xff)/aa_factor;
-          grayend   = (grayend & 0xff) / aa_factor;
+typedef struct CashUtf5DecDefaultData {
+  uint8_t *buf;
+  int      length;
+} CashUtf5DecDefaultData;
 
-          if (first < last)
-          {
-              coverage[first] += graystart;
-              for (int x = first + 1; x < last; x++)
-                coverage[x]  += fraction;
-              coverage[last] += grayend;
-          }
-          else if (first == last)
-            coverage[first] += (graystart-(fraction-grayend));
-        }
-   }
+static void squoze_decode_utf5_append_unichar_as_utf8 (uint32_t unichar, void *write_data)
+{
+  CashUtf5DecDefaultData *data = (CashUtf5DecDefaultData*)write_data;
+  int length = squoze_unichar_to_utf8 (unichar, &data->buf[data->length]);
+  data->buf[data->length += length] = 0;
 }
 
-inline static void
-ctx_rasterizer_generate_coverage_set (CtxRasterizer *rasterizer,
-                                      int            minx,
-                                      int            maxx,
-                                      uint8_t       *coverage,
-                                      int            is_winding)
+static void squoze_decode_jump (CashUtf5Dec *dec, uint8_t in)
 {
-  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
-  int      *edges = rasterizer->edges;
-  int scanline     = rasterizer->scanline;
-  int active_edges = rasterizer->active_edges;
-  int parity = 0;
-  coverage -= minx;
-  for (int t = 0; t < active_edges -1;t++)
-    {
-      CtxSegment *segment = &entries[edges[t]];
-      UPDATE_PARITY;
+  dec->offset -= SQUOZE_JUMP_STRIDE * dec->jumped_amount;
+  int jump_len = (dec->jump_mode - SQUOZE_DEC_OFFSET_A) * 4 +
+                 (in - SQUOZE_DEC_OFFSET_A);
+  if (jump_len > 7)
+    jump_len = 5 - jump_len;
+  else
+    jump_len += 3;
+  dec->offset += jump_len * SQUOZE_JUMP_STRIDE;
+  dec->jumped_amount = 0;
+}
 
-      if (parity)
-        {
-          CtxSegment *next_segment = &entries[edges[t+1]];
-          const int x0        = segment->val;
-          const int x1        = next_segment->val;
-          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int first     = graystart >> 8;
-          int last      = grayend   >> 8;
+static void squoze_decode_utf5 (CashUtf5Dec *dec, uint8_t in)
+{
+  if (dec->is_utf5)
+  {
+    if (in >= 16)
+    {
+      if (dec->current)
+      {
+        dec->offset = squoze_new_offset (dec->current);
+        dec->append_unichar (dec->current, dec->write_data);
+        dec->current = 0;
+      }
+    }
+    if (in == SQUOZE_ENTER_SQUEEZE)
+    {
+      if (dec->current)
+      {
+        dec->offset = squoze_new_offset (dec->current);
+        dec->append_unichar (dec->current, dec->write_data);
+        dec->current = 0;
+      }
+      dec->is_utf5 = 0;
+    }
+    else
+    {
+      dec->current = dec->current * 16 + (in % 16);
+    }
+  }
+  else
+  {
+    if (dec->jumped_amount)
+    {
+      switch (in)
+      {
+        case SQUOZE_DEC_OFFSET_A:
+        case SQUOZE_DEC_OFFSET_B:
+        case SQUOZE_INC_OFFSET_A:
+        case SQUOZE_INC_OFFSET_B:
+          squoze_decode_jump (dec, in);
+          break;
+        default:
+          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
+          dec->jumped_amount = 0;
+          dec->jump_mode = 0;
+          break;
+      }
+    }
+    else
+    {
+      switch (in)
+      {
+        case SQUOZE_ENTER_UTF5:
+          dec->is_utf5 = 1;
+          dec->jumped_amount = 0;
+          dec->jump_mode = 0;
+          break;
+        case SQUOZE_SPACE: 
+          dec->append_unichar (' ', dec->write_data);
+          dec->jumped_amount = 0;
+          dec->jump_mode = 0;
+          break;
+        case SQUOZE_DEC_OFFSET_A:
+          dec->jumped_amount = -2;
+          dec->jump_mode = in;
+          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
+          break;
+        case SQUOZE_INC_OFFSET_A:
+          dec->jumped_amount = 2;
+          dec->jump_mode = in;
+          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
+          break;
+        case SQUOZE_DEC_OFFSET_B:
+          dec->jumped_amount = -1;
+          dec->jump_mode = in;
+          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
+          break;
+        case SQUOZE_INC_OFFSET_B:
+          dec->jumped_amount = 1;
+          dec->jump_mode = in;
+          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
+          break;
+        default:
+          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
+          dec->jumped_amount = 0;
+          dec->jump_mode = 0;
+      }
+    }
+  }
+}
 
-          if (first < minx)
-          { 
-            first = minx;
-            graystart=0;
-          }
-          if (last > maxx)
-          {
-            last = maxx;
-            grayend=255;
-          }
+static void squoze_decode_utf5_bytes (int is_utf5, 
+                        const unsigned char *input, int inlen,
+                        char *output, int *r_outlen)
+{
+  CashUtf5DecDefaultData append_data = {(unsigned char*)output, 0};
+  CashUtf5Dec dec = {is_utf5,
+                     squoze_new_offset('a'),
+                     &append_data,
+                     0,
+                     squoze_decode_utf5_append_unichar_as_utf8,
+                     0, 0
+                    };
+  for (int i = 0; i < inlen; i++)
+    squoze_decode_utf5 (&dec, input[i]);
+  if (dec.current)
+    dec.append_unichar (dec.current, dec.write_data);
+  if (r_outlen)
+    *r_outlen = append_data.length;
+}
 
-          graystart = 255 - (graystart&0xff);
-          grayend   = (grayend & 0xff);
+static const char *squoze_decode_r (int squoze_dim, uint64_t hash, char *ret, int retlen)
+{
+  uint64_t overflowed_mask = ((uint64_t)1<<(squoze_dim * 5 + 1));
 
-          if (first < last)
-          {
-              coverage[first] += graystart;
+  if (hash & overflowed_mask)
+  {
 #if 0
-              for (int x = first + 1; x < last; x++)
-                coverage[x] = 255;
+    for (int i = 0; i < n_interned; i++)
+    {
+      CashInterned *entry = &interned[i];
+      if (entry->hash == hash)
+        return entry->string;
+    }
 #else
-              memset(&coverage[first+1], 255, last-(first+1));
+    int pos = squoze_interned_find (hash);
+    if (!interned || (interned[pos].hash!=hash))
+      return NULL;
+    return interned[pos].string;
 #endif
-              coverage[last]  += grayend;
-          }
-          else if (first == last)
-            coverage[first] += (graystart-(255-grayend));
-        }
-   }
-}
+    return NULL;
+  }
 
-static inline uint32_t
-ctx_over_RGBA8 (uint32_t dst, uint32_t src, uint32_t cov)
-{
-  uint32_t si_ga = (src & 0xff00ff00) >> 8;
-  uint32_t si_rb = src & 0x00ff00ff;
-  uint32_t si_a  = si_ga >> 16;
-  uint32_t rcov  = ((255+si_a * cov)>>8)^255;
-  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
-  uint32_t di_rb = dst & 0x00ff00ff;
-  return
-     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
-      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
-}
+  uint8_t utf5[140]=""; // we newer go really high since there isnt room
+                        // in the integers
+  uint64_t tmp = hash & (overflowed_mask-1);
+  int len = 0;
+  int is_utf5 = tmp & 1;
+  tmp /= 2;
+  int in_utf5 = is_utf5;
+  while (tmp > 0)
+  {
+    uint64_t remnant = tmp % 32;
+    uint64_t val = remnant;
 
+    if      ( in_utf5 && val == SQUOZE_ENTER_SQUEEZE) in_utf5 = 0;
+    else if (!in_utf5 && val == SQUOZE_ENTER_UTF5) in_utf5 = 1;
 
-static inline uint32_t
-ctx_over_RGBA8_full (uint32_t dst, uint32_t src)
-{
-  uint32_t si_ga = (src & 0xff00ff00) >> 8;
-  uint32_t si_rb = src & 0x00ff00ff;
-  uint32_t si_a  = si_ga >> 16;
-  uint32_t rcov  = si_a^255;
-  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
-  uint32_t di_rb = dst & 0x00ff00ff;
-  return
-     ((((si_rb * 255) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
-      (((si_ga * 255) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
+    utf5[len++] = val;
+    tmp -= remnant;
+    tmp /= 32;
+  }
+  utf5[len]=0;
+  squoze_decode_utf5_bytes (is_utf5, utf5, len, ret, &retlen);
+  //ret[retlen]=0;
+  return ret;
 }
 
-static inline uint32_t
-ctx_over_RGBA8_2 (uint32_t dst, uint32_t si_ga, uint32_t si_rb, uint32_t si_a, uint32_t cov)
+/* copy the value as soon as possible, some mitigation is in place
+ * for more than one value in use and cross-thread interactions.
+ */
+static const char *squoze_decode (int squoze_dim, uint64_t hash)
 {
-  uint32_t rcov  = ((si_a * cov)/255)^255;
-  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
-  uint32_t di_rb = dst & 0x00ff00ff;
-  return
-     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
-      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
+#define THREAD __thread  // use thread local storage
+  static THREAD int no = 0;
+  static THREAD char ret[8][256];
+  no ++;
+  if (no > 7) no = 0;
+  return squoze_decode_r (squoze_dim, hash, ret[no], 256);
+#undef THREAD
 }
 
-static inline uint32_t
-ctx_over_RGBA8_full_2 (uint32_t dst, uint32_t si_ga_full, uint32_t si_rb_full, uint32_t si_a)
+const char *squoze6_decode (uint32_t hash)
 {
-  uint32_t rcov = si_a^255;
-  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
-  uint32_t di_rb = dst & 0x00ff00ff;
-  return
-     ((((si_rb_full) + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
-      (((si_ga_full) + (di_ga * rcov)) & 0xff00ff00);
+  return squoze_decode (6, hash);
 }
 
-static inline void ctx_span_set_color (uint32_t *dst_pix, uint32_t val, int count)
+const char *squoze10_decode (uint64_t hash)
 {
-  if (count>0)
-  while(count--)
-    *dst_pix++=val;
+  return squoze_decode (10, hash);
 }
 
-inline static void
-ctx_rasterizer_generate_coverage_apply (CtxRasterizer *rasterizer,
-                                        int            minx,
-                                        int            maxx,
-                                        uint8_t       *coverage,
-                                        int            is_winding,
-                                        CtxCovPath     comp)
+const char *squoze12_decode (uint64_t hash)
 {
-  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
-  int *edges  = rasterizer->edges;
-  int scanline        = rasterizer->scanline;
-  const int bpp             = rasterizer->format->bpp/8;
-  int active_edges    = rasterizer->active_edges;
-  int parity        = 0;
-  const uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
-  const uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
-  const uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
-  const uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
-  const uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
-  const uint32_t si_a  = si_ga >> 16;
+  return squoze_decode (12, hash);
+}
 
-  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
-         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));
-  int accumulator_x=0;
-  uint8_t accumulated = 0;
-  for (int t = 0; t < active_edges -1;t++)
-    {
-      CtxSegment *segment = &entries[edges[t]];
-      UPDATE_PARITY;
-
-       if (parity)
-        {
-          CtxSegment   *next_segment = &entries[edges[t+1]];
-          const int x0        = segment->val;
-          const int x1        = next_segment->val;
-          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int first     = graystart >> 8;
-          int last      = grayend   >> 8;
-
-          if (first < minx)
-          { 
-            first = minx;
-            graystart=0;
-          }
-          if (last > maxx)
-          {
-            last = maxx;
-            grayend=255;
-          }
-
-          graystart = 255 - (graystart&0xff);
-          grayend   = (grayend & 0xff);
-
-          if (accumulated)
-          {
-            if (accumulator_x == first)
-            {
-              graystart += accumulated;
-            }
-            else
-            {
-              uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)]);
-              switch (comp)
-              {
-                case CTX_COV_PATH_COPY:
-                  *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
-                  break;
-                case CTX_COV_PATH_OVER:
-                  *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
-                  break;
-                default:
-                  ctx_rasterizer_apply_coverage (rasterizer, (uint8_t*)dst_pix, accumulator_x, &accumulated, 
1);
-              }
-            }
-            accumulated = 0;
-          }
-
-          if (first < last)
-          {
-            switch (comp)
-            {
-              case CTX_COV_PATH_COPY:
-            {
-              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)]);
-              *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, graystart);
-
-              dst_pix++;
-              ctx_span_set_color (dst_pix, src_pix, last - first - 1);
-#if 0
-              for (int i = first + 1; i < last; i++)
-              {
-                *dst_pix = src_pix;
-                dst_pix++;
-              }
-#endif
-            }
-            break;
-              case CTX_COV_PATH_OVER:
-            {
-              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)]);
-              *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, graystart);
-              dst_pix++;
-              for (int i = first + 1; i < last; i++)
-              {
-                *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
-                dst_pix++;
-              }
-            }
-            break;
-              case CTX_COV_PATH_COPY_FRAGMENT:
-            {
-              float u0 = 0; float v0 = 0;
-              float ud = 0; float vd = 0;
-              uint8_t gs = graystart;
-              ctx_RGBA8_source_copy_normal_fragment (rasterizer, &dst[(first * bpp)], NULL, first, &gs, 1);
-              ctx_init_uv (rasterizer, first+1, last-first-1, &u0, &v0, &ud, &vd);
-              rasterizer->fragment (rasterizer, u0, v0, &dst[(first+1)*bpp], last-first-1, ud, vd);
-            }
-            break;
-              case CTX_COV_PATH_OVER_FRAGMENT:
-            {
-              uint8_t gs = graystart;
-              ctx_RGBA8_source_over_normal_fragment (rasterizer, &dst[(first * bpp)], NULL, first, &gs, 1);
-              ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
-                                                     &dst[((first+1)*bpp)], NULL, first + 1, NULL, 
last-first-1);
-            }
-            break;
-              default:
-            {
-              uint8_t opaque[last-first];
-              memset (opaque, 255, sizeof (opaque));
-              opaque[0] = graystart;
-              ctx_rasterizer_apply_coverage (rasterizer,
-                                             &dst[(first * bpp)], first, opaque, last-first);
-            }
-            }
-            accumulated = grayend;
-          }
-          else if (first == last)
-          {
-            accumulated = (graystart-(255-grayend));
-          }
-          accumulator_x = last;
-        }
-   }
-
-   if (accumulated)
-   {
-     uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)]);
-     switch (comp)
-     {
-       case CTX_COV_PATH_COPY:
-         *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
-         break;
-       case CTX_COV_PATH_OVER:
-         *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
-         break;
-       default:
-         ctx_rasterizer_apply_coverage (rasterizer, (uint8_t*)dst_pix, accumulator_x, &accumulated, 1);
-     }
-   }
+static inline uint32_t
+squoze_utf8_to_unichar (const char *input)
+{
+  const uint8_t *utf8 = (const uint8_t *) input;
+  uint8_t c = utf8[0];
+  if ( (c & 0x80) == 0)
+    { return c; }
+  else if ( (c & 0xE0) == 0xC0)
+    return ( (utf8[0] & 0x1F) << 6) |
+           (utf8[1] & 0x3F);
+  else if ( (c & 0xF0) == 0xE0)
+    return ( (utf8[0] & 0xF)  << 12) |
+           ( (utf8[1] & 0x3F) << 6) |
+           (utf8[2] & 0x3F);
+  else if ( (c & 0xF8) == 0xF0)
+    return ( (utf8[0] & 0x7)  << 18) |
+           ( (utf8[1] & 0x3F) << 12) |
+           ( (utf8[2] & 0x3F) << 6) |
+           (utf8[3] & 0x3F);
+  else if ( (c & 0xFC) == 0xF8)
+    return ( (utf8[0] & 0x3)  << 24) |
+           ( (utf8[1] & 0x3F) << 18) |
+           ( (utf8[2] & 0x3F) << 12) |
+           ( (utf8[3] & 0x3F) << 6) |
+           (utf8[4] & 0x3F);
+  else if ( (c & 0xFE) == 0xFC)
+    return ( (utf8[0] & 0x1)  << 30) |
+           ( (utf8[1] & 0x3F) << 24) |
+           ( (utf8[2] & 0x3F) << 18) |
+           ( (utf8[3] & 0x3F) << 12) |
+           ( (utf8[4] & 0x3F) << 6) |
+           (utf8[5] & 0x3F);
+  return 0;
 }
-
-inline static int ctx_rasterizer_is_simple (CtxRasterizer *rasterizer)
+static inline int
+squoze_unichar_to_utf8 (uint32_t  ch,
+                      uint8_t  *dest)
 {
-  if (rasterizer->fast_aa == 0 ||
-      rasterizer->ending_edges ||
-      rasterizer->pending_edges)
-   return 0;
-  int *edges  = rasterizer->edges;
-  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
-
-  int active_edges = rasterizer->active_edges;
-  for (int t = 0; t < active_edges -1;t++)
+  /* http://www.cprogramming.com/tutorial/utf8.c  */
+  /*  Basic UTF-8 manipulation routines
+    by Jeff Bezanson
+    placed in the public domain Fall 2005 ... */
+  if (ch < 0x80)
     {
-      CtxSegment *segment0 = segments + edges[t];
-      CtxSegment *segment1 = segments + edges[t+1];
-      const int delta0    = segment0->delta;
-      const int delta1    = segment1->delta;
-      const int x0        = segment0->val;
-      const int x1        = segment1->val;
-      int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
-      int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
-      int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
-      int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
-      if (x1_end < x0_end   ||
-          x1_start < x0_end ||
-          x1_end < x0_start
-         )
-         return 0;
+      dest[0] = (char) ch;
+      return 1;
     }
-  return 1;
+  if (ch < 0x800)
+    {
+      dest[0] = (ch>>6) | 0xC0;
+      dest[1] = (ch & 0x3F) | 0x80;
+      return 2;
+    }
+  if (ch < 0x10000)
+    {
+      dest[0] = (ch>>12) | 0xE0;
+      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
+      dest[2] = (ch & 0x3F) | 0x80;
+      return 3;
+    }
+  if (ch < 0x110000)
+    {
+      dest[0] = (ch>>18) | 0xF0;
+      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
+      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
+      dest[3] = (ch & 0x3F) | 0x80;
+      return 4;
+    }
+  return 0;
 }
 
-
-inline static void
-ctx_rasterizer_generate_coverage_set2 (CtxRasterizer *rasterizer,
-                                         int            minx,
-                                         int            maxx,
-                                         uint8_t       *coverage,
-                                         int            is_winding)
+static inline int
+squoze_utf8_len (const unsigned char first_byte)
 {
-  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
-  int *edges  = rasterizer->edges;
-  int scanline        = rasterizer->scanline;
-  int active_edges    = rasterizer->active_edges;
-  int parity        = 0;
+  if      ( (first_byte & 0x80) == 0)
+    { return 1; } /* ASCII */
+  else if ( (first_byte & 0xE0) == 0xC0)
+    { return 2; }
+  else if ( (first_byte & 0xF0) == 0xE0)
+    { return 3; }
+  else if ( (first_byte & 0xF8) == 0xF0)
+    { return 4; }
+  return 1;
+}
 
-  coverage -= minx;
+#endif
 
-  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
-  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
 
-  for (int t = 0; t < active_edges -1;t++)
+static inline int
+ctx_conts_for_entry (CtxEntry *entry)
+{
+    switch (entry->code)
     {
-      CtxSegment   *segment = &entries[edges[t]];
-      UPDATE_PARITY;
+      case CTX_DATA:
+        return entry->data.u32[1];
+      case CTX_LINEAR_GRADIENT:
+      //case CTX_DEFINE_TEXTURE:
+        return 1;
+      case CTX_RADIAL_GRADIENT:
+      case CTX_ARC:
+      case CTX_ARC_TO:
+      case CTX_REL_ARC_TO:
+      case CTX_CURVE_TO:
+      case CTX_REL_CURVE_TO:
+      case CTX_APPLY_TRANSFORM:
+      case CTX_SOURCE_TRANSFORM:
+      case CTX_COLOR:
+      case CTX_ROUND_RECTANGLE:
+      case CTX_SHADOW_COLOR:
+        return 2;
+      case CTX_FILL_RECT:
+      case CTX_STROKE_RECT:
+      case CTX_RECTANGLE:
+      case CTX_VIEW_BOX:
+      case CTX_REL_QUAD_TO:
+      case CTX_QUAD_TO:
+        return 1;
 
-       if (parity)
+      case CTX_TEXT:
+      case CTX_LINE_DASH:
+      case CTX_COLOR_SPACE:
+      case CTX_STROKE_TEXT:
+      case CTX_FONT:
+      case CTX_TEXTURE:
         {
-          CtxSegment   *next_segment = &entries[edges[t+1]];
-          const int x0        = segment->val;
-          const int x1        = next_segment->val;
-          const int delta0    = segment->delta;
-          const int delta1    = next_segment->delta;
-
-          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
-          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
-          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
-          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
-
-          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int first     = graystart >> 8;
-          int last      = grayend   >> 8;
-
-          first = ctx_maxi (first, minx);
-          last  = ctx_mini (last, maxx);
+          int eid_len = entry[1].data.u32[1];
+          return eid_len + 1;
+        }
+      case CTX_DEFINE_TEXTURE:
+        {
+          int eid_len = entry[2].data.u32[1];
+          int pix_len = entry[2 + eid_len + 1].data.u32[1];
+          return eid_len + pix_len + 2 + 1;
+        }
+      default:
+        return 0;
+    }
+}
 
-          if (first < last)
-          {
-            int pre = 1;
-            int post = 1;
+// expanding arc_to to arc can be the job
+// of a layer in front of backend?
+//   doing:
+//     rectangle
+//     arc
+//     ... etc reduction to beziers
+//     or even do the reduction to
+//     polylines directly here...
+//     making the rasterizer able to
+//     only do poly-lines? will that be faster?
 
-            if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
-            {
-              graystart = 255 - (graystart&0xff);
-              coverage[first] += graystart;
-            }
-            else
-            {
-              int u0 = ctx_mini (x0_start, x0_end);
-              int u1 = ctx_maxi (x0_start, x0_end);
-              u0 = ctx_maxi (u0, minx_);
-              u1 = ctx_mini (u1, maxx_);
-              u1 = ctx_maxi (u1, minx_);
-              u0 = ctx_mini (u0, maxx_);
-  
-              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
-              int count = 0;
+/* the iterator - should decode bitpacked data as well -
+ * making the rasterizers simpler, possibly do unpacking
+ * all the way to absolute coordinates.. unless mixed
+ * relative/not are wanted.
+ */
 
-              int mod = (255-(u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)) *
-                         (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
-              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);
 
-              for (int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
-              {
-                coverage[us + count] = (u - u0 + mod) / sum;
-                count++;
-              }
-              pre = (us+count-1)-first+1;
-            }
-  
-            if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
-            {
-               grayend   = (grayend & 0xff);
-               coverage[last] += grayend;
-            }
-            else
-            {
-              int u0 = ctx_mini (x1_start, x1_end);
-              int u1 = ctx_maxi (x1_start, x1_end);
-              u0 = ctx_maxi (u0, minx_);
-              u1 = ctx_mini (u1, maxx_);
-              u1 = ctx_maxi (u1, minx_);
-              u0 = ctx_mini (u0, maxx_);
-              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
-              int count = 0;
-              int mod = ((255-(u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)+64) *
-                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
-              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);
-              for (int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
-              {
-                coverage[us + count] = 255-((u - u0 + mod)/ sum);
-                count++;
-              }
-              post = last-us+1;
-            }
-            for (int i = first + pre; i <= last - post; i++)
-              coverage[i] = 255;
-          }
-          else if (first == last)
-          {
-            graystart = 255 - (graystart&0xff);
-            grayend   = (grayend & 0xff);
-            coverage[last]+=(graystart-(255-grayend));
-          }
-        }
-   }
+static void
+ctx_iterator_init (CtxIterator      *iterator,
+                   CtxDrawlist  *drawlist,
+                   int               start_pos,
+                   int               flags)
+{
+  iterator->drawlist   = drawlist;
+  iterator->flags          = flags;
+  iterator->bitpack_pos    = 0;
+  iterator->bitpack_length = 0;
+  iterator->pos            = start_pos;
+  iterator->end_pos        = drawlist->count;
+  iterator->first_run      = 1; // -1 is a marker used for first run
+  ctx_memset (iterator->bitpack_command, 0, sizeof (iterator->bitpack_command) );
 }
 
-
-inline static void
-ctx_rasterizer_generate_coverage_apply2 (CtxRasterizer *rasterizer,
-                                         int            minx,
-                                         int            maxx,
-                                         uint8_t       *coverage,
-                                         int            is_winding,
-                                         CtxCovPath     comp)
+int ctx_iterator_pos (CtxIterator *iterator)
 {
-  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
-  int *edges          = rasterizer->edges;
-  int  scanline       = rasterizer->scanline;
-  const int  bpp      = rasterizer->format->bpp/8;
-  int  active_edges   = rasterizer->active_edges;
-  int  parity         = 0;
+  return iterator->pos;
+}
 
-  const uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
-  const uint32_t si_ga      = ((uint32_t*)rasterizer->color)[1];
-  const uint32_t si_rb      = ((uint32_t*)rasterizer->color)[2];
-  const uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
-  const uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
-  const uint32_t si_a  = src_pix >> 24;
+static inline CtxEntry *_ctx_iterator_next (CtxIterator *iterator)
+{
+  int ret = iterator->pos;
+  CtxEntry *entry = &iterator->drawlist->entries[ret];
+  if (CTX_UNLIKELY(ret >= iterator->end_pos))
+    { return NULL; }
 
-  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
-         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));
+  if (CTX_UNLIKELY(iterator->first_run))
+      iterator->first_run = 0;
+  else
+     iterator->pos += (ctx_conts_for_entry (entry) + 1);
 
-  coverage -= minx;
+  if (CTX_UNLIKELY(iterator->pos >= iterator->end_pos))
+    { return NULL; }
+  return &iterator->drawlist->entries[iterator->pos];
+}
 
-  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
-  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
+// 6024x4008
+#if CTX_BITPACK
+static void
+ctx_iterator_expand_s8_args (CtxIterator *iterator, CtxEntry *entry)
+{
+  int no = 0;
+  for (int cno = 0; cno < 4; cno++)
+    for (int d = 0; d < 2; d++, no++)
+      iterator->bitpack_command[cno].data.f[d] =
+        entry->data.s8[no] * 1.0f / CTX_SUBDIV;
+  iterator->bitpack_command[0].code =
+    iterator->bitpack_command[1].code =
+      iterator->bitpack_command[2].code =
+        iterator->bitpack_command[3].code = CTX_CONT;
+  iterator->bitpack_length = 4;
+  iterator->bitpack_pos = 0;
+}
 
-  int accumulated_x0 = 65538;
-  int accumulated_x1 = 65536;
+static void
+ctx_iterator_expand_s16_args (CtxIterator *iterator, CtxEntry *entry)
+{
+  int no = 0;
+  for (int cno = 0; cno < 2; cno++)
+    for (int d = 0; d < 2; d++, no++)
+      iterator->bitpack_command[cno].data.f[d] = entry->data.s16[no] * 1.0f /
+          CTX_SUBDIV;
+  iterator->bitpack_command[0].code =
+    iterator->bitpack_command[1].code = CTX_CONT;
+  iterator->bitpack_length = 2;
+  iterator->bitpack_pos    = 0;
+}
+#endif
 
-  for (int t = 0; t < active_edges -1;t++)
+CtxCommand *
+ctx_iterator_next (CtxIterator *iterator)
+{
+  CtxEntry *ret;
+#if CTX_BITPACK
+  int expand_bitpack = iterator->flags & CTX_ITERATOR_EXPAND_BITPACK;
+again:
+  if (CTX_UNLIKELY(iterator->bitpack_length))
     {
-      CtxSegment   *segment = &entries[edges[t]];
-      UPDATE_PARITY;
-
-       if (parity)
+      ret = &iterator->bitpack_command[iterator->bitpack_pos];
+      iterator->bitpack_pos += (ctx_conts_for_entry (ret) + 1);
+      if (iterator->bitpack_pos >= iterator->bitpack_length)
         {
-          CtxSegment   *next_segment = &entries[edges[t+1]];
-          const int x0        = segment->val;
-          const int x1        = next_segment->val;
-          const int delta0    = segment->delta;
-          const int delta1    = next_segment->delta;
-
-          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
-          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
-          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
-          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
-
-          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
-          int first     = graystart >> 8;
-          int last      = grayend   >> 8;
-
-          first = ctx_maxi (first, minx);
-          last = ctx_mini (last, maxx);
-          graystart = 255 - (graystart&0xff);
-          grayend   = (grayend & 0xff);
-
-          if (first < last)
-          {
-            int pre = 1;
-            int post = 1;
-
-          if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
-          {
-             coverage[first] += graystart;
-
-            accumulated_x1 = first;
-            accumulated_x0 = ctx_mini (accumulated_x0, first);
-          }
+          iterator->bitpack_length = 0;
+        }
+      return (CtxCommand *) ret;
+    }
+#endif
+  ret = _ctx_iterator_next (iterator);
+#if CTX_BITPACK
+  if (CTX_UNLIKELY(ret && expand_bitpack))
+    switch ((CtxCode)(ret->code))
+      {
+        case CTX_REL_CURVE_TO_REL_LINE_TO:
+          ctx_iterator_expand_s8_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
+          iterator->bitpack_command[1].code =
+          iterator->bitpack_command[2].code = CTX_CONT;
+          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
+          // 0.0 here is a common optimization - so check for it
+          if (ret->data.s8[6]== 0 && ret->data.s8[7] == 0)
+            { iterator->bitpack_length = 3; }
           else
-          {
-            int u0 = ctx_mini (x0_start, x0_end);
-            int u1 = ctx_maxi (x0_start, x0_end);
-            u0 = ctx_maxi (u0, minx_);
-            u1 = ctx_mini (u1, maxx_);
-            u1 = ctx_maxi (u1, minx_);
-            u0 = ctx_mini (u0, maxx_);
-
-            int mod = (255-(u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)) *
-                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
-            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);
-
-            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
-            int count = 0;
-            for (int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
-            {
-              coverage[us + count] = (u - u0 + mod) / sum;
-              count++;
-            }
-            pre = (us+count-1)-first+1;
-
-#if 0 // the CTX_UNLIKELY helps - but this is a big constant overhead
-      // which ends up penalizing us in benchmarks, it needs to be
-      // a shape with really large interior emptiness for this
-      // to be worthwhile
-          if (CTX_UNLIKELY(
-              us - accumulated_x1 > 16 &&
-              accumulated_x1-accumulated_x0>=0
-                          ))
-          {
-             switch (comp)
-             {
-                case CTX_COV_PATH_OVER:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                    {
-                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
-                      dst_i++;
-                    }
-                }
-                break;
-                case CTX_COV_PATH_COPY:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                  {
-                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
-                    dst_i++;
-                  }
-                }
-                  break;
-                default:
-                ctx_rasterizer_apply_coverage (rasterizer,
-                          &dst[((accumulated_x0) * bpp)],
-                          accumulated_x0,
-                          &coverage[accumulated_x0],
-                          accumulated_x1-accumulated_x0+1);
-             }
-          }
+            iterator->bitpack_length          = 4;
+          goto again;
+        case CTX_REL_LINE_TO_REL_CURVE_TO:
+          ctx_iterator_expand_s8_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
+          iterator->bitpack_command[1].code = CTX_REL_CURVE_TO;
+          iterator->bitpack_length          = 2;
+          goto again;
+        case CTX_REL_CURVE_TO_REL_MOVE_TO:
+          ctx_iterator_expand_s8_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
+          iterator->bitpack_command[3].code = CTX_REL_MOVE_TO;
+          iterator->bitpack_length          = 4;
+          goto again;
+        case CTX_REL_LINE_TO_X4:
+          ctx_iterator_expand_s8_args (iterator, ret);
+          iterator->bitpack_command[0].code =
+          iterator->bitpack_command[1].code =
+          iterator->bitpack_command[2].code =
+          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
+          iterator->bitpack_length          = 4;
+          goto again;
+        case CTX_REL_QUAD_TO_S16:
+          ctx_iterator_expand_s16_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_REL_QUAD_TO;
+          iterator->bitpack_length          = 1;
+          goto again;
+        case CTX_REL_QUAD_TO_REL_QUAD_TO:
+          ctx_iterator_expand_s8_args (iterator, ret);
+          iterator->bitpack_command[0].code =
+          iterator->bitpack_command[2].code = CTX_REL_QUAD_TO;
+          iterator->bitpack_length          = 3;
+          goto again;
+        case CTX_REL_LINE_TO_X2:
+          ctx_iterator_expand_s16_args (iterator, ret);
+          iterator->bitpack_command[0].code =
+          iterator->bitpack_command[1].code = CTX_REL_LINE_TO;
+          iterator->bitpack_length          = 2;
+          goto again;
+        case CTX_REL_LINE_TO_REL_MOVE_TO:
+          ctx_iterator_expand_s16_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
+          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
+          iterator->bitpack_length          = 2;
+          goto again;
+        case CTX_MOVE_TO_REL_LINE_TO:
+          ctx_iterator_expand_s16_args (iterator, ret);
+          iterator->bitpack_command[0].code = CTX_MOVE_TO;
+          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
+          iterator->bitpack_length          = 2;
+          goto again;
+        case CTX_FILL_MOVE_TO:
+          iterator->bitpack_command[1]      = *ret;
+          iterator->bitpack_command[0].code = CTX_FILL;
+          iterator->bitpack_command[1].code = CTX_MOVE_TO;
+          iterator->bitpack_pos             = 0;
+          iterator->bitpack_length          = 2;
+          goto again;
+        case CTX_LINEAR_GRADIENT:
+        case CTX_QUAD_TO:
+        case CTX_REL_QUAD_TO:
+        case CTX_TEXTURE:
+        case CTX_RECTANGLE:
+        case CTX_VIEW_BOX:
+        case CTX_ARC:
+        case CTX_ARC_TO:
+        case CTX_REL_ARC_TO:
+        case CTX_COLOR:
+        case CTX_SHADOW_COLOR:
+        case CTX_RADIAL_GRADIENT:
+        case CTX_CURVE_TO:
+        case CTX_REL_CURVE_TO:
+        case CTX_APPLY_TRANSFORM:
+        case CTX_SOURCE_TRANSFORM:
+        case CTX_ROUND_RECTANGLE:
+        case CTX_TEXT:
+        case CTX_STROKE_TEXT:
+        case CTX_FONT:
+        case CTX_LINE_DASH:
+        case CTX_FILL:
+        case CTX_NOP:
+        case CTX_MOVE_TO:
+        case CTX_LINE_TO:
+        case CTX_REL_MOVE_TO:
+        case CTX_REL_LINE_TO:
+        case CTX_VER_LINE_TO:
+        case CTX_REL_VER_LINE_TO:
+        case CTX_HOR_LINE_TO:
+        case CTX_REL_HOR_LINE_TO:
+        case CTX_ROTATE:
+        case CTX_FLUSH:
+        case CTX_TEXT_ALIGN:
+        case CTX_TEXT_BASELINE:
+        case CTX_TEXT_DIRECTION:
+        case CTX_MITER_LIMIT:
+        case CTX_GLOBAL_ALPHA:
+        case CTX_COMPOSITING_MODE:
+        case CTX_BLEND_MODE:
+        case CTX_SHADOW_BLUR:
+        case CTX_SHADOW_OFFSET_X:
+        case CTX_SHADOW_OFFSET_Y:
+        case CTX_RESET:
+        case CTX_EXIT:
+        case CTX_BEGIN_PATH:
+        case CTX_CLOSE_PATH:
+        case CTX_SAVE:
+        case CTX_CLIP:
+        case CTX_PRESERVE:
+        case CTX_DEFINE_GLYPH:
+        case CTX_IDENTITY:
+        case CTX_FONT_SIZE:
+        case CTX_START_GROUP:
+        case CTX_END_GROUP:
+        case CTX_RESTORE:
+        case CTX_LINE_WIDTH:
+        case CTX_LINE_DASH_OFFSET:
+        case CTX_STROKE:
+        case CTX_KERNING_PAIR:
+        case CTX_SCALE:
+        case CTX_GLYPH:
+        case CTX_SET_PIXEL:
+        case CTX_FILL_RULE:
+        case CTX_LINE_CAP:
+        case CTX_LINE_JOIN:
+        case CTX_NEW_PAGE:
+        case CTX_SET_KEY:
+        case CTX_TRANSLATE:
+        case CTX_DEFINE_TEXTURE:
+        case CTX_GRADIENT_STOP:
+        case CTX_DATA: // XXX : would be better if we hide the DATAs
+        case CTX_CONT: // shouldnt happen
+        default:
+          iterator->bitpack_length = 0;
+          return (CtxCommand *) ret;
+#if 0
+        default: // XXX remove - and get better warnings
+          iterator->bitpack_command[0] = ret[0];
+          iterator->bitpack_command[1] = ret[1];
+          iterator->bitpack_command[2] = ret[2];
+          iterator->bitpack_command[3] = ret[3];
+          iterator->bitpack_command[4] = ret[4];
+          iterator->bitpack_pos = 0;
+          iterator->bitpack_length = 1;
+          goto again;
 #endif
-
-            accumulated_x0 = ctx_mini (accumulated_x0, us);
-            accumulated_x1 = us + count - 1;
-          }
-
-   if (accumulated_x1-accumulated_x0>=0)
-          {
-             switch (comp)
-             {
-                case CTX_COV_PATH_OVER:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                    {
-                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
-                      dst_i++;
-                    }
-                }
-                break;
-                case CTX_COV_PATH_COPY:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                  {
-                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
-                    dst_i++;
-                  }
-                }
-                  break;
-                default:
-                ctx_rasterizer_apply_coverage (rasterizer,
-                          &dst[((accumulated_x0) * bpp)],
-                          accumulated_x0,
-                          &coverage[accumulated_x0],
-                          accumulated_x1-accumulated_x0+1);
-             }
-             accumulated_x0 = 65538;
-             accumulated_x1 = 65536;
-          }
-
-          if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
-          {
-             coverage[last] += grayend;
-             accumulated_x1 = last;
-             accumulated_x0 = last;
-          }
-          else
-          {
-            int u0 = ctx_mini (x1_start, x1_end);
-            int u1 = ctx_maxi (x1_start, x1_end);
-            u0 = ctx_maxi (u0, minx_);
-            u1 = ctx_mini (u1, maxx_);
-            u1 = ctx_maxi (u1, minx_);
-            u0 = ctx_mini (u0, maxx_);
-            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
-            int count = 0;
-
-            int mod = ((255-(u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)+64) *
-                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
-            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);
-
-            for (int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
-            {
-                coverage[us + count] = 255-((u - u0 + mod)/ sum);
-              count++;
-            }
-            post = last-us+1;
-
-            accumulated_x1 = us + count;
-            accumulated_x0 = us;
-          }
-            switch (comp)
-            {
-              case CTX_COV_PATH_COPY:
-            {
-              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)]);
-              dst_pix+=pre;
-              ctx_span_set_color (dst_pix, src_pix, last-first-pre-post + 1);
-            }
-            break;
-              case CTX_COV_PATH_OVER:
-            {
-              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)]);
-              dst_pix+=pre;
-              for (int i = first + pre; i <= last - post; i++)
-              {
-                *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
-                dst_pix++;
-              }
-            }
-            break;
-              case CTX_COV_PATH_COPY_FRAGMENT:
-              {
-              int width = last-first-pre-post+1;
-            if (width>0)
-            {
-              {
-                float u0 = 0; float v0 = 0;
-                float ud = 0; float vd = 0;
-                ctx_init_uv (rasterizer, first+pre, width, &u0, &v0, &ud, &vd);
-                rasterizer->fragment (rasterizer, u0, v0, &dst[(first+pre)*bpp],                             
         width, ud, vd);
-              }
-            }
-              }
-            break;
-              case CTX_COV_PATH_OVER_FRAGMENT:
-              {
-                int width = last-first-pre-post+1;
-                if (width>0)
-                ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
-                               &dst[((first+pre)*bpp)],
-                               NULL,
-                               first + pre,
-                               NULL,
-                               width);
-              }
-              break;
-              default:
-              {
-                int width = last-first-pre-post+1;
-                if (width > 0)
-                {
-                uint8_t opaque[width];
-                memset (opaque, 255, sizeof (opaque));
-                ctx_rasterizer_apply_coverage (rasterizer,
-                            &dst[((first + pre) * bpp)],
-                            first + pre,
-                            opaque,
-                            width);
-                }
-              }
-            }
-            //}
-          }
-          else if (first == last)
-          {
-            coverage[last]+=(graystart-(255-grayend));
-
-            accumulated_x1 = last;
-            accumulated_x0 = ctx_mini (accumulated_x0, last);
-          }
-        }
-   }
-
-   if (accumulated_x1-accumulated_x0>=0)
-   {
-             switch (comp)
-             {
-                case CTX_COV_PATH_OVER:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                    {
-                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
-                      dst_i++;
-                    }
-                }
-                break;
-                case CTX_COV_PATH_COPY:
-                {
-                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)];
-                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
-                  {
-                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
-                    dst_i++;
-                  }
-                }
-                  break;
-                default:
-                ctx_rasterizer_apply_coverage (rasterizer,
-                          &dst[((accumulated_x0) * bpp)],
-                          accumulated_x0,
-                          &coverage[accumulated_x0],
-                          accumulated_x1-accumulated_x0+1);
-             }
-   }
-}
-
-#undef CTX_EDGE_Y0
-#undef CTX_EDGE
-
-static inline void
-ctx_rasterizer_reset (CtxRasterizer *rasterizer)
-{
-  rasterizer->pending_edges   = 0;
-  rasterizer->active_edges    = 0;
-  rasterizer->has_shape       = 0;
-  rasterizer->has_prev        = 0;
-  rasterizer->edge_list.count = 0; // ready for new edges
-  rasterizer->edge_pos        = 0;
-  rasterizer->scanline        = 0;
-  if (CTX_LIKELY(!rasterizer->preserve))
-  {
-    rasterizer->scan_min      = 5000;
-    rasterizer->scan_max      = -5000;
-    rasterizer->col_min       = 5000;
-    rasterizer->col_max       = -5000;
-  }
-  //rasterizer->comp_op       = NULL; // keep comp_op cached 
-  //     between rasterizations where rendering attributes are
-  //     nonchanging
+      }
+#endif
+  return (CtxCommand *) ret;
 }
 
+static void ctx_drawlist_compact (CtxDrawlist *drawlist);
 static void
-ctx_rasterizer_rasterize_edges (CtxRasterizer *rasterizer, const int fill_rule 
-#if CTX_SHAPE_CACHE
-                                ,CtxShapeEntry *shape
-#endif
-                               )
+ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size)
 {
-  int      is_winding = fill_rule == CTX_FILL_RULE_WINDING;
-  const CtxCovPath comp = rasterizer->comp;
-  const int real_aa = rasterizer->aa;
-  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
-
-
-  int scan_start = rasterizer->blit_y * CTX_FULL_AA;
-  int scan_end   = scan_start + (rasterizer->blit_height - 1) * CTX_FULL_AA;
-  const int blit_width = rasterizer->blit_width;
-  const int blit_max_x = rasterizer->blit_x + blit_width;
-  int minx       = rasterizer->col_min / CTX_SUBDIV - rasterizer->blit_x;
-  int maxx       = (rasterizer->col_max + CTX_SUBDIV-1) / CTX_SUBDIV - rasterizer->blit_x;
-  const int blit_stride = rasterizer->blit_stride;
-
-  rasterizer->prev_active_edges = -1;
-  if (
-#if CTX_SHAPE_CACHE
-    !shape &&
-#endif
-    maxx > blit_max_x - 1)
-    { maxx = blit_max_x - 1; }
-
-  minx = ctx_maxi (rasterizer->state->gstate.clip_min_x, minx);
-  maxx = ctx_mini (rasterizer->state->gstate.clip_max_x, maxx);
-  minx = ctx_maxi (0, minx); // redundant?
-  if (minx >= maxx)
+  int flags=drawlist->flags;
+#if CTX_DRAWLIST_STATIC
+  if (flags & CTX_DRAWLIST_EDGE_LIST)
     {
-      ctx_rasterizer_reset (rasterizer);
-      return;
+      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
+      drawlist->entries = (CtxEntry*)&sbuf[0];
+      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
     }
-#if CTX_SHAPE_CACHE
-  uint8_t _coverage[shape?2:maxx-minx+1];
-#else
-  uint8_t _coverage[maxx-minx+1];
-#endif
-  uint8_t *coverage = &_coverage[0];
-
-  int coverage_size = 
-#if CTX_SHAPE_CACHE
-                  shape?shape->width:
-#endif
-                  sizeof (_coverage);
-
-#if CTX_SHAPE_CACHE
-  if (shape)
+  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
     {
-      coverage = &shape->data[0];
+      static CtxEntry sbuf[CTX_MAX_EDGE_LIST_SIZE];
+      drawlist->entries = &sbuf[0];
+      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
     }
-#endif
-  //ctx_assert (coverage);
-  rasterizer->scan_min -= (rasterizer->scan_min % CTX_FULL_AA);
-#if CTX_SHAPE_CACHE
-  if (shape)
+  else
     {
-      scan_start = rasterizer->scan_min;
-      scan_end   = rasterizer->scan_max;
+      static CtxEntry sbuf[CTX_MAX_JOURNAL_SIZE];
+      drawlist->entries = &sbuf[0];
+      drawlist->size = CTX_MAX_JOURNAL_SIZE;
+      if(0)ctx_drawlist_compact (drawlist);
     }
-  else
-#endif
+#else
+  int new_size = desired_size;
+  int min_size = CTX_MIN_JOURNAL_SIZE;
+  int max_size = CTX_MAX_JOURNAL_SIZE;
+  if ((flags & CTX_DRAWLIST_EDGE_LIST))
     {
-      if (rasterizer->scan_min > scan_start)
-        {
-          dst += (rasterizer->blit_stride * (rasterizer->scan_min-scan_start) / CTX_FULL_AA);
-          scan_start = rasterizer->scan_min;
-        }
-      scan_end = ctx_mini (rasterizer->scan_max, scan_end);
+      min_size = CTX_MIN_EDGE_LIST_SIZE;
+      max_size = CTX_MAX_EDGE_LIST_SIZE;
     }
-  if (CTX_UNLIKELY(rasterizer->state->gstate.clip_min_y * CTX_FULL_AA > scan_start ))
-    { 
-       dst += (rasterizer->blit_stride * (rasterizer->state->gstate.clip_min_y * CTX_FULL_AA -scan_start) / 
CTX_FULL_AA);
-       scan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA; 
+  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
+    {
+      min_size = CTX_MIN_EDGE_LIST_SIZE;
+      max_size = CTX_MAX_EDGE_LIST_SIZE;
     }
-  scan_end = ctx_mini (rasterizer->state->gstate.clip_max_y * CTX_FULL_AA, scan_end);
-  if (CTX_UNLIKELY(scan_start > scan_end ||
-      (scan_start > (rasterizer->blit_y + (rasterizer->blit_height-1)) * CTX_FULL_AA) ||
-      (scan_end < (rasterizer->blit_y) * CTX_FULL_AA)))
-  { 
-    /* not affecting this rasterizers scanlines */
-    ctx_rasterizer_reset (rasterizer);
-    return;
-  }
-
-  rasterizer->horizontal_edges = 0;
-  {
-    rasterizer->needs_aa3  = 0;
-    rasterizer->needs_aa5  = 0;
-    rasterizer->needs_aa15 = 0;
-    ctx_rasterizer_sort_edges (rasterizer);
-    rasterizer->scanline = scan_start;
-    ctx_rasterizer_feed_edges (rasterizer, 0); 
-
-      int avoid_direct = (0 
-#if CTX_ENABLE_CLIP
-         || rasterizer->clip_buffer
-#endif
-#if CTX_ENABLE_SHADOW_BLUR
-         || rasterizer->in_shadow
-#endif
-#if CTX_SHAPE_CACHE
-         || shape != NULL
+  else
+    {
+#if 0
+      ctx_drawlist_compact (drawlist);
 #endif
-         );
+    }
 
-  for (; rasterizer->scanline <= scan_end;)
+  if (CTX_UNLIKELY(new_size < drawlist->size))
+    { return; }
+  if (CTX_UNLIKELY(drawlist->size == max_size))
+    { return; }
+  new_size = ctx_maxi (new_size, min_size);
+  //if (new_size < drawlist->count)
+  //  { new_size = drawlist->count + 4; }
+  new_size = ctx_mini (new_size, max_size);
+  if (new_size != drawlist->size)
     {
-
-      int needs_full_aa =
-          ( (rasterizer->horizontal_edges!=0) 
-          | (rasterizer->active_edges != rasterizer->prev_active_edges)
-          | (rasterizer->active_edges + rasterizer->pending_edges == rasterizer->ending_edges)
-          );
-
-    if (needs_full_aa)
+      int item_size = sizeof (CtxEntry);
+      if (flags & CTX_DRAWLIST_EDGE_LIST) item_size = sizeof (CtxSegment);
+      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, 
drawlist->size);
+  if (drawlist->entries)
     {
-        int increment = CTX_FULL_AA/real_aa;
-        memset (coverage, 0, coverage_size);
-        for (int i = 0; i < real_aa; i++)
-        {
-          ctx_rasterizer_feed_edges (rasterizer, 0);
-          ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, real_aa);
-          ctx_rasterizer_increment_edges (rasterizer, increment);
-        }
+      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
+      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
+      memcpy (ne, drawlist->entries, drawlist->size * item_size );
+      free (drawlist->entries);
+      drawlist->entries = ne;
+      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
     }
-    else if (! avoid_direct & (rasterizer->needs_aa3 == 0))
+  else
     {
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
-      ctx_rasterizer_feed_edges (rasterizer, 0);
+      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
+      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
+    }
+  drawlist->size = new_size;
+    }
+  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
+#endif
+}
 
-      ctx_rasterizer_generate_coverage_apply (rasterizer, minx, maxx, coverage, is_winding,
-                      comp);
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
 
-      dst += blit_stride;
-      rasterizer->prev_active_edges = rasterizer->active_edges;
-      continue;
+static inline int
+ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
+{
+  unsigned int max_size = CTX_MAX_JOURNAL_SIZE;
+  int ret = drawlist->count;
+  int flags = drawlist->flags;
+  if (CTX_LIKELY((flags & CTX_DRAWLIST_EDGE_LIST ||
+       flags & CTX_DRAWLIST_CURRENT_PATH)))
+    {
+      max_size = CTX_MAX_EDGE_LIST_SIZE;
     }
-    else if (avoid_direct & (rasterizer->needs_aa3 == 0))
+  if (CTX_UNLIKELY(flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES))
     {
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
-      ctx_rasterizer_feed_edges (rasterizer, 0);
-
-      memset (coverage, 0, coverage_size);
-      ctx_rasterizer_generate_coverage_set (rasterizer, minx, maxx, coverage, is_winding);
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
+      return ret;
     }
-    else if (ctx_rasterizer_is_simple (rasterizer))
+  if (CTX_UNLIKELY(ret + 64 >= drawlist->size - 40))
     {
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
-      ctx_rasterizer_feed_edges (rasterizer, 1);
-      memset (coverage, 0, coverage_size);
-      if (!avoid_direct)
-      {
+      int new_ = CTX_MAX (drawlist->size * 2, ret + 1024);
+      ctx_drawlist_resize (drawlist, new_);
+    }
 
-        ctx_rasterizer_generate_coverage_apply2 (rasterizer, minx, maxx, coverage, is_winding,
-                      comp);
-        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
-
-        dst += blit_stride;
-        rasterizer->prev_active_edges = rasterizer->active_edges;
-        continue;
-      }
-      ctx_rasterizer_generate_coverage_set2 (rasterizer, minx, maxx, coverage, is_winding);
-      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
-    }
-    else 
+  if (CTX_UNLIKELY(drawlist->count >= max_size - 20))
     {
-      int aa = 3;
-      if (rasterizer->needs_aa5 && real_aa >=5)
-      {
-         aa = 5;
-         if (rasterizer->needs_aa15 && real_aa >=15)
-           aa = 15;
-      }
-      int scanline_increment = 15/aa;
+      return 0;
+    }
+  if ((flags & CTX_DRAWLIST_EDGE_LIST))
+    ((CtxSegment*)(drawlist->entries))[drawlist->count] = *(CtxSegment*)entry;
+  else
+    drawlist->entries[drawlist->count] = *entry;
+  ret = drawlist->count;
+  drawlist->count++;
+  return ret;
+}
 
-      memset (coverage, 0, coverage_size);
-      for (int i = 0; i < CTX_FULL_AA; i+= scanline_increment)
-      {
-        ctx_rasterizer_feed_edges (rasterizer, 0);
-        ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, aa);
-        ctx_rasterizer_increment_edges (rasterizer, scanline_increment);
-      }
+
+int
+ctx_add_single (Ctx *ctx, void *entry)
+{
+  return ctx_drawlist_add_single (&ctx->drawlist, (CtxEntry *) entry);
+}
+
+static inline int
+ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry)
+{
+  int length = ctx_conts_for_entry (entry) + 1;
+  int ret = 0;
+  for (int i = 0; i < length; i ++)
+    {
+      ret = ctx_drawlist_add_single (drawlist, &entry[i]);
     }
+  return ret;
+}
 
-  ctx_coverage_post_process (rasterizer, minx, maxx, coverage - minx,
-                  NULL, NULL);
-#if CTX_SHAPE_CACHE
-  if (shape == NULL)
+#if 0
+int
+ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
+{
+  int length = ctx_conts_for_entry (entry) + 1;
+  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
+  for (int i = 0; i < length; i++)
+  {
+    for (int j = pos + i + 1; j < tmp_pos; j++)
+      drawlist->entries[j] = entry[j-1];
+    drawlist->entries[pos + i] = entry[i];
+  }
+  return pos;
+}
 #endif
+int
+ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
+{
+  int length = ctx_conts_for_entry (entry) + 1;
+  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
+#if 1
+  for (int i = 0; i < length; i++)
   {
-    ctx_rasterizer_apply_coverage (rasterizer,
-                         &dst[(minx * rasterizer->format->bpp) /8],
-                         minx,
-                         coverage,
-                         maxx-minx+ 1);
+    for (int j = tmp_pos; j > pos + i; j--)
+      drawlist->entries[j] = drawlist->entries[j-1];
+    drawlist->entries[pos + i] = entry[i];
   }
-#if CTX_SHAPE_CACHE
-      if (shape)
-        {
-          coverage += shape->width;
-        }
+  return pos;
 #endif
-      dst += blit_stride;
-      rasterizer->prev_active_edges = rasterizer->active_edges;
+  return tmp_pos;
+}
+
+int ctx_append_drawlist (Ctx *ctx, void *data, int length)
+{
+  CtxEntry *entries = (CtxEntry *) data;
+  if (length % sizeof (CtxEntry) )
+    {
+      ctx_log("drawlist not multiple of 9\n");
+      return -1;
     }
-  }
+  for (unsigned int i = 0; i < length / sizeof (CtxEntry); i++)
+    {
+      ctx_drawlist_add_single (&ctx->drawlist, &entries[i]);
+    }
+  return 0;
+}
 
-  if (CTX_UNLIKELY(rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_OUT ||
-      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_IN ||
-      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_IN ||
-      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_ATOP ||
-      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_CLEAR))
-  {
-     /* fill in the rest of the blitrect when compositing mode permits it */
-     uint8_t nocoverage[rasterizer->blit_width];
-     //int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
-     int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
-     int gscan_end = rasterizer->state->gstate.clip_max_y * CTX_FULL_AA;
-     memset (nocoverage, 0, sizeof(nocoverage));
-     int startx   = rasterizer->state->gstate.clip_min_x;
-     int endx     = rasterizer->state->gstate.clip_max_x;
-     int clipw    = endx-startx + 1;
-     uint8_t *dst = ( (uint8_t *) rasterizer->buf);
+int ctx_set_drawlist (Ctx *ctx, void *data, int length)
+{
+  CtxDrawlist *drawlist = &ctx->drawlist;
+  if (drawlist->flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES)
+    {
+      return -1;
+    }
+  ctx->drawlist.count = 0;
+  if (!data || length == 0)
+    return 0;
+  if (CTX_UNLIKELY(length % 9)) return -1;
+  ctx_drawlist_resize (drawlist, length/9);
+  memcpy (drawlist->entries, data, length);
+  drawlist->count = length / 9;
+  return length;
+}
 
-     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (gscan_start / CTX_FULL_AA);
-     for (rasterizer->scanline = gscan_start; rasterizer->scanline < scan_start;)
-     {
-       ctx_rasterizer_apply_coverage (rasterizer,
-                                      &dst[ (startx * rasterizer->format->bpp) /8],
-                                      0,
-                                      nocoverage, clipw);
-       rasterizer->scanline += CTX_FULL_AA;
-       dst += rasterizer->blit_stride;
-     }
-     if (minx < startx)
-     {
-     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
-     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
-     {
-       ctx_rasterizer_apply_coverage (rasterizer,
-                                      &dst[ (startx * rasterizer->format->bpp) /8],
-                                      0,
-                                      nocoverage, minx-startx);
-       dst += blit_stride;
-     }
-     }
+int ctx_get_drawlist_count (Ctx *ctx)
+{
+  return ctx->drawlist.count;
+}
 
-     if (endx > maxx)
-     {
-     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
-     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
-     {
-       ctx_rasterizer_apply_coverage (rasterizer,
-                                      &dst[ (maxx * rasterizer->format->bpp) /8],
-                                      0,
-                                      nocoverage, endx-maxx);
+const CtxEntry *ctx_get_drawlist (Ctx *ctx)
+{
+  return ctx->drawlist.entries;
+}
 
-       rasterizer->scanline += CTX_FULL_AA;
-       dst += rasterizer->blit_stride;
-     }
-     }
-#if 1
-     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_end / CTX_FULL_AA);
-     // XXX valgrind/asan this
-     if(0)for (rasterizer->scanline = scan_end; rasterizer->scanline/CTX_FULL_AA < gscan_end-1;)
-     {
-       ctx_rasterizer_apply_coverage (rasterizer,
-                                      &dst[ (startx * rasterizer->format->bpp) /8],
-                                      0,
-                                      nocoverage, clipw-1);
+int
+ctx_add_data (Ctx *ctx, void *data, int length)
+{
+  if (CTX_UNLIKELY(length % sizeof (CtxEntry) ))
+    {
+      //ctx_log("err\n");
+      return -1;
+    }
+  /* some more input verification might be in order.. like
+   * verify that it is well-formed up to length?
+   *
+   * also - it would be very useful to stop processing
+   * upon flush - and do drawlist resizing.
+   */
+  return ctx_drawlist_add_entry (&ctx->drawlist, (CtxEntry *) data);
+}
 
-       rasterizer->scanline += CTX_FULL_AA;
-       dst += blit_stride;
-     }
-#endif
-  }
-  ctx_rasterizer_reset (rasterizer);
+int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2])
+{
+  CtxEntry entry[3] = {{code, {{0},}},};
+  entry[0].data.u32[0] = u32[0];
+  entry[0].data.u32[1] = u32[1];
+  return ctx_drawlist_add_single (drawlist, &entry[0]);
 }
 
-inline static int
-ctx_is_transparent (CtxRasterizer *rasterizer, int stroke)
+int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length)
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  if (gstate->global_alpha_u8 == 0)
-    return 1;
-  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  CtxEntry entry[3] = {{CTX_DATA, {{0},}}};
+  entry[0].data.u32[0] = 0;
+  entry[0].data.u32[1] = 0;
+  int ret = ctx_drawlist_add_single (drawlist, &entry[0]);
+  if (CTX_UNLIKELY(!data)) { return -1; }
+  int length_in_blocks;
+  if (length <= 0) { length = strlen ( (char *) data) + 1; }
+  length_in_blocks = length / sizeof (CtxEntry);
+  length_in_blocks += (length % sizeof (CtxEntry) ) ?1:0;
+  if ((signed)drawlist->count + length_in_blocks + 4 > drawlist->size)
+    { ctx_drawlist_resize (drawlist, drawlist->count * 1.2 + length_in_blocks + 32); }
+  if (CTX_UNLIKELY((signed)drawlist->count >= drawlist->size))
+    { return -1; }
+  drawlist->count += length_in_blocks;
+  drawlist->entries[ret].data.u32[0] = length;
+  drawlist->entries[ret].data.u32[1] = length_in_blocks;
+  memcpy (&drawlist->entries[ret+1], data, length);
   {
-    uint8_t ga[2];
-    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
-    if (ga[1] == 0)
-      return 1;
+    //int reverse = ctx_drawlist_add (drawlist, CTX_DATA_REV);
+    CtxEntry entry[3] = {{CTX_DATA_REV, {{0},}}};
+    entry[0].data.u32[0] = length;
+    entry[0].data.u32[1] = length_in_blocks;
+    ctx_drawlist_add_single (drawlist, &entry[0]);
+
+    /* this reverse marker exist to enable more efficient
+       front to back traversal, can be ignored in other
+       direction, is this needed after string setters as well?
+     */
   }
-  return 0;
+  return ret;
 }
 
-#define CTX_RECT_FILL 1
+static inline CtxEntry
+ctx_void (CtxCode code)
+{
+  CtxEntry command;
+  command.code = code;
+  return command;
+}
 
-#if CTX_RECT_FILL
-static void
-ctx_rasterizer_fill_rect (CtxRasterizer *rasterizer,
-                          int          x0,
-                          int          y0,
-                          int          x1,
-                          int          y1,
-                          uint8_t      cov)
+static inline CtxEntry
+ctx_f (CtxCode code, float x, float y)
 {
-  int blit_x = rasterizer->blit_x;
-  int blit_y = rasterizer->blit_y;
-  int blit_width = rasterizer->blit_width;
-  int blit_height = rasterizer->blit_height;
-  int blit_stride = rasterizer->blit_stride;
-  x0 = ctx_maxi (x0, blit_x);
-  x1 = ctx_mini (x1, blit_x + blit_width);
+  CtxEntry command;
+  command.code = code;
+  command.data.f[0] = x;
+  command.data.f[1] = y;
+  return command;
+}
 
-  int width = x1 - x0;
+static CtxEntry
+ctx_u32 (CtxCode code, uint32_t x, uint32_t y)
+{
+  CtxEntry command = ctx_void (code);
+  command.data.u32[0] = x;
+  command.data.u32[1] = y;
+  return command;
+}
 
-  if (CTX_UNLIKELY(width <=0))
-    return;
+#if 0
+static CtxEntry
+ctx_s32 (CtxCode code, int32_t x, int32_t y)
+{
+  CtxEntry command = ctx_void (code);
+  command.data.s32[0] = x;
+  command.data.s32[1] = y;
+  return command;
+}
+#endif
 
-  void (*comp_op)(CTX_COMPOSITE_ARGUMENTS) = rasterizer->comp_op;
+static inline CtxEntry
+ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1)
+{
+  CtxEntry command;
+  command.code = code;
+  command.data.s16[0] = x0;
+  command.data.s16[1] = y0;
+  command.data.s16[2] = x1;
+  command.data.s16[3] = y1;
+  return command;
+}
 
-  y0 = ctx_maxi (y0, blit_y);
-  y1 = ctx_mini (y1, blit_y + blit_height);
-  rasterizer->scanline = y0 * CTX_FULL_AA;
-  _ctx_setup_compositor (rasterizer);
-  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
 
-  dst += (y0 - blit_y) * blit_stride;
-  dst += (x0) * rasterizer->format->bpp/8;
+static CtxEntry
+ctx_u8 (CtxCode code,
+        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
+        uint8_t e, uint8_t f, uint8_t g, uint8_t h)
+{
+  CtxEntry command;
+  command.code = code;
+  command.data.u8[0] = a;
+  command.data.u8[1] = b;
+  command.data.u8[2] = c;
+  command.data.u8[3] = d;
+  command.data.u8[4] = e;
+  command.data.u8[5] = f;
+  command.data.u8[6] = g;
+  command.data.u8[7] = h;
+  return command;
+}
 
-  if (cov == 255)
-  {
-    if (comp_op == ctx_RGBA8_source_copy_normal_color)
-    {
-      uint32_t color = *((uint32_t*)rasterizer->color);
-      if (width == 1)
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          *dst_i = color;
-          dst += blit_stride;
-        }
-        return;
-      }
-      else
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          ctx_span_set_color ((uint32_t*)&dst[0], color, width);
-          dst += blit_stride;
-        }
-        return;
-      }
-    }
-    else if (comp_op == ctx_RGBA8_source_over_normal_color)
-    {
-      uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
-      uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
-      uint32_t si_a  = rasterizer->color[3];
+static void
+ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int 
len)
+{
+  CtxEntry commands[1 + 2 + (len+1+1)/9];
+  ctx_memset (commands, 0, sizeof (commands) );
+  commands[0] = ctx_u32 (code, arg0, arg1);
+  commands[1].code = CTX_DATA;
+  commands[1].data.u32[0] = len;
+  commands[1].data.u32[1] = (len+1+1)/9 + 1;
+  memcpy( (char *) &commands[2].data.u8[0], string, len);
+  ( (char *) (&commands[2].data.u8[0]) ) [len]=0;
+  ctx_process (ctx, commands);
+}
 
-      if (width == 1)
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          ((uint32_t*)(dst))[0] = ctx_over_RGBA8_full_2 (
-             ((uint32_t*)(dst))[0], si_ga_full, si_rb_full, si_a);
-          dst += blit_stride;
-        }
-        return;
-      }
-      else
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          for (int i = 0; i < width; i++)
-          {
-            dst_i[i] = ctx_over_RGBA8_full_2 (dst_i[i], si_ga_full, si_rb_full, si_a);
-          }
-          dst += blit_stride;
-        }
-        return;
-      }
-    }
-    else if (comp_op == ctx_RGBA8_source_copy_normal_fragment)
-    {
-      for (int y = y0; y < y1; y++)
-      {
-        float u0 = 0; float v0 = 0;
-        float ud = 0; float vd = 0;
-        ctx_init_uv (rasterizer, x0, width, &u0, &v0, &ud, &vd);
-        rasterizer->fragment (rasterizer, u0, v0, &dst[0], width, ud, vd);
-        rasterizer->scanline += CTX_FULL_AA;
-        dst += blit_stride;
-      }
-      return;
-    }
-    else if (comp_op == ctx_RGBA8_source_over_normal_fragment)
-    {
-      for (int y = y0; y < y1; y++)
-      {
-        ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
-                                &dst[0], NULL, x0, NULL, width);
-        rasterizer->scanline += CTX_FULL_AA;
-        dst += blit_stride;
-      }
-      return;
-    }
-  }
-  else
-  {
-  if (comp_op == ctx_RGBA8_source_copy_normal_color)
+static void
+ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1)
+{
+  ctx_process_cmd_str_with_len (ctx, code, string, arg0, arg1, strlen (string));
+}
+
+static void
+ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1)
+{
+  uint32_t iarg0;
+  uint32_t iarg1;
+  memcpy (&iarg0, &arg0, sizeof (iarg0));
+  memcpy (&iarg1, &arg1, sizeof (iarg1));
+  ctx_process_cmd_str_with_len (ctx, code, string, iarg0, iarg1, strlen (string));
+}
+
+#if CTX_BITPACK_PACKER
+static unsigned int
+ctx_last_history (CtxDrawlist *drawlist)
+{
+  unsigned int last_history = 0;
+  unsigned int i = 0;
+  while (i < drawlist->count)
     {
-      uint32_t color = *((uint32_t*)rasterizer->color);
-      if (width == 1)
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          *dst_i = ctx_lerp_RGBA8 (*dst_i, color, cov);
-          dst += blit_stride;
-        }
-        return;
-      }
-      else
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          for (int i = 0; i < width; i++)
-          {
-            dst_i[i] = ctx_lerp_RGBA8 (dst_i[i], color, cov);
-          }
-          dst += blit_stride;
-        }
-        return;
-      }
+      CtxEntry *entry = &drawlist->entries[i];
+      i += (ctx_conts_for_entry (entry) + 1);
     }
-    else if (comp_op == ctx_RGBA8_source_over_normal_color)
+  return last_history;
+}
+#endif
+
+#if CTX_BITPACK_PACKER
+
+static float
+find_max_dev (CtxEntry *entry, int nentrys)
+{
+  float max_dev = 0.0;
+  for (int c = 0; c < nentrys; c++)
     {
-      uint32_t color = *((uint32_t*)rasterizer->color);
-      if (width == 1)
-      {
-        for (int y = y0; y < y1; y++)
-        {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          *dst_i = ctx_over_RGBA8 (*dst_i, color, cov);
-          dst += blit_stride;
-        }
-        return;
-      }
-      else
-      {
-        for (int y = y0; y < y1; y++)
+      for (int d = 0; d < 2; d++)
         {
-          uint32_t *dst_i = (uint32_t*)&dst[0];
-          for (int i = 0; i < width; i++)
-          {
-            dst_i[i] = ctx_over_RGBA8 (dst_i[i], color, cov);
-          }
-          dst += blit_stride;
+          if (entry[c].data.f[d] > max_dev)
+            { max_dev = entry[c].data.f[d]; }
+          if (entry[c].data.f[d] < -max_dev)
+            { max_dev = -entry[c].data.f[d]; }
         }
-        return;
-      }
-    }
-  }
-
-  {
-    uint8_t coverage[width];
-    memset (coverage, cov, sizeof (coverage) );
-    for (int y = y0; y < y1; y++)
-    {
-      ctx_rasterizer_apply_coverage (rasterizer, &dst[0], x0, coverage, width);
-      rasterizer->scanline += CTX_FULL_AA;
-      dst += blit_stride;
     }
-  }
+  return max_dev;
 }
-#endif
 
-static inline float ctx_fmod1f (float val)
+static void
+pack_s8_args (CtxEntry *entry, int npairs)
 {
-  int vali = val;
-  return val - vali;
+  for (int c = 0; c < npairs; c++)
+    for (int d = 0; d < 2; d++)
+      { entry[0].data.s8[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
 }
 
 static void
-ctx_rasterizer_fill (CtxRasterizer *rasterizer)
+pack_s16_args (CtxEntry *entry, int npairs)
 {
-  int preserved_count = rasterizer->preserve?rasterizer->edge_list.count:1;
-  int blit_x = rasterizer->blit_x;
-  int blit_y = rasterizer->blit_y;
-  int blit_width = rasterizer->blit_width;
-  int blit_height = rasterizer->blit_height;
-  int blit_stride = rasterizer->blit_stride;
-
-  CtxSegment temp[preserved_count]; /* copy of already built up path's poly line
-                          XXX - by building a large enough path
-                          the stack can be smashed!
-                         */
-  if (CTX_UNLIKELY(rasterizer->preserve))
-    { memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) ); }
+  for (int c = 0; c < npairs; c++)
+    for (int d = 0; d < 2; d++)
+      { entry[0].data.s16[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
+}
+#endif
 
-#if CTX_ENABLE_SHADOW_BLUR
-  if (CTX_UNLIKELY(rasterizer->in_shadow))
-  {
-  for (int i = 0; i < rasterizer->edge_list.count; i++)
+#if CTX_BITPACK_PACKER
+static void
+ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos)
+{
+  CtxIterator iterator;
+  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
+    { return; }
+  ctx_iterator_init (&iterator, drawlist, start_pos, CTX_ITERATOR_FLAT);
+  iterator.end_pos = drawlist->count - 5;
+  CtxCommand *command = NULL;
+  while ( (command = ctx_iterator_next (&iterator) ) )
     {
-      CtxSegment *entry = &((CtxSegment*)rasterizer->edge_list.entries)[i];
-      entry->data.s16[2] += rasterizer->shadow_x * CTX_SUBDIV;
-      entry->data.s16[3] += rasterizer->shadow_y * CTX_FULL_AA;
+      CtxEntry *entry = &command->entry;
+      /* things smaller than this have probably been scaled down
+         beyond recognition, bailing for both better packing and less rasterization work
+       */
+      if (command[0].code == CTX_REL_CURVE_TO)
+        {
+          float max_dev = find_max_dev (entry, 3);
+          if (max_dev < 1.0)
+            {
+              entry[0].code = CTX_REL_LINE_TO;
+              entry[0].data.f[0] = entry[2].data.f[0];
+              entry[0].data.f[1] = entry[2].data.f[1];
+              entry[1].code = CTX_NOP;
+              entry[2].code = CTX_NOP;
+            }
+        }
     }
-    rasterizer->scan_min += rasterizer->shadow_y * CTX_FULL_AA;
-    rasterizer->scan_max += rasterizer->shadow_y * CTX_FULL_AA;
-    rasterizer->col_min  += (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
-    rasterizer->col_max  += (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
-  }
+}
 #endif
 
-  if (CTX_UNLIKELY(ctx_is_transparent (rasterizer, 0) ||
-      rasterizer->scan_min > CTX_FULL_AA * (blit_y + blit_height) ||
-      rasterizer->scan_max < CTX_FULL_AA * blit_y ||
-      rasterizer->col_min > CTX_SUBDIV * (blit_x + blit_width) ||
-      rasterizer->col_max < CTX_SUBDIV * blit_x))
+#if CTX_BITPACK_PACKER
+static void
+ctx_drawlist_bitpack (CtxDrawlist *drawlist, unsigned int start_pos)
+{
+#if CTX_BITPACK
+  unsigned int i = 0;
+  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
+    { return; }
+  ctx_drawlist_remove_tiny_curves (drawlist, drawlist->bitpack_pos);
+  i = drawlist->bitpack_pos;
+  if (start_pos > i)
+    { i = start_pos; }
+  while (i < drawlist->count - 4) /* the -4 is to avoid looking past
+                                    initialized data we're not ready
+                                    to bitpack yet*/
     {
-      ctx_rasterizer_reset (rasterizer);
-    }
-  else
-  {
-    _ctx_setup_compositor (rasterizer);
-
+      CtxEntry *entry = &drawlist->entries[i];
+      if (entry[0].code == CTX_SET_RGBA_U8 &&
+          entry[1].code == CTX_MOVE_TO &&
+          entry[2].code == CTX_REL_LINE_TO &&
+          entry[3].code == CTX_REL_LINE_TO &&
+          entry[4].code == CTX_REL_LINE_TO &&
+          entry[5].code == CTX_REL_LINE_TO &&
+          entry[6].code == CTX_FILL &&
+          ctx_fabsf (entry[2].data.f[0] - 1.0f) < 0.02f &&
+          ctx_fabsf (entry[3].data.f[1] - 1.0f) < 0.02f)
+        {
+          entry[0].code = CTX_SET_PIXEL;
+          entry[0].data.u16[2] = entry[1].data.f[0];
+          entry[0].data.u16[3] = entry[1].data.f[1];
+          entry[1].code = CTX_NOP;
+          entry[2].code = CTX_NOP;
+          entry[3].code = CTX_NOP;
+          entry[4].code = CTX_NOP;
+          entry[5].code = CTX_NOP;
+          entry[6].code = CTX_NOP;
+        }
 #if 1
-    rasterizer->state->min_x = ctx_mini (rasterizer->state->min_x, rasterizer->col_min / CTX_SUBDIV);
-    rasterizer->state->max_x = ctx_maxi (rasterizer->state->min_x, rasterizer->col_max / CTX_SUBDIV);
-    rasterizer->state->min_y = ctx_mini (rasterizer->state->min_y, rasterizer->scan_min / CTX_FULL_AA);
-    rasterizer->state->max_y = ctx_maxi (rasterizer->state->max_y, rasterizer->scan_max / CTX_FULL_AA);
-#else
-    if (CTX_UNLIKELY ( rasterizer->col_min / CTX_SUBDIV < rasterizer->state->min_x))
-       rasterizer->state->min_x = rasterizer->col_min / CTX_SUBDIV;
-    if (CTX_UNLIKELY ( rasterizer->col_min / CTX_SUBDIV > rasterizer->state->max_x))
-       rasterizer->state->min_x = rasterizer->col_min / CTX_SUBDIV;
-
-    if (CTX_UNLIKELY ( rasterizer->scan_min / CTX_FULL_AA < rasterizer->state->min_y))
-       rasterizer->state->min_y = rasterizer->scan_min / CTX_FULL_AA;
-    if (CTX_UNLIKELY ( rasterizer->scan_min / CTX_FULL_AA > rasterizer->state->max_y))
-       rasterizer->state->max_y = rasterizer->scan_max / CTX_FULL_AA;
-#endif
-
-#if CTX_RECT_FILL
-  if (rasterizer->edge_list.count == 5)
-    {
-      CtxSegment *entry0 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
-      CtxSegment *entry1 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[1];
-      CtxSegment *entry2 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[2];
-      CtxSegment *entry3 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[3];
-
-      if ((!(rasterizer->state->gstate.clipped != 0)) &
-          (entry0->data.s16[2] == entry1->data.s16[2]) &
-          (entry0->data.s16[3] == entry3->data.s16[3]) &
-          (entry1->data.s16[3] == entry2->data.s16[3]) &
-          (entry2->data.s16[2] == entry3->data.s16[2])
-#if CTX_ENABLE_SHADOW_BLUR
-           && !rasterizer->in_shadow
-#endif
-         )
-       {
-         if(((entry1->data.s16[2] % (CTX_SUBDIV))  == 0) &
-            ((entry1->data.s16[3] % (CTX_FULL_AA)) == 0) &
-            ((entry3->data.s16[2] % (CTX_SUBDIV))  == 0) &
-            ((entry3->data.s16[3] % (CTX_FULL_AA)) == 0))
-         {
-           /* best-case axis aligned rectangle */
-           int x0 = entry3->data.s16[2] / CTX_SUBDIV;
-           int y0 = entry3->data.s16[3] / CTX_FULL_AA;
-           int x1 = entry1->data.s16[2] / CTX_SUBDIV;
-           int y1 = entry1->data.s16[3] / CTX_FULL_AA;
-
-           ctx_rasterizer_fill_rect (rasterizer, x0, y0, x1, y1, 255);
-           ctx_rasterizer_reset (rasterizer);
-           goto done;
-         }
-        else
-         {
-           float x0 = entry3->data.s16[2] * (1.0f / CTX_SUBDIV);
-           float y0 = entry3->data.s16[3] * (1.0f / CTX_FULL_AA);
-           float x1 = entry1->data.s16[2] * (1.0f / CTX_SUBDIV);
-           float y1 = entry1->data.s16[3] * (1.0f / CTX_FULL_AA);
-
-           x0 = ctx_maxf (x0, blit_x);
-           y0 = ctx_maxf (y0, blit_y);
-           x1 = ctx_minf (x1, blit_x + blit_width);
-           y1 = ctx_minf (y1, blit_y + blit_height);
-
-           uint8_t left = 255-ctx_fmod1f (x0) * 255;
-           uint8_t top  = 255-ctx_fmod1f (y0) * 255;
-           uint8_t right  = ctx_fmod1f (x1) * 255;
-           uint8_t bottom = ctx_fmod1f (y1) * 255;
-
-           x0 = ctx_floorf (x0);
-           y0 = ctx_floorf (y0);
-           x1 = ctx_floorf (x1+7/8.0f);
-           y1 = ctx_floorf (y1+14/15.0f);
-
-           int has_top    = (top < 255);
-           int has_bottom = (bottom <255);
-           int has_right  = (right >0);
-           int has_left   = (left >0);
-
-           int width = x1 - x0;
-
-           if (CTX_LIKELY(width >0))
-           {
-              uint8_t *dst = ( (uint8_t *) rasterizer->buf);
-              uint8_t coverage[width+2];
-              dst += (((int)y0) - blit_y) * blit_stride;
-              dst += ((int)x0) * rasterizer->format->bpp/8;
-
-              if (has_top)
-              {
-                int i = 0;
-                if (has_left)
+      else if (entry[0].code == CTX_REL_LINE_TO)
+        {
+          if (entry[1].code == CTX_REL_LINE_TO &&
+              entry[2].code == CTX_REL_LINE_TO &&
+              entry[3].code == CTX_REL_LINE_TO)
+            {
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
                 {
-                  coverage[i++] = top * left / 255;
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_LINE_TO_X4;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
                 }
-                for (int x = x0 + has_left; x < x1 - has_right; x++)
-                  coverage[i++] = top;
-                coverage[i++]= top * right / 255;
-
-                  ctx_rasterizer_apply_coverage (rasterizer,dst,
-                                                 x0,
-                                                 coverage, width);
-                 dst += blit_stride;
-               }
-
-#if 0
-           if (!(
-            (rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_COPY||
-             rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_OVER) &&
-             rasterizer->state->gstate.blend_mode == CTX_BLEND_NORMAL &&
-             rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR
-             ))
-           {
-             int i = 0;
-             if (has_left)
-             {
-               coverage[i++] = left;
-             }
-             for (int x = x0 + has_left; x < x1 - has_right; x++)
-               coverage[i++] = 255;
-             coverage[i++] = right;
-
-             for (int ty = y0+has_top; ty < y1-has_bottom; ty++)
-             {
-               ctx_rasterizer_apply_coverage (rasterizer, dst, x0, coverage, width);
-               dst += blit_stride;
-             }
-           }
-           else
-#endif
-           {
-             if (has_left)
-               ctx_rasterizer_fill_rect (rasterizer, x0, y0 + has_top, x0+1, y1 - has_bottom, left);
-             if (has_right)
-               ctx_rasterizer_fill_rect (rasterizer, x1-1, y0 + has_top, x1, y1 - has_bottom, right);
-             x0 += has_left;
-             y0 += has_top;
-             y1 -= has_bottom;
-             x1 -= has_right;
-             ctx_rasterizer_fill_rect (rasterizer, x0,y0,x1,y1,255);
-
-             dst += blit_stride * ((((int)y1)-has_bottom) - (((int)y0)+has_top));
-           }
-
-           if (has_bottom)
-           {
-             int i = 0;
-             if (has_left)
-               coverage[i++] = bottom * left / 255;
-             for (int x = x0 + has_left; x < x1 - has_right; x++)
-               coverage[i++] = bottom;
-             coverage[i++]= bottom * right / 255;
-
-             ctx_rasterizer_apply_coverage (rasterizer,dst, x0, coverage, width);
-           }
-           }
-
-           ctx_rasterizer_reset (rasterizer);
-           goto done;
-         }
-
-       }
-    }
-#endif
-    ctx_rasterizer_finish_shape (rasterizer);
-
-    uint32_t hash = ctx_rasterizer_poly_to_edges (rasterizer);
-    if (hash){};
-
-#if CTX_SHAPE_CACHE
-    int width = (rasterizer->col_max + (CTX_SUBDIV-1) ) / CTX_SUBDIV - rasterizer->col_min/CTX_SUBDIV + 1;
-    int height = (rasterizer->scan_max + (CTX_FULL_AA-1) ) / CTX_FULL_AA - rasterizer->scan_min / 
CTX_FULL_AA + 1;
-    if (width * height < CTX_SHAPE_CACHE_DIM && width >=1 && height >= 1
-        && width < CTX_SHAPE_CACHE_MAX_DIM
-        && height < CTX_SHAPE_CACHE_MAX_DIM 
-#if CTX_ENABLE_SHADOW_BLUR
-        && !rasterizer->in_shadow
+            }
+          else if (entry[1].code == CTX_REL_CURVE_TO)
+            {
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
+                {
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_LINE_TO_REL_CURVE_TO;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
+                }
+            }
+          else if (entry[1].code == CTX_REL_LINE_TO &&
+                   entry[2].code == CTX_REL_LINE_TO &&
+                   entry[3].code == CTX_REL_LINE_TO)
+            {
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
+                {
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_LINE_TO_X4;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
+                }
+            }
+          else if (entry[1].code == CTX_REL_MOVE_TO)
+            {
+              float max_dev = find_max_dev (entry, 2);
+              if (max_dev < 31000 / CTX_SUBDIV)
+                {
+                  pack_s16_args (entry, 2);
+                  entry[0].code = CTX_REL_LINE_TO_REL_MOVE_TO;
+                  entry[1].code = CTX_NOP;
+                }
+            }
+          else if (entry[1].code == CTX_REL_LINE_TO)
+            {
+              float max_dev = find_max_dev (entry, 2);
+              if (max_dev < 31000 / CTX_SUBDIV)
+                {
+                  pack_s16_args (entry, 2);
+                  entry[0].code = CTX_REL_LINE_TO_X2;
+                  entry[1].code = CTX_NOP;
+                }
+            }
+        }
 #endif
-        )
-      {
-        int scan_min = rasterizer->scan_min;
-        int col_min = rasterizer->col_min;
-        scan_min -= (scan_min % CTX_FULL_AA);
-        int y0 = scan_min / CTX_FULL_AA;
-        int y1 = y0 + height;
-        int x0 = col_min / CTX_SUBDIV;
-        int ymin = y0;
-        int x1 = x0 + width;
-        int clip_x_min = blit_x;
-        int clip_x_max = blit_x + blit_width - 1;
-        int clip_y_min = blit_y;
-        int clip_y_max = blit_y + blit_height - 1;
-
-        int dont_cache = 0;
-        if (CTX_UNLIKELY(x1 >= clip_x_max))
-          { x1 = clip_x_max;
-            dont_cache = 1;
-          }
-        int xo = 0;
-        if (CTX_UNLIKELY(x0 < clip_x_min))
-          {
-            xo = clip_x_min - x0;
-            x0 = clip_x_min;
-            dont_cache = 1;
-          }
-        if (CTX_UNLIKELY(y0 < clip_y_min || y1 >= clip_y_max))
-          dont_cache = 1;
-        if (dont_cache || !_ctx_shape_cache_enabled)
+#if 1
+      else if (entry[0].code == CTX_REL_CURVE_TO)
         {
-          ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
-#if CTX_SHAPE_CACHE
-                                        , NULL
-#endif
-                                       );
+          if (entry[3].code == CTX_REL_LINE_TO)
+            {
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
+                {
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
+                }
+            }
+          else if (entry[3].code == CTX_REL_MOVE_TO)
+            {
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
+                {
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_CURVE_TO_REL_MOVE_TO;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
+                }
+            }
+          else
+            {
+              float max_dev = find_max_dev (entry, 3);
+              if (max_dev < 114 / CTX_SUBDIV)
+                {
+                  pack_s8_args (entry, 3);
+                  ctx_arg_s8 (6) =
+                    ctx_arg_s8 (7) = 0;
+                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                }
+            }
         }
-        else
-        {
-        rasterizer->scanline = scan_min;
-        CtxShapeEntry *shape = ctx_shape_entry_find (rasterizer, hash, width, height); 
-
-        if (shape->uses == 0)
-          {
-            CtxBuffer *buffer_backup = rasterizer->clip_buffer;
-            rasterizer->clip_buffer = NULL;
-            ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule, shape);
-            rasterizer->clip_buffer = buffer_backup;
-          }
-
-        int ewidth = x1 - x0;
-        if (ewidth>0)
+#endif
+#if 1
+      else if (entry[0].code == CTX_REL_QUAD_TO)
         {
-          rasterizer->scanline = scan_min;
-          int bpp = rasterizer->format->bpp;
-          if (rasterizer->clip_buffer && !rasterizer->clip_rectangle)
-          {
-          uint8_t composite[ewidth];
-          uint8_t *clip_data = (uint8_t*)rasterizer->clip_buffer->data;
-          int shape_width = shape->width;
-          for (int y = y0; y < y1; y++)
+          if (entry[2].code == CTX_REL_QUAD_TO)
             {
-              if ( (y >= clip_y_min) && (y <= clip_y_max) )
+              float max_dev = find_max_dev (entry, 4);
+              if (max_dev < 114 / CTX_SUBDIV)
                 {
-                    for (int x = 0; x < ewidth; x++)
-                    {
-                      int val = shape->data[shape_width * (int)(y-ymin) + xo + x];
-                      // XXX : not valid for 1bit clip buffers
-                      val = (val*(clip_data) [
-                              ((y-blit_y) * blit_width) + x0 + x])/255;
-                      composite[x] = val;
-                    }
-                    ctx_rasterizer_apply_coverage (rasterizer,
-                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + 
(int) (x0) * bpp/8,
-                                                 x0, // is 0
-                                                 composite,
-                                                 ewidth );
-               rasterizer->scanline += CTX_FULL_AA;
+                  pack_s8_args (entry, 4);
+                  entry[0].code = CTX_REL_QUAD_TO_REL_QUAD_TO;
+                  entry[1].code = CTX_NOP;
+                  entry[2].code = CTX_NOP;
+                  entry[3].code = CTX_NOP;
+                }
             }
-          }
-          }
           else
-          for (int y = y0; y < y1; y++)
             {
-              if (CTX_LIKELY((y >= clip_y_min) && (y <= clip_y_max) ))
+              float max_dev = find_max_dev (entry, 2);
+              if (max_dev < 3100 / CTX_SUBDIV)
                 {
-                    ctx_rasterizer_apply_coverage (rasterizer,
-                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + 
(int) (x0) * bpp/8,
-                                                 x0,
-                                                 &shape->data[shape->width * (int) (y-ymin) + xo],
-                                                 ewidth );
+                  pack_s16_args (entry, 2);
+                  entry[0].code = CTX_REL_QUAD_TO_S16;
+                  entry[1].code = CTX_NOP;
                 }
-               rasterizer->scanline += CTX_FULL_AA;
             }
         }
-        if (shape->uses != 0)
-          {
-            ctx_rasterizer_reset (rasterizer);
-          }
+#endif
+#if 1
+      else if (entry[0].code == CTX_FILL &&
+               entry[1].code == CTX_MOVE_TO)
+        {
+          entry[0] = entry[1];
+          entry[0].code = CTX_FILL_MOVE_TO;
+          entry[1].code = CTX_NOP;
         }
-      }
-    else
 #endif
-    {
-            
-    ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
-#if CTX_SHAPE_CACHE
-                                    , NULL
+#if 1
+      else if (entry[0].code == CTX_MOVE_TO &&
+               entry[1].code == CTX_MOVE_TO &&
+               entry[2].code == CTX_MOVE_TO)
+        {
+          entry[0]      = entry[2];
+          entry[0].code = CTX_MOVE_TO;
+          entry[1].code = CTX_NOP;
+          entry[2].code = CTX_NOP;
+        }
 #endif
-                                   );
+#if 1
+      else if ( (entry[0].code == CTX_MOVE_TO &&
+                 entry[1].code == CTX_MOVE_TO) ||
+                (entry[0].code == CTX_REL_MOVE_TO &&
+                 entry[1].code == CTX_MOVE_TO) )
+        {
+          entry[0]      = entry[1];
+          entry[0].code = CTX_MOVE_TO;
+          entry[1].code = CTX_NOP;
+        }
+#endif
+      i += (ctx_conts_for_entry (entry) + 1);
     }
-  }
-done:
-  if (CTX_UNLIKELY(rasterizer->preserve))
+
+  unsigned int source = drawlist->bitpack_pos;
+  unsigned int target = drawlist->bitpack_pos;
+  int removed = 0;
+  /* remove nops that have been inserted as part of shortenings
+   */
+  while (source < drawlist->count)
     {
-      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
-      rasterizer->edge_list.count = preserved_count;
+      CtxEntry *sentry = &drawlist->entries[source];
+      CtxEntry *tentry = &drawlist->entries[target];
+      while (sentry->code == CTX_NOP && source < drawlist->count)
+        {
+          source++;
+          sentry = &drawlist->entries[source];
+          removed++;
+        }
+      if (sentry != tentry)
+        { *tentry = *sentry; }
+      source ++;
+      target ++;
     }
-#if CTX_ENABLE_SHADOW_BLUR
-  if (CTX_UNLIKELY(rasterizer->in_shadow))
-  {
-    rasterizer->scan_min -= rasterizer->shadow_y * CTX_FULL_AA;
-    rasterizer->scan_max -= rasterizer->shadow_y * CTX_FULL_AA;
-    rasterizer->col_min  -= (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
-    rasterizer->col_max  -= (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * 
CTX_SUBDIV;
-  }
+  drawlist->count -= removed;
+  drawlist->bitpack_pos = drawlist->count;
 #endif
-  rasterizer->preserve = 0;
 }
 
-#if 0
-static void
-ctx_rasterizer_triangle (CtxRasterizer *rasterizer,
-                         int x0, int y0,
-                         int x1, int y1,
-                         int x2, int y2,
-                         int r0, int g0, int b0, int a0,
-                         int r1, int g1, int b1, int a1,
-                         int r2, int g2, int b2, int a2,
-                         int u0, int v0,
-                         int u1, int v1)
+#endif
+
+static inline void
+ctx_drawlist_compact (CtxDrawlist *drawlist)
 {
+#if CTX_BITPACK_PACKER
+  unsigned int last_history;
+  last_history = ctx_last_history (drawlist);
+#else
+  if (drawlist) {};
+#endif
+#if CTX_BITPACK_PACKER
+  ctx_drawlist_bitpack (drawlist, last_history);
+#endif
+}
 
+uint8_t *ctx_define_texture_pixel_data (CtxEntry *entry)
+{
+  return &entry[2 + 1 + 1 + ctx_conts_for_entry (&entry[2])].data.u8[0];
 }
-#endif
 
+#ifndef __CTX_TRANSFORM
+#define __CTX_TRANSFORM
 
-typedef struct _CtxTermGlyph CtxTermGlyph;
 
-struct _CtxTermGlyph
+static inline void
+_ctx_matrix_apply_transform_only_x (const CtxMatrix *m, float *x, float y_in)
 {
-  uint32_t unichar;
-  int      col;
-  int      row;
-  uint8_t  rgba_bg[4];
-  uint8_t  rgba_fg[4];
-};
+  float x_in = *x;
+  *x = ( (x_in * m->m[0][0]) + (y_in * m->m[1][0]) + m->m[2][0]);
+}
 
-static int _ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke);
-static void
-ctx_rasterizer_glyph (CtxRasterizer *rasterizer, uint32_t unichar, int stroke)
+void
+ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
 {
-  float tx = rasterizer->state->x;
-  float ty = rasterizer->state->y - rasterizer->state->gstate.font_size;
-  float tx2 = rasterizer->state->x + rasterizer->state->gstate.font_size;
-  float ty2 = rasterizer->state->y + rasterizer->state->gstate.font_size;
-  _ctx_user_to_device (rasterizer->state, &tx, &ty);
-  _ctx_user_to_device (rasterizer->state, &tx2, &ty2);
-
-  if (tx2 < rasterizer->blit_x || ty2 < rasterizer->blit_y) return;
-  if (tx  > rasterizer->blit_x + rasterizer->blit_width ||
-      ty  > rasterizer->blit_y + rasterizer->blit_height)
-          return;
+  _ctx_matrix_apply_transform (m, x, y);
+}
 
-#if CTX_BRAILLE_TEXT
-  float font_size = 0;
-  int ch = 1;
-  int cw = 1;
+static inline void
+_ctx_user_to_device (CtxState *state, float *x, float *y)
+{
+  _ctx_matrix_apply_transform (&state->gstate.transform, x, y);
+}
 
-  if (rasterizer->term_glyphs)
-  {
-    float tx = 0;
-    font_size = rasterizer->state->gstate.font_size;
+static void
+_ctx_user_to_device_distance (CtxState *state, float *x, float *y)
+{
+  const CtxMatrix *m = &state->gstate.transform;
+  _ctx_matrix_apply_transform (m, x, y);
+  *x -= m->m[2][0];
+  *y -= m->m[2][1];
+}
 
-    ch = ctx_term_get_cell_height (rasterizer->ctx);
-    cw = ctx_term_get_cell_width (rasterizer->ctx);
+void ctx_user_to_device          (Ctx *ctx, float *x, float *y)
+{
+  _ctx_user_to_device (&ctx->state, x, y);
+}
+void ctx_user_to_device_distance (Ctx *ctx, float *x, float *y)
+{
+  _ctx_user_to_device_distance (&ctx->state, x, y);
+}
 
-    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
-  }
-  if (rasterizer->term_glyphs && !stroke &&
-      fabs (font_size - ch) < 0.5)
-  {
-    float tx = rasterizer->x;
-    float ty = rasterizer->y;
-    _ctx_user_to_device (rasterizer->state, &tx, &ty);
-    int col = tx / cw + 1;
-    int row = ty / ch + 1;
-    CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
-    ctx_list_append (&rasterizer->glyphs, glyph);
-    glyph->unichar = unichar;
-    glyph->col = col;
-    glyph->row = row;
-    ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
-                         &glyph->rgba_fg[0]);
-  }
-  else
-#endif
-  _ctx_glyph (rasterizer->ctx, unichar, stroke);
-}
-
-static void
-_ctx_text (Ctx        *ctx,
-           const char *string,
-           int         stroke,
-           int         visible);
 static void
-ctx_rasterizer_text (CtxRasterizer *rasterizer, const char *string, int stroke)
+ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f)
 {
-#if CTX_BRAILLE_TEXT
-  float font_size = 0;
-  if (rasterizer->term_glyphs)
-  {
-    float tx = 0;
-    font_size = rasterizer->state->gstate.font_size;
-    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
-  }
-  int   ch = ctx_term_get_cell_height (rasterizer->ctx);
-  int   cw = ctx_term_get_cell_width (rasterizer->ctx);
-
-  if (rasterizer->term_glyphs && !stroke &&
-      fabs (font_size - ch) < 0.5)
-  {
-    float tx = rasterizer->x;
-    float ty = rasterizer->y;
-    _ctx_user_to_device (rasterizer->state, &tx, &ty);
-    int col = tx / cw + 1;
-    int row = ty / ch + 1;
-    for (int i = 0; string[i]; i++, col++)
-    {
-      CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
-      ctx_list_prepend (&rasterizer->glyphs, glyph);
-      glyph->unichar = string[i];
-      glyph->col = col;
-      glyph->row = row;
-      ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
-                      glyph->rgba_fg);
-    }
-  }
-  else
-#endif
-  {
-    _ctx_text (rasterizer->ctx, string, stroke, 1);
-  }
+  matrix->m[0][0] = a;
+  matrix->m[0][1] = b;
+  matrix->m[1][0] = c;
+  matrix->m[1][1] = d;
+  matrix->m[2][0] = e;
+  matrix->m[2][1] = f;
 }
 
+
 void
-_ctx_font (Ctx *ctx, const char *name);
-static void
-ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name)
+ctx_matrix_identity (CtxMatrix *matrix)
 {
-  _ctx_font (rasterizer->ctx, font_name);
+  _ctx_matrix_identity (matrix);
 }
 
-static void
-ctx_rasterizer_arc (CtxRasterizer *rasterizer,
-                    float          x,
-                    float          y,
-                    float          radius,
-                    float          start_angle,
-                    float          end_angle,
-                    int            anticlockwise)
+void
+ctx_matrix_multiply (CtxMatrix       *result,
+                     const CtxMatrix *t,
+                     const CtxMatrix *s)
 {
-  int full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS;
-  full_segments = radius * CTX_PI * 2 / 4.0;
-  if (full_segments > CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS)
-    { full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS; }
-  if (full_segments < 24) full_segments = 24;
-  float step = CTX_PI*2.0/full_segments;
-  int steps;
-
-  if (end_angle < -30.0)
-    end_angle = -30.0;
-  if (start_angle < -30.0)
-    start_angle = -30.0;
-  if (end_angle > 30.0)
-    end_angle = 30.0;
-  if (start_angle > 30.0)
-    start_angle = 30.0;
+  _ctx_matrix_multiply (result, t, s);
+}
 
-  if (radius <= 0.0001)
-          return;
+void
+ctx_matrix_translate (CtxMatrix *matrix, float x, float y)
+{
+  CtxMatrix transform;
+  transform.m[0][0] = 1.0f;
+  transform.m[0][1] = 0.0f;
+  transform.m[1][0] = 0.0f;
+  transform.m[1][1] = 1.0f;
+  transform.m[2][0] = x;
+  transform.m[2][1] = y;
+  _ctx_matrix_multiply (matrix, &transform, matrix);
+}
 
-  if (end_angle == start_angle)
-          // XXX also detect arcs fully outside render view
-    {
-    if (rasterizer->has_prev!=0)
-      ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
-                              y + ctx_sinf (end_angle) * radius);
-      else
-      ctx_rasterizer_move_to (rasterizer, x + ctx_cosf (end_angle) * radius,
-                            y + ctx_sinf (end_angle) * radius);
-      return;
-    }
-#if 1
-  if ( (!anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||
-       ( (anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f ) ) 
-  ||   (anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||  (!anticlockwise && 
fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f )  )
-    {
-      steps = full_segments - 1;
-    }
-  else
-#endif
-    {
-      steps = (end_angle - start_angle) / (CTX_PI*2) * full_segments;
-      if (anticlockwise)
-        { steps = full_segments - steps; };
-   // if (steps > full_segments)
-   //   steps = full_segments;
-    }
-  if (anticlockwise) { step = step * -1; }
-  int first = 1;
-  if (steps == 0 /* || steps==full_segments -1  || (anticlockwise && steps == full_segments) */)
-    {
-      float xv = x + ctx_cosf (start_angle) * radius;
-      float yv = y + ctx_sinf (start_angle) * radius;
-      if (!rasterizer->has_prev)
-        { ctx_rasterizer_move_to (rasterizer, xv, yv); }
-      first = 0;
-    }
-  else
-    {
-      for (float angle = start_angle, i = 0; i < steps; angle += step, i++)
-        {
-          float xv = x + ctx_cosf (angle) * radius;
-          float yv = y + ctx_sinf (angle) * radius;
-          if (first && !rasterizer->has_prev)
-            { ctx_rasterizer_move_to (rasterizer, xv, yv); }
-          else
-            { ctx_rasterizer_line_to (rasterizer, xv, yv); }
-          first = 0;
-        }
-    }
-  ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
-                          y + ctx_sinf (end_angle) * radius);
+void
+ctx_matrix_scale (CtxMatrix *matrix, float x, float y)
+{
+  CtxMatrix transform;
+  transform.m[0][0] = x;
+  transform.m[0][1] = 0.0f;
+  transform.m[1][0] = 0.0f;
+  transform.m[1][1] = y;
+  transform.m[2][0] = 0.0f;
+  transform.m[2][1] = 0.0f;
+  _ctx_matrix_multiply (matrix, &transform, matrix);
 }
 
-static void
-ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
-                        float        cx,
-                        float        cy,
-                        float        x,
-                        float        y)
+
+/* for multiples of 90 degree rotations, we return no rotation */
+int
+ctx_matrix_no_skew_or_rotate (CtxMatrix *matrix)
 {
-  /* XXX : it is probably cheaper/faster to do quad interpolation directly -
-   *       though it will increase the code-size, an
-   *       alternative is to turn everything into cubic
-   *       and deal with cubics more directly during
-   *       rasterization
-   */
-  ctx_rasterizer_curve_to (rasterizer,
-                           (cx * 2 + rasterizer->x) / 3.0f, (cy * 2 + rasterizer->y) / 3.0f,
-                           (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
-                           x,                              y);
+  if (matrix->m[0][1] != 0.0f) return 0;
+  if (matrix->m[1][0] != 0.0f) return 0;
+  return 1;
 }
 
-static void
-ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
-                            float cx, float cy,
-                            float x,  float y)
+void
+ctx_matrix_rotate (CtxMatrix *matrix, float angle)
 {
-  ctx_rasterizer_quad_to (rasterizer, cx + rasterizer->x, cy + rasterizer->y,
-                          x  + rasterizer->x, y  + rasterizer->y);
+  CtxMatrix transform;
+  float val_sin = ctx_sinf (angle);
+  float val_cos = ctx_cosf (angle);
+  transform.m[0][0] =  val_cos;
+  transform.m[0][1] = val_sin;
+  transform.m[1][0] = -val_sin;
+  transform.m[1][1] = val_cos;
+  transform.m[2][0] =     0.0f;
+  transform.m[2][1] = 0.0f;
+  _ctx_matrix_multiply (matrix, &transform, matrix);
 }
 
-#define LENGTH_OVERSAMPLE 1
 #if 0
 static void
-ctx_rasterizer_pset (CtxRasterizer *rasterizer, int x, int y, uint8_t cov)
+ctx_matrix_skew_x (CtxMatrix *matrix, float angle)
 {
-  // XXX - we avoid rendering here x==0 - to keep with
-  //  an off-by one elsewhere
-  //
-  //  XXX onlt works in rgba8 formats
-  if (x <= 0 || y < 0 || x >= rasterizer->blit_width ||
-      y >= rasterizer->blit_height)
-    { return; }
-  uint8_t fg_color[4];
-  ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, fg_color);
-  uint8_t pixel[4];
-  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
-  dst += y * rasterizer->blit_stride;
-  dst += x * rasterizer->format->bpp / 8;
-  if (!rasterizer->format->to_comp ||
-      !rasterizer->format->from_comp)
-    { return; }
-  if (cov == 255)
-    {
-      for (int c = 0; c < 4; c++)
-        {
-          pixel[c] = fg_color[c];
-        }
-    }
-  else
-    {
-      rasterizer->format->to_comp (rasterizer, x, dst, &pixel[0], 1);
-      for (int c = 0; c < 4; c++)
-        {
-          pixel[c] = ctx_lerp_u8 (pixel[c], fg_color[c], cov);
-        }
-    }
-  rasterizer->format->from_comp (rasterizer, x, &pixel[0], dst, 1);
+  CtxMatrix transform;
+  float val_tan = ctx_tanf (angle);
+  transform.m[0][0] =    1.0f;
+  transform.m[0][1] = 0.0f;
+  transform.m[1][0] = val_tan;
+  transform.m[1][1] = 1.0f;
+  transform.m[2][0] =    0.0f;
+  transform.m[2][1] = 0.0f;
+  _ctx_matrix_multiply (matrix, &transform, matrix);
 }
-#endif
 
-#if 0
 static void
-ctx_rasterizer_stroke_1px (CtxRasterizer *rasterizer)
+ctx_matrix_skew_y (CtxMatrix *matrix, float angle)
 {
-  int count = rasterizer->edge_list.count;
-  CtxSegment *temp = (CtxSegment*)rasterizer->edge_list.entries;
-  float prev_x = 0.0f;
-  float prev_y = 0.0f;
-  int aa = 15;//rasterizer->aa;
-  int start = 0;
-  int end = 0;
-#if 0
-  float factor = ctx_matrix_get_scale (&state->gstate.transform);
+  CtxMatrix transform;
+  float val_tan = ctx_tanf (angle);
+  transform.m[0][0] =    1.0f;
+  transform.m[0][1] = val_tan;
+  transform.m[1][0] =    0.0f;
+  transform.m[1][1] = 1.0f;
+  transform.m[2][0] =    0.0f;
+  transform.m[2][1] = 0.0f;
+  _ctx_matrix_multiply (matrix, &transform, matrix);
+}
 #endif
 
-  while (start < count)
-    {
-      int started = 0;
-      int i;
-      for (i = start; i < count; i++)
-        {
-          CtxSegment *entry = &temp[i];
-          float x, y;
-          if (entry->code == CTX_NEW_EDGE)
-            {
-              if (started)
-                {
-                  end = i - 1;
-                  goto foo;
-                }
-              prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-              prev_y = entry->data.s16[1] * 1.0f / aa;
-              started = 1;
-              start = i;
-            }
-          x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-          y = entry->data.s16[3] * 1.0f / aa;
-          int dx = x - prev_x;
-          int dy = y - prev_y;
-          int length = ctx_maxf (abs (dx), abs (dy) );
-          if (length)
-            {
-              length *= LENGTH_OVERSAMPLE;
-              int len = length;
-              int tx = prev_x * 256;
-              int ty = prev_y * 256;
-              dx *= 256;
-              dy *= 256;
-              dx /= length;
-              dy /= length;
-              for (int i = 0; i < len; i++)
-                {
-                  ctx_rasterizer_pset (rasterizer, tx/256, ty/256, 255);
-                  tx += dx;
-                  ty += dy;
-                  ctx_rasterizer_pset (rasterizer, tx/256, ty/256, 255);
-                }
-            }
-          prev_x = x;
-          prev_y = y;
-        }
-      end = i-1;
-foo:
-      start = end+1;
-    }
-  ctx_rasterizer_reset (rasterizer);
+
+void
+ctx_identity (Ctx *ctx)
+{
+  CTX_PROCESS_VOID (CTX_IDENTITY);
 }
-#endif
 
-static void
-ctx_rasterizer_stroke (CtxRasterizer *rasterizer)
+
+
+void
+ctx_apply_transform (Ctx *ctx, float a, float b,  // hscale, hskew
+                     float c, float d,  // vskew,  vscale
+                     float e, float f)  // htran,  vtran
 {
-  CtxGState *gstate = &rasterizer->state->gstate;
-  CtxSource source_backup;
-  int count = rasterizer->edge_list.count;
-  if (count <= 0)
-    return;
-  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
+  CtxEntry command[3]=
   {
-    source_backup = gstate->source_fill;
-    gstate->source_fill = rasterizer->state->gstate.source_stroke;
-  }
-  int preserved = rasterizer->preserve;
-  float factor = ctx_matrix_get_scale (&gstate->transform);
-  float line_width = gstate->line_width * factor;
+    ctx_f (CTX_APPLY_TRANSFORM, a, b),
+    ctx_f (CTX_CONT,            c, d),
+    ctx_f (CTX_CONT,            e, f)
+  };
+  ctx_process (ctx, command);
+}
 
-  CtxSegment temp[count]; /* copy of already built up path's poly line  */
-  memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) );
+void
+ctx_get_transform  (Ctx *ctx, float *a, float *b,
+                    float *c, float *d,
+                    float *e, float *f)
+{
+  if (a) { *a = ctx->state.gstate.transform.m[0][0]; }
+  if (b) { *b = ctx->state.gstate.transform.m[0][1]; }
+  if (c) { *c = ctx->state.gstate.transform.m[1][0]; }
+  if (d) { *d = ctx->state.gstate.transform.m[1][1]; }
+  if (e) { *e = ctx->state.gstate.transform.m[2][0]; }
+  if (f) { *f = ctx->state.gstate.transform.m[2][1]; }
+}
 
-#if CTX_RECT_FILL
-  if (rasterizer->edge_list.count == 5)
-    {
-      CtxSegment *entry0 = &((CtxSegment*)rasterizer->edge_list.entries)[0];
-      CtxSegment *entry1 = &((CtxSegment*)rasterizer->edge_list.entries)[1];
-      CtxSegment *entry2 = &((CtxSegment*)rasterizer->edge_list.entries)[2];
-      CtxSegment *entry3 = &((CtxSegment*)rasterizer->edge_list.entries)[3];
-      //fprintf (stderr, "{%i %.2f %.2f}", lw, lwmod, line_width);
+void
+ctx_source_transform (Ctx *ctx, float a, float b,  // hscale, hskew
+                      float c, float d,  // vskew,  vscale
+                      float e, float f)  // htran,  vtran
+{
+  CtxEntry command[3]=
+  {
+    ctx_f (CTX_SOURCE_TRANSFORM, a, b),
+    ctx_f (CTX_CONT,             c, d),
+    ctx_f (CTX_CONT,             e, f)
+  };
+  ctx_process (ctx, command);
+}
 
-      if (!rasterizer->state->gstate.clipped &&
-          (entry0->data.s16[2] == entry1->data.s16[2]) &&
-          (entry0->data.s16[3] == entry3->data.s16[3]) &&
-          (entry1->data.s16[3] == entry2->data.s16[3]) &&
-          (entry2->data.s16[2] == entry3->data.s16[2])
-#if CTX_ENABLE_SHADOW_BLUR
-           && !rasterizer->in_shadow
-#endif
-         )
-       {
-      float lwmod = ctx_fmod1f (line_width);
-      int lw = ctx_floorf (line_width + 0.5f);
-      int is_compat_even = (lw % 2 == 0) && (lwmod < 0.1); // only even linewidths implemented properly
-      int is_compat_odd = (lw % 2 == 1) && (lwmod < 0.1); // only even linewidths implemented properly
+void
+ctx_source_transform_matrix (Ctx *ctx, CtxMatrix *matrix)
+{
+  ctx_source_transform (ctx,
+    matrix->m[0][0], matrix->m[0][1],
+    matrix->m[1][0], matrix->m[1][1],
+    matrix->m[2][0], matrix->m[2][1]);
+}
 
-      int off_x = 0;
-      int off_y = 0;
+void ctx_apply_matrix (Ctx *ctx, CtxMatrix *matrix)
+{
+  ctx_apply_transform (ctx,
+                       matrix->m[0][0], matrix->m[0][1],
+                       matrix->m[1][0], matrix->m[1][1],
+                       matrix->m[2][0], matrix->m[2][1]);
+}
 
+void ctx_get_matrix (Ctx *ctx, CtxMatrix *matrix)
+{
+  *matrix = ctx->state.gstate.transform;
+}
 
-      if (is_compat_odd)
-      {
-        off_x = CTX_SUBDIV/2;
-        off_y = CTX_FULL_AA/2;
-      }
+void ctx_set_matrix (Ctx *ctx, CtxMatrix *matrix)
+{
+  ctx_identity (ctx);
+  ctx_apply_matrix (ctx, matrix);
+}
 
-      if((is_compat_odd || is_compat_even) &&
-         (((entry1->data.s16[2]-off_x) % (CTX_SUBDIV))  == 0)  &&
-         (((entry1->data.s16[3]-off_y) % (CTX_FULL_AA)) == 0) &&
-         (((entry3->data.s16[2]-off_x) % (CTX_SUBDIV))  == 0)  &&
-         (((entry3->data.s16[3]-off_y) % (CTX_FULL_AA)) == 0))
-      {
-        float x0 = entry3->data.s16[2] * 1.0f / CTX_SUBDIV;
-        float y0 = entry3->data.s16[3] * 1.0f / CTX_FULL_AA;
-        float x1 = entry1->data.s16[2] * 1.0f / CTX_SUBDIV;
-        float y1 = entry1->data.s16[3] * 1.0f / CTX_FULL_AA;
+void ctx_rotate (Ctx *ctx, float x)
+{
+  if (x == 0.0f)
+    return;
+  CTX_PROCESS_F1 (CTX_ROTATE, x);
+  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
+    { ctx->drawlist.count--; }
+}
 
-        int bw = lw/2+1;
-        int bwb = lw/2;
+void ctx_scale (Ctx *ctx, float x, float y)
+{
+  if (x == 1.0f && y == 1.0f)
+    return;
+  CTX_PROCESS_F (CTX_SCALE, x, y);
+  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
+    { ctx->drawlist.count--; }
+}
 
-        if (is_compat_even)
-        {
-          bw = lw/2;
-        }
-        ctx_rasterizer_fill_rect (rasterizer, x0-bwb, y0-bwb, x1+bw, y0+bw, 255);
-        ctx_rasterizer_fill_rect (rasterizer, x0-bwb, y1-bwb, x1-bwb, y1+bw, 255);
-        ctx_rasterizer_fill_rect (rasterizer, x0-bwb, y0, x0+bw, y1, 255);
-        ctx_rasterizer_fill_rect (rasterizer, x1-bwb, y0, x1+bw, y1+bw, 255);
-        ctx_rasterizer_reset (rasterizer);
-        goto done;
-      }
-       }
-    }
-#endif
-  
-    {
+void ctx_translate (Ctx *ctx, float x, float y)
+{
+  if (x == 0.0f && y == 0.0f)
+    return;
+  CTX_PROCESS_F (CTX_TRANSLATE, x, y);
+  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
+    { ctx->drawlist.count--; }
+}
 
-  int aa = CTX_FULL_AA;
-#if 0
-  if (CTX_UNLIKELY(gstate->line_width * factor <= 0.0f &&
-      gstate->line_width * factor > -10.0f))
+void
+ctx_matrix_invert (CtxMatrix *m)
+{
+  CtxMatrix t = *m;
+  float invdet, det = m->m[0][0] * m->m[1][1] -
+                      m->m[1][0] * m->m[0][1];
+  if (det > -0.0000001f && det < 0.0000001f)
     {
-      ctx_rasterizer_stroke_1px (rasterizer);
+      m->m[0][0] = m->m[0][1] =
+                     m->m[1][0] = m->m[1][1] =
+                                    m->m[2][0] = m->m[2][1] = 0.0;
+      return;
     }
-  else
+  invdet = 1.0f / det;
+  m->m[0][0] = t.m[1][1] * invdet;
+  m->m[1][0] = -t.m[1][0] * invdet;
+  m->m[2][0] = (t.m[1][0] * t.m[2][1] - t.m[1][1] * t.m[2][0]) * invdet;
+  m->m[0][1] = -t.m[0][1] * invdet;
+  m->m[1][1] = t.m[0][0] * invdet;
+  m->m[2][1] = (t.m[0][1] * t.m[2][0] - t.m[0][0] * t.m[2][1]) * invdet ;
+}
+
+
+
 #endif
-    {
-      if (line_width < 5.0f)
-      {
-      factor *= 0.89; /* this hack adjustment makes sharp 1px and 2px strokewidths
-      //                 end up sharp without erronious AA; we seem to be off by
-      //                 one somewhere else, causing the need for this
-      //                 */
-      line_width *= 0.89f;
-      }
-      ctx_rasterizer_reset (rasterizer); /* then start afresh with our stroked shape  */
-      CtxMatrix transform_backup = gstate->transform;
-      _ctx_matrix_identity (&gstate->transform);
-      float prev_x = 0.0f;
-      float prev_y = 0.0f;
-      float half_width_x = line_width/2;
-      float half_width_y = line_width/2;
-      if (CTX_UNLIKELY(line_width <= 0.0f))
-        { // makes 0 width be hairline
-          half_width_x = .5f;
-          half_width_y = .5f;
-        }
-      int start = 0;
-      int end   = 0;
-      while (start < count)
-        {
-          int started = 0;
-          int i;
-          for (i = start; i < count; i++)
-            {
-              CtxSegment *entry = &temp[i];
-              float x, y;
-              if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
-                {
-                  if (CTX_LIKELY(started))
-                    {
-                      end = i - 1;
-                      goto foo;
-                    }
-                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-                  prev_y = entry->data.s16[1] * 1.0f / aa;
-                  started = 1;
-                  start = i;
-                }
-              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-              y = entry->data.s16[3] * 1.0f / aa;
-              float dx = x - prev_x;
-              float dy = y - prev_y;
-              float length = ctx_fast_hypotf (dx, dy);
-              if (CTX_LIKELY(length>0.001f))
-                {
-                  float recip_length = 1.0/length;
-                  dx = dx * recip_length * half_width_x;
-                  dy = dy * recip_length * half_width_y;
-                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
-                    {
-                      ctx_rasterizer_finish_shape (rasterizer);
-                      ctx_rasterizer_move_to (rasterizer, prev_x+dy, prev_y-dx);
-                    }
-                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
-                  
-                  // we need to know the slope of the other side
+#if CTX_AUDIO
 
-                  // XXX possible miter line-to
-                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+4, prev_y+dx+10);
-                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+8, prev_y+dx+0);
+//#include <string.h>
+//#include "ctx-internal.h"
+//#include "mmm.h"
 
+#if !__COSMOPOLITAN__
 
-                  ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
-                }
-              prev_x = x;
-              prev_y = y;
-            }
-          end = i-1;
-foo:
-          for (int i = end; i >= start; i--)
-            {
-              CtxSegment *entry = &temp[i];
-              float x, y, dx, dy;
-              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-              y = entry->data.s16[3] * 1.0f / aa;
-              dx = x - prev_x;
-              dy = y - prev_y;
-              float length = ctx_fast_hypotf (dx, dy);
-              float recip_length = 1.0f/length;
-              dx = dx * recip_length * half_width_x;
-              dy = dy * recip_length * half_width_y;
-              if (CTX_LIKELY(length>0.001f))
-                {
-                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
-                  // XXX possible miter line-to
-             //   ctx_rasterizer_line_to (rasterizer, prev_x-dy+10, prev_y+dx+10);
-                  ctx_rasterizer_line_to (rasterizer, x-dy,      y+dx);
-                }
-              prev_x = x;
-              prev_y = y;
-              if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
-                {
-                  x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-                  y = entry->data.s16[1] * 1.0f / aa;
-                  dx = x - prev_x;
-                  dy = y - prev_y;
-                  length = ctx_fast_hypotf (dx, dy);
-                  recip_length = 1.0f/length;
-                  if (CTX_LIKELY(length>0.001f))
-                    {
-                      dx = dx * recip_length * half_width_x;
-                      dy = dy * recip_length * half_width_y;
-                      ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
-                      ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
-                    }
-                }
-              if ( (prev_x != x) && (prev_y != y) )
-                {
-                  prev_x = x;
-                  prev_y = y;
-                }
-            }
-          start = end+1;
-        }
-      ctx_rasterizer_finish_shape (rasterizer);
-      switch (gstate->line_cap)
-        {
-          case CTX_CAP_SQUARE: // XXX: incorrect - if rectangles were in
-                               //                  reverse order - rotation would be off
-                               //                  better implement correct here
-            {
-              float x = 0, y = 0;
-              int has_prev = 0;
-              for (int i = 0; i < count; i++)
-                {
-                  CtxSegment *entry = &temp[i];
-                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
-                    {
-                      if (has_prev)
-                        {
-                          ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, 
half_width_x, half_width_y);
-                          ctx_rasterizer_finish_shape (rasterizer);
-                        }
-                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-                      y = entry->data.s16[1] * 1.0f / aa;
-                      ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, half_width_x 
* 2, half_width_y * 2);
-                      ctx_rasterizer_finish_shape (rasterizer);
-                    }
-                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-                  y = entry->data.s16[3] * 1.0f / aa;
-                  has_prev = 1;
-                }
-              ctx_rasterizer_rectangle (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, 
half_width_y * 2);
-              ctx_rasterizer_finish_shape (rasterizer);
-            }
-            break;
-          case CTX_CAP_NONE: /* nothing to do */
-            break;
-          case CTX_CAP_ROUND:
-            {
-              float x = 0, y = 0;
-              int has_prev = 0;
-              for (int i = 0; i < count; i++)
-                {
-                  CtxSegment *entry = &temp[i];
-                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
-                    {
-                      if (has_prev)
-                        {
-                          ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
-                          ctx_rasterizer_finish_shape (rasterizer);
-                        }
-                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-                      y = entry->data.s16[1] * 1.0f / aa;
-                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
-                      ctx_rasterizer_finish_shape (rasterizer);
-                    }
-                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-                  y = entry->data.s16[3] * 1.0f / aa;
-                  has_prev = 1;
-                }
-              ctx_rasterizer_move_to (rasterizer, x, y);
-              ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
-              ctx_rasterizer_finish_shape (rasterizer);
-              break;
-            }
-        }
-      switch (gstate->line_join)
-        {
-          case CTX_JOIN_BEVEL:
-          case CTX_JOIN_MITER:
-            break;
-          case CTX_JOIN_ROUND:
-            {
-              float x = 0, y = 0;
-              for (int i = 0; i < count-1; i++)
-                {
-                  CtxSegment *entry = &temp[i];
-                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-                  y = entry->data.s16[3] * 1.0f / aa;
-                  if (CTX_UNLIKELY(entry[1].code == CTX_EDGE))
-                    {
-                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
-                      ctx_rasterizer_finish_shape (rasterizer);
-                    }
-                }
-              break;
-            }
-        }
-      CtxFillRule rule_backup = gstate->fill_rule;
-      gstate->fill_rule = CTX_FILL_RULE_WINDING;
-      rasterizer->preserve = 0; // so fill isn't tripped
-      ctx_rasterizer_fill (rasterizer);
-      gstate->fill_rule = rule_backup;
-      gstate->transform = transform_backup;
-    }
-  }
-done:
-  if (preserved)
-    {
-      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
-      rasterizer->edge_list.count = count;
-      rasterizer->preserve = 0;
-    }
-  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
-    gstate->source_fill = source_backup;
-}
-
-#if CTX_1BIT_CLIP
-#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY1
-#else
-#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY8
+#include <pthread.h>
+#if CTX_ALSA_AUDIO
+#include <alsa/asoundlib.h>
 #endif
 
 
-static void
-ctx_rasterizer_clip_reset (CtxRasterizer *rasterizer)
-{
-#if CTX_ENABLE_CLIP
-  if (rasterizer->clip_buffer)
-   ctx_buffer_free (rasterizer->clip_buffer);
-  rasterizer->clip_buffer = NULL;
+
+//#include <alloca.h>
+
 #endif
-  rasterizer->state->gstate.clip_min_x = rasterizer->blit_x;
-  rasterizer->state->gstate.clip_min_y = rasterizer->blit_y;
 
-  rasterizer->state->gstate.clip_max_x = rasterizer->blit_x + rasterizer->blit_width - 1;
-  rasterizer->state->gstate.clip_max_y = rasterizer->blit_y + rasterizer->blit_height - 1;
-}
+#define DESIRED_PERIOD_SIZE 1000
 
-static void
-ctx_rasterizer_clip_apply (CtxRasterizer *rasterizer,
-                           CtxSegment    *edges)
+int ctx_pcm_bytes_per_frame (CtxPCM format)
 {
-  int count = edges[0].data.u32[0];
-
-  int minx = 5000;
-  int miny = 5000;
-  int maxx = -5000;
-  int maxy = -5000;
-  int prev_x = 0;
-  int prev_y = 0;
-  int blit_width = rasterizer->blit_width;
-  int blit_height = rasterizer->blit_height;
+  switch (format)
+  {
+    case CTX_f32:  return 4;
+    case CTX_f32S: return 8;
+    case CTX_s16:  return 2;
+    case CTX_s16S: return 4;
+    default: return 1;
+  }
+}
 
-  int aa = 15;//rasterizer->aa;
-  float coords[6][2];
+static float    ctx_host_freq     = 48000;
+static CtxPCM   ctx_host_format   = CTX_s16S;
+static float    client_freq   = 48000;
+static CtxPCM   ctx_client_format = CTX_s16S;
+static int      ctx_pcm_queued    = 0;
+static int      ctx_pcm_cur_left  = 0;
+static CtxList *ctx_pcm_list;                 /* data is a blob a 32bit uint first, followed by pcm-data */
 
-  for (int i = 0; i < count; i++)
-    {
-      CtxSegment *entry = &edges[i+1];
-      float x, y;
-      if (entry->code == CTX_NEW_EDGE)
-        {
-          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-          prev_y = entry->data.s16[1] * 1.0f / aa;
-          if (prev_x < minx) { minx = prev_x; }
-          if (prev_y < miny) { miny = prev_y; }
-          if (prev_x > maxx) { maxx = prev_x; }
-          if (prev_y > maxy) { maxy = prev_y; }
-        }
-      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-      y = entry->data.s16[3] * 1.0f / aa;
-      if (x < minx) { minx = x; }
-      if (y < miny) { miny = y; }
-      if (x > maxx) { maxx = x; }
-      if (y > maxy) { maxy = y; }
 
-      if (i < 6)
-      {
-        coords[i][0] = x;
-        coords[i][1] = y;
-      }
-    }
+//static long int ctx_pcm_queued_ticks = 0;  /*  the number of ticks into the future
+  //                                      *  we've queued audio for
+                                       
 
-#if CTX_ENABLE_CLIP
 
-  if ((rasterizer->clip_rectangle==1
-       || !rasterizer->clip_buffer)
-      )
+int
+ctx_pcm_channels (CtxPCM format)
+{
+  switch (format)
   {
-    if (count == 5)
-    {
-      if (coords[0][0] == coords[1][0] &&
-          coords[0][1] == coords[4][1] &&
-          coords[0][1] == coords[3][1] &&
-          coords[1][1] == coords[2][1] &&
-          coords[3][0] == coords[4][0]
-          )
-      {
-#if 0
-        printf ("%d,%d %dx%d\n", minx, miny,
-                                       maxx-minx+1, maxy-miny+1);
-#endif
-
-         rasterizer->state->gstate.clip_min_x =
-            ctx_maxi (minx, rasterizer->state->gstate.clip_min_x);
-         rasterizer->state->gstate.clip_min_y =
-            ctx_maxi (miny, rasterizer->state->gstate.clip_min_y);
-         rasterizer->state->gstate.clip_max_x =
-            ctx_mini (maxx, rasterizer->state->gstate.clip_max_x);
-         rasterizer->state->gstate.clip_max_y =
-            ctx_mini (maxy, rasterizer->state->gstate.clip_max_y);
-
-         rasterizer->clip_rectangle = 1;
+    case CTX_s16:
+    case CTX_f32:
+      return 1;
+    case CTX_s16S:
+    case CTX_f32S:
+      return 2;
+  }
+  return 0;
+}
 
-#if 0
-         if (!rasterizer->clip_buffer)
-           rasterizer->clip_buffer = ctx_buffer_new (blit_width,
-                                                     blit_height,
-                                                     CTX_CLIP_FORMAT);
+/* todo: only start audio thread on first write - enabling dynamic choice
+ * of sample-rate? or is it better to keep to opening 48000 as a standard
+ * and do better internal resampling for others?
+ */
 
-         memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
-         int i = 0;
-         for (int y = rasterizer->state->gstate.clip_min_y;
-                  y <= rasterizer->state->gstate.clip_max_y;
-                  y++)
-         for (int x = rasterizer->state->gstate.clip_min_x;
-                  x <= rasterizer->state->gstate.clip_max_x;
-                  x++, i++)
-         {
-           ((uint8_t*)(rasterizer->clip_buffer->data))[i] = 255;
-         }
-#endif
+#if CTX_ALSA_AUDIO
+static snd_pcm_t *alsa_open (char *dev, int rate, int channels)
+{
+   snd_pcm_hw_params_t *hwp;
+   snd_pcm_sw_params_t *swp;
+   snd_pcm_t *h;
+   int r;
+   int dir;
+   snd_pcm_uframes_t period_size_min;
+   snd_pcm_uframes_t period_size_max;
+   snd_pcm_uframes_t period_size;
+   snd_pcm_uframes_t buffer_size;
 
-         return;
-      }
-#if 0
-      else
-      {
-        printf ("%d,%d %dx%d  0,0:%.2f 0,1:%.2f 1,0:%.2f 11:%.2f 20:%.2f 21:%2.f 30:%.2f 31:%.2f 40:%.2f 
41:%.2f\n", minx, miny,
-                                       maxx-minx+1, maxy-miny+1
-                                       
-         ,coords[0][0] ,  coords[0][1]
-         ,coords[1][0] ,  coords[1][1]
-         ,coords[2][0] ,  coords[2][1]
-         ,coords[3][0] ,  coords[3][1]
-         ,coords[4][0] ,  coords[4][1]
-         );
-      }
-#endif
-    }
-  }
-  rasterizer->clip_rectangle = 0;
+   if ((r = snd_pcm_open(&h, dev, SND_PCM_STREAM_PLAYBACK, 0) < 0))
+           return NULL;
 
-  if ((minx == maxx) || (miny == maxy)) // XXX : reset hack
-  {
-    ctx_rasterizer_clip_reset (rasterizer);
-    return;//goto done;
-  }
+   hwp = alloca(snd_pcm_hw_params_sizeof());
+   memset(hwp, 0, snd_pcm_hw_params_sizeof());
+   snd_pcm_hw_params_any(h, hwp);
 
-  int we_made_it = 0;
-  CtxBuffer *clip_buffer;
+   snd_pcm_hw_params_set_access(h, hwp, SND_PCM_ACCESS_RW_INTERLEAVED);
+   snd_pcm_hw_params_set_format(h, hwp, SND_PCM_FORMAT_S16_LE);
+   snd_pcm_hw_params_set_rate(h, hwp, rate, 0);
+   snd_pcm_hw_params_set_channels(h, hwp, channels);
+   dir = 0;
+   snd_pcm_hw_params_get_period_size_min(hwp, &period_size_min, &dir);
+   dir = 0;
+   snd_pcm_hw_params_get_period_size_max(hwp, &period_size_max, &dir);
 
-  if (!rasterizer->clip_buffer)
-  {
-    rasterizer->clip_buffer = ctx_buffer_new (blit_width,
-                                              blit_height,
-                                              CTX_CLIP_FORMAT);
-    clip_buffer = rasterizer->clip_buffer;
-    we_made_it = 1;
-    if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
-      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height/8);
-    else
-      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
-  }
-  else
-  {
-    clip_buffer = ctx_buffer_new (blit_width, blit_height,
-                                  CTX_CLIP_FORMAT);
-  }
+   period_size = DESIRED_PERIOD_SIZE;
 
-  {
+   dir = 0;
+   r = snd_pcm_hw_params_set_period_size_near(h, hwp, &period_size, &dir);
+   r = snd_pcm_hw_params_get_period_size(hwp, &period_size, &dir);
+   buffer_size = period_size * 4;
+   r = snd_pcm_hw_params_set_buffer_size_near(h, hwp, &buffer_size);
+   r = snd_pcm_hw_params(h, hwp);
+   swp = alloca(snd_pcm_sw_params_sizeof());
+   memset(hwp, 0, snd_pcm_sw_params_sizeof());
+   snd_pcm_sw_params_current(h, swp);
+   r = snd_pcm_sw_params_set_avail_min(h, swp, period_size);
+   snd_pcm_sw_params_set_start_threshold(h, swp, 0);
+   r = snd_pcm_sw_params(h, swp);
+   r = snd_pcm_prepare(h);
 
-  int prev_x = 0;
-  int prev_y = 0;
+   return h;
+}
 
-    Ctx *ctx = ctx_new_for_framebuffer (clip_buffer->data, blit_width, blit_height,
-       blit_width,
-       CTX_CLIP_FORMAT);
+static  snd_pcm_t *h = NULL;
+static void *ctx_alsa_audio_start(Ctx *ctx)
+{
+//  Lyd *lyd = aux;
+  int c;
 
-  for (int i = 0; i < count; i++)
-    {
-      CtxSegment *entry = &edges[i+1];
-      float x, y;
-      if (entry->code == CTX_NEW_EDGE)
-        {
-          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-          prev_y = entry->data.s16[1] * 1.0f / aa;
-          ctx_move_to (ctx, prev_x, prev_y);
-        }
-      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-      y = entry->data.s16[3] * 1.0f / aa;
-      ctx_line_to (ctx, x, y);
-    }
-    ctx_gray (ctx, 1.0f);
-    ctx_fill (ctx);
-    ctx_free (ctx);
-  }
+  /* The audio handler is implemented as a mixer that adds data on top
+   * of 0s, XXX: it should be ensured that minimal work is there is
+   * no data available.
+   */
+  for (;;)
+  {
+    int client_channels = ctx_pcm_channels (ctx_client_format);
+    int is_float = 0;
+    int16_t data[81920*8]={0,};
 
-  int maybe_rect = 1;
-  rasterizer->clip_rectangle = 0;
+    if (ctx_client_format == CTX_f32 ||
+        ctx_client_format == CTX_f32S)
+      is_float = 1;
 
-  if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
-  {
-    int count = blit_width * blit_height / 8;
-    for (int i = 0; i < count; i++)
-    {
-      ((uint8_t*)rasterizer->clip_buffer->data)[i] =
-      (((uint8_t*)rasterizer->clip_buffer->data)[i] &
-      ((uint8_t*)clip_buffer->data)[i]);
-    }
-  }
-  else
-  {
-    int count = blit_width * blit_height;
+    c = snd_pcm_wait(h, 1000);
 
+    if (c >= 0)
+       c = snd_pcm_avail_update(h);
 
-    int i;
-    int x0 = 0;
-    int y0 = 0;
-    int width = -1;
-    int next_stage = 0;
-    uint8_t *p_data = (uint8_t*)rasterizer->clip_buffer->data;
-    uint8_t *data = (uint8_t*)clip_buffer->data;
+    if (c > 1000) c = 1000; // should use max mmm buffer sizes
 
-    i=0;
-    /* find upper left */
-    for (; i < count && maybe_rect && !next_stage; i++)
-    {
-      uint8_t val = (p_data[i] * data[i])/255;
-      data[i] = val;
-      switch (val)
-      {
-        case 255:
-          x0 = i % blit_width;
-          y0 = i / blit_width;
-          next_stage = 1;
-          break;
-        case 0: break;
-        default:
-          maybe_rect = 0;
-          break;
-      }
-    }
+    if (c == -EPIPE)
+      snd_pcm_prepare(h);
 
-    next_stage = 0;
-    /* figure out with */
-    for (; i < count && !next_stage && maybe_rect; i++)
+    if (c > 0)
     {
-      int x = i % blit_width;
-      int y = i / blit_width;
-      uint8_t val = (p_data[i] * data[i])/255;
-      data[i] = val;
-
-      if (y == y0)
+      int i;
+      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
       {
-        switch (val)
+        if (ctx_pcm_cur_left)  //  XXX  this line can be removed
         {
-          case 255:
-            width = x - x0 + 1;
-            break;
-          case 0:
-            next_stage = 1;
-            break;
-          default:
-            maybe_rect = 0;
-            break;
-        }
-        if (x % blit_width == blit_width - 1) next_stage = 1;
-      }
-      else next_stage = 1;
-    }
+          uint32_t *packet_sizep = (ctx_pcm_list->data);
+          uint32_t packet_size = *packet_sizep;
+          uint16_t left = 0, right = 0;
 
-    next_stage = 0;
-    /* body */
-    for (; i < count && maybe_rect && !next_stage; i++)
-    {
-      int x = i % blit_width;
-      uint8_t val = (p_data[i] * data[i])/255;
-      data[i] = val;
+          if (is_float)
+          {
+            float *packet = (ctx_pcm_list->data);
+            packet += 4;
+            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
+            left = right = packet[0] * (1<<15);
+            if (client_channels > 1)
+              right = packet[0] * (1<<15);
+          }
+          else // s16
+          {
+            uint16_t *packet = (ctx_pcm_list->data);
+            packet += 8;
+            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
 
-      if (x < x0)
-      {
-        if (val != 0){ maybe_rect = 0; next_stage = 1; }
-      } else if (x < x0 + width)
-      {
-        if (val != 255){ if (val != 0) maybe_rect = 0; next_stage = 1; }
-      } else {
-        if (val != 0){ maybe_rect = 0; next_stage = 1; }
+            left = right = packet[0];
+            if (client_channels > 1)
+              right = packet[1];
+          }
+          data[i * 2 + 0] = left;
+          data[i * 2 + 1] = right;
+
+          ctx_pcm_cur_left--;
+          ctx_pcm_queued --;
+          if (ctx_pcm_cur_left == 0)
+          {
+            void *old = ctx_pcm_list->data;
+            ctx_list_remove (&ctx_pcm_list, ctx_pcm_list->data);
+            free (old);
+            ctx_pcm_cur_left = 0;
+            if (ctx_pcm_list)
+            {
+              uint32_t *packet_sizep = (ctx_pcm_list->data);
+              uint32_t packet_size = *packet_sizep;
+              ctx_pcm_cur_left = packet_size;
+            }
+          }
+        }
       }
+
+    c = snd_pcm_writei(h, data, c);
+    if (c < 0)
+      c = snd_pcm_recover (h, c, 0);
+     }else{
+      if (getenv("LYD_FATAL_UNDERRUNS"))
+        {
+          printf ("dying XXxx need to add API for this debug\n");
+          //printf ("%i", lyd->active);
+          exit(0);
+        }
+      fprintf (stderr, "ctx alsa underun\n");
+      //exit(0);
     }
+  }
+}
+#endif
 
-    next_stage = 0;
-    /* foot */
-    for (; i < count && maybe_rect && !next_stage; i++)
-    {
-      uint8_t val = (p_data[i] * data[i])/255;
-      data[i] = val;
+static char MuLawCompressTable[256] =
+{
+   0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
+   4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
 
-      if (val != 0){ maybe_rect = 0; next_stage = 1; }
-    }
+static unsigned char LinearToMuLawSample(int16_t sample)
+{
+  const int cBias = 0x84;
+  const int cClip = 32635;
+  int sign = (sample >> 8) & 0x80;
 
+  if (sign)
+    sample = (int16_t)-sample;
 
-    for (; i < count; i++)
+  if (sample > cClip)
+    sample = cClip;
+
+  sample = (int16_t)(sample + cBias);
+
+  int exponent = (int)MuLawCompressTable[(sample>>7) & 0xFF];
+  int mantissa = (sample >> (exponent+3)) & 0x0F;
+
+  int compressedByte = ~ (sign | (exponent << 4) | mantissa);
+
+  return (unsigned char)compressedByte;
+}
+
+void ctx_ctx_pcm (Ctx *ctx)
+{
+    int client_channels = ctx_pcm_channels (ctx_client_format);
+    int is_float = 0;
+    uint8_t data[81920*8]={0,};
+    int c;
+
+    if (ctx_client_format == CTX_f32 ||
+        ctx_client_format == CTX_f32S)
+      is_float = 1;
+
+    c = 2000;
+
+    if (c > 0)
     {
-      uint8_t val = (p_data[i] * data[i])/255;
-      data[i] = val;
+      int i;
+      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
+      {
+        if (ctx_pcm_cur_left)  //  XXX  this line can be removed
+        {
+          uint32_t *packet_sizep = (ctx_pcm_list->data);
+          uint32_t packet_size = *packet_sizep;
+          int left = 0, right = 0;
+
+          if (is_float)
+          {
+            float *packet = (ctx_pcm_list->data);
+            packet += 4;
+            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
+            left = right = packet[0] * (1<<15);
+            if (client_channels > 1)
+              right = packet[1] * (1<<15);
+          }
+          else // s16
+          {
+            uint16_t *packet = (ctx_pcm_list->data);
+            packet += 8;
+            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
+
+            left = right = packet[0];
+            if (client_channels > 1)
+              right = packet[1];
+          }
+          data[i] = LinearToMuLawSample((left+right)/2);
+
+          ctx_pcm_cur_left--;
+          ctx_pcm_queued --;
+          if (ctx_pcm_cur_left == 0)
+          {
+            void *old = ctx_pcm_list->data;
+            ctx_list_remove (&ctx_pcm_list, ctx_pcm_list->data);
+            free (old);
+            ctx_pcm_cur_left = 0;
+            if (ctx_pcm_list)
+            {
+              uint32_t *packet_sizep = (ctx_pcm_list->data);
+              uint32_t packet_size = *packet_sizep;
+              ctx_pcm_cur_left = packet_size;
+            }
+          }
+        }
+      }
+
+    char encoded[81920*8]="";
+
+    int encoded_len = ctx_a85enc (data, encoded, i);
+    fprintf (stdout, "\033_Af=%i;", i);
+    fwrite (encoded, 1, encoded_len, stdout);
+    fwrite ("\e\\", 1, 2, stdout);
+    fflush (stdout);
     }
+}
 
-    if (maybe_rect)
-       rasterizer->clip_rectangle = 1;
+int ctx_pcm_init (Ctx *ctx)
+{
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
+  {
+    return 0;
   }
-  if (!we_made_it)
-   ctx_buffer_free (clip_buffer);
-#else
-  if (coords[0][0]){};
+  else
 #endif
-  
-  rasterizer->state->gstate.clip_min_x = ctx_maxi (minx,
-                                         rasterizer->state->gstate.clip_min_x);
-  rasterizer->state->gstate.clip_min_y = ctx_maxi (miny,
-                                         rasterizer->state->gstate.clip_min_y);
-  rasterizer->state->gstate.clip_max_x = ctx_mini (maxx,
-                                         rasterizer->state->gstate.clip_max_x);
-  rasterizer->state->gstate.clip_max_y = ctx_mini (maxy,
-                                         rasterizer->state->gstate.clip_max_y);
+  if (ctx_backend_type (ctx) == CTX_BACKEND_CTX)
+  {
+     ctx_host_freq = 8000;
+     ctx_host_format = CTX_s16;
+#if 0
+     pthread_t tid;
+     pthread_create(&tid, NULL, (void*)ctx_audio_start, ctx);
+#endif
+  }
+  else
+  {
+#if CTX_ALSA_AUDIO
+     pthread_t tid;
+     h = alsa_open("default", ctx_host_freq, ctx_pcm_channels (ctx_host_format));
+  if (!h) {
+    fprintf(stderr, "ctx unable to open ALSA device (%d channels, %f Hz), dying\n",
+            ctx_pcm_channels (ctx_host_format), ctx_host_freq);
+    return -1;
+  }
+  pthread_create(&tid, NULL, (void*)ctx_alsa_audio_start, ctx);
+#endif
+  }
+  return 0;
 }
 
-static void
-ctx_rasterizer_clip (CtxRasterizer *rasterizer)
+int ctx_pcm_queue (Ctx *ctx, const int8_t *data, int frames)
 {
-  int count = rasterizer->edge_list.count;
-  CtxSegment temp[count+1]; /* copy of already built up path's poly line  */
-  rasterizer->state->has_clipped=1;
-  rasterizer->state->gstate.clipped=1;
-  //if (rasterizer->preserve)
-    { memcpy (temp + 1, rasterizer->edge_list.entries, sizeof (temp) - sizeof (temp[0]));
-      temp[0].code = CTX_NOP;
-      temp[0].data.u32[0] = count;
-      ctx_state_set_blob (rasterizer->state, CTX_clip, (uint8_t*)temp, sizeof(temp));
-    }
-  ctx_rasterizer_clip_apply (rasterizer, temp);
-  ctx_rasterizer_reset (rasterizer);
-  if (rasterizer->preserve)
+  static int inited = 0;
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
+  {
+    return mmm_pcm_queue (ctx->backend_data, data, frames);
+  }
+  else
+#endif
+  {
+    if (!inited)
     {
-      memcpy (rasterizer->edge_list.entries, temp + 1, sizeof (temp) - sizeof(temp[0]));
-      rasterizer->edge_list.count = count;
-      rasterizer->preserve = 0;
+      ctx_pcm_init (ctx);
+      inited = 1;
     }
-}
+    float factor = client_freq * 1.0 / ctx_host_freq;
+    int   scaled_frames = frames / factor;
+    int   bpf = ctx_pcm_bytes_per_frame (ctx_client_format);
 
+    uint8_t *packet = malloc (scaled_frames * ctx_pcm_bytes_per_frame (ctx_client_format) + 16);
+    *((uint32_t *)packet) = scaled_frames;
 
-#if 0
-static void
-ctx_rasterizer_load_image (CtxRasterizer *rasterizer,
-                           const char  *path,
-                           float x,
-                           float y)
-{
-  // decode PNG, put it in image is slot 1,
-  // magic width height stride format data
-  ctx_buffer_load_png (&rasterizer->ctx->texture[0], path);
-  ctx_rasterizer_set_texture (rasterizer, 0, x, y);
-}
-#endif
+    if (factor > 0.999 && factor < 1.0001)
+    {
+       memcpy (packet + 16, data, frames * bpf);
+    }
+    else
+    {
+      /* a crude nearest / sample-and hold resampler */
+      int i;
+      for (i = 0; i < scaled_frames; i++)
+      {
+        int source_frame = i * factor;
+        memcpy (packet + 16 + bpf * i, data + source_frame * bpf, bpf);
+      }
+    }
+    if (ctx_pcm_list == NULL)     // otherwise it is another frame at front
+      ctx_pcm_cur_left = scaled_frames;  // and current cur_left is valid
 
+    ctx_list_append (&ctx_pcm_list, packet);
+    ctx_pcm_queued += scaled_frames;
 
-CTX_INLINE void
-ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
-                          float x,
-                          float y,
-                          float width,
-                          float height)
-{
-  ctx_rasterizer_move_to (rasterizer, x, y);
-  ctx_rasterizer_rel_line_to (rasterizer, width, 0);
-  ctx_rasterizer_rel_line_to (rasterizer, 0, height);
-  ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
-  ctx_rasterizer_rel_line_to (rasterizer, 0, -height);
-  //ctx_rasterizer_rel_line_to (rasterizer, width/2, 0);
-  ctx_rasterizer_finish_shape (rasterizer);
+    return frames;
+  }
+  return 0;
 }
 
-static void
-ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
-                          uint16_t x,
-                          uint16_t y,
-                          uint8_t r,
-                          uint8_t g,
-                          uint8_t b,
-                          uint8_t a)
+static int ctx_pcm_get_queued_frames (Ctx *ctx)
 {
-  rasterizer->state->gstate.source_fill.type = CTX_SOURCE_COLOR;
-  ctx_color_set_RGBA8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, r, g, b, a);
-  rasterizer->comp_op = NULL;
 #if 0
-  // XXX : doesn't take transforms into account - and has
-  // received less testing than code paths part of protocol,
-  // using rectangle properly will trigger the fillrect fastpath
-  ctx_rasterizer_pset (rasterizer, x, y, 255);
-#else
-  ctx_rasterizer_rectangle (rasterizer, x, y, 1.0, 1.0);
-  ctx_rasterizer_fill (rasterizer);
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
+  {
+    return mmm_pcm_get_queued_frames (ctx->backend_data);
+  }
 #endif
+  return ctx_pcm_queued;
 }
 
-#if CTX_ENABLE_SHADOW_BLUR
-static float
-ctx_gaussian (float x, float mu, float sigma)
+int ctx_pcm_get_queued (Ctx *ctx)
 {
-  float a = ( x- mu) / sigma;
-  return ctx_expf (-0.5 * a * a);
+  return ctx_pcm_get_queued_frames (ctx);
 }
 
-static void
-ctx_compute_gaussian_kernel (int dim, float radius, float *kernel)
+float ctx_pcm_get_queued_length (Ctx *ctx)
 {
-  float sigma = radius / 2;
-  float sum = 0.0;
-  int i = 0;
-  //for (int row = 0; row < dim; row ++)
-    for (int col = 0; col < dim; col ++, i++)
-    {
-      float val = //ctx_gaussian (row, radius, sigma) *
-                            ctx_gaussian (col, radius, sigma);
-      kernel[i] = val;
-      sum += val;
-    }
-  i = 0;
-  //for (int row = 0; row < dim; row ++)
-    for (int col = 0; col < dim; col ++, i++)
-        kernel[i] /= sum;
+  return 1.0 * ctx_pcm_get_queued_frames (ctx) / ctx_host_freq;
 }
-#endif
 
-static void
-ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, 
float corner_radius)
+int ctx_pcm_get_frame_chunk (Ctx *ctx)
 {
-  float aspect  = 1.0f;
-  float radius  = corner_radius / aspect;
-  float degrees = CTX_PI / 180.0f;
-
-  if (radius > width*0.5f) radius = width/2;
-  if (radius > height*0.5f) radius = height/2;
-
-  ctx_rasterizer_finish_shape (rasterizer);
-  ctx_rasterizer_arc (rasterizer, x + width - radius, y + radius, radius, -90 * degrees, 0 * degrees, 0);
-  ctx_rasterizer_arc (rasterizer, x + width - radius, y + height - radius, radius, 0 * degrees, 90 * 
degrees, 0);
-  ctx_rasterizer_arc (rasterizer, x + radius, y + height - radius, radius, 90 * degrees, 180 * degrees, 0);
-  ctx_rasterizer_arc (rasterizer, x + radius, y + radius, radius, 180 * degrees, 270 * degrees, 0);
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
+  {
+    return mmm_pcm_get_frame_chunk (ctx->backend_data);
+  }
+#endif
+  if (ctx_backend_type (ctx) == CTX_BACKEND_CTX)
+  {
+    // 300 stuttering
+    // 350 nothing
+    // 380 slight buzz
+    // 390  buzzing
+    // 400 ok - but sometimes falling out
+    // 410 buzzing
+    // 420 ok - but odd latency
+    // 450 buzzing
 
-  ctx_rasterizer_finish_shape (rasterizer);
-}
+    if (ctx_pcm_get_queued_frames (ctx) > 400)
+      return 0;
+    else
+      return 400 - ctx_pcm_get_queued_frames (ctx);
 
-static void
-ctx_rasterizer_process (void *user_data, CtxCommand *command);
+  }
 
-int
-_ctx_is_rasterizer (Ctx *ctx)
-{
-  if (ctx->renderer && ctx->renderer->process == ctx_rasterizer_process)
-    return 1;
-  return 0;
+  if (ctx_pcm_get_queued_frames (ctx) > 1000)
+    return 0;
+  else
+    return 1000 - ctx_pcm_get_queued_frames (ctx);
 }
 
-#if CTX_COMPOSITING_GROUPS
-static void
-ctx_rasterizer_start_group (CtxRasterizer *rasterizer)
+void ctx_pcm_set_sample_rate (Ctx *ctx, int sample_rate)
 {
-  CtxEntry save_command = ctx_void(CTX_SAVE);
-  // allocate buffer, and set it as temporary target
-  int no;
-  if (rasterizer->group[0] == NULL) // first group
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
   {
-    rasterizer->saved_buf = rasterizer->buf;
+    mmm_pcm_set_sample_rate (ctx->backend_data, sample_rate);
   }
-  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
-
-  if (no >= CTX_GROUP_MAX)
-     return;
-  rasterizer->group[no] = ctx_buffer_new (rasterizer->blit_width,
-                                          rasterizer->blit_height,
-                                          rasterizer->format->composite_format);
-  rasterizer->buf = rasterizer->group[no]->data;
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&save_command);
+  else
+#endif
+    client_freq = sample_rate;
 }
 
-static void
-ctx_rasterizer_end_group (CtxRasterizer *rasterizer)
+void ctx_pcm_set_format (Ctx *ctx, CtxPCM format)
 {
-  CtxEntry restore_command = ctx_void(CTX_RESTORE);
-  CtxEntry save_command = ctx_void(CTX_SAVE);
-  int no = 0;
-  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
-  no--;
-
-  if (no < 0)
-    return;
-
-  CtxCompositingMode comp = rasterizer->state->gstate.compositing_mode;
-  CtxBlend blend = rasterizer->state->gstate.blend_mode;
-  float global_alpha = rasterizer->state->gstate.global_alpha_f;
-  // fetch compositing, blending, global alpha
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&restore_command);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&save_command);
-  CtxEntry set_state[3]=
-  {
-    ctx_u32 (CTX_COMPOSITING_MODE, comp,  0),
-    ctx_u32 (CTX_BLEND_MODE,       blend, 0),
-    ctx_f  (CTX_GLOBAL_ALPHA,     global_alpha, 0.0)
-  };
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_state[0]);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_state[1]);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_state[2]);
-  if (no == 0)
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
   {
-    rasterizer->buf = rasterizer->saved_buf;
+    mmm_pcm_set_format (ctx->backend_data, format);
   }
   else
-  {
-    rasterizer->buf = rasterizer->group[no-1]->data;
-  }
-  // XXX use texture_source ?
-   ctx_texture_init (rasterizer->ctx, ".ctx-group", // XXX ? count groups..
-                  rasterizer->blit_width,  // or have group based on thread-id?
-                  rasterizer->blit_height, // .. this would mean threadsafe
-                                           // allocation
-                  rasterizer->blit_width * rasterizer->format->bpp/8,
-                  rasterizer->format->pixel_format,
-                  NULL, // space
-                  (uint8_t*)rasterizer->group[no]->data,
-                  NULL, NULL);
-  {
-     const char *eid = ".ctx-group";
-     int   eid_len = strlen (eid);
-
-     CtxEntry commands[4] =
-      {
-       ctx_f  (CTX_TEXTURE, rasterizer->blit_x, rasterizer->blit_y), 
-       ctx_u32 (CTX_DATA, eid_len, eid_len/9+1),
-       ctx_u32 (CTX_CONT, 0,0),
-       ctx_u32 (CTX_CONT, 0,0)
-      };
-     memcpy( (char *) &commands[2].data.u8[0], eid, eid_len);
-     ( (char *) (&commands[2].data.u8[0]) ) [eid_len]=0;
+#endif
+    ctx_client_format = format;
+}
 
-     ctx_rasterizer_process (rasterizer, (CtxCommand*)commands);
-  }
+CtxPCM ctx_pcm_get_format (Ctx *ctx)
+{
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
   {
-    CtxEntry commands[2]=
-    {
-      ctx_f (CTX_RECTANGLE, rasterizer->blit_x, rasterizer->blit_y),
-      ctx_f (CTX_CONT,      rasterizer->blit_width, rasterizer->blit_height)
-    };
-    ctx_rasterizer_process (rasterizer, (CtxCommand*)commands);
+    return mmm_pcm_get_format (ctx->backend_data);
   }
+#endif
+  return ctx_client_format;
+}
+
+int ctx_pcm_get_sample_rate (Ctx *ctx)
+{
+#if 0
+  if (!strcmp (ctx->backend->name, "mmm") ||
+      !strcmp (ctx->backend->name, "mmm-client"))
   {
-    CtxEntry commands[1]= { ctx_void (CTX_FILL) };
-    ctx_rasterizer_process (rasterizer, (CtxCommand*)commands);
+    return mmm_pcm_get_sample_rate (ctx->backend_data);
   }
-  //ctx_texture_release (rasterizer->ctx, ".ctx-group");
-  ctx_buffer_free (rasterizer->group[no]);
-  rasterizer->group[no] = 0;
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&restore_command);
+#endif
+  return client_freq;
 }
+
+#else
+
+void ctx_pcm_set_format (Ctx *ctx, CtxPCM format) { }
+void ctx_pcm_set_sample_rate (Ctx *ctx, int sample_rate) { }
+int ctx_pcm_get_sample_rate (Ctx *ctx) { return 48000; }
+CtxPCM ctx_pcm_get_format (Ctx *ctx) { return CTX_s16S; }
+int ctx_pcm_queue (Ctx *ctx, const int8_t *data, int frames) { return frames; }
+float ctx_pcm_get_queued_length (Ctx *ctx) { return 0.0; }
+
 #endif
+ /* Copyright (C) 2020 Øyvind Kolås <pippin gimp org>
+ */
 
-#if CTX_ENABLE_SHADOW_BLUR
-static void
-ctx_rasterizer_shadow_stroke (CtxRasterizer *rasterizer)
+#if CTX_FORMATTER
+
+/* returns the maximum string length including terminating \0 */
+int ctx_a85enc_len (int input_length)
 {
-  CtxColor color;
-  CtxEntry save_command = ctx_void(CTX_SAVE);
+  return (input_length / 4 + 1) * 5;
+}
 
-  float rgba[4] = {0, 0, 0, 1.0};
-  if (ctx_get_color (rasterizer->ctx, CTX_shadowColor, &color) == 0)
-    ctx_color_get_rgba (rasterizer->state, &color, rgba);
-
-  CtxEntry set_color_command [3]=
-  {
-    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
-    ctx_f (CTX_CONT, rgba[1], rgba[2]),
-    ctx_f (CTX_CONT, rgba[3], 0)
-  };
-  CtxEntry restore_command = ctx_void(CTX_RESTORE);
-  float radius = rasterizer->state->gstate.shadow_blur;
-  int dim = 2 * radius + 1;
-  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
-    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
-  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&save_command);
-  {
-    int i = 0;
-    for (int v = 0; v < dim; v += 1, i++)
-      {
-        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
-        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
-        ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_color_command[0]);
-#if CTX_ENABLE_SHADOW_BLUR
-        rasterizer->in_shadow = 1;
-#endif
-        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
-        rasterizer->shadow_y = dy;
-        rasterizer->preserve = 1;
-        ctx_rasterizer_stroke (rasterizer);
-#if CTX_ENABLE_SHADOW_BLUR
-        rasterizer->in_shadow = 0;
-#endif
-      }
-  }
-  //free (kernel);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&restore_command);
-}
-
-static void
-ctx_rasterizer_shadow_text (CtxRasterizer *rasterizer, const char *str)
+int ctx_a85enc (const void *srcp, char *dst, int count)
 {
-  float x = rasterizer->state->x;
-  float y = rasterizer->state->y;
-  CtxColor color;
-  CtxEntry save_command = ctx_void(CTX_SAVE);
+  const uint8_t *src = (uint8_t*)srcp;
+  int out_len = 0;
 
-  float rgba[4] = {0, 0, 0, 1.0};
-  if (ctx_get_color (rasterizer->ctx, CTX_shadowColor, &color) == 0)
-    ctx_color_get_rgba (rasterizer->state, &color, rgba);
+  int padding = 4-(count % 4);
+  if (padding == 4) padding = 0;
 
-  CtxEntry set_color_command [3]=
-  {
-    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
-    ctx_f (CTX_CONT, rgba[1], rgba[2]),
-    ctx_f (CTX_CONT, rgba[3], 0)
-  };
-  CtxEntry move_to_command [1]=
+  for (int i = 0; i < (count+3)/4; i ++)
   {
-    ctx_f (CTX_MOVE_TO, x, y),
-  };
-  CtxEntry restore_command = ctx_void(CTX_RESTORE);
-  float radius = rasterizer->state->gstate.shadow_blur;
-  int dim = 2 * radius + 1;
-  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
-    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
-  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&save_command);
+    uint32_t input = 0;
+    for (int j = 0; j < 4; j++)
+    {
+      input = (input << 8);
+      if (i*4+j<=count)
+        input += src[i*4+j];
+    }
 
-  {
+    int divisor = 85 * 85 * 85 * 85;
+#if 0
+    if (input == 0)
+    {
+        dst[out_len++] = 'z';
+    }
+    /* todo: encode 4 spaces as 'y' */
+    else
+#endif
+    {
+      for (int j = 0; j < 5; j++)
       {
-        move_to_command[0].data.f[0] = x;
-        move_to_command[0].data.f[1] = y;
-        set_color_command[2].data.f[0] = rgba[3];
-        ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_color_command);
-        ctx_rasterizer_process (rasterizer, (CtxCommand*)&move_to_command);
-        rasterizer->in_shadow=1;
-        ctx_rasterizer_text (rasterizer, str, 0);
-        rasterizer->in_shadow=0;
+        dst[out_len++] = ((input / divisor) % 85) + '!';
+        divisor /= 85;
       }
+    }
   }
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&restore_command);
-  move_to_command[0].data.f[0] = x;
-  move_to_command[0].data.f[1] = y;
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&move_to_command);
+  out_len -= padding;
+  dst[out_len]=0;
+  return out_len;
 }
+#endif
 
-static void
-ctx_rasterizer_shadow_fill (CtxRasterizer *rasterizer)
-{
-  CtxColor color;
-  CtxEntry save_command = ctx_void(CTX_SAVE);
-
-  float rgba[4] = {0, 0, 0, 1.0};
-  if (ctx_get_color (rasterizer->ctx, CTX_shadowColor, &color) == 0)
-    ctx_color_get_rgba (rasterizer->state, &color, rgba);
-
-  CtxEntry set_color_command [3]=
-  {
-    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
-    ctx_f (CTX_CONT, rgba[1], rgba[2]),
-    ctx_f (CTX_CONT, rgba[3], 0)
-  };
-  CtxEntry restore_command = ctx_void(CTX_RESTORE);
-  float radius = rasterizer->state->gstate.shadow_blur;
-  int dim = 2 * radius + 1;
-  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
-    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
-  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&save_command);
+#if CTX_PARSER
 
+int ctx_a85dec (const char *src, char *dst, int count)
+{
+  int out_len = 0;
+  uint32_t val = 0;
+  int k = 0;
+  int i = 0;
+  int p = 0;
+  for (i = 0; i < count; i ++)
   {
-    for (int v = 0; v < dim; v ++)
+    p = src[i];
+    val *= 85;
+    if (CTX_UNLIKELY(p == '~'))
+    {
+      break;
+    }
+#if 0
+    else if (p == 'z')
+    {
+      for (int j = 0; j < 4; j++)
+        dst[out_len++] = 0;
+      k = 0;
+    }
+    else if (p == 'y') /* lets support this extension */
+    {
+      for (int j = 0; j < 4; j++)
+        dst[out_len++] = 32;
+      k = 0;
+    }
+#endif
+    else if (CTX_LIKELY(p >= '!' && p <= 'u'))
+    {
+      val += p-'!';
+      if (CTX_UNLIKELY (k % 5 == 4))
       {
-        int i = v;
-        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
-        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
-        ctx_rasterizer_process (rasterizer, (CtxCommand*)&set_color_command);
-        rasterizer->in_shadow = 1;
-        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
-        rasterizer->shadow_y = dy;
-        rasterizer->preserve = 1;
-        ctx_rasterizer_fill (rasterizer);
-        rasterizer->in_shadow = 0;
+         for (int j = 0; j < 4; j++)
+         {
+           dst[out_len++] = (val & ((unsigned)0xff << 24)) >> 24;
+           val <<= 8;
+         }
+         val = 0;
       }
+      k++;
+    }
+    // we treat all other chars as whitespace
+  }
+  if (CTX_LIKELY (p != '~'))
+  { 
+    val *= 85;
+  }
+  k = k % 5;
+  if (k)
+  {
+    val += 84;
+    for (int j = k; j < 4; j++)
+    {
+      val *= 85;
+      val += 84;
+    }
+
+    for (int j = 0; j < k-1; j++)
+    {
+      dst[out_len++] = (val & ((unsigned)0xff << 24)) >> 24;
+      val <<= 8;
+    }
+    val = 0;
   }
-  ctx_rasterizer_process (rasterizer, (CtxCommand*)&restore_command);
+  dst[out_len] = 0;
+  return out_len;
 }
-#endif
 
-static void
-ctx_rasterizer_line_dash (CtxRasterizer *rasterizer, int count, float *dashes)
+#if 1
+int ctx_a85len (const char *src, int count)
 {
-  if (!dashes)
-  {
-    rasterizer->state->gstate.n_dashes = 0;
-    return;
-  }
-  count = CTX_MIN(count, CTX_PARSER_MAX_ARGS-1);
-  rasterizer->state->gstate.n_dashes = count;
-  memcpy(&rasterizer->state->gstate.dashes[0], dashes, count * sizeof(float));
+  int out_len = 0;
+  int k = 0;
   for (int i = 0; i < count; i ++)
   {
-    if (rasterizer->state->gstate.dashes[i] < 0.0001f)
-      rasterizer->state->gstate.dashes[i] = 0.0001f; // hang protection
+    if (src[i] == '~')
+      break;
+    else if (src[i] == 'z')
+    {
+      for (int j = 0; j < 4; j++)
+        out_len++;
+      k = 0;
+    }
+    else if (src[i] >= '!' && src[i] <= 'u')
+    {
+      if (k % 5 == 4)
+        out_len += 4;
+      k++;
+    }
+    // we treat all other chars as whitespace
   }
+  k = k % 5;
+  if (k)
+    out_len += k-1;
+  return out_len;
 }
+#endif
 
-
-static void
-ctx_rasterizer_process (void *user_data, CtxCommand *command)
-{
-  CtxEntry *entry = &command->entry;
-  CtxRasterizer *rasterizer = (CtxRasterizer *) user_data;
-  CtxState *state = rasterizer->state;
-  CtxCommand *c = (CtxCommand *) entry;
-  int clear_clip = 0;
-  ctx_interpret_style (state, entry, NULL);
-  switch (c->code)
-    {
-#if CTX_ENABLE_SHADOW_BLUR
-      case CTX_SHADOW_COLOR:
-        {
-          CtxColor  col;
-          CtxColor *color = &col;
-          //state->gstate.source_fill.type = CTX_SOURCE_COLOR;
-          switch ((int)c->rgba.model)
-            {
-              case CTX_RGB:
-                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, 1.0f);
-                break;
-              case CTX_RGBA:
-                //ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
-                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
-                break;
-              case CTX_DRGBA:
-                ctx_color_set_drgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
-                break;
-#if CTX_ENABLE_CMYK
-              case CTX_CMYKA:
-                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 
c->cmyka.a);
-                break;
-              case CTX_CMYK:
-                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
-                break;
-              case CTX_DCMYKA:
-                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 
c->cmyka.a);
-                break;
-              case CTX_DCMYK:
-                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
-                break;
 #endif
-              case CTX_GRAYA:
-                ctx_color_set_graya (state, color, c->graya.g, c->graya.a);
-                break;
-              case CTX_GRAY:
-                ctx_color_set_graya (state, color, c->graya.g, 1.0f);
-                break;
-            }
-          ctx_set_color (rasterizer->ctx, CTX_shadowColor, color);
-        }
-        break;
+
+#if CTX_IMPLEMENTATION
+
+#define SHA1_IMPLEMENTATION
+/* LibTomCrypt, modular cryptographic library -- Tom St Denis
+ *
+ * LibTomCrypt is a library that provides various cryptographic
+ * algorithms in a highly modular and flexible manner.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis gmail com, http://libtom.org
+ *
+ * The plain ANSIC sha1 functionality has been extracted from libtomcrypt,
+ * and is included directly in the sources. /Øyvind K. - since libtomcrypt
+ * is public domain the adaptations done here to make the sha1 self contained
+ * also is public domain.
+ */
+#ifndef __SHA1_H
+#define __SHA1_H
+#if !__COSMOPOLITAN__
+#include <inttypes.h>
 #endif
-      case CTX_LINE_DASH:
-        if (c->line_dash.count)
-          {
-            ctx_rasterizer_line_dash (rasterizer, c->line_dash.count, c->line_dash.data);
-          }
-        else
-        ctx_rasterizer_line_dash (rasterizer, 0, NULL);
-        break;
 
-      case CTX_LINE_TO:
-        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_REL_LINE_TO:
-        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_MOVE_TO:
-        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_REL_MOVE_TO:
-        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_CURVE_TO:
-        ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
-                                 c->c.x1, c->c.y1,
-                                 c->c.x2, c->c.y2);
-        break;
-      case CTX_REL_CURVE_TO:
-        ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
-                                     c->c.x1, c->c.y1,
-                                     c->c.x2, c->c.y2);
-        break;
-      case CTX_QUAD_TO:
-        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
-        break;
-      case CTX_REL_QUAD_TO:
-        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
-        break;
-      case CTX_ARC:
-        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, 
c->arc.direction);
-        break;
-      case CTX_RECTANGLE:
-        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
-                                  c->rectangle.width, c->rectangle.height);
-        break;
-      case CTX_ROUND_RECTANGLE:
-        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
-                                        c->rectangle.width, c->rectangle.height,
-                                        c->rectangle.radius);
-        break;
-      case CTX_SET_PIXEL:
-        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
-                                  c->set_pixel.rgba[0],
-                                  c->set_pixel.rgba[1],
-                                  c->set_pixel.rgba[2],
-                                  c->set_pixel.rgba[3]);
-        break;
-      case CTX_DEFINE_TEXTURE:
-        {
-          uint8_t *pixel_data = ctx_define_texture_pixel_data (entry);
-          ctx_rasterizer_define_texture (rasterizer, c->define_texture.eid,
-                                         c->define_texture.width, c->define_texture.height,
-                                         c->define_texture.format,
-                                         pixel_data);
-          rasterizer->comp_op = NULL;
-          rasterizer->fragment = NULL;
-        }
-        break;
-      case CTX_TEXTURE:
-        ctx_rasterizer_set_texture (rasterizer, c->texture.eid,
-                                    c->texture.x, c->texture.y);
-        rasterizer->comp_op = NULL;
-        rasterizer->fragment = NULL;
-        break;
-      case CTX_SOURCE_TRANSFORM:
-        ctx_matrix_set (&state->gstate.source_fill.set_transform,
-                        ctx_arg_float (0), ctx_arg_float (1),
-                        ctx_arg_float (2), ctx_arg_float (3),
-                        ctx_arg_float (4), ctx_arg_float (5));
-        rasterizer->comp_op = NULL;
-        break;
+
+int ctx_sha1_init(CtxSHA1 * sha1);
+CtxSHA1 *ctx_sha1_new (void)
+{
+  CtxSHA1 *state = (CtxSHA1*)calloc (sizeof (CtxSHA1), 1);
+  ctx_sha1_init (state);
+  return state;
+}
+void ctx_sha1_free (CtxSHA1 *sha1)
+{
+  free (sha1);
+}
+
 #if 0
-      case CTX_LOAD_IMAGE:
-        ctx_rasterizer_load_image (rasterizer, ctx_arg_string(),
-                                   ctx_arg_float (0), ctx_arg_float (1) );
-        break;
+          CtxSHA1 sha1;
+          ctx_sha1_init (&sha1);
+          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
 #endif
-#if CTX_GRADIENTS
-      case CTX_GRADIENT_STOP:
-        {
-          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
-                         };
-          ctx_rasterizer_gradient_add_stop (rasterizer,
-                                            ctx_arg_float (0), rgba);
-          rasterizer->comp_op = NULL;
-        }
-        break;
-      case CTX_LINEAR_GRADIENT:
-        ctx_state_gradient_clear_stops (state);
-        rasterizer->comp_op = NULL;
-        break;
-      case CTX_RADIAL_GRADIENT:
-        ctx_state_gradient_clear_stops (state);
-        rasterizer->comp_op = NULL;
-        break;
+
+#ifdef SHA1_FF0
+#undef SHA1_FF0
 #endif
-      case CTX_PRESERVE:
-        rasterizer->preserve = 1;
-        break;
-      case CTX_COLOR:
-      case CTX_COMPOSITING_MODE:
-      case CTX_BLEND_MODE:
-        rasterizer->comp_op = NULL;
-        //_ctx_setup_compositor (rasterizer);
-        break;
-#if CTX_COMPOSITING_GROUPS
-      case CTX_START_GROUP:
-        ctx_rasterizer_start_group (rasterizer);
-        break;
-      case CTX_END_GROUP:
-        ctx_rasterizer_end_group (rasterizer);
-        break;
+#ifdef SHA1_FF1
+#undef SHA1_FF1
 #endif
 
-      case CTX_RESTORE:
-        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
-             i < state->gstate.keydb_pos; i++)
-        {
-          if (state->keydb[i].key == CTX_clip)
-          {
-            clear_clip = 1;
-          }
-        }
-        /* FALLTHROUGH */
-      case CTX_ROTATE:
-      case CTX_SCALE:
-      case CTX_TRANSLATE:
-      case CTX_IDENTITY:
-      case CTX_SAVE:
-        rasterizer->comp_op = NULL;
-        rasterizer->uses_transforms = 1;
-        ctx_interpret_transforms (state, entry, NULL);
-        if (clear_clip)
-        {
-          ctx_rasterizer_clip_reset (rasterizer);
-        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
-             i < state->gstate.keydb_pos; i++)
-        {
-          if (state->keydb[i].key == CTX_clip)
-          {
-            int idx = ctx_float_to_string_index (state->keydb[i].value);
-            if (idx >=0)
-            {
-              CtxSegment *edges = (CtxSegment*)&state->stringpool[idx];
-              ctx_rasterizer_clip_apply (rasterizer, edges);
-            }
-          }
-        }
-        }
-        break;
-      case CTX_STROKE:
-#if CTX_ENABLE_SHADOW_BLUR
-        if (state->gstate.shadow_blur > 0.0 &&
-            !rasterizer->in_text)
-          ctx_rasterizer_shadow_stroke (rasterizer);
+#ifdef SHA1_IMPLEMENTATION
+#if !__COSMOPOLITAN__
+#include <stdlib.h>
+#include <string.h>
 #endif
-        {
-        int count = rasterizer->edge_list.count;
-        if (state->gstate.n_dashes)
-        {
-          int n_dashes = state->gstate.n_dashes;
-          float *dashes = state->gstate.dashes;
-          float factor = ctx_matrix_get_scale (&state->gstate.transform);
 
-          int aa = 15;//rasterizer->aa;
-          CtxSegment temp[count]; /* copy of already built up path's poly line  */
-          memcpy (temp, rasterizer->edge_list.entries, sizeof (temp));
-          int start = 0;
-          int end   = 0;
-      CtxMatrix transform_backup = state->gstate.transform;
-      _ctx_matrix_identity (&state->gstate.transform);
-      ctx_rasterizer_reset (rasterizer); /* for dashing we create
-                                            a dashed path to stroke */
-      float prev_x = 0.0f;
-      float prev_y = 0.0f;
-      float pos = 0.0;
+#define STORE64H(x,                                                             y)                           
                                          \
+   { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned                char)(((x)>>48)&255);     \
+     (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned                char)(((x)>>32)&255);     \
+     (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned                char)(((x)>>16)&255);     \
+     (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }
 
-      int   dash_no  = 0.0;
-      float dash_lpos = state->gstate.line_dash_offset * factor;
-      int   is_down = 0;
+#define STORE32H(x,                                                             y)                           
                                          \
+     { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned              char)(((x)>>16)&255);   \
+       (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned               char)((x)&255); }
 
-          while (start < count)
-          {
-            int started = 0;
-            int i;
-            is_down = 0;
+#define LOAD32H(x, y)                            \
+     { x = ((unsigned long)((y)[0] & 255)<<24) | \
+           ((unsigned long)((y)[1] & 255)<<16) | \
+           ((unsigned long)((y)[2] & 255)<<8)  | \
+           ((unsigned long)((y)[3] & 255)); }
 
-            if (!is_down)
-            {
-              CtxSegment *entry = &temp[0];
-              prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-              prev_y = entry->data.s16[1] * 1.0f / aa;
-              ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
-              is_down = 1;
-            }
+/* rotates the hard way */
+#define ROL(x, y)  ((((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned 
long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
+#define ROLc(x, y) ROL(x,y)
 
+#define CRYPT_OK     0
+#define CRYPT_ERROR  1
+#define CRYPT_NOP    2
 
-            for (i = start; i < count; i++)
-            {
-              CtxSegment *entry = &temp[i];
-              float x, y;
-              if (entry->code == CTX_NEW_EDGE)
-                {
-                  if (started)
-                    {
-                      end = i - 1;
-                      dash_no = 0;
-                      dash_lpos = 0.0;
-                      goto foo;
-                    }
-                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
-                  prev_y = entry->data.s16[1] * 1.0f / aa;
-                  started = 1;
-                  start = i;
-                  is_down = 1;
-                  ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
-                }
+#ifndef MAX
+   #define MAX(x, y) ( ((x)>(y))?(x):(y) )
+#endif
+#ifndef MIN
+   #define MIN(x, y) ( ((x)<(y))?(x):(y) )
+#endif
 
-again:
+/* a simple macro for making hash "process" functions */
+#define HASH_PROCESS(func_name, compress_name, state_var, block_size)               \
+int func_name (CtxSHA1 *sha1, const unsigned char *in, unsigned long inlen)      \
+{                                                                                   \
+    unsigned long n;                                                                \
+    int           err;                                                              \
+    assert (sha1 != NULL);                                                          \
+    assert (in != NULL);                                                            \
+    if (sha1->curlen > sizeof(sha1->buf)) {                                         \
+       return -1;                                                                   \
+    }                                                                               \
+    while (inlen > 0) {                                                             \
+        if (sha1->curlen == 0 && inlen >= block_size) {                             \
+           if ((err = compress_name (sha1, (unsigned char *)in)) != CRYPT_OK) {     \
+              return err;                                                           \
+           }                                                                        \
+           sha1->length += block_size * 8;                                          \
+           in             += block_size;                                            \
+           inlen          -= block_size;                                            \
+        } else {                                                                    \
+           n = MIN(inlen, (block_size - sha1->curlen));                             \
+           memcpy(sha1->buf + sha1->curlen, in, (size_t)n);                         \
+           sha1->curlen += n;                                                       \
+           in             += n;                                                     \
+           inlen          -= n;                                                     \
+           if (sha1->curlen == block_size) {                                        \
+              if ((err = compress_name (sha1, sha1->buf)) != CRYPT_OK) {            \
+                 return err;                                                        \
+              }                                                                     \
+              sha1->length += 8*block_size;                                         \
+              sha1->curlen = 0;                                                     \
+           }                                                                        \
+       }                                                                            \
+    }                                                                               \
+    return CRYPT_OK;                                                                \
+}
 
-              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
-              y = entry->data.s16[3] * 1.0f / aa;
-              float dx = x - prev_x;
-              float dy = y - prev_y;
-              float length = ctx_fast_hypotf (dx, dy);
+/**********************/
 
-              if (dash_lpos + length >= dashes[dash_no] * factor)
-              {
-                float p = (dashes[dash_no] * factor - dash_lpos) / length;
-                float splitx = x * p + (1.0f - p) * prev_x;
-                float splity = y * p + (1.0f - p) * prev_y;
-                if (is_down)
-                {
-                  ctx_rasterizer_line_to (rasterizer, splitx, splity);
-                  is_down = 0;
-                }
-                else
-                {
-                  ctx_rasterizer_move_to (rasterizer, splitx, splity);
-                  is_down = 1;
-                }
-                prev_x = splitx;
-                prev_y = splity;
-                dash_no++;
-                dash_lpos=0;
-                if (dash_no >= n_dashes) dash_no = 0;
-                goto again;
-              }
-              else
-              {
-                pos += length;
-                dash_lpos += length;
-                {
-                  if (is_down)
-                    ctx_rasterizer_line_to (rasterizer, x, y);
-                }
-              }
-              prev_x = x;
-              prev_y = y;
-            }
-          end = i-1;
-foo:
-          start = end+1;
-        }
-        state->gstate.transform = transform_backup;
-        }
-        ctx_rasterizer_stroke (rasterizer);
+#define F0(x,y,z)  (z ^ (x & (y ^ z)))
+#define F1(x,y,z)  (x ^ y ^ z)
+#define F2(x,y,z)  ((x & y) | (z & (x | y)))
+#define F3(x,y,z)  (x ^ y ^ z)
+
+static int  ctx_sha1_compress(CtxSHA1 *sha1, unsigned char *buf)
+{
+    uint32_t a,b,c,d,e,W[80],i;
+
+    /* copy the state into 512-bits into W[0..15] */
+    for (i = 0; i < 16; i++) {
+        LOAD32H(W[i], buf + (4*i));
+    }
+
+    /* copy state */
+    a = sha1->state[0];
+    b = sha1->state[1];
+    c = sha1->state[2];
+    d = sha1->state[3];
+    e = sha1->state[4];
+
+    /* expand it */
+    for (i = 16; i < 80; i++) {
+        W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); 
+    }
+
+    /* compress */
+    /* round one */
+    #define SHA1_FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
+    #define SHA1_FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
+    #define SHA1_FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
+    #define SHA1_FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
+ 
+    for (i = 0; i < 20; ) {
+       SHA1_FF0(a,b,c,d,e,i++);
+       SHA1_FF0(e,a,b,c,d,i++);
+       SHA1_FF0(d,e,a,b,c,i++);
+       SHA1_FF0(c,d,e,a,b,i++);
+       SHA1_FF0(b,c,d,e,a,i++);
+    }
+
+    /* round two */
+    for (; i < 40; )  { 
+       SHA1_FF1(a,b,c,d,e,i++);
+       SHA1_FF1(e,a,b,c,d,i++);
+       SHA1_FF1(d,e,a,b,c,i++);
+       SHA1_FF1(c,d,e,a,b,i++);
+       SHA1_FF1(b,c,d,e,a,i++);
+    }
+
+    /* round three */
+    for (; i < 60; )  { 
+       SHA1_FF2(a,b,c,d,e,i++);
+       SHA1_FF2(e,a,b,c,d,i++);
+       SHA1_FF2(d,e,a,b,c,i++);
+       SHA1_FF2(c,d,e,a,b,i++);
+       SHA1_FF2(b,c,d,e,a,i++);
+    }
+
+    /* round four */
+    for (; i < 80; )  { 
+       SHA1_FF3(a,b,c,d,e,i++);
+       SHA1_FF3(e,a,b,c,d,i++);
+       SHA1_FF3(d,e,a,b,c,i++);
+       SHA1_FF3(c,d,e,a,b,i++);
+       SHA1_FF3(b,c,d,e,a,i++);
+    }
+
+    #undef SHA1_FF0
+    #undef SHA1_FF1
+    #undef SHA1_FF2
+    #undef SHA1_FF3
+
+    /* store */
+    sha1->state[0] = sha1->state[0] + a;
+    sha1->state[1] = sha1->state[1] + b;
+    sha1->state[2] = sha1->state[2] + c;
+    sha1->state[3] = sha1->state[3] + d;
+    sha1->state[4] = sha1->state[4] + e;
+
+    return CRYPT_OK;
+}
+
+/**
+   Initialize the hash state
+   @param md   The hash state you wish to initialize
+   @return CRYPT_OK if successful
+*/
+int ctx_sha1_init(CtxSHA1 * sha1)
+{
+   assert(sha1 != NULL);
+   sha1->state[0] = 0x67452301UL;
+   sha1->state[1] = 0xefcdab89UL;
+   sha1->state[2] = 0x98badcfeUL;
+   sha1->state[3] = 0x10325476UL;
+   sha1->state[4] = 0xc3d2e1f0UL;
+   sha1->curlen = 0;
+   sha1->length = 0;
+   return CRYPT_OK;
+}
+
+/**
+   Process a block of memory though the hash
+   @param md     The hash state
+   @param in     The data to hash
+   @param inlen  The length of the data (octets)
+   @return CRYPT_OK if successful
+*/
+HASH_PROCESS(ctx_sha1_process, ctx_sha1_compress, sha1, 64)
+
+/**
+   Terminate the hash to get the digest
+   @param md  The hash state
+   @param out [out] The destination of the hash (20 bytes)
+   @return CRYPT_OK if successful
+*/
+int ctx_sha1_done(CtxSHA1 * sha1, unsigned char *out)
+{
+    int i;
+
+    assert(sha1 != NULL);
+    assert(out != NULL);
+
+    if (sha1->curlen >= sizeof(sha1->buf)) {
+       return -1;
+    }
+
+    /* increase the length of the message */
+    sha1->length += sha1->curlen * 8;
+
+    /* append the '1' bit */
+    sha1->buf[sha1->curlen++] = (unsigned char)0x80;
+
+    /* if the length is currently above 56 bytes we append zeros
+     * then compress.  Then we can fall back to padding zeros and length
+     * encoding like normal.
+     */
+    if (sha1->curlen > 56) {
+        while (sha1->curlen < 64) {
+            sha1->buf[sha1->curlen++] = (unsigned char)0;
         }
+        ctx_sha1_compress(sha1, sha1->buf);
+        sha1->curlen = 0;
+    }
 
-        break;
-      case CTX_FONT:
-        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
-        break;
-      case CTX_TEXT:
-        rasterizer->in_text++;
-#if CTX_ENABLE_SHADOW_BLUR
-        if (state->gstate.shadow_blur > 0.0)
-          ctx_rasterizer_shadow_text (rasterizer, ctx_arg_string ());
+    /* pad upto 56 bytes of zeroes */
+    while (sha1->curlen < 56) {
+        sha1->buf[sha1->curlen++] = (unsigned char)0;
+    }
+
+    /* store length */
+    STORE64H(sha1->length, sha1->buf+56);
+    ctx_sha1_compress(sha1, sha1->buf);
+
+    /* copy output */
+    for (i = 0; i < 5; i++) {
+        STORE32H(sha1->state[i], out+(4*i));
+    }
+    return CRYPT_OK;
+}
 #endif
-        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 0);
-        rasterizer->in_text--;
-        ctx_rasterizer_reset (rasterizer);
-        break;
-      case CTX_STROKE_TEXT:
-        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 1);
-        ctx_rasterizer_reset (rasterizer);
-        break;
-      case CTX_GLYPH:
-        ctx_rasterizer_glyph (rasterizer, entry[0].data.u32[0], entry[0].data.u8[4]);
-        break;
-      case CTX_FILL:
-#if CTX_ENABLE_SHADOW_BLUR
-        if (state->gstate.shadow_blur > 0.0 &&
-            !rasterizer->in_text)
-          ctx_rasterizer_shadow_fill (rasterizer);
+
 #endif
-        ctx_rasterizer_fill (rasterizer);
-        break;
-      case CTX_RESET:
-      case CTX_BEGIN_PATH:
-        ctx_rasterizer_reset (rasterizer);
+#endif
+#ifdef CTX_X86_64
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
+  ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
+  ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,
+};
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0,
+  ARCH_X86_INTEL_FEATURE_SSSE3    = 1 << 9,
+  ARCH_X86_INTEL_FEATURE_FMA      = 1 << 12,
+  ARCH_X86_INTEL_FEATURE_SSE4_1   = 1 << 19,
+  ARCH_X86_INTEL_FEATURE_SSE4_2   = 1 << 20,
+  ARCH_X86_INTEL_FEATURE_MOVBE    = 1 << 22,
+  ARCH_X86_INTEL_FEATURE_POPCNT   = 1 << 23,
+  ARCH_X86_INTEL_FEATURE_XSAVE    = 1 << 26,
+  ARCH_X86_INTEL_FEATURE_OSXSAVE  = 1 << 27,
+  ARCH_X86_INTEL_FEATURE_AVX      = 1 << 28,
+  ARCH_X86_INTEL_FEATURE_F16C     = 1 << 29
+};
+
+enum
+{
+  ARCH_X86_INTEL_FEATURE_BMI1     = 1 << 3,
+  ARCH_X86_INTEL_FEATURE_BMI2     = 1 << 8,
+  ARCH_X86_INTEL_FEATURE_AVX2     = 1 << 5,
+};
+
+#define cpuid(a,b,eax,ebx,ecx,edx)                     \
+  __asm__("cpuid"                                           \
+           : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
+           : "0" (a), "2" (b)  )
+
+/* returns x86_64 microarchitecture level
+ *   0
+ */
+int
+ctx_x86_64_level (void)
+{
+  int level = 0;
+  uint32_t eax, ebx, ecx, edx;
+  cpuid (1, 0, eax, ebx, ecx, edx);
+
+  if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)   return level;
+  if ((edx & ARCH_X86_INTEL_FEATURE_XMM) == 0)   return level;
+  level = 1;
+
+  if ((ecx & ARCH_X86_INTEL_FEATURE_SSSE3)==0)   return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_SSE4_1)==0)  return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_SSE4_2)==0)  return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_POPCNT)==0)  return level;
+  level = 2;
+
+  if ((ecx & ARCH_X86_INTEL_FEATURE_AVX)==0)     return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_OSXSAVE)==0) return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_FMA)==0)     return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_F16C)==0)    return level;
+  if ((ecx & ARCH_X86_INTEL_FEATURE_MOVBE)==0)   return level;
+
+  cpuid (0, 0, eax, ebx, ecx, edx);
+  if (eax >= 7)
+  {
+    cpuid (2, 0, eax, ebx, ecx, edx);
+    if ((ebx & ARCH_X86_INTEL_FEATURE_AVX2)==0)  return level;
+    if ((ebx & ARCH_X86_INTEL_FEATURE_BMI1)==0)  return level;
+    if ((ebx & ARCH_X86_INTEL_FEATURE_BMI2)==0)  return level;
+    level = 3; 
+  }
+  return level;
+}
+
+#endif
+#include <stdio.h>
+#include <string.h>
+
+#if CTX_FORMATTER
+
+static int ctx_yenc (const char *src, char *dst, int count)
+{
+  int out_len = 0;
+  for (int i = 0; i < count; i ++)
+  {
+    int o = (src[i] + 42) % 256;
+    switch (o)
+    {
+      case 0x00: //null
+      case 0x20: //space// but better safe
+      case 0x0A: //lf   // than sorry
+      case 0x0D: //cr
+      case 0x09: //tab  // not really needed
+      case 0x10: //datalink escape (used by ctx)
+      case 0x11: //xoff
+      case 0x13: //xon
+      case 0x1b: //
+      case 0xff: //
+      case 0x3D: //=
+        dst[out_len++] = '=';
+        o = (o + 64) % 256;
+        /* FALLTHROUGH */
+      default:
+        dst[out_len++] = o;
         break;
-      case CTX_CLIP:
-        ctx_rasterizer_clip (rasterizer);
+    }
+  }
+  dst[out_len]=0;
+  return out_len;
+}
+#endif
+
+#if CTX_PARSER
+static int ctx_ydec (const char *tmp_src, char *dst, int count)
+{
+  const char *src = tmp_src;
+#if 0
+  if (tmp_src == dst)
+  {
+    src = malloc (count);
+    memcpy (src, tmp_src, count);
+  }
+#endif
+  int out_len = 0;
+  for (int i = 0; i < count; i ++)
+  {
+    int o = src[i];
+    switch (o)
+    {
+      case '=':
+        i++;
+        o = src[i];
+        if (o == 'y')
+        {
+          dst[out_len]=0;
+#if 0
+          if (tmp_src == dst) free (src);
+#endif
+          return out_len;
+        }
+        o = (o-42-64) % 256;
+        dst[out_len++] = o;
         break;
-      case CTX_CLOSE_PATH:
-        ctx_rasterizer_finish_shape (rasterizer);
+      case '\n':
+      case '\e':
+      case '\r':
+      case '\0':
         break;
-      case CTX_IMAGE_SMOOTHING:
-        rasterizer->comp_op = NULL;
+      default:
+        o = (o-42) % 256;
+        dst[out_len++] = o;
         break;
     }
-  ctx_interpret_pos_bare (state, entry, NULL);
+  }
+  dst[out_len]=0;
+#if 0
+  if (tmp_src == dst) free (src);
+#endif
+  return out_len;
+}
+#endif
+
+#if 0
+int main (){
+  char *input="this is a testæøåÅØ'''\"!:_asdac\n\r";
+  char  encoded[256]="";
+  char  decoded[256]="";
+  int   in_len = strlen (input);
+  int   out_len;
+  int   dec_len;
+
+  printf ("input: %s\n", input);
+
+  out_len = ctx_yenc (input, encoded, in_len);
+  printf ("encoded: %s\n", encoded);
+
+  dec_len = ydec (encoded, encoded, out_len);
+
+  printf ("decoded: %s\n", encoded);
+
+  return 0;
+}
+#endif
+#ifndef __CTX_UTIL_H
+#define __CTX_UTIL_H
+
+
+static int ctx_str_is_number (const char *str)
+{
+  int got_digit = 0;
+  for (int i = 0; str[i]; i++)
+  {
+    if (str[i] >= '0' && str[i] <= '9')
+    {
+       got_digit ++;
+    }
+    else if (str[i] == '.')
+    {
+    }
+    else
+      return 0;
+  }
+  if (got_digit)
+    return 1;
+  return 0;
+}
+
+#if CTX_GET_CONTENTS
+
+typedef struct CtxFileContent
+{
+  char *path;
+  unsigned char *contents;
+  long  length;
+  int   free_data;
+} CtxFileContent;
+
+CtxList *registered_contents = NULL;
+
+void
+ctx_register_contents (const char *path,
+                       const unsigned char *contents,
+                       long length,
+                       int  free_data)
+{
+  // if (path[0] != '/') && strchr(path, ':')) 
+  //   with this check regular use is faster, but we lose
+  //   generic filesystem overrides..
+  for (CtxList *l = registered_contents; l; l = l->next)
+  {
+    CtxFileContent *c = (CtxFileContent*)l->data;
+    if (!strcmp (c->path, path))
+    {
+       if (c->free_data)
+       {
+         free (c->contents);
+       }
+       c->free_data = free_data;
+       c->contents = (unsigned char*)contents;
+       c->length = length;
+       return;
+    }
+  }
+  CtxFileContent *c = (CtxFileContent*)calloc (sizeof (CtxFileContent), 1);
+  c->free_data = free_data;
+  c->contents = (unsigned char*)contents;
+  c->length    = length;
+  ctx_list_append (&registered_contents, c);
+}
+
+void
+_ctx_file_set_contents (const char     *path,
+                        const unsigned char  *contents,
+                        long            length)
+{
+  FILE *file;
+  file = fopen (path, "wb");
+  if (!file)
+    { return; }
+  if (length < 0) length = strlen ((const char*)contents);
+  fwrite (contents, 1, length, file);
+  fclose (file);
+}
+
+static int
+___ctx_file_get_contents (const char     *path,
+                          unsigned char **contents,
+                          long           *length,
+                          long            max_len)
+{
+  FILE *file;
+  long  size;
+  long  remaining;
+  char *buffer;
+  file = fopen (path, "rb");
+  if (!file)
+    { return -1; }
+  fseek (file, 0, SEEK_END);
+  size = remaining = ftell (file);
+
+  if (size > max_len)
+  {
+     size = remaining = max_len;
+  }
+
+  if (length)
+    { *length =size; }
+  rewind (file);
+  buffer = (char*)malloc (size + 8);
+  if (!buffer)
+    {
+      fclose (file);
+      return -1;
+    }
+  remaining -= fread (buffer, 1, remaining, file);
+  if (remaining)
+    {
+      fclose (file);
+      free (buffer);
+      return -1;
+    }
+  fclose (file);
+  *contents = (unsigned char*) buffer;
+  buffer[size] = 0;
+  return 0;
+}
+
+static int
+__ctx_file_get_contents (const char     *path,
+                        unsigned char **contents,
+                        long           *length)
+{
+  return ___ctx_file_get_contents (path, contents, length, 1024*1024*1024);
+}
+
+#if !__COSMOPOLITAN__
+#include <limits.h>
+#endif
+
+
+
+
+#endif
+
+
+#endif
+
+
+static float ctx_state_get (CtxState *state, uint32_t hash)
+{
+  for (int i = state->gstate.keydb_pos-1; i>=0; i--)
+    {
+      if (state->keydb[i].key == hash)
+        { return state->keydb[i].value; }
+    }
+  return -0.0;
+}
+
+static void ctx_state_set (CtxState *state, uint32_t key, float value)
+{
+  if (key != CTX_new_state)
+    {
+      if (ctx_state_get (state, key) == value)
+        { return; }
+      for (int i = state->gstate.keydb_pos-1;
+           i >= 0 && state->keydb[i].key != CTX_new_state;
+           i--)
+        {
+          if (state->keydb[i].key == key)
+            {
+              state->keydb[i].value = value;
+              return;
+            }
+        }
+    }
+  if (state->gstate.keydb_pos >= CTX_MAX_KEYDB)
+    { return; }
+  state->keydb[state->gstate.keydb_pos].key = key;
+  state->keydb[state->gstate.keydb_pos].value = value;
+  state->gstate.keydb_pos++;
+}
+
+
+#define CTX_KEYDB_STRING_START (-90000.0)
+#define CTX_KEYDB_STRING_END   (CTX_KEYDB_STRING_START + CTX_STRINGPOOL_SIZE)
+
+static int ctx_float_is_string (float val)
+{
+  return val >= CTX_KEYDB_STRING_START && val <= CTX_KEYDB_STRING_END;
+}
+
+static int ctx_float_to_string_index (float val)
+{
+  int idx = -1;
+  if (ctx_float_is_string (val))
+  {
+    idx = val - CTX_KEYDB_STRING_START;
+  }
+  return idx;
+}
+
+static float ctx_string_index_to_float (int index)
+{
+  return CTX_KEYDB_STRING_START + index;
+}
+
+static void *ctx_state_get_blob (CtxState *state, uint32_t key)
+{
+  float stored = ctx_state_get (state, key);
+  int idx = ctx_float_to_string_index (stored);
+  if (idx >= 0)
+  {
+     // can we know length?
+     return &state->stringpool[idx];
+  }
+
+  // format number as string?
+  return NULL;
+}
+
+static const char *ctx_state_get_string (CtxState *state, uint32_t key)
+{
+  const char *ret = (char*)ctx_state_get_blob (state, key);
+  if (ret && ret[0] == 127)
+    return NULL;
+  return ret;
+}
+
+
+static void ctx_state_set_blob (CtxState *state, uint32_t key, uint8_t *data, int len)
+{
+  int idx = state->gstate.stringpool_pos;
+
+  if (idx + len > CTX_STRINGPOOL_SIZE)
+  {
+    ctx_log ("blowing varpool size [%c..]\n", data[0]);
+    //fprintf (stderr, "blowing varpool size [%c%c%c..]\n", data[0],data[1], data[1]?data[2]:0);
+#if 0
+    for (int i = 0; i< CTX_STRINGPOOL_SIZE; i++)
+    {
+       if (i==0) fprintf (stderr, "\n%i ", i);
+       else      fprintf (stderr, "%c", state->stringpool[i]);
+    }
+#endif
+    return;
+  }
+
+  memcpy (&state->stringpool[idx], data, len);
+  state->gstate.stringpool_pos+=len;
+  state->stringpool[state->gstate.stringpool_pos++]=0;
+  ctx_state_set (state, key, ctx_string_index_to_float (idx));
+}
+
+static void ctx_state_set_string (CtxState *state, uint32_t key, const char *string)
+{
+  float old_val = ctx_state_get (state, key);
+  int   old_idx = ctx_float_to_string_index (old_val);
+
+  if (old_idx >= 0)
+  {
+    const char *old_string = ctx_state_get_string (state, key);
+    if (old_string && !strcmp (old_string, string))
+      return;
+  }
+
+  if (ctx_str_is_number (string))
+  {
+    ctx_state_set (state, key, strtod (string, NULL));
+    return;
+  }
+  // should do same with color
+ 
+  // XXX should special case when the string modified is at the
+  //     end of the stringpool.
+  //
+  //     for clips the behavior is howevre ideal, since
+  //     we can have more than one clip per save/restore level
+  ctx_state_set_blob (state, key, (uint8_t*)string, strlen(string));
+}
+
+static int ctx_state_get_color (CtxState *state, uint32_t key, CtxColor *color)
+{
+  CtxColor *stored = (CtxColor*)ctx_state_get_blob (state, key);
+  if (stored)
+  {
+    if (stored->magic == 127)
+    {
+      *color = *stored;
+      return 0;
+    }
+  }
+  return -1;
+}
+
+static void ctx_state_set_color (CtxState *state, uint32_t key, CtxColor *color)
+{
+  CtxColor mod_color;
+  CtxColor old_color;
+  mod_color = *color;
+  mod_color.magic = 127;
+  if (ctx_state_get_color (state, key, &old_color)==0)
+  {
+    if (!memcmp (&mod_color, &old_color, sizeof (mod_color)))
+      return;
+  }
+  ctx_state_set_blob (state, key, (uint8_t*)&mod_color, sizeof (CtxColor));
+}
+
+const char *ctx_get_string (Ctx *ctx, uint32_t hash)
+{
+  return ctx_state_get_string (&ctx->state, hash);
+}
+float ctx_get_float (Ctx *ctx, uint32_t hash)
+{
+  return ctx_state_get (&ctx->state, hash);
+}
+int ctx_get_int (Ctx *ctx, uint32_t hash)
+{
+  return ctx_state_get (&ctx->state, hash);
+}
+void ctx_set_float (Ctx *ctx, uint32_t hash, float value)
+{
+  ctx_state_set (&ctx->state, hash, value);
+}
+void ctx_set_string (Ctx *ctx, uint32_t hash, const char *value)
+{
+  ctx_state_set_string (&ctx->state, hash, value);
+}
+void ctx_set_color (Ctx *ctx, uint32_t hash, CtxColor *color)
+{
+  ctx_state_set_color (&ctx->state, hash, color);
+}
+int  ctx_get_color (Ctx *ctx, uint32_t hash, CtxColor *color)
+{
+  return ctx_state_get_color (&ctx->state, hash, color);
+}
+int ctx_is_set (Ctx *ctx, uint32_t hash)
+{
+  return ctx_get_float (ctx, hash) != -0.0f;
+}
+int ctx_is_set_now (Ctx *ctx, uint32_t hash)
+{
+  return ctx_is_set (ctx, hash);
+}
+#ifndef __CTX_COLOR
+#define __CTX_COLOR
+
+int ctx_color_model_get_components (CtxColorModel model)
+{
+  switch (model)
+    {
+      case CTX_GRAY:
+        return 1;
+      case CTX_GRAYA:
+      case CTX_GRAYA_A:
+        return 1;
+      case CTX_RGB:
+      case CTX_LAB:
+      case CTX_LCH:
+      case CTX_DRGB:
+        return 3;
+      case CTX_CMYK:
+      case CTX_DCMYK:
+      case CTX_LABA:
+      case CTX_LCHA:
+      case CTX_RGBA:
+      case CTX_DRGBA:
+      case CTX_RGBA_A:
+      case CTX_RGBA_A_DEVICE:
+        return 4;
+      case CTX_DCMYKA:
+      case CTX_CMYKA:
+      case CTX_CMYKA_A:
+      case CTX_DCMYKA_A:
+        return 5;
+    }
+  return 0;
+}
+
+#if CTX_U8_TO_FLOAT_LUT
+float ctx_u8_float[256];
+#endif
+
+CtxColor *ctx_color_new (void)
+{
+  CtxColor *color = (CtxColor*)ctx_calloc (sizeof (CtxColor), 1);
+  return color;
+}
+
+int ctx_color_is_transparent (CtxColor *color)
+{
+  return color->alpha <= 0.001f;
+}
+
+
+void ctx_color_free (CtxColor *color)
+{
+  free (color);
+}
+
+static void ctx_color_set_RGBA8 (CtxState *state, CtxColor *color, uint8_t r, uint8_t g, uint8_t b, uint8_t 
a)
+{
+  color->original = color->valid = CTX_VALID_RGBA_U8;
+  color->rgba[0] = r;
+  color->rgba[1] = g;
+  color->rgba[2] = b;
+  color->rgba[3] = a;
+#if CTX_ENABLE_CM
+  color->space = state->gstate.device_space;
+#endif
+}
+
+#if 0
+static void ctx_color_set_RGBA8_ (CtxColor *color, const uint8_t *in)
+{
+  ctx_color_set_RGBA8 (color, in[0], in[1], in[2], in[3]);
+}
+#endif
+
+static void ctx_color_set_graya (CtxState *state, CtxColor *color, float gray, float alpha)
+{
+  color->original = color->valid = CTX_VALID_GRAYA;
+  color->l = gray;
+  color->alpha = alpha;
+}
+#if 0
+static void ctx_color_set_graya_ (CtxColor *color, const float *in)
+{
+  return ctx_color_set_graya (color, in[0], in[1]);
+}
+#endif
+
+void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
+{
+#if CTX_ENABLE_CM
+  color->original = color->valid = CTX_VALID_RGBA;
+  color->red      = r;
+  color->green    = g;
+  color->blue     = b;
+  color->space    = state->gstate.rgb_space;
+#else
+  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
+  color->device_red   = r;
+  color->device_green = g;
+  color->device_blue  = b;
+#endif
+  color->alpha        = a;
+}
+
+static void ctx_color_set_drgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
+{
+#if CTX_ENABLE_CM
+  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
+  color->device_red   = r;
+  color->device_green = g;
+  color->device_blue  = b;
+  color->alpha        = a;
+  color->space        = state->gstate.device_space;
+#else
+  ctx_color_set_rgba (state, color, r, g, b, a);
+#endif
+}
+
+#if 0
+static void ctx_color_set_rgba_ (CtxState *state, CtxColor *color, const float *in)
+{
+  ctx_color_set_rgba (color, in[0], in[1], in[2], in[3]);
+}
+#endif
+
+/* the baseline conversions we have whether CMYK support is enabled or not,
+ * providing an effort at right rendering
+ */
+static void ctx_cmyk_to_rgb (float c, float m, float y, float k, float *r, float *g, float *b)
+{
+  *r = (1.0f-c) * (1.0f-k);
+  *g = (1.0f-m) * (1.0f-k);
+  *b = (1.0f-y) * (1.0f-k);
+}
+
+void ctx_rgb_to_cmyk (float r, float g, float b,
+                      float *c_out, float *m_out, float *y_out, float *k_out)
+{
+  float c = 1.0f - r;
+  float m = 1.0f - g;
+  float y = 1.0f - b;
+  float k = ctx_minf (c, ctx_minf (y, m) );
+  if (k < 1.0f)
+    {
+      c = (c - k) / (1.0f - k);
+      m = (m - k) / (1.0f - k);
+      y = (y - k) / (1.0f - k);
+    }
+  else
+    {
+      c = m = y = 0.0f;
+    }
+  *c_out = c;
+  *m_out = m;
+  *y_out = y;
+  *k_out = k;
+}
+
+#if CTX_ENABLE_CMYK
+static void ctx_color_set_cmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float 
a)
+{
+  color->original = color->valid = CTX_VALID_CMYKA;
+  color->cyan     = c;
+  color->magenta  = m;
+  color->yellow   = y;
+  color->key      = k;
+  color->alpha    = a;
+#if CTX_ENABLE_CM
+  color->space    = state->gstate.cmyk_space;
+#endif
+}
+
+static void ctx_color_set_dcmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, 
float a)
+{
+  color->original       = color->valid = CTX_VALID_DCMYKA;
+  color->device_cyan    = c;
+  color->device_magenta = m;
+  color->device_yellow  = y;
+  color->device_key     = k;
+  color->alpha          = a;
+#if CTX_ENABLE_CM
+  color->space = state->gstate.device_space;
+#endif
+}
+
+#endif
+
+#if CTX_ENABLE_CM
+
+static void ctx_rgb_user_to_device (CtxState *state, float rin, float gin, float bin,
+                                    float *rout, float *gout, float *bout)
+{
+#if CTX_BABL
+#if 0
+  fprintf (stderr, "-[%p %p\n",
+    state->gstate.fish_rgbaf_user_to_device,
+    state->gstate.fish_rgbaf_device_to_user);
+#endif
+  if (state->gstate.fish_rgbaf_user_to_device)
+  {
+    float rgbaf[4]={rin,gin,bin,1.0};
+    float rgbafo[4];
+    babl_process (state->gstate.fish_rgbaf_user_to_device,
+                  rgbaf, rgbafo, 1);
+
+    *rout = rgbafo[0];
+    *gout = rgbafo[1];
+    *bout = rgbafo[2];
+    return;
+  }
+#endif
+  *rout = rin;
+  *gout = gin;
+  *bout = bin;
+}
+
+static void ctx_rgb_device_to_user (CtxState *state, float rin, float gin, float bin,
+                                    float *rout, float *gout, float *bout)
+{
+#if CTX_BABL
+#if 0
+  fprintf (stderr, "=[%p %p\n",
+    state->gstate.fish_rgbaf_user_to_device,
+    state->gstate.fish_rgbaf_device_to_user);
+#endif
+  if (state->gstate.fish_rgbaf_device_to_user)
+  {
+    float rgbaf[4]={rin,gin,bin,1.0};
+    float rgbafo[4];
+    babl_process (state->gstate.fish_rgbaf_device_to_user,
+                  rgbaf, rgbafo, 1);
+
+    *rout = rgbafo[0];
+    *gout = rgbafo[1];
+    *bout = rgbafo[2];
+    return;
+  }
+#endif
+  *rout = rin;
+  *gout = gin;
+  *bout = bin;
+}
+#endif
+
+static void ctx_color_get_drgba (CtxState *state, CtxColor *color, float *out)
+{
+  if (! (color->valid & CTX_VALID_RGBA_DEVICE) )
+    {
+#if CTX_ENABLE_CM
+      if (color->valid & CTX_VALID_RGBA)
+        {
+          ctx_rgb_user_to_device (state, color->red, color->green, color->blue,
+                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
+        }
+      else
+#endif
+        if (color->valid & CTX_VALID_RGBA_U8)
+          {
+            float red = ctx_u8_to_float (color->rgba[0]);
+            float green = ctx_u8_to_float (color->rgba[1]);
+            float blue = ctx_u8_to_float (color->rgba[2]);
+#if CTX_ENABLE_CM
+            ctx_rgb_user_to_device (state, red, green, blue,
+                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
+#else
+            color->device_red = red;
+            color->device_green = green;
+            color->device_blue = blue;
+#endif
+            color->alpha        = ctx_u8_to_float (color->rgba[3]);
+          }
+#if CTX_ENABLE_CMYK
+        else if (color->valid & CTX_VALID_CMYKA)
+          {
+            ctx_cmyk_to_rgb (color->cyan, color->magenta, color->yellow, color->key,
+                             &color->device_red,
+                             &color->device_green,
+                             &color->device_blue);
+          }
+#endif
+        else if (color->valid & CTX_VALID_GRAYA)
+          {
+            color->device_red   =
+              color->device_green =
+                color->device_blue  = color->l;
+          }
+      color->valid |= CTX_VALID_RGBA_DEVICE;
+    }
+  out[0] = color->device_red;
+  out[1] = color->device_green;
+  out[2] = color->device_blue;
+  out[3] = color->alpha;
+}
+
+
+static inline void
+_ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
+{
+#if CTX_ENABLE_CM
+  if (! (color->valid & CTX_VALID_RGBA) )
+    {
+      ctx_color_get_drgba (state, color, out);
+      if (color->valid & CTX_VALID_RGBA_DEVICE)
+        {
+          ctx_rgb_device_to_user (state, color->device_red, color->device_green, color->device_blue,
+                                  & (color->red), & (color->green), & (color->blue) );
+        }
+      color->valid |= CTX_VALID_RGBA;
+    }
+  out[0] = color->red;
+  out[1] = color->green;
+  out[2] = color->blue;
+  out[3] = color->alpha;
+#else
+  ctx_color_get_drgba (state, color, out);
+#endif
+}
+
+void ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
+{
+  _ctx_color_get_rgba (state, color, out);
+}
+
+
+
+float ctx_float_color_rgb_to_gray (CtxState *state, const float *rgb)
+{
+        // XXX todo replace with correct according to primaries
+  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
+}
+uint8_t ctx_u8_color_rgb_to_gray (CtxState *state, const uint8_t *rgb)
+{
+        // XXX todo replace with correct according to primaries
+  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
+}
+
+void ctx_color_get_graya (CtxState *state, CtxColor *color, float *out)
+{
+  if (! (color->valid & CTX_VALID_GRAYA) )
+    {
+      float rgba[4];
+      ctx_color_get_drgba (state, color, rgba);
+      color->l = ctx_float_color_rgb_to_gray (state, rgba);
+      color->valid |= CTX_VALID_GRAYA;
+    }
+  out[0] = color->l;
+  out[1] = color->alpha;
+}
+
+#if CTX_ENABLE_CMYK
+void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out)
+{
+  if (! (color->valid & CTX_VALID_CMYKA) )
+    {
+      if (color->valid & CTX_VALID_GRAYA)
+        {
+          color->cyan = color->magenta = color->yellow = 0.0;
+          color->key = color->l;
+        }
+      else
+        {
+          float rgba[4];
+          ctx_color_get_rgba (state, color, rgba);
+          ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2],
+                           &color->cyan, &color->magenta, &color->yellow, &color->key);
+          color->alpha = rgba[3];
+        }
+      color->valid |= CTX_VALID_CMYKA;
+    }
+  out[0] = color->cyan;
+  out[1] = color->magenta;
+  out[2] = color->yellow;
+  out[3] = color->key;
+  out[4] = color->alpha;
+}
+
+#if 0
+static void ctx_color_get_cmyka_u8 (CtxState *state, CtxColor *color, uint8_t *out)
+{
+  if (! (color->valid & CTX_VALID_CMYKA_U8) )
+    {
+      float cmyka[5];
+      ctx_color_get_cmyka (color, cmyka);
+      for (int i = 0; i < 5; i ++)
+        { color->cmyka[i] = ctx_float_to_u8 (cmyka[i]); }
+      color->valid |= CTX_VALID_CMYKA_U8;
+    }
+  out[0] = color->cmyka[0];
+  out[1] = color->cmyka[1];
+  out[2] = color->cmyka[2];
+  out[3] = color->cmyka[3];
+}
+#endif
+#endif
+
+static inline void
+_ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
+{
+  if (! (color->valid & CTX_VALID_RGBA_U8) )
+    {
+      float rgba[4];
+      ctx_color_get_drgba (state, color, rgba);
+      for (int i = 0; i < 4; i ++)
+        { color->rgba[i] = ctx_float_to_u8 (rgba[i]); }
+      color->valid |= CTX_VALID_RGBA_U8;
+    }
+  out[0] = color->rgba[0];
+  out[1] = color->rgba[1];
+  out[2] = color->rgba[2];
+  out[3] = color->rgba[3];
+}
+
+void
+ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
+{
+  _ctx_color_get_rgba8 (state, color, out);
+}
+
+void ctx_color_get_graya_u8 (CtxState *state, CtxColor *color, uint8_t *out)
+{
+  if (! (color->valid & CTX_VALID_GRAYA_U8) )
+    {
+      float graya[2];
+      ctx_color_get_graya (state, color, graya);
+      color->l_u8 = ctx_float_to_u8 (graya[0]);
+      color->rgba[3] = ctx_float_to_u8 (graya[1]);
+      color->valid |= CTX_VALID_GRAYA_U8;
+    }
+  out[0] = color->l_u8;
+  out[1] = color->rgba[3];
+}
+
+#if 0
+void
+ctx_get_rgba (Ctx *ctx, float *rgba)
+{
+  ctx_color_get_rgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
+}
+
+void
+ctx_get_drgba (Ctx *ctx, float *rgba)
+{
+  ctx_color_get_drgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
+}
+#endif
+
+int ctx_in_fill (Ctx *ctx, float x, float y)
+{
+  float x1, y1, x2, y2;
+  ctx_path_extents (ctx, &x1, &y1, &x2, &y2);
+
+  if (x1 <= x && x <= x2 && // XXX - just bounding box for now
+      y1 <= y && y <= y2)   //
+    return 1;
+  return 0;
+}
+
+
+#if CTX_ENABLE_CMYK
+#if 0
+void
+ctx_get_cmyka (Ctx *ctx, float *cmyka)
+{
+  ctx_color_get_cmyka (& (ctx->state), &ctx->state.gstate.source.color, cmyka);
+}
+#endif
+#endif
+#if 0
+void
+ctx_get_graya (Ctx *ctx, float *ya)
+{
+  ctx_color_get_graya (& (ctx->state), &ctx->state.gstate.source.color, ya);
+}
+#endif
+
+void ctx_stroke_source (Ctx *ctx)
+{
+  CtxEntry set_stroke = ctx_void (CTX_STROKE_SOURCE);
+  ctx_process (ctx, &set_stroke);
+}
+
+
+static void ctx_color_raw (Ctx *ctx, CtxColorModel model, float *components, int stroke)
+{
+#if 0
+  CtxSource *source = stroke?
+          &ctx->state.gstate.source_stroke:
+          &ctx->state.gstate.source_fill;
+
+  if (model == CTX_RGB || model == CTX_RGBA)
+  {
+    float rgba[4];
+  // XXX it should be possible to disable this, to get a more accurate record
+  // when it is intentional
+    float a = 1.0f;
+    if (model == CTX_RGBA) a = components[3];
+    ctx_color_get_rgba (&ctx->state, &source->color, rgba);
+    if (rgba[0] == components[0] && rgba[1] == components[1] && rgba[2] == components[2] && rgba[3] == a)
+     return;
+  }
+#endif
+
+  if (stroke)
+  {
+    ctx_stroke_source (ctx);
+  }
+
+  CtxEntry command[3]= {
+  ctx_f (CTX_COLOR, model, 0)
+  };
+  switch (model)
+  {
+    case CTX_RGBA:
+    case CTX_RGBA_A:
+    case CTX_RGBA_A_DEVICE:
+    case CTX_DRGBA:
+    case CTX_LABA:
+    case CTX_LCHA:
+      command[2].data.f[0]=components[3];
+      /*FALLTHROUGH*/
+    case CTX_RGB:
+    case CTX_LAB:
+    case CTX_LCH:
+    case CTX_DRGB:
+      command[0].data.f[1]=components[0];
+      command[1].data.f[0]=components[1];
+      command[1].data.f[1]=components[2];
+      break;
+    case CTX_DCMYKA:
+    case CTX_CMYKA:
+    case CTX_DCMYKA_A:
+    case CTX_CMYKA_A:
+      command[2].data.f[1]=components[4];
+      /*FALLTHROUGH*/
+    case CTX_CMYK:
+    case CTX_DCMYK:
+      command[0].data.f[1]=components[0];
+      command[1].data.f[0]=components[1];
+      command[1].data.f[1]=components[2];
+      command[2].data.f[0]=components[3];
+      break;
+    case CTX_GRAYA:
+    case CTX_GRAYA_A:
+      command[1].data.f[0]=components[1];
+      /*FALLTHROUGH*/
+    case CTX_GRAY:
+      command[0].data.f[1]=components[0];
+      break;
+  }
+  ctx_process (ctx, command);
+}
+
+void ctx_rgba (Ctx *ctx, float r, float g, float b, float a)
+{
+  float components[4]={r,g,b,a};
+  ctx_color_raw (ctx, CTX_RGBA, components, 0);
+}
+
+void ctx_rgba_stroke (Ctx *ctx, float r, float g, float b, float a)
+{
+  float components[4]={r,g,b,a};
+  ctx_color_raw (ctx, CTX_RGBA, components, 1);
+}
+
+void ctx_rgb (Ctx *ctx, float   r, float   g, float   b)
+{
+  ctx_rgba (ctx, r, g, b, 1.0f);
+}
+
+void ctx_rgb_stroke (Ctx *ctx, float   r, float   g, float   b)
+{
+  ctx_rgba_stroke (ctx, r, g, b, 1.0f);
+}
+
+void ctx_gray_stroke   (Ctx *ctx, float gray)
+{
+  ctx_color_raw (ctx, CTX_GRAY, &gray, 1);
+}
+void ctx_gray (Ctx *ctx, float gray)
+{
+  ctx_color_raw (ctx, CTX_GRAY, &gray, 0);
+}
+
+void ctx_drgba_stroke (Ctx *ctx, float r, float g, float b, float a)
+{
+  float components[4]={r,g,b,a};
+  ctx_color_raw (ctx, CTX_DRGBA, components, 1);
+}
+void ctx_drgba (Ctx *ctx, float r, float g, float b, float a)
+{
+  float components[4]={r,g,b,a};
+  ctx_color_raw (ctx, CTX_DRGBA, components, 0);
+}
+
+#if CTX_ENABLE_CMYK
+
+void ctx_cmyka_stroke (Ctx *ctx, float c, float m, float y, float k, float a)
+{
+  float components[5]={c,m,y,k,a};
+  ctx_color_raw (ctx, CTX_CMYKA, components, 1);
+}
+void ctx_cmyka (Ctx *ctx, float c, float m, float y, float k, float a)
+{
+  float components[5]={c,m,y,k,a};
+  ctx_color_raw (ctx, CTX_CMYKA, components, 0);
+}
+void ctx_cmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
+{
+  float components[4]={c,m,y,k};
+  ctx_color_raw (ctx, CTX_CMYK, components, 1);
+}
+void ctx_cmyk (Ctx *ctx, float c, float m, float y, float k)
+{
+  float components[4]={c,m,y,k};
+  ctx_color_raw (ctx, CTX_CMYK, components, 0);
+}
+
+#if 0
+static void ctx_dcmyk_raw (Ctx *ctx, float c, float m, float y, float k, int stroke)
+{
+  float components[5]={c,m,y,k,1.0f};
+  ctx_color_raw (ctx, CTX_DCMYKA, components, stroke);
+}
+
+static void ctx_dcmyka_raw (Ctx *ctx, float c, float m, float y, float k, float a, int stroke)
+{
+  CtxEntry command[3]=
+  {
+    ctx_f (CTX_COLOR, CTX_DCMYKA + 512 * stroke, c),
+    ctx_f (CTX_CONT, m, y),
+    ctx_f (CTX_CONT, k, a)
+  };
+  ctx_process (ctx, command);
+}
+#endif
+
+void ctx_dcmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
+{
+  float components[5]={c,m,y,k,1.0f};
+  ctx_color_raw (ctx, CTX_DCMYK, components, 1);
+}
+void ctx_dcmyk (Ctx *ctx, float c, float m, float y, float k)
+{
+  float components[5]={c,m,y,k,1.0f};
+  ctx_color_raw (ctx, CTX_DCMYK, components, 0);
 }
 
-void
-ctx_rasterizer_deinit (CtxRasterizer *rasterizer)
+void ctx_dcmyka_stroke   (Ctx *ctx, float c, float m, float y, float k, float a)
 {
-  ctx_drawlist_deinit (&rasterizer->edge_list);
-#if CTX_ENABLE_CLIP
-  if (rasterizer->clip_buffer)
-  {
-    ctx_buffer_free (rasterizer->clip_buffer);
-    rasterizer->clip_buffer = NULL;
-  }
+  float components[5]={c,m,y,k,a};
+  ctx_color_raw (ctx, CTX_DCMYKA, components, 1);
+}
+void ctx_dcmyka (Ctx *ctx, float c, float m, float y, float k, float a)
+{
+  float components[5]={c,m,y,k,a};
+  ctx_color_raw (ctx, CTX_DCMYKA, components, 0);
+}
+
 #endif
-#if CTX_SHAPE_CACHE
-  for (int i = 0; i < CTX_SHAPE_CACHE_ENTRIES; i ++)
-    if (rasterizer->shape_cache.entries[i])
-    {
-      free (rasterizer->shape_cache.entries[i]);
-      rasterizer->shape_cache.entries[i] = NULL;
-    }
 
+/* XXX: missing CSS1:
+ *
+ *   EM { color: rgb(110%, 0%, 0%) }  // clipped to 100% 
+ *
+ *
+ *   :first-letter
+ *   :first-list
+ *   :link :visited :active
+ *
+ */
+
+typedef struct ColorDef {
+  uint64_t name;
+  float r;
+  float g;
+  float b;
+  float a;
+} ColorDef;
+
+#if 0
+#define CTX_silver     CTX_STRH('s','i','l','v','e','r',0,0,0,0,0,0,0,0)
+#define CTX_fuchsia    CTX_STRH('f','u','c','h','s','i','a',0,0,0,0,0,0,0)
+#define CTX_gray       CTX_STRH('g','r','a','y',0,0,0,0,0,0,0,0,0,0)
+#define CTX_yellow     CTX_STRH('y','e','l','l','o','w',0,0,0,0,0,0,0,0)
+#define CTX_white      CTX_STRH('w','h','i','t','e',0,0,0,0,0,0,0,0,0)
+#define CTX_maroon     CTX_STRH('m','a','r','o','o','n',0,0,0,0,0,0,0,0)
+#define CTX_magenta    CTX_STRH('m','a','g','e','n','t','a',0,0,0,0,0,0,0)
+#define CTX_blue       CTX_STRH('b','l','u','e',0,0,0,0,0,0,0,0,0,0)
+#define CTX_green      CTX_STRH('g','r','e','e','n',0,0,0,0,0,0,0,0,0)
+#define CTX_red        CTX_STRH('r','e','d',0,0,0,0,0,0,0,0,0,0,0)
+#define CTX_purple     CTX_STRH('p','u','r','p','l','e',0,0,0,0,0,0,0,0)
+#define CTX_olive      CTX_STRH('o','l','i','v','e',0,0,0,0,0,0,0,0,0)
+#define CTX_teal        CTX_STRH('t','e','a','l',0,0,0,0,0,0,0,0,0,0)
+#define CTX_black      CTX_STRH('b','l','a','c','k',0,0,0,0,0,0,0,0,0)
+#define CTX_cyan       CTX_STRH('c','y','a','n',0,0,0,0,0,0,0,0,0,0)
+#define CTX_navy       CTX_STRH('n','a','v','y',0,0,0,0,0,0,0,0,0,0)
+#define CTX_lime       CTX_STRH('l','i','m','e',0,0,0,0,0,0,0,0,0,0)
+#define CTX_aqua       CTX_STRH('a','q','u','a',0,0,0,0,0,0,0,0,0,0)
+#define CTX_transparent CTX_STRH('t','r','a','n','s','p','a','r','e','n','t',0,0,0)
 #endif
-  free (rasterizer);
-}
 
+static ColorDef _ctx_colors[]={
+  {CTX_black,    0, 0, 0, 1},
+  {CTX_red,      1, 0, 0, 1},
+  {CTX_green,    0, 1, 0, 1},
+  {CTX_yellow,   1, 1, 0, 1},
+  {CTX_blue,     0, 0, 1, 1},
+  {CTX_fuchsia,  1, 0, 1, 1},
+  {CTX_cyan,     0, 1, 1, 1},
+  {CTX_white,    1, 1, 1, 1},
+  {CTX_silver,   0.75294, 0.75294, 0.75294, 1},
+  {CTX_gray,     0.50196, 0.50196, 0.50196, 1},
+  {CTX_magenta,  0.50196, 0, 0.50196, 1},
+  {CTX_maroon,   0.50196, 0, 0, 1},
+  {CTX_purple,   0.50196, 0, 0.50196, 1},
+  {CTX_green,    0, 0.50196, 0, 1},
+  {CTX_lime,     0, 1, 0, 1},
+  {CTX_olive,    0.50196, 0.50196, 0, 1},
+  {CTX_navy,     0, 0,      0.50196, 1},
+  {CTX_teal,     0, 0.50196, 0.50196, 1},
+  {CTX_aqua,     0, 1, 1, 1},
+  {CTX_transparent, 0, 0, 0, 0},
+  {CTX_none,     0, 0, 0, 0},
+};
 
-CtxAntialias ctx_get_antialias (Ctx *ctx)
+static int xdigit_value(const char xdigit)
 {
-#if CTX_EVENTS
-  if (ctx_renderer_is_tiled (ctx))
+  if (xdigit >= '0' && xdigit <= '9')
+   return xdigit - '0';
+  switch (xdigit)
   {
-     CtxTiled *fb = (CtxTiled*)(ctx->renderer);
-     return fb->antialias;
+    case 'A':case 'a': return 10;
+    case 'B':case 'b': return 11;
+    case 'C':case 'c': return 12;
+    case 'D':case 'd': return 13;
+    case 'E':case 'e': return 14;
+    case 'F':case 'f': return 15;
   }
-#endif
-  if (!_ctx_is_rasterizer (ctx)) return CTX_ANTIALIAS_DEFAULT;
+  return 0;
+}
+
+static int
+ctx_color_parse_rgb (CtxState *ctxstate, CtxColor *color, const char *color_string)
+{
+  float dcolor[4] = {0,0,0,1};
+  while (*color_string && *color_string != '(')
+    color_string++;
+  if (*color_string) color_string++;
 
-  switch (((CtxRasterizer*)(ctx->renderer))->aa)
   {
-    case 1: return CTX_ANTIALIAS_NONE;
-    case 3: return CTX_ANTIALIAS_FAST;
-    //case 5: return CTX_ANTIALIAS_GOOD;
-    default:
-    case 15: return CTX_ANTIALIAS_DEFAULT;
+    int n_floats = 0;
+    char *p =    (char*)color_string;
+    char *prev = (char*)NULL;
+    for (; p && n_floats < 4 && p != prev && *p; )
+    {
+      float val;
+      prev = p;
+      val = _ctx_parse_float (p, &p);
+      if (p != prev)
+      {
+        if (n_floats < 3)
+          dcolor[n_floats++] = val/255.0;
+        else
+          dcolor[n_floats++] = val;
+
+        while (*p == ' ' || *p == ',')
+        {
+          p++;
+          prev++;
+        }
+      }
+    }
   }
+  ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
+  return 0;
 }
 
-static int _ctx_antialias_to_aa (CtxAntialias antialias)
+static int ctx_isxdigit (uint8_t ch)
 {
-  switch (antialias)
-  {
-    case CTX_ANTIALIAS_NONE: return 1;
-    case CTX_ANTIALIAS_FAST: return 3;
-    case CTX_ANTIALIAS_GOOD: return 5;
-    default:
-    case CTX_ANTIALIAS_DEFAULT: return CTX_RASTERIZER_AA;
-  }
+  if (ch >= '0' && ch <= '9') return 1;
+  if (ch >= 'a' && ch <= 'f') return 1;
+  if (ch >= 'A' && ch <= 'F') return 1;
+  return 0;
 }
 
-void
-ctx_set_antialias (Ctx *ctx, CtxAntialias antialias)
+static int
+mrg_color_parse_hex (CtxState *ctxstate, CtxColor *color, const char *color_string)
 {
-#if CTX_EVENTS
-  if (ctx_renderer_is_tiled (ctx))
-  {
-     CtxTiled *fb = (CtxTiled*)(ctx->renderer);
-     fb->antialias = antialias;
-     for (int i = 0; i < _ctx_max_threads; i++)
-     {
-       ctx_set_antialias (fb->host[i], antialias);
-     }
-     return;
-  }
-#endif
-  if (!_ctx_is_rasterizer (ctx)) return;
+  float dcolor[4]={0,0,0,1};
+  int string_length = strlen (color_string);
+  int i;
+  dcolor[3] = 1.0;
 
-  ((CtxRasterizer*)(ctx->renderer))->aa = 
-     _ctx_antialias_to_aa (antialias);
-  ((CtxRasterizer*)(ctx->renderer))->fast_aa = 0;
-  if (antialias == CTX_ANTIALIAS_DEFAULT||
-      antialias == CTX_ANTIALIAS_FAST)
-    ((CtxRasterizer*)(ctx->renderer))->fast_aa = 1;
+  if (string_length == 7 ||  /* #rrggbb   */
+      string_length == 9)    /* #rrggbbaa */
+    {
+      int num_iterations = (string_length - 1) / 2;
+  
+      for (i = 0; i < num_iterations; ++i)
+        {
+          if (ctx_isxdigit (color_string[2 * i + 1]) &&
+              ctx_isxdigit (color_string[2 * i + 2]))
+            {
+              dcolor[i] = (xdigit_value (color_string[2 * i + 1]) << 4 |
+                           xdigit_value (color_string[2 * i + 2])) / 255.f;
+            }
+          else
+            {
+              return 0;
+            }
+        }
+      /* Successful #rrggbb(aa) parsing! */
+      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
+      return 1;
+    }
+  else if (string_length == 4 ||  /* #rgb  */
+           string_length == 5)    /* #rgba */
+    {
+      int num_iterations = string_length - 1;
+      for (i = 0; i < num_iterations; ++i)
+        {
+          if (ctx_isxdigit (color_string[i + 1]))
+            {
+              dcolor[i] = (xdigit_value (color_string[i + 1]) << 4 |
+                           xdigit_value (color_string[i + 1])) / 255.f;
+            }
+          else
+            {
+              return 0;
+            }
+        }
+      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
+      /* Successful #rgb(a) parsing! */
+      return 0;
+    }
+  /* String was of unsupported length. */
+  return 1;
 }
 
-CtxRasterizer *
-ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, 
int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias)
-{
-#if CTX_ENABLE_CLIP
-  if (rasterizer->clip_buffer)
-    ctx_buffer_free (rasterizer->clip_buffer);
-#endif
-  if (rasterizer->edge_list.size)
-    ctx_drawlist_deinit (&rasterizer->edge_list);
+//#define CTX_currentColor     CTX_STRH('c','u','r','r','e','n','t','C','o','l','o','r',0,0)
 
-  memset (rasterizer, 0, sizeof (CtxRasterizer) );
-  rasterizer->vfuncs.process = ctx_rasterizer_process;
-  rasterizer->vfuncs.free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
-  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
-  rasterizer->state       = state;
-  rasterizer->ctx         = ctx;
-  rasterizer->texture_source = texture_source?texture_source:ctx;
+int ctx_color_set_from_string (Ctx *ctx, CtxColor *color, const char *string)
+{
+  int i;
+  uint32_t hash = ctx_strhash (string);
+//  ctx_color_set_rgba (&(ctx->state), color, 0.4,0.1,0.9,1.0);
+//  return 0;
+    //rgba[0], rgba[1], rgba[2], rgba[3]);
 
-  rasterizer->aa          = _ctx_antialias_to_aa (antialias);
-  rasterizer->fast_aa = (antialias == CTX_ANTIALIAS_DEFAULT||antialias == CTX_ANTIALIAS_FAST);
-  ctx_state_init (rasterizer->state);
-  rasterizer->buf         = data;
-  rasterizer->blit_x      = x;
-  rasterizer->blit_y      = y;
-  rasterizer->blit_width  = width;
-  rasterizer->blit_height = height;
-  rasterizer->state->gstate.clip_min_x  = x;
-  rasterizer->state->gstate.clip_min_y  = y;
-  rasterizer->state->gstate.clip_max_x  = x + width - 1;
-  rasterizer->state->gstate.clip_max_y  = y + height - 1;
-  rasterizer->blit_stride = stride;
-  rasterizer->scan_min    = 5000;
-  rasterizer->scan_max    = -5000;
+  if (hash == CTX_currentColor)
+  {
+    float rgba[4];
+    CtxColor ccolor;
+    ctx_get_color (ctx, CTX_color, &ccolor);
+    ctx_color_get_rgba (&(ctx->state), &ccolor, rgba);
+    ctx_color_set_rgba (&(ctx->state), color, rgba[0], rgba[1], rgba[2], rgba[3]);
+    return 0;
+  }
 
-  if (pixel_format == CTX_FORMAT_BGRA8)
+  for (i = (sizeof(_ctx_colors)/sizeof(_ctx_colors[0]))-1; i>=0; i--)
   {
-    pixel_format = CTX_FORMAT_RGBA8;
-    rasterizer->swap_red_green = 1;
+    if (hash == _ctx_colors[i].name)
+    {
+      ctx_color_set_rgba (&(ctx->state), color,
+       _ctx_colors[i].r, _ctx_colors[i].g, _ctx_colors[i].b, _ctx_colors[i].a);
+      return 0;
+    }
   }
 
-  rasterizer->format = ctx_pixel_format_info (pixel_format);
+  if (string[0] == '#')
+    mrg_color_parse_hex (&(ctx->state), color, string);
+  else if (string[0] == 'r' &&
+      string[1] == 'g' &&
+      string[2] == 'b'
+      )
+    ctx_color_parse_rgb (&(ctx->state), color, string);
 
-  return rasterizer;
+  return 0;
 }
 
-Ctx *
-ctx_new_for_buffer (CtxBuffer *buffer)
+int ctx_color (Ctx *ctx, const char *string)
 {
-  Ctx *ctx = ctx_new ();
-  ctx_set_renderer (ctx,
-                    ctx_rasterizer_init ( (CtxRasterizer *) malloc (sizeof (CtxRasterizer) ),
-                                          ctx, NULL, &ctx->state,
-                                          buffer->data, 0, 0, buffer->width, buffer->height,
-                                          buffer->stride, buffer->format->pixel_format,
-                                          CTX_ANTIALIAS_DEFAULT));
-  return ctx;
+  CtxColor color = {0,};
+  ctx_color_set_from_string (ctx, &color, string);
+  float rgba[4];
+  ctx_color_get_rgba (&(ctx->state), &color, rgba);
+  ctx_color_raw (ctx, CTX_RGBA, rgba, 0);
+  return 0;
 }
 
-Ctx *
-ctx_new_for_framebuffer (void *data, int width, int height,
-                         int stride,
-                         CtxPixelFormat pixel_format)
+void
+ctx_rgba8 (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
 {
-  Ctx *ctx = ctx_new ();
-  CtxRasterizer *r = ctx_rasterizer_init ( (CtxRasterizer *) ctx_calloc (sizeof (CtxRasterizer), 1),
-                                          ctx, NULL, &ctx->state, data, 0, 0, width, height,
-                                          stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
-  ctx_set_renderer (ctx, r);
-  return ctx;
-}
+#if 0
+  CtxEntry command = ctx_u8 (CTX_SET_RGBA_U8, r, g, b, a, 0, 0, 0, 0);
 
-// ctx_new_for_stream (FILE *stream);
+  uint8_t rgba[4];
+  ctx_color_get_rgba8 (&ctx->state, &ctx->state.gstate.source.color, rgba);
+  if (rgba[0] == r && rgba[1] == g && rgba[2] == b && rgba[3] == a)
+     return;
 
-#if 0
-CtxRasterizer *ctx_rasterizer_new (void *data, int x, int y, int width, int height,
-                                   int stride, CtxPixelFormat pixel_format)
+  ctx_process (ctx, &command);
+#else
+  ctx_rgba (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
+#endif
+}
+
+void ctx_rgba8_stroke (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
 {
-  CtxState    *state    = (CtxState *) malloc (sizeof (CtxState) );
-  CtxRasterizer *rasterizer = (CtxRasterizer *) malloc (sizeof (CtxRenderer) );
-  ctx_rasterizer_init (rasterizer, state, data, x, y, width, height,
-                       stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
+  ctx_rgba_stroke (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
 }
-#endif
 
-CtxPixelFormatInfo *
-ctx_pixel_format_info (CtxPixelFormat format);
 
-#else
+#endif 
 
-CtxPixelFormatInfo *
-ctx_pixel_format_info (CtxPixelFormat format)
+#if CTX_BABL
+void ctx_rasterizer_colorspace_babl (CtxState      *state,
+                                     CtxColorSpace  space_slot,
+                                     const Babl    *space)
 {
-  return NULL;
+  switch (space_slot)
+  {
+    case CTX_COLOR_SPACE_DEVICE_RGB:
+      state->gstate.device_space = space;
+      break;
+    case CTX_COLOR_SPACE_DEVICE_CMYK:
+      state->gstate.device_space = space;
+      break;
+    case CTX_COLOR_SPACE_USER_RGB:
+      state->gstate.rgb_space = space;
+      break;
+    case CTX_COLOR_SPACE_USER_CMYK:
+      state->gstate.cmyk_space = space;
+      break;
+    case CTX_COLOR_SPACE_TEXTURE:
+      state->gstate.texture_space = space;
+      break;
+  }
+
+  const Babl *srgb = babl_space ("sRGB");
+  if (!state->gstate.texture_space) 
+       state->gstate.texture_space = srgb;
+  if (!state->gstate.device_space) 
+       state->gstate.device_space = srgb;
+  if (!state->gstate.rgb_space) 
+       state->gstate.rgb_space = srgb;
+
+  //fprintf (stderr, "%s\n", babl_get_name (state->gstate.device_space));
+
+  state->gstate.fish_rgbaf_device_to_user = babl_fish (
+       babl_format_with_space ("R'G'B'A float", state->gstate.device_space),
+       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space));
+  state->gstate.fish_rgbaf_user_to_device = babl_fish (
+       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space),
+       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
+  state->gstate.fish_rgbaf_texture_to_device = babl_fish (
+       babl_format_with_space ("R'G'B'A float", state->gstate.texture_space),
+       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
 }
 #endif
 
-void
-ctx_current_point (Ctx *ctx, float *x, float *y)
+void ctx_rasterizer_colorspace_icc (CtxState      *state,
+                                    CtxColorSpace  space_slot,
+                                    char          *icc_data,
+                                    int            icc_length)
 {
-  if (!ctx)
-    { 
-      if (x) { *x = 0.0f; }
-      if (y) { *y = 0.0f; }
-    }
-#if CTX_RASTERIZER
-  if (ctx->renderer)
-    {
-      if (x) { *x = ( (CtxRasterizer *) (ctx->renderer) )->x; }
-      if (y) { *y = ( (CtxRasterizer *) (ctx->renderer) )->y; }
-      return;
-    }
-#endif
-  if (x) { *x = ctx->state.x; }
-  if (y) { *y = ctx->state.y; }
-}
+#if CTX_BABL
+   const char *error = NULL;
+   const Babl *space = NULL;
 
-float ctx_x (Ctx *ctx)
-{
-  float x = 0, y = 0;
-  ctx_current_point (ctx, &x, &y);
-  return x;
-}
+   if (icc_data == NULL) space = babl_space ("sRGB");
+   else if (icc_length < 32)
+   {
+      if (icc_data[0] == '0' && icc_data[1] == 'x')
+        sscanf (icc_data, "%p", &space);
+      else
+      {
+        char tmp[24];
+        int i;
+        for (i = 0; i < icc_length; i++)
+          tmp[i]= (icc_data[i]>='A' && icc_data[i]<='Z')?icc_data[i]+('a'-'A'):icc_data[i];
+        tmp[icc_length]=0;
+        if (!strcmp (tmp, "srgb"))            space = babl_space ("sRGB");
+        else if (!strcmp (tmp, "scrgb"))      space = babl_space ("scRGB");
+        else if (!strcmp (tmp, "acescg"))     space = babl_space ("ACEScg");
+        else if (!strcmp (tmp, "adobe"))      space = babl_space ("Adobe");
+        else if (!strcmp (tmp, "apple"))      space = babl_space ("Apple");
+        else if (!strcmp (tmp, "rec2020"))    space = babl_space ("Rec2020");
+        else if (!strcmp (tmp, "aces2065-1")) space = babl_space ("ACES2065-1");
+      }
+   }
 
-float ctx_y (Ctx *ctx)
-{
-  float x = 0, y = 0;
-  ctx_current_point (ctx, &x, &y);
-  return y;
+   if (!space)
+   {
+     space = babl_space_from_icc (icc_data, icc_length, BABL_ICC_INTENT_RELATIVE_COLORIMETRIC, &error);
+   }
+   if (space)
+   {
+     ctx_rasterizer_colorspace_babl (state, space_slot, space);
+   }
+#endif
 }
 
-static void
-ctx_process (Ctx *ctx, CtxEntry *entry)
+void ctx_colorspace (Ctx           *ctx,
+                     CtxColorSpace  space_slot,
+                     unsigned char *data,
+                     int            data_length)
 {
-#if CTX_CURRENT_PATH
-  switch (entry->code)
-    {
-      case CTX_TEXT:
-      case CTX_STROKE_TEXT:
-      case CTX_BEGIN_PATH:
-        ctx->current_path.count = 0;
-        break;
-      case CTX_CLIP:
-      case CTX_FILL:
-      case CTX_STROKE:
-              // XXX unless preserve
-        ctx->current_path.count = 0;
-        break;
-      case CTX_CLOSE_PATH:
-      case CTX_LINE_TO:
-      case CTX_MOVE_TO:
-      case CTX_QUAD_TO:
-      case CTX_SMOOTH_TO:
-      case CTX_SMOOTHQ_TO:
-      case CTX_REL_QUAD_TO:
-      case CTX_REL_SMOOTH_TO:
-      case CTX_REL_SMOOTHQ_TO:
-      case CTX_CURVE_TO:
-      case CTX_REL_CURVE_TO:
-      case CTX_ARC:
-      case CTX_ARC_TO:
-      case CTX_REL_ARC_TO:
-      case CTX_RECTANGLE:
-      case CTX_ROUND_RECTANGLE:
-        ctx_drawlist_add_entry (&ctx->current_path, entry);
-        break;
-      default:
-        break;
-    }
-#endif
-#if CTX_RASTERIZER
-  if (CTX_LIKELY(ctx->renderer && ctx->renderer->process == ctx_rasterizer_process))
-    {
-      ctx_rasterizer_process (ctx->renderer, (CtxCommand *) entry);
-    }
-  else
-#endif
-  if (CTX_LIKELY(ctx->renderer && ctx->renderer->process))
-    {
-      ctx->renderer->process (ctx->renderer, (CtxCommand *) entry);
-    }
+  if (data)
+  {
+    if (data_length <= 0) data_length = (int)strlen ((char*)data);
+    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, (char*)data, space_slot, 0, data_length);
+  }
   else
-    {
-      /* these functions might alter the code and coordinates of
-         command that in the end gets added to the drawlist
-       */
-      ctx_interpret_style (&ctx->state, entry, ctx);
-      ctx_interpret_transforms (&ctx->state, entry, ctx);
-      ctx_interpret_pos (&ctx->state, entry, ctx);
-      ctx_drawlist_add_entry (&ctx->drawlist, entry);
-    }
+  {
+    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, "sRGB", space_slot, 0, 4);
+  }
+}
+
+void ctx_gradient_add_stop_u8
+(Ctx *ctx, float pos, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
+{
+  CtxEntry entry = ctx_f (CTX_GRADIENT_STOP, pos, 0);
+  entry.data.u8[4+0] = r;
+  entry.data.u8[4+1] = g;
+  entry.data.u8[4+2] = b;
+  entry.data.u8[4+3] = a;
+  ctx_process (ctx, &entry);
 }
 
-
-int ctx_gradient_cache_valid = 0;
-
-void
-ctx_state_gradient_clear_stops (CtxState *state)
+void ctx_gradient_add_stop
+(Ctx *ctx, float pos, float r, float g, float b, float a)
 {
-//#if CTX_GRADIENT_CACHE
-//  ctx_gradient_cache_reset ();
-//#endif
-  ctx_gradient_cache_valid = 0;
-  state->gradient.n_stops = 0;
+  int ir = r * 255;
+  int ig = g * 255;
+  int ib = b * 255;
+  int ia = a * 255;
+  ir = CTX_CLAMP (ir, 0,255);
+  ig = CTX_CLAMP (ig, 0,255);
+  ib = CTX_CLAMP (ib, 0,255);
+  ia = CTX_CLAMP (ia, 0,255);
+  ctx_gradient_add_stop_u8 (ctx, pos, ir, ig, ib, ia);
 }
 
+void ctx_gradient_add_stop_string
+(Ctx *ctx, float pos, const char *string)
+{
+  CtxColor color = {0,};
+  ctx_color_set_from_string (ctx, &color, string);
+  float rgba[4];
+  ctx_color_get_rgba (&(ctx->state), &color, rgba);
+  ctx_gradient_add_stop (ctx, pos, rgba[0], rgba[1], rgba[2], rgba[3]);
+}
 
-/****  end of engine ****/
+//  deviceRGB .. settable when creating an RGB image surface..
+//               queryable when running in terminal - is it really needed?
+//               though it is settable ; and functional for changing this state at runtime..
+//
+//  userRGB - settable at any time, stored in save|restore 
+//  texture - set as the space of data on subsequent 
 
 CtxBuffer *ctx_buffer_new_bare (void)
 {
@@ -21313,710 +22672,254 @@ const char* ctx_texture_init (Ctx           *ctx,
                               int            stride,
                               CtxPixelFormat format,
                               void          *space,
-                              uint8_t       *pixels,
-                              void (*freefunc) (void *pixels, void *user_data),
-                              void *user_data)
-{
-  int id = 0;
-  if (eid)
-  {
-    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
-    {
-      if (ctx->texture[i].data &&
-          ctx->texture[i].eid &&
-          !strcmp (ctx->texture[i].eid, eid))
-      {
-        ctx->texture[i].frame = ctx->texture_cache->frame;
-        if (freefunc && user_data != (void*)23)
-          freefunc (pixels, user_data);
-        return ctx->texture[i].eid;
-      }
-      if (ctx->texture[i].data == NULL 
-          ||   (ctx->texture_cache->frame - ctx->texture[i].frame >= 2))
-        id = i;
-    }
-  } else
-  {
-    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
-    {
-      if (ctx->texture[i].data == NULL 
-          || (ctx->texture_cache->frame - ctx->texture[i].frame > 2))
-        id = i;
-    }
-  }
-  //int bpp = ctx_pixel_format_bits_per_pixel (format);
-  ctx_buffer_deinit (&ctx->texture[id]);
-
-  if (stride<=0)
-  {
-    stride = ctx_pixel_format_get_stride ((CtxPixelFormat)format, width);
-  }
-
-  int data_len = stride * height;
-  if (format == CTX_FORMAT_YUV420)
-          data_len = width * height +
-                  2 * ((width/2)*(height/2));
-
-  if (freefunc == ctx_buffer_pixels_free && user_data == (void*)23)
-  {
-     uint8_t *tmp = (uint8_t*)malloc (data_len);
-     memcpy (tmp, pixels, data_len);
-     pixels = tmp;
-  }
-
-  ctx_buffer_set_data (&ctx->texture[id],
-                       pixels, width, height,
-                       stride, format,
-                       freefunc, user_data);
-#if CTX_ENABLE_CM
-  ctx->texture[id].space = space;
-#endif
-  ctx->texture[id].frame = ctx->texture_cache->frame;
-  if (eid)
-  {
-    /* we got an eid, this is the fast path */
-    ctx->texture[id].eid = strdup (eid);
-  }
-  else
-  {
-    uint8_t hash[20];
-    char ascii[41];
-
-    CtxSHA1 *sha1 = ctx_sha1_new ();
-    ctx_sha1_process (sha1, pixels, stride * height);
-    ctx_sha1_done (sha1, hash);
-    ctx_sha1_free (sha1);
-    const char *hex="0123456789abcdef";
-    for (int i = 0; i < 20; i ++)
-    {
-       ascii[i*2]=hex[hash[i]/16];
-       ascii[i*2+1]=hex[hash[i]%16];
-    }
-    ascii[40]=0;
-    ctx->texture[id].eid = strdup (ascii);
-  }
-  return ctx->texture[id].eid;
-}
-
-static void
-_ctx_texture_prepare_color_management (CtxRasterizer *rasterizer,
-                                      CtxBuffer     *buffer)
-{
-   switch (buffer->format->pixel_format)
-   {
-#ifndef NO_BABL
-#if CTX_BABL
-     case CTX_FORMAT_RGBA8:
-       if (buffer->space == rasterizer->state->gstate.device_space)
-       {
-         buffer->color_managed = buffer;
-       }
-       else
-       {
-          buffer->color_managed = ctx_buffer_new (buffer->width, buffer->height,
-                                                  CTX_FORMAT_RGBA8);
-          babl_process (
-             babl_fish (babl_format_with_space ("R'G'B'A u8", buffer->space),
-                        babl_format_with_space ("R'G'B'A u8", rasterizer->state->gstate.device_space)),
-             buffer->data, buffer->color_managed->data,
-             buffer->width * buffer->height
-             );
-       }
-       break;
-     case CTX_FORMAT_RGB8:
-       if (buffer->space == rasterizer->state->gstate.device_space)
-       {
-         buffer->color_managed = buffer;
-       }
-       else
-       {
-         buffer->color_managed = ctx_buffer_new (buffer->width, buffer->height,
-                                               CTX_FORMAT_RGB8);
-         babl_process (
-            babl_fish (babl_format_with_space ("R'G'B' u8", buffer->space),
-                       babl_format_with_space ("R'G'B' u8", rasterizer->state->gstate.device_space)),
-            buffer->data, buffer->color_managed->data,
-            buffer->width * buffer->height
-          );
-       }
-       break;
-#endif
-#endif
-     default:
-       buffer->color_managed = buffer;
-   }
-}
-
-
-
-int ctx_utf8_len (const unsigned char first_byte)
-{
-  if      ( (first_byte & 0x80) == 0)
-    { return 1; } /* ASCII */
-  else if ( (first_byte & 0xE0) == 0xC0)
-    { return 2; }
-  else if ( (first_byte & 0xF0) == 0xE0)
-    { return 3; }
-  else if ( (first_byte & 0xF8) == 0xF0)
-    { return 4; }
-  return 1;
-}
-
-
-const char *ctx_utf8_skip (const char *s, int utf8_length)
-{
-  int count;
-  if (!s)
-    { return NULL; }
-  for (count = 0; *s; s++)
-    {
-      if ( (*s & 0xC0) != 0x80)
-        { count++; }
-      if (count == utf8_length + 1)
-        { return s; }
-    }
-  return s;
-}
-
-//  XXX  :  unused
-int ctx_utf8_strlen (const char *s)
-{
-  int count;
-  if (!s)
-    { return 0; }
-  for (count = 0; *s; s++)
-    if ( (*s & 0xC0) != 0x80)
-      { count++; }
-  return count;
-}
-
-int
-ctx_unichar_to_utf8 (uint32_t  ch,
-                     uint8_t  *dest)
-{
-  /* http://www.cprogramming.com/tutorial/utf8.c  */
-  /*  Basic UTF-8 manipulation routines
-    by Jeff Bezanson
-    placed in the public domain Fall 2005 ... */
-  if (ch < 0x80)
-    {
-      dest[0] = (char) ch;
-      return 1;
-    }
-  if (ch < 0x800)
-    {
-      dest[0] = (ch>>6) | 0xC0;
-      dest[1] = (ch & 0x3F) | 0x80;
-      return 2;
-    }
-  if (ch < 0x10000)
-    {
-      dest[0] = (ch>>12) | 0xE0;
-      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
-      dest[2] = (ch & 0x3F) | 0x80;
-      return 3;
-    }
-  if (ch < 0x110000)
-    {
-      dest[0] = (ch>>18) | 0xF0;
-      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
-      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
-      dest[3] = (ch & 0x3F) | 0x80;
-      return 4;
-    }
-  return 0;
-}
-
-uint32_t
-ctx_utf8_to_unichar (const char *input)
-{
-  const uint8_t *utf8 = (const uint8_t *) input;
-  uint8_t c = utf8[0];
-  if ( (c & 0x80) == 0)
-    { return c; }
-  else if ( (c & 0xE0) == 0xC0)
-    return ( (utf8[0] & 0x1F) << 6) |
-           (utf8[1] & 0x3F);
-  else if ( (c & 0xF0) == 0xE0)
-    return ( (utf8[0] & 0xF)  << 12) |
-           ( (utf8[1] & 0x3F) << 6) |
-           (utf8[2] & 0x3F);
-  else if ( (c & 0xF8) == 0xF0)
-    return ( (utf8[0] & 0x7)  << 18) |
-           ( (utf8[1] & 0x3F) << 12) |
-           ( (utf8[2] & 0x3F) << 6) |
-           (utf8[3] & 0x3F);
-  else if ( (c & 0xFC) == 0xF8)
-    return ( (utf8[0] & 0x3)  << 24) |
-           ( (utf8[1] & 0x3F) << 18) |
-           ( (utf8[2] & 0x3F) << 12) |
-           ( (utf8[3] & 0x3F) << 6) |
-           (utf8[4] & 0x3F);
-  else if ( (c & 0xFE) == 0xFC)
-    return ( (utf8[0] & 0x1)  << 30) |
-           ( (utf8[1] & 0x3F) << 24) |
-           ( (utf8[2] & 0x3F) << 18) |
-           ( (utf8[3] & 0x3F) << 12) |
-           ( (utf8[4] & 0x3F) << 6) |
-           (utf8[5] & 0x3F);
-  return 0;
-}
-
-#if CTX_RASTERIZER
-
-
-static int
-ctx_rect_intersect (const CtxIntRectangle *a, const CtxIntRectangle *b)
-{
-  if (a->x >= b->x + b->width ||
-      b->x >= a->x + a->width ||
-      a->y >= b->y + b->height ||
-      b->y >= a->y + a->height) return 0;
-
-  return 1;
-}
-
-static void
-_ctx_add_hash (CtxHasher *hasher, CtxIntRectangle *shape_rect, char *hash)
-{
-  CtxIntRectangle rect = {0,0, hasher->rasterizer.blit_width/hasher->cols,
-                            hasher->rasterizer.blit_height/hasher->rows};
-  int hno = 0;
-  for (int row = 0; row < hasher->rows; row++)
-    for (int col = 0; col < hasher->cols; col++, hno++)
-     {
-      rect.x = col * rect.width;
-      rect.y = row * rect.height;
-      if (ctx_rect_intersect (shape_rect, &rect))
-      {
-        int temp = hasher->hashes[(row * hasher->cols + col)  *20 + 0];
-        for (int i = 0; i <19;i++)
-           hasher->hashes[(row * hasher->cols + col)  *20 + i] =
-             hasher->hashes[(row * hasher->cols + col)  *20 + i+1]^
-             hash[i];
-        hasher->hashes[(row * hasher->cols + col)  *20 + 19] =
-                temp ^ hash[19];
-      }
-    }
-}
-
-static int
-ctx_str_count_lines (const char *str)
-{
-  int count = 0;
-  for (const char *p = str; *p; p++)
-    if (*p == '\n') count ++;
-  return count;
-}
-
-static void
-ctx_hasher_process (void *user_data, CtxCommand *command)
-{
-  CtxEntry *entry = &command->entry;
-  CtxRasterizer *rasterizer = (CtxRasterizer *) user_data;
-  CtxHasher *hasher = (CtxHasher*) user_data;
-  CtxState *state = rasterizer->state;
-  CtxCommand *c = (CtxCommand *) entry;
-  int aa = 15;//rasterizer->aa;
-
-  ctx_interpret_pos_bare (rasterizer->state, entry, NULL);
-  ctx_interpret_style (rasterizer->state, entry, NULL);
-
-  switch (c->code)
-    {
-      case CTX_TEXT:
-        {
-          CtxSHA1 sha1;
-          memcpy (&sha1, &hasher->sha1_fill, sizeof (CtxSHA1));
-          char ctx_sha1_hash[20];
-          float width = ctx_text_width (rasterizer->ctx, ctx_arg_string());
-
-
-          float height = ctx_get_font_size (rasterizer->ctx);
-           CtxIntRectangle shape_rect;
-          
-           shape_rect.x=rasterizer->x;
-           shape_rect.y=rasterizer->y - height,
-           shape_rect.width = width;
-           shape_rect.height = height * (ctx_str_count_lines (ctx_arg_string()) + 1.5);
-          switch ((int)ctx_state_get (rasterizer->state, CTX_text_align))
-          {
-          case CTX_TEXT_ALIGN_LEFT:
-          case CTX_TEXT_ALIGN_START:
-                  break;
-          case CTX_TEXT_ALIGN_END:
-          case CTX_TEXT_ALIGN_RIGHT:
-           shape_rect.x -= shape_rect.width;
-           break;
-          case CTX_TEXT_ALIGN_CENTER:
-           shape_rect.x -= shape_rect.width/2;
-           break;
-                   // XXX : doesn't take all text-alignments into account
-          }
-
-#if 0
-          uint32_t color;
-          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
-#endif
-          ctx_sha1_process(&sha1, (const unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
-#if 1
-        ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
-    //      ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
-#endif
-          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
-          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
-
-          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
-        }
-        ctx_rasterizer_reset (rasterizer);
-        break;
-      case CTX_STROKE_TEXT:
-        {
-          CtxSHA1 sha1;
-          memcpy (&sha1, &hasher->sha1_stroke, sizeof (CtxSHA1));
-          char ctx_sha1_hash[20];
-          float width = ctx_text_width (rasterizer->ctx, ctx_arg_string());
-          float height = ctx_get_font_size (rasterizer->ctx);
-
-           CtxIntRectangle shape_rect = {
-              (int)rasterizer->x, (int)(rasterizer->y - height),
-              (int)width, (int)(height * 2)
-           };
-
-#if 0
-          uint32_t color;
-          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
-#endif
-          ctx_sha1_process(&sha1, (unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
-#if 1
-          ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
-    //    ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
-#endif
-          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
-          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
-
-          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
-        }
-        ctx_rasterizer_reset (rasterizer);
-        break;
-      case CTX_GLYPH:
-         {
-          CtxSHA1 sha1;
-          memcpy (&sha1, &hasher->sha1_fill, sizeof (CtxSHA1));
-
-          char ctx_sha1_hash[20];
-          uint8_t string[8];
-          string[ctx_unichar_to_utf8 (c->u32.a0, string)]=0;
-          float width = ctx_text_width (rasterizer->ctx, (char*)string);
-          float height = ctx_get_font_size (rasterizer->ctx);
-
-          float tx = rasterizer->x;
-          float ty = rasterizer->y;
-          float tw = width;
-          float th = height * 2;
-
-          _ctx_user_to_device (rasterizer->state, &tx, &ty);
-          _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
-          CtxIntRectangle shape_rect = {(int)tx,(int)(ty-th/2),(int)tw,(int)th};
-
-
-#if 0
-          uint32_t color;
-          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
-#endif
-          ctx_sha1_process(&sha1, string, strlen ((const char*)string));
-          ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
-#if 0
-          ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
-#endif
-          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
-          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
-
-          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
-          ctx_rasterizer_reset (rasterizer);
-         }
-        break;
-
-      case CTX_FILL:
-        {
-          CtxSHA1 sha1;
-          memcpy (&sha1, &hasher->sha1_fill, sizeof (CtxSHA1));
-          char ctx_sha1_hash[20];
-
-          /* we eant this hasher to be as good as possible internally,
-           * since it is also used in the small shapes rasterization
-           * cache
-           */
-        uint64_t hash = ctx_rasterizer_poly_to_hash (rasterizer); // + hasher->salt;
-        CtxIntRectangle shape_rect = {
-          (int)(rasterizer->col_min / CTX_SUBDIV - 2),
-          (int)(rasterizer->scan_min / aa - 2),
-          (int)(3+(rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV),
-          (int)(3+(rasterizer->scan_max - rasterizer->scan_min + 1) / aa)
-        };
-
-        hash ^= (rasterizer->state->gstate.fill_rule * 23);
-
-        ctx_sha1_process(&sha1, (unsigned char*)&hash, 8);
-
-        {
-          int is = rasterizer->state->gstate.image_smoothing;
-          ctx_sha1_process(&sha1, (uint8_t*)&is, sizeof(int));
-        }
-
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
-          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
-
-        if (!rasterizer->preserve)
-          ctx_rasterizer_reset (rasterizer);
-        rasterizer->preserve = 0;
-        }
-        break;
-      case CTX_STROKE:
-        {
-          CtxSHA1 sha1;
-          memcpy (&sha1, &hasher->sha1_stroke, sizeof (CtxSHA1));
-          char ctx_sha1_hash[20];
-        uint64_t hash = ctx_rasterizer_poly_to_hash (rasterizer);
-        CtxIntRectangle shape_rect = {
-          (int)(rasterizer->col_min / CTX_SUBDIV - rasterizer->state->gstate.line_width),
-          (int)(rasterizer->scan_min / aa - rasterizer->state->gstate.line_width),
-          (int)((rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV + 
rasterizer->state->gstate.line_width),
-          (int)((rasterizer->scan_max - rasterizer->scan_min + 1) / aa + 
rasterizer->state->gstate.line_width)
-        };
-
-        shape_rect.width += rasterizer->state->gstate.line_width * 2;
-        shape_rect.height += rasterizer->state->gstate.line_width * 2;
-        shape_rect.x -= rasterizer->state->gstate.line_width;
-        shape_rect.y -= rasterizer->state->gstate.line_width;
-
-        hash ^= (int)(rasterizer->state->gstate.line_width * 110);
-        hash ^= (rasterizer->state->gstate.line_cap * 23);
-        hash ^= (rasterizer->state->gstate.source_stroke.type * 117);
-
-        ctx_sha1_process(&sha1, (unsigned char*)&hash, 8);
-
-        uint32_t color;
-        ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
-
-          ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
+                              uint8_t       *pixels,
+                              void (*freefunc) (void *pixels, void *user_data),
+                              void *user_data)
+{
+  int id = 0;
+  if (eid)
+  {
+    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
+    {
+      if (ctx->texture[i].data &&
+          ctx->texture[i].eid &&
+          !strcmp (ctx->texture[i].eid, eid))
+      {
+        ctx->texture[i].frame = ctx->texture_cache->frame;
+        if (freefunc && user_data != (void*)23)
+          freefunc (pixels, user_data);
+        return ctx->texture[i].eid;
+      }
+      if (ctx->texture[i].data == NULL 
+          ||   (ctx->texture_cache->frame - ctx->texture[i].frame >= 2))
+        id = i;
+    }
+  } else
+  {
+    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
+    {
+      if (ctx->texture[i].data == NULL 
+          || (ctx->texture_cache->frame - ctx->texture[i].frame > 2))
+        id = i;
+    }
+  }
+  //int bpp = ctx_pixel_format_bits_per_pixel (format);
+  ctx_buffer_deinit (&ctx->texture[id]);
 
-          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
-          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
-        }
-        if (!rasterizer->preserve)
-          ctx_rasterizer_reset (rasterizer);
-        rasterizer->preserve = 0;
-        break;
-        /* the above cases are the painting cases and 
-         * the only ones differing from the rasterizer's process switch
-         */
+  if (stride<=0)
+  {
+    stride = ctx_pixel_format_get_stride ((CtxPixelFormat)format, width);
+  }
 
-      case CTX_LINE_TO:
-        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_REL_LINE_TO:
-        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_MOVE_TO:
-        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_REL_MOVE_TO:
-        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
-        break;
-      case CTX_CURVE_TO:
-        ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
-                                 c->c.x1, c->c.y1,
-                                 c->c.x2, c->c.y2);
-        break;
-      case CTX_REL_CURVE_TO:
-        ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
-                                     c->c.x1, c->c.y1,
-                                     c->c.x2, c->c.y2);
-        break;
-      case CTX_QUAD_TO:
-        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
-        break;
-      case CTX_REL_QUAD_TO:
-        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
-        break;
-      case CTX_ARC:
-        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, 
c->arc.direction);
-        break;
-      case CTX_RECTANGLE:
-        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
-                                  c->rectangle.width, c->rectangle.height);
-        break;
-      case CTX_ROUND_RECTANGLE:
-        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
-                                        c->rectangle.width, c->rectangle.height,
-                                        c->rectangle.radius);
-        break;
-      case CTX_SET_PIXEL:
-        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
-                                  c->set_pixel.rgba[0],
-                                  c->set_pixel.rgba[1],
-                                  c->set_pixel.rgba[2],
-                                  c->set_pixel.rgba[3]);
-        break;
-      case CTX_PRESERVE:
-        rasterizer->preserve = 1;
-        break;
-      case CTX_ROTATE:
-      case CTX_SCALE:
-      case CTX_TRANSLATE:
-      case CTX_SAVE:
-      case CTX_RESTORE:
-        rasterizer->uses_transforms = 1;
-        ctx_interpret_transforms (rasterizer->state, entry, NULL);
+  int data_len = stride * height;
+  if (format == CTX_FORMAT_YUV420)
+          data_len = width * height +
+                  2 * ((width/2)*(height/2));
 
-        
-        break;
-      case CTX_FONT:
-        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
-        break;
-      case CTX_BEGIN_PATH:
-        ctx_rasterizer_reset (rasterizer);
-        break;
-      case CTX_CLIP:
-        // should perhaps modify a global state to include
-        // in hash?
-        ctx_rasterizer_clip (rasterizer);
-        break;
-      case CTX_CLOSE_PATH:
-        ctx_rasterizer_finish_shape (rasterizer);
-        break;
-      case CTX_DEFINE_TEXTURE:
-        {
-        ctx_sha1_init (&hasher->sha1_fill);
-        ctx_sha1_process (&hasher->sha1_fill, (uint8_t*)c->define_texture.eid, strlen 
(c->define_texture.eid));
-        ctx_sha1_process(&hasher->sha1_fill, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
+  if (freefunc == ctx_buffer_pixels_free && user_data == (void*)23)
+  {
+     uint8_t *tmp = (uint8_t*)malloc (data_len);
+     memcpy (tmp, pixels, data_len);
+     pixels = tmp;
+  }
 
-        rasterizer->comp_op = NULL; // why?
-        }
-        break;
-      case CTX_TEXTURE:
-        ctx_sha1_init (&hasher->sha1_fill);
-        ctx_sha1_process (&hasher->sha1_fill, (uint8_t*)c->texture.eid, strlen (c->texture.eid));
-        ctx_sha1_process (&hasher->sha1_fill, (uint8_t*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
-        rasterizer->comp_op = NULL; // why?
-        break;
-      case CTX_COLOR:
-        {
-          uint32_t color;
-          if (((int)(ctx_arg_float(0))&512))
-          {
-            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
-            ctx_sha1_init (&hasher->sha1_stroke);
-            ctx_sha1_process(&hasher->sha1_stroke, (unsigned char*)&color, 4);
-          }
-          else
-          {
-            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
-            ctx_sha1_init (&hasher->sha1_fill);
-            ctx_sha1_process(&hasher->sha1_fill, (unsigned char*)&color, 4);
-          }
-        }
-        break;
-      case CTX_LINEAR_GRADIENT:
-          ctx_sha1_init (&hasher->sha1_fill);
-          ctx_sha1_process(&hasher->sha1_fill, 
-                           (uint8_t*)c, sizeof (c->linear_gradient));
-          ctx_sha1_process (&hasher->sha1_fill, (unsigned char*)(&rasterizer->state->gstate.transform), 
sizeof (rasterizer->state->gstate.transform));
-        break;
-      case CTX_RADIAL_GRADIENT:
-          ctx_sha1_init (&hasher->sha1_fill);
-          ctx_sha1_process(&hasher->sha1_fill, 
-                           (uint8_t*)c, sizeof (c->radial_gradient));
-          ctx_sha1_process (&hasher->sha1_fill, (unsigned char*)(&rasterizer->state->gstate.transform), 
sizeof (rasterizer->state->gstate.transform));
-        //ctx_state_gradient_clear_stops (rasterizer->state);
-        break;
-#if CTX_GRADIENTS
-      case CTX_GRADIENT_STOP:
-        {
-          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
-                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
-                         };
-          ctx_sha1_process(&hasher->sha1_fill, 
-                           (uint8_t*) &rgba[0], sizeof(rgba));
-        }
-        break;
+  ctx_buffer_set_data (&ctx->texture[id],
+                       pixels, width, height,
+                       stride, format,
+                       freefunc, user_data);
+#if CTX_ENABLE_CM
+  ctx->texture[id].space = space;
 #endif
-    }
-  if (command->code == CTX_LINE_WIDTH)
+  ctx->texture[id].frame = ctx->texture_cache->frame;
+  if (eid)
+  {
+    /* we got an eid, this is the fast path */
+    ctx->texture[id].eid = strdup (eid);
+  }
+  else
+  {
+    uint8_t hash[20];
+    char ascii[41];
+
+    CtxSHA1 *sha1 = ctx_sha1_new ();
+    ctx_sha1_process (sha1, pixels, stride * height);
+    ctx_sha1_done (sha1, hash);
+    ctx_sha1_free (sha1);
+    const char *hex="0123456789abcdef";
+    for (int i = 0; i < 20; i ++)
     {
-      float x = state->gstate.line_width;
-      /* normalize line width according to scaling factor
-       */
-      x = x * ctx_maxf (ctx_maxf (ctx_fabsf (state->gstate.transform.m[0][0]),
-                                  ctx_fabsf (state->gstate.transform.m[0][1]) ),
-                        ctx_maxf (ctx_fabsf (state->gstate.transform.m[1][0]),
-                                  ctx_fabsf (state->gstate.transform.m[1][1]) ) );
-      state->gstate.line_width = x;
+       ascii[i*2]=hex[hash[i]/16];
+       ascii[i*2+1]=hex[hash[i]%16];
     }
+    ascii[40]=0;
+    ctx->texture[id].eid = strdup (ascii);
+  }
+  return ctx->texture[id].eid;
 }
 
-static CtxRasterizer *
-ctx_hasher_init (CtxRasterizer *rasterizer, Ctx *ctx, CtxState *state, int width, int height, int cols, int 
rows)
+void
+_ctx_texture_prepare_color_management (CtxRasterizer *rasterizer,
+                                      CtxBuffer     *buffer)
 {
-  CtxHasher *hasher = (CtxHasher*)rasterizer;
-  ctx_memset (rasterizer, 0, sizeof (CtxHasher) );
-  rasterizer->vfuncs.process = ctx_hasher_process;
-  rasterizer->vfuncs.free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
-  // XXX need own destructor to not leak ->hashes
-  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
-  rasterizer->state       = state;
-  rasterizer->ctx         = ctx;
-  ctx_state_init (rasterizer->state);
-  rasterizer->blit_x      = 0;
-  rasterizer->blit_y      = 0;
-  rasterizer->blit_width  = width;
-  rasterizer->blit_height = height;
-  rasterizer->state->gstate.clip_min_x  = 0;
-  rasterizer->state->gstate.clip_min_y  = 0;
-  rasterizer->state->gstate.clip_max_x  = width - 1;
-  rasterizer->state->gstate.clip_max_y  = height - 1;
-  rasterizer->scan_min    = 5000;
-  rasterizer->scan_max    = -5000;
-  //rasterizer->aa          = 15;
+   switch (buffer->format->pixel_format)
+   {
+#ifndef NO_BABL
+#if CTX_BABL
+     case CTX_FORMAT_RGBA8:
+       if (buffer->space == rasterizer->state->gstate.device_space)
+       {
+         buffer->color_managed = buffer;
+       }
+       else
+       {
+          buffer->color_managed = ctx_buffer_new (buffer->width, buffer->height,
+                                                  CTX_FORMAT_RGBA8);
+          babl_process (
+             babl_fish (babl_format_with_space ("R'G'B'A u8", buffer->space),
+                        babl_format_with_space ("R'G'B'A u8", rasterizer->state->gstate.device_space)),
+             buffer->data, buffer->color_managed->data,
+             buffer->width * buffer->height
+             );
+       }
+       break;
+     case CTX_FORMAT_RGB8:
+       if (buffer->space == rasterizer->state->gstate.device_space)
+       {
+         buffer->color_managed = buffer;
+       }
+       else
+       {
+         buffer->color_managed = ctx_buffer_new (buffer->width, buffer->height,
+                                               CTX_FORMAT_RGB8);
+         babl_process (
+            babl_fish (babl_format_with_space ("R'G'B' u8", buffer->space),
+                       babl_format_with_space ("R'G'B' u8", rasterizer->state->gstate.device_space)),
+            buffer->data, buffer->color_managed->data,
+            buffer->width * buffer->height
+          );
+       }
+       break;
+#endif
+#endif
+     default:
+       buffer->color_managed = buffer;
+   }
+}
 
-  hasher->rows = rows;
-  hasher->cols = cols;
 
-  hasher->hashes = (uint8_t*)ctx_calloc (20, rows * cols);
-  ctx_sha1_init (&hasher->sha1_fill);
-  ctx_sha1_init (&hasher->sha1_stroke);
 
-  return rasterizer;
+int ctx_utf8_len (const unsigned char first_byte)
+{
+  if      ( (first_byte & 0x80) == 0)
+    { return 1; } /* ASCII */
+  else if ( (first_byte & 0xE0) == 0xC0)
+    { return 2; }
+  else if ( (first_byte & 0xF0) == 0xE0)
+    { return 3; }
+  else if ( (first_byte & 0xF8) == 0xF0)
+    { return 4; }
+  return 1;
 }
 
-Ctx *ctx_hasher_new (int width, int height, int cols, int rows)
+
+const char *ctx_utf8_skip (const char *s, int utf8_length)
 {
-  Ctx *ctx           = ctx_new ();
-  CtxState    *state = &ctx->state;
-  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx_calloc (sizeof (CtxHasher), 1);
-  ctx_hasher_init (rasterizer, ctx, state, width, height, cols, rows);
-  ctx_set_renderer (ctx, (void*)rasterizer);
-  return ctx;
+  int count;
+  if (!s)
+    { return NULL; }
+  for (count = 0; *s; s++)
+    {
+      if ( (*s & 0xC0) != 0x80)
+        { count++; }
+      if (count == utf8_length + 1)
+        { return s; }
+    }
+  return s;
 }
 
-uint8_t *ctx_hasher_get_hash (Ctx *ctx, int col, int row)
+//  XXX  :  unused
+int ctx_utf8_strlen (const char *s)
 {
-  CtxHasher *hasher = (CtxHasher*)ctx->renderer;
-  if (row < 0) row =0;
-  if (col < 0) col =0;
-  if (row >= hasher->rows) row = hasher->rows-1;
-  if (col >= hasher->cols) col = hasher->cols-1;
+  int count;
+  if (!s)
+    { return 0; }
+  for (count = 0; *s; s++)
+    if ( (*s & 0xC0) != 0x80)
+      { count++; }
+  return count;
+}
 
-  return &hasher->hashes[(row*hasher->cols+col)*20];
+int
+ctx_unichar_to_utf8 (uint32_t  ch,
+                     uint8_t  *dest)
+{
+  /* http://www.cprogramming.com/tutorial/utf8.c  */
+  /*  Basic UTF-8 manipulation routines
+    by Jeff Bezanson
+    placed in the public domain Fall 2005 ... */
+  if (ch < 0x80)
+    {
+      dest[0] = (char) ch;
+      return 1;
+    }
+  if (ch < 0x800)
+    {
+      dest[0] = (ch>>6) | 0xC0;
+      dest[1] = (ch & 0x3F) | 0x80;
+      return 2;
+    }
+  if (ch < 0x10000)
+    {
+      dest[0] = (ch>>12) | 0xE0;
+      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
+      dest[2] = (ch & 0x3F) | 0x80;
+      return 3;
+    }
+  if (ch < 0x110000)
+    {
+      dest[0] = (ch>>18) | 0xF0;
+      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
+      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
+      dest[3] = (ch & 0x3F) | 0x80;
+      return 4;
+    }
+  return 0;
 }
 
-#endif
+uint32_t
+ctx_utf8_to_unichar (const char *input)
+{
+  const uint8_t *utf8 = (const uint8_t *) input;
+  uint8_t c = utf8[0];
+  if ( (c & 0x80) == 0)
+    { return c; }
+  else if ( (c & 0xE0) == 0xC0)
+    return ( (utf8[0] & 0x1F) << 6) |
+           (utf8[1] & 0x3F);
+  else if ( (c & 0xF0) == 0xE0)
+    return ( (utf8[0] & 0xF)  << 12) |
+           ( (utf8[1] & 0x3F) << 6) |
+           (utf8[2] & 0x3F);
+  else if ( (c & 0xF8) == 0xF0)
+    return ( (utf8[0] & 0x7)  << 18) |
+           ( (utf8[1] & 0x3F) << 12) |
+           ( (utf8[2] & 0x3F) << 6) |
+           (utf8[3] & 0x3F);
+  else if ( (c & 0xFC) == 0xF8)
+    return ( (utf8[0] & 0x3)  << 24) |
+           ( (utf8[1] & 0x3F) << 18) |
+           ( (utf8[2] & 0x3F) << 12) |
+           ( (utf8[3] & 0x3F) << 6) |
+           (utf8[4] & 0x3F);
+  else if ( (c & 0xFE) == 0xFC)
+    return ( (utf8[0] & 0x1)  << 30) |
+           ( (utf8[1] & 0x3F) << 24) |
+           ( (utf8[2] & 0x3F) << 18) |
+           ( (utf8[3] & 0x3F) << 12) |
+           ( (utf8[4] & 0x3F) << 6) |
+           (utf8[5] & 0x3F);
+  return 0;
+}
 #if CTX_EVENTS
 
 #if !__COSMOPOLITAN__
@@ -22399,7 +23302,7 @@ nc_at_exit (void)
 static const char *mouse_get_event_int (Ctx *n, int *x, int *y)
 {
   static int prev_state = 0;
-  const char *ret = "mouse-motion";
+  const char *ret = "pm";
   float relx, rely;
   signed char buf[3];
   read (n->mouse_fd, buf, 3);
@@ -22411,18 +23314,18 @@ static const char *mouse_get_event_int (Ctx *n, int *x, int *y)
 
   if (n->mouse_x < 1) n->mouse_x = 1;
   if (n->mouse_y < 1) n->mouse_y = 1;
-  if (n->mouse_x >= n->events.width)  n->mouse_x = n->events.width;
-  if (n->mouse_y >= n->events.height) n->mouse_y = n->events.height;
+  if (n->mouse_x >= n->width)  n->mouse_x = n->width;
+  if (n->mouse_y >= n->height) n->mouse_y = n->height;
 
   if (x) *x = n->mouse_x;
   if (y) *y = n->mouse_y;
 
   if ((prev_state & 1) != (buf[0] & 1))
     {
-      if (buf[0] & 1) ret = "mouse-press";
+      if (buf[0] & 1) ret = "pp";
     }
   else if (buf[0] & 1)
-    ret = "mouse-drag";
+    ret = "pd";
 
   if ((prev_state & 2) != (buf[0] & 2))
     {
@@ -22492,7 +23395,7 @@ static int mouse_has_event (Ctx *n)
           return mouse_has_event (n);
         }
 
-      if ((mev_type && !strcmp (type, mev_type) && !strcmp (type, "mouse-motion")) ||
+      if ((mev_type && !strcmp (type, mev_type) && !strcmp (type, "pm")) ||
          (mev_type && !strcmp (type, mev_type) && !strcmp (type, "mouse1-drag")) ||
          (mev_type && !strcmp (type, mev_type) && !strcmp (type, "mouse2-drag")))
         {
@@ -22518,7 +23421,7 @@ static int _nc_raw (void)
     return -1;
   if (!atexit_registered)
     {
-      atexit (nc_at_exit);
+      //atexit (nc_at_exit);
       atexit_registered = 1;
     }
   if (tcgetattr (STDIN_FILENO, &orig_attr) == -1)
@@ -22670,32 +23573,32 @@ const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y)
               switch (buf[3])
                 {
                         /* XXX : todo reduce this to less string constants */
-                  case 32:  return "mouse-press";
+                  case 32:  return "pp";
                   case 33:  return "mouse1-press";
                   case 34:  return "mouse2-press";
-                  case 40:  return "alt-mouse-press";
+                  case 40:  return "alt-pp";
                   case 41:  return "alt-mouse1-press";
                   case 42:  return "alt-mouse2-press";
-                  case 48:  return "control-mouse-press";
+                  case 48:  return "control-pp";
                   case 49:  return "control-mouse1-press";
                   case 50:  return "control-mouse2-press";
-                  case 56:  return "alt-control-mouse-press";
+                  case 56:  return "alt-control-pp";
                   case 57:  return "alt-control-mouse1-press";
                   case 58:  return "alt-control-mouse2-press";
-                  case 64:  return "mouse-drag";
+                  case 64:  return "pd";
                   case 65:  return "mouse1-drag";
                   case 66:  return "mouse2-drag";
-                  case 71:  return "mouse-motion"; /* shift+motion */
-                  case 72:  return "alt-mouse-drag";
+                  case 71:  return "pm"; /* shift+motion */
+                  case 72:  return "alt-pd";
                   case 73:  return "alt-mouse1-drag";
                   case 74:  return "alt-mouse2-drag";
-                  case 75:  return "mouse-motion"; /* alt+motion */
-                  case 80:  return "control-mouse-drag";
+                  case 75:  return "pm"; /* alt+motion */
+                  case 80:  return "control-pd";
                   case 81:  return "control-mouse1-drag";
                   case 82:  return "control-mouse2-drag";
-                  case 83:  return "mouse-motion"; /* ctrl+motion */
-                  case 91:  return "mouse-motion"; /* ctrl+alt+motion */
-                  case 95:  return "mouse-motion"; /* ctrl+alt+shift+motion */
+                  case 83:  return "pm"; /* ctrl+motion */
+                  case 91:  return "pm"; /* ctrl+alt+motion */
+                  case 95:  return "pm"; /* ctrl+alt+shift+motion */
                   case 96:  return "scroll-up";
                   case 97:  return "scroll-down";
                   case 100: return "shift-scroll-up";
@@ -22709,8 +23612,8 @@ const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y)
                   case 35: /* (or release) */
                   case 51: /* (or ctrl-release) */
                   case 43: /* (or alt-release) */
-                  case 67: return "mouse-motion";
-                           /* have a separate mouse-drag ? */
+                  case 67: return "pm";
+                           /* have a separate pd ? */
                   default: {
                              static char rbuf[100];
                              sprintf (rbuf, "mouse (unhandled state: %i)", buf[3]);
@@ -22758,28 +23661,28 @@ const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y)
   return "fail";
 }
 
-int ctx_nct_consume_events (Ctx *ctx)
+void ctx_nct_consume_events (Ctx *ctx)
 {
   int ix, iy;
-  CtxCtx *ctxctx = (CtxCtx*)ctx->renderer;
+  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
   const char *event = NULL;
 
-  {
+  do {
     float x, y;
     event = ctx_nct_get_event (ctx, 50, &ix, &iy);
 
-    x = (ix - 1.0 + 0.5) / ctxctx->cols * ctx->events.width;
-    y = (iy - 1.0)       / ctxctx->rows * ctx->events.height;
+    x = (ix - 1.0 + 0.5) / ctxctx->cols * ctx->width;
+    y = (iy - 1.0)       / ctxctx->rows * ctx->height;
 
-    if (!strcmp (event, "mouse-press"))
+    if (!strcmp (event, "pp"))
     {
       ctx_pointer_press (ctx, x, y, 0, 0);
       ctxctx->was_down = 1;
-    } else if (!strcmp (event, "mouse-release"))
+    } else if (!strcmp (event, "pr"))
     {
       ctx_pointer_release (ctx, x, y, 0, 0);
       ctxctx->was_down = 0;
-    } else if (!strcmp (event, "mouse-motion"))
+    } else if (!strcmp (event, "pm"))
     {
       //nct_set_cursor_pos (backend->term, ix, iy);
       //nct_flush (backend->term);
@@ -22789,7 +23692,7 @@ int ctx_nct_consume_events (Ctx *ctx)
         ctxctx->was_down = 0;
       }
       ctx_pointer_motion (ctx, x, y, 0, 0);
-    } else if (!strcmp (event, "mouse-drag"))
+    } else if (!strcmp (event, "pd"))
     {
       ctx_pointer_motion (ctx, x, y, 0, 0);
     } else if (!strcmp (event, "size-changed"))
@@ -22809,7 +23712,7 @@ int ctx_nct_consume_events (Ctx *ctx)
       mrg_set_size (mrg, width, height);
       mrg_queue_draw (mrg, NULL);
 #endif
-      //if (ctx_renderer_is_ctx (ctx))
+      //if (ctx_backend_is_ctx (ctx))
 #if 0
       {
         int width = ctx_terminal_width ();
@@ -22831,13 +23734,12 @@ int ctx_nct_consume_events (Ctx *ctx)
         ctx_key_press (ctx, 0, "return", 0);
       else if (!strcmp (event, "idle"))
       {
+        event = NULL;
       }
       else
       ctx_key_press (ctx, 0, event, 0);
     }
-  }
-
-  return 1;
+  }  while (event);
 }
 
 const char *ctx_native_get_event (Ctx *n, int timeoutms)
@@ -22994,29 +23896,28 @@ void _ctx_texture_unlock (void)
 #endif
 }
 
-
 void
 ctx_init (int *argc, char ***argv)
 {
 #if 0
-  if (!getenv ("CTX_VERSION"))
+  const char *backend = getenv ("CTX_BACKEND");
+  if (!backend || strcmp (backend, "ctx"))
   {
     int i;
-    char *new_argv[*argc+3];
+    char *new_argv[*argc+5];
     new_argv[0] = "ctx";
+    new_argv[1] = "-e";
+    new_argv[2] = "--";
     for (i = 0; i < *argc; i++)
     {
-      new_argv[i+1] = *argv[i];
+      new_argv[i+3] = *argv[i];
     }
-    new_argv[i+1] = NULL;
+    new_argv[i+3] = NULL;
     execvp (new_argv[0], new_argv);
-    // if this fails .. we continue normal startup
-    // and end up in self-hosted braille
   }
 #endif
 }
 
-
 #if 0
 int ctx_count (Ctx *ctx)
 {
@@ -23052,9 +23953,8 @@ static uint32_t ctx_ms (Ctx *ctx)
   return _ctx_ticks () / 1000;
 }
 
-
 static int is_in_ctx (void);
-Ctx *ctx_new_ui (int width, int height)
+static Ctx *ctx_new_ui (int width, int height, const char *backend)
 {
 #if CTX_TILED
   if (getenv ("CTX_DAMAGE_CONTROL"))
@@ -23108,7 +24008,8 @@ Ctx *ctx_new_ui (int width, int height)
   if (_ctx_max_threads > CTX_MAX_THREADS) _ctx_max_threads = CTX_MAX_THREADS;
 
   //fprintf (stderr, "ctx using %i threads\n", _ctx_max_threads);
-  const char *backend = getenv ("CTX_BACKEND");
+  if (!backend)
+    backend = getenv ("CTX_BACKEND");
 
   if (backend && !strcmp (backend, ""))
     backend = NULL;
@@ -23135,6 +24036,14 @@ Ctx *ctx_new_ui (int width, int height)
     }
   }
 
+#if CTX_HEADLESS
+  if (!ret)
+    {
+      if (backend && !strcmp (backend, "headless"))
+        ret = ctx_new_headless (width, height);
+    }
+#endif
+
 #if CTX_SDL
   if (!ret && getenv ("DISPLAY"))
   {
@@ -23151,6 +24060,7 @@ Ctx *ctx_new_ui (int width, int height)
   }
 #endif
 
+
 #if CTX_FB
   if (!ret && !getenv ("DISPLAY"))
     {
@@ -23195,22 +24105,14 @@ void _ctx_resized (Ctx *ctx, int width, int height, long time);
 
 void ctx_set_size (Ctx *ctx, int width, int height)
 {
-#if CTX_EVENTS
-  if (ctx->events.width != width || ctx->events.height != height)
+  if (ctx->width != width || ctx->height != height)
   {
-    ctx->events.width = width;
-    ctx->events.height = height;
+    ctx->width = width;
+    ctx->height = height;
+#if CTX_EVENTS
     _ctx_resized (ctx, width, height, 0);
-#if 1
-    if (ctx_renderer_is_ctx (ctx))
-    {
-      CtxCtx *ctxctx = (CtxCtx*)ctx->renderer;
-      ctxctx->width = width;
-      ctxctx->height= height;
-    }
 #endif
   }
-#endif
 }
 
 #if CTX_EVENTS
@@ -23369,10 +24271,10 @@ void _ctx_idle_iteration (Ctx *ctx)
   while (ctx->events.idles_to_remove)
   {
     CtxIdleCb *item = ctx->events.idles_to_remove->data;
-    if (item->destroy_notify)
-      item->destroy_notify (item->destroy_data);
     ctx_list_remove (&ctx->events.idles, item);
     ctx_list_remove (&ctx->events.idles_to_remove, item);
+    if (item->destroy_notify)
+      item->destroy_notify (item->destroy_data);
   }
   ctx->events.in_idle_dispatch=0;
 }
@@ -23499,10 +24401,10 @@ void ctx_remove_idle (Ctx *ctx, int handle)
   while (ctx->events.idles_to_remove)
   {
     CtxIdleCb *item = ctx->events.idles_to_remove->data;
-    if (item->destroy_notify)
-      item->destroy_notify (item->destroy_data);
     ctx_list_remove (&ctx->events.idles, item);
     ctx_list_remove (&ctx->events.idles_to_remove, item);
+    if (item->destroy_notify)
+      item->destroy_notify (item->destroy_data);
   }
 }
 
@@ -23684,8 +24586,8 @@ void ctx_listen_full (Ctx     *ctx,
       float th = height;
       _ctx_user_to_device (&ctx->state, &tx, &ty);
       _ctx_user_to_device_distance (&ctx->state, &tw, &th);
-      if (ty > ctx->events.height * 2 ||
-          tx > ctx->events.width * 2 ||
+      if (ty > ctx->height * 2 ||
+          tx > ctx->width * 2 ||
           tx + tw < 0 ||
           ty + th < 0)
       {
@@ -24048,122 +24950,25 @@ _ctx_emit_cb_item (Ctx *ctx, CtxItem *item, CtxEvent *event, CtxEventType type,
 #include <stdatomic.h>
 #endif
 
-int ctx_native_events = 0;
-#if CTX_SDL
-int ctx_sdl_events = 0;
-int ctx_sdl_consume_events (Ctx *ctx);
-#endif
-
-#if CTX_FB
-int ctx_fb_events = 0;
-int ctx_fb_consume_events (Ctx *ctx);
-#endif
-
-#if CTX_KMS
-int ctx_kms_events = 0;
-int ctx_kms_consume_events (Ctx *ctx);
-#endif
-
-int ctx_nct_consume_events (Ctx *ctx);
-int ctx_nct_has_event (Ctx  *n, int delay_ms);
-int ctx_ctx_consume_events (Ctx *ctx);
-
-
-
 void ctx_consume_events (Ctx *ctx)
 {
-#if CTX_SDL
-  if (ctx_sdl_events)
-    ctx_sdl_consume_events (ctx);
-  else
-#endif
-#if CTX_FB
-  if (ctx_fb_events)
-    ctx_fb_consume_events (ctx);
-  else
-#endif
-#if CTX_KMS
-  if (ctx_kms_events)
-    ctx_kms_consume_events (ctx);
-  else
-#endif
-  if (ctx_native_events)
-    ctx_ctx_consume_events (ctx);
-  else
-    ctx_nct_consume_events (ctx);
+  CtxBackend *backend = ctx->backend;
+  if (backend && backend->consume_events)
+    backend->consume_events (ctx);
 }
 
-int ctx_has_event (Ctx *ctx, int timeout)
+void ctx_stdin_get_event_fds (Ctx *ctx, int *fd, int *count)
 {
-#if CTX_SDL
-  if (ctx_sdl_events)
-  {
-    return SDL_WaitEventTimeout (NULL, timeout);
-  }
-  else
-#endif
-#if CTX_FB
-  if (ctx_fb_events)
-  {
-    return ctx_nct_has_event (ctx, timeout);
-  }
-  else
-#endif
-  if (ctx_native_events)
-  {
-    return ctx_nct_has_event (ctx, timeout);
-  }
-  else
-  {
-    return ctx_nct_has_event (ctx, timeout);
-  }
-
-  ctx_consume_events (ctx);
-  if (ctx->events.events)
-    return 1;
-  return 0;
+  fd[0] = STDIN_FILENO;
+  *count = 1;
 }
 
-#if CTX_FB
-static int ctx_fb_get_mice_fd (Ctx *ctx);
-#endif
-
 void ctx_get_event_fds (Ctx *ctx, int *fd, int *count)
 {
-#if CTX_SDL
-  if (ctx_sdl_events)
-  {
-    *count = 0;
-  }
-  else
-#endif
-#if CTX_FB
-  if (ctx_fb_events)
-  {
-    int mice_fd = ctx_fb_get_mice_fd (ctx);
-    fd[0] = STDIN_FILENO;
-    if (mice_fd)
-    {
-      fd[1] = mice_fd;
-      *count = 2;
-    }
-    else
-    {
-      *count = 1;
-    }
-  }
-  else
-#endif
-  if (ctx_native_events)
-  {
-    fd[0] = STDIN_FILENO;
-    *count = 1;
-  }
-  else
-  {
-    fd[0] = STDIN_FILENO;
-    *count = 1;
-  }
+  CtxBackend *backend = ctx->backend;
+  if (backend && backend->get_event_fds)
+    backend->get_event_fds (ctx, fd, count);
+  *count = 0;
 }
 
 CtxEvent *ctx_get_event (Ctx *ctx)
@@ -24178,7 +24983,10 @@ CtxEvent *ctx_get_event (Ctx *ctx)
 
   _ctx_idle_iteration (ctx);
   if (!ctx->events.ctx_get_event_enabled)
+  {
     ctx->events.ctx_get_event_enabled = 1;
+    ctx_queue_draw (ctx);
+  }
 
   ctx_consume_events (ctx);
 
@@ -24609,7 +25417,7 @@ int ctx_pointer_motion (Ctx *ctx, float x, float y, int device_no, uint32_t time
     static CtxItem *prev_hovered_item = NULL;
     if (prev_hovered_item != hovered_item)
     {
-      ctx_set_dirty (ctx, 1);
+      ctx_queue_draw (ctx);
     }
     prev_hovered_item = hovered_item;
   }
@@ -24755,12 +25563,12 @@ int ctx_key_press (Ctx *ctx, unsigned int keyval,
   char event_type[128]="";
   float x, y; int b;
   sscanf (string, "%s %f %f %i", event_type, &x, &y, &b);
-  if (!strcmp (event_type, "mouse-motion") ||
-      !strcmp (event_type, "mouse-drag"))
+  if (!strcmp (event_type, "pm") ||
+      !strcmp (event_type, "pd"))
     return ctx_pointer_motion (ctx, x, y, b, 0);
-  else if (!strcmp (event_type, "mouse-press"))
+  else if (!strcmp (event_type, "pp"))
     return ctx_pointer_press (ctx, x, y, b, 0);
-  else if (!strcmp (event_type, "mouse-release"))
+  else if (!strcmp (event_type, "pr"))
     return ctx_pointer_release (ctx, x, y, b, 0);
   //else if (!strcmp (event_type, "keydown"))
   //  return ctx_key_down (ctx, keyval, string + 8, time);
@@ -24893,11 +25701,11 @@ void ctx_events_clear_items (Ctx *ctx)
 }
 int ctx_events_width (Ctx *ctx)
 {
-  return ctx->events.width;
+  return ctx->width;
 }
 int ctx_events_height (Ctx *ctx)
 {
-  return ctx->events.height;
+  return ctx->height;
 }
 
 float ctx_pointer_x (Ctx *ctx)
@@ -24961,13 +25769,18 @@ int ctx_get_hash_cache (Ctx *ctx)
   return _ctx_enable_hash_cache;
 }
 
-int ctx_is_dirty (Ctx *ctx)
+int ctx_need_redraw (Ctx *ctx)
 {
-  return ctx->dirty;
+  return (ctx->dirty != 0)
+#if CTX_CLIENTS
+    || ctx_clients_need_redraw (ctx)
+#endif
+    ;
 }
-void ctx_set_dirty (Ctx *ctx, int dirty)
+
+void ctx_queue_draw (Ctx *ctx)
 {
-  ctx->dirty = dirty;
+  ctx->dirty ++;
 }
 
 /*
@@ -25017,7 +25830,6 @@ int ctx_input_pending (Ctx *ctx, int timeout)
   {
     FD_SET (input_fds[i], &fdset);
   }
-
   tv.tv_sec = 0;
   tv.tv_usec = timeout;
   tv.tv_sec = timeout / 1000000;
@@ -25031,14 +25843,28 @@ int ctx_input_pending (Ctx *ctx, int timeout)
   return retval;
 }
 
-void ctx_sdl_set_title (void *self, const char *new_title);
-void ctx_set_title (Ctx *ctx, const char *title)
+
+void ctx_handle_events (Ctx *ctx)
 {
-#if CTX_SDL
-     // XXX also check we're first/only client?
-   if (ctx_renderer_is_sdl (ctx))
-     ctx_sdl_set_title (ctx_get_renderer (ctx), title);
+#if CTX_CLIENTS
+  ctx_clients_handle_events (ctx);
 #endif
+  while (ctx_get_event (ctx)){}
+}
+
+
+static void ctx_events_deinit (Ctx *ctx)
+{
+  ctx_list_free (&ctx->events.items);
+  ctx->events.last_item = NULL;
+
+  while (ctx->events.idles)
+  {
+    CtxIdleCb *item = ctx->events.idles->data;
+    ctx_list_remove (&ctx->events.idles, item);
+    if (item->destroy_notify)
+      item->destroy_notify (item->destroy_data);
+  }
 }
 
 #endif
@@ -26221,7 +27047,7 @@ static void ctx_parser_dispatch_command (CtxParser *parser)
           }
         break;
       case CTX_FLUSH:
-        //ctx_flush (ctx);
+        //ctx_flush (ctx); // XXX  XXX  flush only does things inside backends
         break;
       case CTX_RESET:
         ctx_reset (ctx);
@@ -26906,3261 +27732,3866 @@ void ctx_parse (Ctx *ctx, const char *string)
 
 #endif
 
-static CtxFont ctx_fonts[CTX_MAX_FONTS];
-static int     ctx_font_count = 0;
+#if !__COSMOPOLITAN__
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
 
-#if CTX_FONT_ENGINE_STB
-static float
-ctx_glyph_width_stb (CtxFont *font, Ctx *ctx, uint32_t unichar);
-static float
-ctx_glyph_kern_stb (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB);
-static int
-ctx_glyph_stb (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke);
+//#include "ctx.h"
+/* instead of including ctx.h we declare the few utf8
+ * functions we use
+ */
+uint32_t ctx_utf8_to_unichar (const char *input);
+int ctx_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
+int ctx_utf8_strlen (const char *s);
 
-CtxFontEngine ctx_font_engine_stb =
+static void ctx_string_init (CtxString *string, int initial_size)
 {
-#if CTX_FONTS_FROM_FILE
-  ctx_load_font_ttf_file,
-#endif
-  ctx_load_font_ttf,
-  ctx_glyph_stb,
-  ctx_glyph_width_stb,
-  ctx_glyph_kern_stb,
-};
+  string->allocated_length = initial_size;
+  string->length = 0;
+  string->utf8_length = 0;
+  string->str = (char*)malloc (string->allocated_length + 1);
+  string->str[0]='\0';
+}
 
-int
-ctx_load_font_ttf (const char *name, const void *ttf_contents, int length)
+static void ctx_string_destroy (CtxString *string)
 {
-  if (ctx_font_count >= CTX_MAX_FONTS)
-    { return -1; }
-  ctx_fonts[ctx_font_count].type = 1;
-  ctx_fonts[ctx_font_count].name = (char *) malloc (strlen (name) + 1);
-  ctx_strcpy ( (char *) ctx_fonts[ctx_font_count].name, name);
-  if (!stbtt_InitFont (&ctx_fonts[ctx_font_count].stb.ttf_info, ttf_contents, 0) )
+  if (string->str)
     {
-      ctx_log ( "Font init failed\n");
-      return -1;
+      free (string->str);
+      string->str = NULL;
     }
-  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_stb;
-  ctx_font_count ++;
-  return ctx_font_count-1;
 }
 
-#if CTX_FONTS_FROM_FILE
-int
-ctx_load_font_ttf_file (const char *name, const char *path)
+void ctx_string_clear (CtxString *string)
 {
-  uint8_t *contents = NULL;
-  long length = 0;
-  ctx_get_contents (path, &contents, &length);
-  if (!contents)
+  string->length = 0;
+  string->utf8_length = 0;
+  string->str[string->length]=0;
+}
+
+
+void ctx_string_pre_alloc (CtxString *string, int size)
+{
+  char *old = string->str;
+  string->allocated_length = CTX_MAX (size + 2, string->length + 2);
+  string->str = (char*)realloc (old, string->allocated_length);
+}
+
+
+static inline void _ctx_string_append_byte (CtxString *string, char  val)
+{
+  if (CTX_LIKELY((val & 0xC0) != 0x80))
+    { string->utf8_length++; }
+  if (CTX_UNLIKELY(string->length + 2 >= string->allocated_length))
     {
-      ctx_log ( "File load failed\n");
-      return -1;
+      char *old = string->str;
+      string->allocated_length = CTX_MAX (string->allocated_length * 2, string->length + 2);
+      string->str = (char*)realloc (old, string->allocated_length);
     }
-  return ctx_load_font_ttf (name, contents, length);
+  string->str[string->length++] = val;
+  string->str[string->length] = '\0';
 }
-#endif
 
-static int
-ctx_glyph_stb_find (CtxFont *font, uint32_t unichar)
+void ctx_string_append_byte (CtxString *string, char  val)
 {
-  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
-  int index = font->stb.cache_index;
-  if (font->stb.cache_unichar == unichar)
+  _ctx_string_append_byte (string, val);
+}
+
+void ctx_string_append_unichar (CtxString *string, unsigned int unichar)
+{
+  char *str;
+  char utf8[5];
+  utf8[ctx_unichar_to_utf8 (unichar, (unsigned char *) utf8)]=0;
+  str = utf8;
+  while (str && *str)
     {
-      return index;
+      _ctx_string_append_byte (string, *str);
+      str++;
     }
-  font->stb.cache_unichar = 0;
-  index = font->stb.cache_index = stbtt_FindGlyphIndex (ttf_info, unichar);
-  font->stb.cache_unichar = unichar;
-  return index;
 }
 
-static float
-ctx_glyph_width_stb (CtxFont *font, Ctx *ctx, uint32_t unichar)
+static inline void _ctx_string_append_str (CtxString *string, const char *str)
 {
-  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
-  float font_size          = ctx->state.gstate.font_size;
-  float scale              = stbtt_ScaleForPixelHeight (ttf_info, font_size);
-  int advance, lsb;
-  int glyph = ctx_glyph_stb_find (font, unichar);
+  if (!str) { return; }
+  while (*str)
+    {
+      _ctx_string_append_byte (string, *str);
+      str++;
+    }
+}
 
-#if CTX_EVENTS
-  if (ctx_renderer_is_term (ctx))
-    return 2;
+void ctx_string_append_utf8char (CtxString *string, const char *str)
+{
+  if (!str) { return; }
+  int len = ctx_utf8_len (*str);
+  for (int i = 0; i < len && *str; i++)
+    {
+      _ctx_string_append_byte (string, *str);
+      str++;
+    }
+}
+
+void ctx_string_append_str (CtxString *string, const char *str)
+{
+  _ctx_string_append_str (string, str);
+}
+
+CtxString *ctx_string_new_with_size (const char *initial, int initial_size)
+{
+  CtxString *string = (CtxString*)ctx_calloc (sizeof (CtxString), 1);
+  ctx_string_init (string, initial_size);
+  if (initial)
+    { _ctx_string_append_str (string, initial); }
+  return string;
+}
+
+CtxString *ctx_string_new (const char *initial)
+{
+  return ctx_string_new_with_size (initial, 8);
+}
+
+void ctx_string_append_data (CtxString *string, const char *str, int len)
+{
+  int i;
+  for (i = 0; i<len; i++)
+    { _ctx_string_append_byte (string, str[i]); }
+}
+
+void ctx_string_append_string (CtxString *string, CtxString *string2)
+{
+  const char *str = ctx_string_get (string2);
+  while (str && *str)
+    {
+      _ctx_string_append_byte (string, *str);
+      str++;
+    }
+}
+
+const char *ctx_string_get (CtxString *string)
+{
+  return string->str;
+}
+
+int ctx_string_get_utf8length (CtxString *string)
+{
+  return string->utf8_length;
+}
+
+int ctx_string_get_length (CtxString *string)
+{
+  return string->length;
+}
+
+void
+ctx_string_free (CtxString *string, int freealloc)
+{
+  if (freealloc)
+    {
+      ctx_string_destroy (string);
+    }
+#if 0
+  if (string->is_line)
+  {
+    VtLine *line = (VtLine*)string;
+    if (line->style)
+      { free (line->style); }
+    if (line->ctx)
+      { ctx_free (line->ctx); }
+    if (line->ctx_copy)
+      { ctx_free (line->ctx_copy); }
+  }
 #endif
+  free (string);
+}
 
-  if (glyph==0)
-    { return 0.0f; }
-  stbtt_GetGlyphHMetrics (ttf_info, glyph, &advance, &lsb);
-  return (advance * scale);
+char       *ctx_string_dissolve       (CtxString *string)
+{
+  char *ret = string->str;
+  ctx_string_free (string, 0);
+  return ret;
 }
 
-static float
-ctx_glyph_kern_stb (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
+void
+ctx_string_set (CtxString *string, const char *new_string)
 {
-  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
-  float font_size = ctx->state.gstate.font_size;
-  float scale = stbtt_ScaleForPixelHeight (ttf_info, font_size);
-  int glyphA = ctx_glyph_stb_find (font, unicharA);
-  int glyphB = ctx_glyph_stb_find (font, unicharB);
-  return stbtt_GetGlyphKernAdvance (ttf_info, glyphA, glyphB) * scale;
+  ctx_string_clear (string);
+  _ctx_string_append_str (string, new_string);
 }
 
-static int
-ctx_glyph_stb (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
+static char *ctx_strdup (const char *str)
 {
-  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
-  int glyph = ctx_glyph_stb_find (font, unichar);
-  if (glyph==0)
-    { return -1; }
-  float font_size = ctx->state.gstate.font_size;
-  int   baseline = ctx->state.y;
-  float origin_x = ctx->state.x;
-  float origin_y = baseline;
-  float scale    = stbtt_ScaleForPixelHeight (ttf_info, font_size);;
-  stbtt_vertex *vertices = NULL;
-  ctx_begin_path (ctx);
-  int num_verts = stbtt_GetGlyphShape (ttf_info, glyph, &vertices);
-  for (int i = 0; i < num_verts; i++)
+  int len = strlen (str);
+  char *ret = (char*)malloc (len + 1);
+  memcpy (ret, str, len);
+  ret[len]=0;
+  return ret;
+}
+
+void ctx_string_replace_utf8 (CtxString *string, int pos, const char *new_glyph)
+{
+#if 1
+  int old_len = string->utf8_length;
+#else
+  int old_len = ctx_utf8_strlen (string->str);// string->utf8_length;
+#endif
+  if (CTX_LIKELY(pos == old_len))
     {
-      stbtt_vertex *vertex = &vertices[i];
-      switch (vertex->type)
-        {
-          case STBTT_vmove:
-            ctx_move_to (ctx,
-                         origin_x + vertex->x * scale, origin_y - vertex->y * scale);
-            break;
-          case STBTT_vline:
-            ctx_line_to (ctx,
-                         origin_x + vertex->x * scale, origin_y - vertex->y * scale);
-            break;
-          case STBTT_vcubic:
-            ctx_curve_to (ctx,
-                          origin_x + vertex->cx  * scale, origin_y - vertex->cy  * scale,
-                          origin_x + vertex->cx1 * scale, origin_y - vertex->cy1 * scale,
-                          origin_x + vertex->x   * scale, origin_y - vertex->y   * scale);
-            break;
-          case STBTT_vcurve:
-            ctx_quad_to (ctx,
-                         origin_x + vertex->cx  * scale, origin_y - vertex->cy  * scale,
-                         origin_x + vertex->x   * scale, origin_y - vertex->y   * scale);
-            break;
-        }
+      _ctx_string_append_str (string, new_glyph);
+      return;
     }
-  stbtt_FreeShape (ttf_info, vertices);
-  if (stroke)
+
+  char tmpg[3]=" ";
+  int new_len = ctx_utf8_len (*new_glyph);
+  if (new_len <= 1 && new_glyph[0] < 32)
     {
-      ctx_stroke (ctx);
+      new_len = 1;
+      tmpg[0]=new_glyph[0]+64;
+      new_glyph = tmpg;
+    }
+  {
+    for (int i = old_len; i <= pos + 2; i++)
+      {
+        _ctx_string_append_byte (string, ' ');
+        old_len++;
+      }
+  }
+  if (string->length + new_len  >= string->allocated_length - 2)
+    {
+      char *tmp;
+      char *defer;
+      string->allocated_length = string->length + new_len + 2;
+      tmp = (char*) ctx_calloc (string->allocated_length + 1 + 8, 1);
+      strcpy (tmp, string->str);
+      defer = string->str;
+      string->str = tmp;
+      free (defer);
+    }
+  char *p = (char *) ctx_utf8_skip (string->str, pos);
+  int prev_len = ctx_utf8_len (*p);
+  char *rest;
+  if (*p == 0 || * (p+prev_len) == 0)
+    {
+      rest = ctx_strdup ("");
     }
   else
-    { ctx_fill (ctx); }
-  return 0;
+    {
+      if (p + prev_len >= string->length  + string->str)
+        { rest = ctx_strdup (""); }
+      else
+        { rest = ctx_strdup (p + prev_len); }
+    }
+  memcpy (p, new_glyph, new_len);
+  memcpy (p + new_len, rest, strlen (rest) + 1);
+  string->length += new_len;
+  string->length -= prev_len;
+  free (rest);
+  //string->length = strlen (string->str);
+  //string->utf8_length = ctx_utf8_strlen (string->str);
+}
+
+void ctx_string_replace_unichar (CtxString *string, int pos, uint32_t unichar)
+{
+  uint8_t utf8[8];
+  ctx_unichar_to_utf8 (unichar, utf8);
+  ctx_string_replace_utf8 (string, pos, (char *) utf8);
+}
+
+uint32_t ctx_string_get_unichar (CtxString *string, int pos)
+{
+  char *p = (char *) ctx_utf8_skip (string->str, pos);
+  if (!p)
+    { return 0; }
+  return ctx_utf8_to_unichar (p);
+}
+
+void ctx_string_insert_utf8 (CtxString *string, int pos, const char *new_glyph)
+{
+  int new_len = ctx_utf8_len (*new_glyph);
+  int old_len = string->utf8_length;
+  char tmpg[3]=" ";
+  if (old_len == pos && 0)
+    {
+      ctx_string_append_str (string, new_glyph);
+      return;
+    }
+  if (new_len <= 1 && new_glyph[0] < 32)
+    {
+      tmpg[0]=new_glyph[0]+64;
+      new_glyph = tmpg;
+    }
+  {
+    for (int i = old_len; i <= pos; i++)
+      {
+        _ctx_string_append_byte (string, ' ');
+        old_len++;
+      }
+  }
+  if (string->length + new_len + 1  > string->allocated_length)
+    {
+      char *tmp;
+      char *defer;
+      string->allocated_length = string->length + new_len + 1;
+      tmp = (char*) ctx_calloc (string->allocated_length + 1, 1);
+      strcpy (tmp, string->str);
+      defer = string->str;
+      string->str = tmp;
+      free (defer);
+    }
+  char *p = (char *) ctx_utf8_skip (string->str, pos);
+  int prev_len = ctx_utf8_len (*p);
+  char *rest;
+  if ( (*p == 0 || * (p+prev_len) == 0) && pos != 0)
+    {
+      rest = ctx_strdup ("");
+    }
+  else
+    {
+      rest = ctx_strdup (p);
+    }
+  memcpy (p, new_glyph, new_len);
+  memcpy (p + new_len, rest, strlen (rest) + 1);
+  free (rest);
+  string->length = strlen (string->str);
+  string->utf8_length = ctx_utf8_strlen (string->str);
+}
+
+void ctx_string_insert_unichar (CtxString *string, int pos, uint32_t unichar)
+{
+  uint8_t utf8[5]="";
+  utf8[ctx_unichar_to_utf8(unichar, utf8)]=0;
+  ctx_string_insert_utf8 (string, pos, (char*)utf8);
+}
+
+void ctx_string_remove (CtxString *string, int pos)
+{
+  int old_len = string->utf8_length;
+  {
+    for (int i = old_len; i <= pos; i++)
+      {
+        _ctx_string_append_byte (string, ' ');
+        old_len++;
+      }
+  }
+  char *p = (char *) ctx_utf8_skip (string->str, pos);
+  int prev_len = ctx_utf8_len (*p);
+  char *rest;
+  if (!p || *p == 0)
+    {
+      return;
+      rest = ctx_strdup ("");
+      prev_len = 0;
+    }
+  else if (* (p+prev_len) == 0)
+  {
+      rest = ctx_strdup ("");
+  }
+  else
+    {
+      rest = ctx_strdup (p + prev_len);
+    }
+  strcpy (p, rest);
+  string->str[string->length - prev_len] = 0;
+  free (rest);
+  string->length = strlen (string->str);
+  string->utf8_length = ctx_utf8_strlen (string->str);
+}
+
+char *ctx_strdup_printf (const char *format, ...)
+{
+  va_list ap;
+  size_t needed;
+  char *buffer;
+  va_start (ap, format);
+  needed = vsnprintf (NULL, 0, format, ap) + 1;
+  buffer = (char*)malloc (needed);
+  va_end (ap);
+  va_start (ap, format);
+  vsnprintf (buffer, needed, format, ap);
+  va_end (ap);
+  return buffer;
+}
+
+void ctx_string_append_printf (CtxString *string, const char *format, ...)
+{
+  va_list ap;
+  size_t needed;
+  char *buffer;
+  va_start (ap, format);
+  needed = vsnprintf (NULL, 0, format, ap) + 1;
+  buffer = (char*)malloc (needed);
+  va_end (ap);
+  va_start (ap, format);
+  vsnprintf (buffer, needed, format, ap);
+  va_end (ap);
+  ctx_string_append_str (string, buffer);
+  free (buffer);
+}
+
+CtxString *ctx_string_new_printf (const char *format, ...)
+{
+  CtxString *string = ctx_string_new ("");
+  va_list ap;
+  size_t needed;
+  char *buffer;
+  va_start (ap, format);
+  needed = vsnprintf (NULL, 0, format, ap) + 1;
+  buffer = (char*)malloc (needed);
+  va_end (ap);
+  va_start (ap, format);
+  vsnprintf (buffer, needed, format, ap);
+  va_end (ap);
+  ctx_string_append_str (string, buffer);
+  free (buffer);
+  return string;
 }
-#endif
-
-#if CTX_FONT_ENGINE_CTX
 
-static int ctx_font_find_glyph_cached (CtxFont *font, uint32_t glyph)
+void ctx_drawlist_clear (Ctx *ctx)
 {
-#if 1
-  int min       = 0;
-  int max       = font->ctx.glyphs-1;
-  uint32_t found;
-
-  do {
-    int pos = (min + max)/2;
-    found = font->ctx.index[pos*2];
-    if (found == glyph)
-    {
-      return font->ctx.index[pos*2+1];
-    } else if (min == max)
-      return -1;
-    else if (min == max-1)
-      return -1;
-    else if (found < glyph)
-    {
-      min = pos;
-    } else {
-      max = pos;
-    }
-
-  } while (min != max);
-
-  return -1;
-#else
-  for (int i = 0; i < font->ctx.glyphs; i++)
-    {
-      if (font->ctx.index[i * 2] == glyph)
-        { return font->ctx.index[i * 2 + 1]; }
-    }
-  return -1;
-#endif
+  ctx->drawlist.count = 0;
+  ctx->drawlist.bitpack_pos = 0;
 }
 
-static int ctx_glyph_find_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar)
+static void ctx_drawlist_backend_free (CtxBackend *backend)
 {
-  int ret = ctx_font_find_glyph_cached (font, unichar);
-  if (ret >= 0) return ret;
+  free (backend);
+}
 
-  for (int i = 0; i < font->ctx.length; i++)
-  {
-    CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
-    if (entry->code == CTX_DEFINE_GLYPH &&
-        entry->data.u32[0] == unichar)
+static void ctx_update_current_path (Ctx *ctx, CtxEntry *entry)
+{
+#if CTX_CURRENT_PATH
+  switch (entry->code)
     {
-       return i;
-       // XXX this could be prone to insertion of valid header
-       // data in included bitmaps.. is that an issue?
-       //   
+      case CTX_TEXT:
+      case CTX_STROKE_TEXT:
+      case CTX_BEGIN_PATH:
+        ctx->current_path.count = 0;
+        break;
+      case CTX_CLIP:
+      case CTX_FILL:
+      case CTX_STROKE:
+              // XXX unless preserve
+        ctx->current_path.count = 0;
+        break;
+      case CTX_CLOSE_PATH:
+      case CTX_LINE_TO:
+      case CTX_MOVE_TO:
+      case CTX_QUAD_TO:
+      case CTX_SMOOTH_TO:
+      case CTX_SMOOTHQ_TO:
+      case CTX_REL_QUAD_TO:
+      case CTX_REL_SMOOTH_TO:
+      case CTX_REL_SMOOTHQ_TO:
+      case CTX_CURVE_TO:
+      case CTX_REL_CURVE_TO:
+      case CTX_ARC:
+      case CTX_ARC_TO:
+      case CTX_REL_ARC_TO:
+      case CTX_RECTANGLE:
+      case CTX_ROUND_RECTANGLE:
+        ctx_drawlist_add_entry (&ctx->current_path, entry);
+        break;
+      default:
+        break;
     }
-  }
-  return -1;
+#endif
 }
 
-
-static float
-ctx_glyph_kern_ctx (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
+static void
+ctx_drawlist_process (Ctx *ctx, CtxEntry *entry)
 {
-  float font_size = ctx->state.gstate.font_size;
-  int first_kern = ctx_glyph_find_ctx (font, ctx, unicharA);
-  if (first_kern < 0) return 0.0;
-
-#if CTX_EVENTS
-  if (ctx_renderer_is_term (ctx) && (3.02 - font_size) < 0.03)
-    return 0.0f;
+#if CTX_CURRENT_PATH
+  ctx_update_current_path (ctx, entry);
 #endif
-
-  for (int i = first_kern + 1; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
-      if (entry->code == CTX_KERNING_PAIR)
-        {
-          if (entry->data.u16[0] == unicharA && entry->data.u16[1] == unicharB)
-            { return entry->data.s32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE; }
-        }
-      if (entry->code == CTX_DEFINE_GLYPH)
-        return 0.0;
-    }
-  return 0.0;
+  /* these functions can alter the code and coordinates of
+     command that in the end gets added to the drawlist
+   */
+  ctx_interpret_style (&ctx->state, entry, ctx);
+  ctx_interpret_transforms (&ctx->state, entry, ctx);
+  ctx_interpret_pos (&ctx->state, entry, ctx);
+  ctx_drawlist_add_entry (&ctx->drawlist, entry);
 }
-#if 0
-static int ctx_glyph_find (Ctx *ctx, CtxFont *font, uint32_t unichar)
+
+static CtxBackend *ctx_drawlist_backend_new (void)
 {
-  for (int i = 0; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
-      if (entry->code == CTX_DEFINE_GLYPH && entry->data.u32[0] == unichar)
-        { return i; }
-    }
-  return 0;
+  CtxBackend *backend = (CtxBackend*)calloc (sizeof (CtxBackend), 1);
+  backend->process = (void(*)(Ctx *a, CtxCommand *c))ctx_drawlist_process;
+  backend->free    = (void(*)(void *a))ctx_drawlist_backend_free;
+  return backend;
 }
-#endif
 
+#if CTX_RASTERIZER
 
-static float
-ctx_glyph_width_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar)
+
+static int
+ctx_rect_intersect (const CtxIntRectangle *a, const CtxIntRectangle *b)
 {
-  CtxState *state = &ctx->state;
-  float font_size = state->gstate.font_size;
-  int   start     = ctx_glyph_find_ctx (font, ctx, unichar);
-  if (start < 0)
-    { return 0.0; }  // XXX : fallback
+  if (a->x >= b->x + b->width ||
+      b->x >= a->x + a->width ||
+      a->y >= b->y + b->height ||
+      b->y >= a->y + a->height) return 0;
 
-#if CTX_EVENTS
-  if (ctx_renderer_is_term (ctx) && (3.02 - font_size) < 0.03)
-    return 2.0f;
-#endif
+  return 1;
+}
 
-  for (int i = start; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
-      if (entry->code == CTX_DEFINE_GLYPH)
-        if (entry->data.u32[0] == (unsigned) unichar)
-          { return (entry->data.u32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE); }
+static void
+_ctx_add_hash (CtxHasher *hasher, CtxIntRectangle *shape_rect, char *hash)
+{
+  CtxIntRectangle rect = {0,0, hasher->rasterizer.blit_width/hasher->cols,
+                            hasher->rasterizer.blit_height/hasher->rows};
+  int hno = 0;
+  for (int row = 0; row < hasher->rows; row++)
+    for (int col = 0; col < hasher->cols; col++, hno++)
+     {
+      rect.x = col * rect.width;
+      rect.y = row * rect.height;
+      if (ctx_rect_intersect (shape_rect, &rect))
+      {
+        int temp = hasher->hashes[(row * hasher->cols + col)  *20 + 0];
+        for (int i = 0; i <19;i++)
+           hasher->hashes[(row * hasher->cols + col)  *20 + i] =
+             hasher->hashes[(row * hasher->cols + col)  *20 + i+1]^
+             hash[i];
+        hasher->hashes[(row * hasher->cols + col)  *20 + 19] =
+                temp ^ hash[19];
+      }
     }
-  return 0.0;
 }
 
 static int
-ctx_glyph_drawlist (CtxFont *font, Ctx *ctx, CtxDrawlist *drawlist, uint32_t unichar, int stroke)
+ctx_str_count_lines (const char *str)
 {
-  CtxState *state = &ctx->state;
-  CtxIterator iterator;
-  float origin_x = state->x;
-  float origin_y = state->y;
-  ctx_current_point (ctx, &origin_x, &origin_y);
-  int in_glyph = 0;
-  float font_size = state->gstate.font_size;
-  int start = 0;
-  if (font->type == 0)
-  {
-  start = ctx_glyph_find_ctx (font, ctx, unichar);
-  if (start < 0)
-    { return -1; }  // XXX : fallback glyph
-  }
-  ctx_iterator_init (&iterator, drawlist, start, CTX_ITERATOR_EXPAND_BITPACK);
-  CtxCommand *command;
+  int count = 0;
+  for (const char *p = str; *p; p++)
+    if (*p == '\n') count ++;
+  return count;
+}
 
-  /* XXX :  do a binary search instead of a linear search */
-  while ( (command= ctx_iterator_next (&iterator) ) )
+static void
+ctx_hasher_process (Ctx *ctx, CtxCommand *command)
+{
+  CtxEntry *entry = &command->entry;
+  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx->backend;
+  CtxHasher *hasher = (CtxHasher*) ctx->backend;
+  CtxState *state = rasterizer->state;
+  CtxCommand *c = (CtxCommand *) entry;
+  int aa = 15;//rasterizer->aa;
+
+  ctx_interpret_pos_bare (rasterizer->state, entry, NULL);
+  ctx_interpret_style (rasterizer->state, entry, NULL);
+
+  switch (c->code)
     {
-      CtxEntry *entry = &command->entry;
-      if (in_glyph)
+      case CTX_TEXT:
         {
-          if (entry->code == CTX_DEFINE_GLYPH)
-            {
-              if (stroke)
-                { ctx_stroke (ctx); }
-              else
-                {
-#if CTX_RASTERIZER
-#if CTX_ENABLE_SHADOW_BLUR
-      if (ctx->renderer && ((CtxRasterizer*)(ctx->renderer))->in_shadow)
-      {
-        ctx_rasterizer_shadow_fill ((CtxRasterizer*)ctx->renderer);
-        ((CtxRasterizer*)(ctx->renderer))->in_shadow = 1;
-      }
-      else
+          const char *str = ctx_arg_string();
+          CtxSHA1 sha1;
+          memcpy (&sha1, &hasher->sha1_fill[hasher->source_level], sizeof (CtxSHA1));
+          char ctx_sha1_hash[20];
+          float width = ctx_text_width (rasterizer->backend.ctx, str);
+
+
+          float height = ctx_get_font_size (rasterizer->backend.ctx);
+           CtxIntRectangle shape_rect;
+
+           float tx = rasterizer->x;
+           float ty = rasterizer->y - height * 1.2;
+           float tw = width;
+           float th = height * (ctx_str_count_lines (str) + 1.5);
+
+           _ctx_user_to_device (rasterizer->state, &tx, &ty);
+           _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
+          
+           shape_rect.x=tx;
+           shape_rect.y=ty;
+           shape_rect.width = tw;
+           shape_rect.height = th;
+          switch ((int)ctx_state_get (rasterizer->state, CTX_text_align))
+          {
+          case CTX_TEXT_ALIGN_LEFT:
+          case CTX_TEXT_ALIGN_START:
+                  break;
+          case CTX_TEXT_ALIGN_END:
+          case CTX_TEXT_ALIGN_RIGHT:
+           shape_rect.x -= shape_rect.width;
+           break;
+          case CTX_TEXT_ALIGN_CENTER:
+           shape_rect.x -= shape_rect.width/2;
+           break;
+                   // XXX : doesn't take all text-alignments into account
+          }
+
+#if 0
+          uint32_t color;
+          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
 #endif
+          ctx_sha1_process(&sha1, (const unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
+#if 1
+        ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
+    //      ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
 #endif
-         ctx_fill (ctx); 
-               
-                }
-              ctx_restore (ctx);
-              return 0;
-            }
-          ctx_process (ctx, entry);
+          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
+
+          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
         }
-      else if (entry->code == CTX_DEFINE_GLYPH && entry->data.u32[0] == unichar)
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_STROKE_TEXT:
         {
-          in_glyph = 1;
-          ctx_save (ctx);
-          ctx_translate (ctx, origin_x, origin_y);
-          ctx_move_to (ctx, 0, 0);
-          ctx_begin_path (ctx);
-          ctx_scale (ctx, font_size / CTX_BAKE_FONT_SIZE,
-                     font_size / CTX_BAKE_FONT_SIZE);
-        }
-    }
-  if (stroke)
-    { ctx_stroke (ctx);
-    }
-  else
-    { 
-    
-#if CTX_RASTERIZER
-#if CTX_ENABLE_SHADOW_BLUR
-      if (ctx->renderer && ((CtxRasterizer*)(ctx->renderer))->in_shadow)
-      {
-        ctx_rasterizer_shadow_fill ((CtxRasterizer*)ctx->renderer);
-        ((CtxRasterizer*)(ctx->renderer))->in_shadow = 1;
-      }
-      else
+          CtxSHA1 sha1;
+          const char *str = ctx_arg_string();
+          memcpy (&sha1, &hasher->sha1_stroke[hasher->source_level], sizeof (CtxSHA1));
+          char ctx_sha1_hash[20];
+          float width = ctx_text_width (rasterizer->backend.ctx, str);
+          float height = ctx_get_font_size (rasterizer->backend.ctx);
+
+           CtxIntRectangle shape_rect;
+
+           float tx = rasterizer->x;
+           float ty = rasterizer->y;
+           float tw = width;
+           float th = height * (ctx_str_count_lines (str));
+
+           _ctx_user_to_device (rasterizer->state, &tx, &ty);
+           _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
+          
+           shape_rect.x=tx;
+           shape_rect.y=ty;
+           shape_rect.width = tw;
+           shape_rect.height = th;
+
+#if 0
+          uint32_t color;
+          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
 #endif
+          ctx_sha1_process(&sha1, (unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
+#if 1
+          ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
+    //    ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
 #endif
-      {
-         ctx_fill (ctx); 
-      }
-    }
-  ctx_restore (ctx);
-  return -1;
-}
+          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
+
+          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
+        }
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_GLYPH:
+         {
+          CtxSHA1 sha1;
+          memcpy (&sha1, &hasher->sha1_fill[hasher->source_level], sizeof (CtxSHA1));
+
+          char ctx_sha1_hash[20];
+          uint8_t string[8];
+          string[ctx_unichar_to_utf8 (c->u32.a0, string)]=0;
+          float width = ctx_text_width (rasterizer->backend.ctx, (char*)string);
+          float height = ctx_get_font_size (rasterizer->backend.ctx);
+
+          float tx = rasterizer->x;
+          float ty = rasterizer->y;
+          float tw = width;
+          float th = height * 2;
+
+          _ctx_user_to_device (rasterizer->state, &tx, &ty);
+          _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
+          CtxIntRectangle shape_rect = {(int)tx,(int)(ty-th/2),(int)tw,(int)th};
 
-static int
-ctx_glyph_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
-{
-  CtxDrawlist drawlist = { (CtxEntry *) font->ctx.data,
-                           font->ctx.length,
-                           font->ctx.length, 0, 0
-                         };
-  return ctx_glyph_drawlist (font, ctx, &drawlist, unichar, stroke);
-}
 
 #if 0
-uint32_t ctx_glyph_no (Ctx *ctx, int no)
-{
-  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
-  if (no < 0 || no >= font->ctx.glyphs)
-    { return 0; }
-  return font->ctx.index[no*2];
-}
+          uint32_t color;
+          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
 #endif
-
-static void ctx_font_init_ctx (CtxFont *font)
-{
-  int glyph_count = 0;
-  for (int i = 0; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = &font->ctx.data[i];
-      if (entry->code == CTX_DEFINE_GLYPH)
-        { glyph_count ++; }
-    }
-  font->ctx.glyphs = glyph_count;
-#if CTX_DRAWLIST_STATIC
-  static uint32_t idx[512]; // one might have to adjust this for
-  // larger fonts XXX
-  // should probably be made a #define
-  font->ctx.index = &idx[0];
-#else
-  font->ctx.index = (uint32_t *) malloc (sizeof (uint32_t) * 2 * glyph_count);
+          ctx_sha1_process(&sha1, string, strlen ((const char*)string));
+          ctx_sha1_process(&sha1, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof 
(rasterizer->state->gstate.transform));
+#if 0
+          ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
 #endif
-  int no = 0;
-  for (int i = 0; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = &font->ctx.data[i];
-      if (entry->code == CTX_DEFINE_GLYPH)
+          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
+
+          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
+          ctx_rasterizer_reset (rasterizer);
+         }
+        break;
+
+      case CTX_FILL:
+        {
+          CtxSHA1 sha1;
+          memcpy (&sha1, &hasher->sha1_fill[hasher->source_level], sizeof (CtxSHA1));
+          char ctx_sha1_hash[20];
+
+          /* we eant this hasher to be as good as possible internally,
+           * since it is also used in the small shapes rasterization
+           * cache
+           */
+        uint64_t hash = ctx_rasterizer_poly_to_hash (rasterizer); // + hasher->salt;
+        CtxIntRectangle shape_rect = {
+          (int)(rasterizer->col_min / CTX_SUBDIV - 2),
+          (int)(rasterizer->scan_min / aa - 2),
+          (int)(3+(rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV),
+          (int)(3+(rasterizer->scan_max - rasterizer->scan_min + 1) / aa)
+        };
+
+        hash ^= (rasterizer->state->gstate.fill_rule * 23);
+
+        ctx_sha1_process(&sha1, (unsigned char*)&hash, 8);
+
+        {
+          int is = rasterizer->state->gstate.image_smoothing;
+          ctx_sha1_process(&sha1, (uint8_t*)&is, sizeof(int));
+        }
+
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
+
+        if (!rasterizer->preserve)
+          ctx_rasterizer_reset (rasterizer);
+        rasterizer->preserve = 0;
+        }
+        break;
+      case CTX_STROKE:
+        {
+          CtxSHA1 sha1;
+          memcpy (&sha1, &hasher->sha1_stroke[hasher->source_level], sizeof (CtxSHA1));
+          char ctx_sha1_hash[20];
+        uint64_t hash = ctx_rasterizer_poly_to_hash (rasterizer);
+        CtxIntRectangle shape_rect = {
+          (int)(rasterizer->col_min / CTX_SUBDIV - rasterizer->state->gstate.line_width),
+          (int)(rasterizer->scan_min / aa - rasterizer->state->gstate.line_width),
+          (int)((rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV + 
rasterizer->state->gstate.line_width),
+          (int)((rasterizer->scan_max - rasterizer->scan_min + 1) / aa + 
rasterizer->state->gstate.line_width)
+        };
+
+        shape_rect.width += rasterizer->state->gstate.line_width * 2;
+        shape_rect.height += rasterizer->state->gstate.line_width * 2;
+        shape_rect.x -= rasterizer->state->gstate.line_width;
+        shape_rect.y -= rasterizer->state->gstate.line_width;
+
+        hash ^= (int)(rasterizer->state->gstate.line_width * 110);
+        hash ^= (rasterizer->state->gstate.line_cap * 23);
+        hash ^= (rasterizer->state->gstate.source_stroke.type * 117);
+
+        ctx_sha1_process(&sha1, (unsigned char*)&hash, 8);
+
+        uint32_t color;
+        ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
+
+          ctx_sha1_process(&sha1, (unsigned char*)&color, 4);
+
+          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
+          _ctx_add_hash (hasher, &shape_rect, ctx_sha1_hash);
+        }
+        if (!rasterizer->preserve)
+          ctx_rasterizer_reset (rasterizer);
+        rasterizer->preserve = 0;
+        break;
+        /* the above cases are the painting cases and 
+         * the only ones differing from the rasterizer's process switch
+         */
+
+      case CTX_LINE_TO:
+        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_REL_LINE_TO:
+        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_MOVE_TO:
+        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_REL_MOVE_TO:
+        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
+        break;
+      case CTX_CURVE_TO:
+        ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
+                                 c->c.x1, c->c.y1,
+                                 c->c.x2, c->c.y2);
+        break;
+      case CTX_REL_CURVE_TO:
+        ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
+                                     c->c.x1, c->c.y1,
+                                     c->c.x2, c->c.y2);
+        break;
+      case CTX_QUAD_TO:
+        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
+        break;
+      case CTX_REL_QUAD_TO:
+        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
+        break;
+      case CTX_ARC:
+        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, 
c->arc.direction);
+        break;
+      case CTX_RECTANGLE:
+        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
+                                  c->rectangle.width, c->rectangle.height);
+        break;
+      case CTX_ROUND_RECTANGLE:
+        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
+                                        c->rectangle.width, c->rectangle.height,
+                                        c->rectangle.radius);
+        break;
+      case CTX_SET_PIXEL:
+        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
+                                  c->set_pixel.rgba[0],
+                                  c->set_pixel.rgba[1],
+                                  c->set_pixel.rgba[2],
+                                  c->set_pixel.rgba[3]);
+        break;
+      case CTX_PRESERVE:
+        rasterizer->preserve = 1;
+        break;
+      case CTX_SAVE:
+      case CTX_RESTORE:
+
+        if (c->code == CTX_SAVE)
         {
-          font->ctx.index[no*2]   = entry->data.u32[0];
-          font->ctx.index[no*2+1] = i;
-          no++;
+           if (hasher->source_level + 1 < CTX_MAX_STATES)
+           {
+             hasher->source_level++;
+             hasher->sha1_fill[hasher->source_level] =
+               hasher->sha1_fill[hasher->source_level-1];
+             hasher->sha1_stroke[hasher->source_level] =
+               hasher->sha1_stroke[hasher->source_level-1];
+           }
+        }
+        else
+        {
+           if (hasher->source_level - 1 >= 0)
+           {
+             hasher->source_level--;
+             hasher->sha1_fill[hasher->source_level] =
+               hasher->sha1_fill[hasher->source_level+1];
+             hasher->sha1_stroke[hasher->source_level] =
+               hasher->sha1_stroke[hasher->source_level+1];
+           }
         }
-    }
-}
 
-int
-ctx_load_font_ctx (const char *name, const void *data, int length);
-#if CTX_FONTS_FROM_FILE
-int
-ctx_load_font_ctx_file (const char *name, const char *path);
-#endif
+        /* FALLTHROUGH */
+      case CTX_ROTATE:
+      case CTX_SCALE:
+      case CTX_TRANSLATE:
 
-static CtxFontEngine ctx_font_engine_ctx =
-{
-#if CTX_FONTS_FROM_FILE
-  ctx_load_font_ctx_file,
-#endif
-  ctx_load_font_ctx,
-  ctx_glyph_ctx,
-  ctx_glyph_width_ctx,
-  ctx_glyph_kern_ctx,
-};
 
-int
-ctx_load_font_ctx (const char *name, const void *data, int length)
-{
-  if (length % sizeof (CtxEntry) )
-    { return -1; }
-  if (ctx_font_count >= CTX_MAX_FONTS)
-    { return -1; }
-  ctx_fonts[ctx_font_count].type = 0;
-  ctx_fonts[ctx_font_count].name = name;
-  ctx_fonts[ctx_font_count].ctx.data = (CtxEntry *) data;
-  ctx_fonts[ctx_font_count].ctx.length = length / sizeof (CtxEntry);
-  ctx_font_init_ctx (&ctx_fonts[ctx_font_count]);
-  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_ctx;
-  ctx_font_count++;
-  return ctx_font_count-1;
-}
 
-#if CTX_FONTS_FROM_FILE
-int
-ctx_load_font_ctx_file (const char *name, const char *path)
-{
-  uint8_t *contents = NULL;
-  long length = 0;
-  ctx_get_contents (path, &contents, &length);
-  if (!contents)
-    {
-      ctx_log ( "File load failed\n");
-      return -1;
-    }
-  return ctx_load_font_ctx (name, contents, length);
-}
-#endif
-#endif
+        rasterizer->uses_transforms = 1;
+        ctx_interpret_transforms (rasterizer->state, entry, NULL);
 
-#if CTX_FONT_ENGINE_CTX_FS
+        
+        break;
+      case CTX_FONT:
+        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
+        break;
+      case CTX_BEGIN_PATH:
+        ctx_rasterizer_reset (rasterizer);
+        break;
+      case CTX_CLIP:
+        // should perhaps modify a global state to include
+        // in hash?
+        ctx_rasterizer_clip (rasterizer);
+        break;
+      case CTX_CLOSE_PATH:
+        ctx_rasterizer_finish_shape (rasterizer);
+        break;
+      case CTX_DEFINE_TEXTURE:
+        {
+        ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+        ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+        ctx_sha1_process (&hasher->sha1_fill[hasher->source_level], (uint8_t*)c->define_texture.eid, strlen 
(c->define_texture.eid));
+        ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], (unsigned 
char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
 
-static float
-ctx_glyph_kern_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
-{
-#if 0
-  float font_size = ctx->state.gstate.font_size;
-  int first_kern = ctx_glyph_find_ctx (font, ctx, unicharA);
-  if (first_kern < 0) return 0.0;
-  for (int i = first_kern + 1; i < font->ctx.length; i++)
-    {
-      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
-      if (entry->code == CTX_KERNING_PAIR)
+        rasterizer->comp_op = NULL; // why?
+        }
+        break;
+      case CTX_TEXTURE:
+        ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+        ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+        ctx_sha1_process (&hasher->sha1_fill[hasher->source_level], (uint8_t*)c->texture.eid, strlen 
(c->texture.eid));
+        ctx_sha1_process (&hasher->sha1_fill[hasher->source_level], 
(uint8_t*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
+        rasterizer->comp_op = NULL; // why?
+        break;
+      case CTX_COLOR:
         {
-          if (entry->data.u16[0] == unicharA && entry->data.u16[1] == unicharB)
-            { return entry->data.s32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE; }
+          uint32_t color;
+          if (((int)(ctx_arg_float(0))&512))
+          {
+            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, 
(uint8_t*)(&color));
+            ctx_sha1_init (&hasher->sha1_stroke[hasher->source_level]);
+            ctx_sha1_process(&hasher->sha1_stroke[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+            ctx_sha1_process(&hasher->sha1_stroke[hasher->source_level], (unsigned char*)&color, 4);
+          }
+          else
+          {
+            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, 
(uint8_t*)(&color));
+            ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+            ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+            ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], (unsigned char*)&color, 4);
+          }
         }
-      if (entry->code == CTX_DEFINE_GLYPH)
-        return 0.0;
-    }
+        break;
+      case CTX_LINEAR_GRADIENT:
+          ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+          ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+          ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
+                           (uint8_t*)c, sizeof (c->linear_gradient));
+          ctx_sha1_process (&hasher->sha1_fill[hasher->source_level], (unsigned 
char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
+        break;
+      case CTX_RADIAL_GRADIENT:
+          ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+          ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
&rasterizer->state->gstate.global_alpha_u8, 1);
+          ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
+                           (uint8_t*)c, sizeof (c->radial_gradient));
+          ctx_sha1_process (&hasher->sha1_fill[hasher->source_level], (unsigned 
char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
+        //ctx_state_gradient_clear_stops (rasterizer->state);
+        break;
+#if CTX_GRADIENTS
+      case CTX_GRADIENT_STOP:
+        {
+          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
+                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
+                         };
+          ctx_sha1_process(&hasher->sha1_fill[hasher->source_level], 
+                           (uint8_t*) &rgba[0], sizeof(rgba));
+        }
+        break;
 #endif
-  return 0.0;
-}
-
-static float
-ctx_glyph_width_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unichar)
-{
-  CtxState *state = &ctx->state;
-  char path[1024];
-  sprintf (path, "%s/%010p", font->ctx_fs.path, unichar);
-  uint8_t *data = NULL;
-  long int len_bytes = 0;
-  ctx_get_contents (path, &data, &len_bytes);
-  float ret = 0.0;
-  float font_size = state->gstate.font_size;
-  if (data){
-    Ctx *glyph_ctx = ctx_new ();
-    ctx_parse (glyph_ctx, data);
-    for (int i = 0; i < glyph_ctx->drawlist.count; i++)
+    }
+  if (command->code == CTX_LINE_WIDTH)
     {
-      CtxEntry *e = &glyph_ctx->drawlist.entries[i];
-      if (e->code == CTX_DEFINE_GLYPH)
-        ret = e->data.u32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE;
+      float x = state->gstate.line_width;
+      /* normalize line width according to scaling factor
+       */
+      x = x * ctx_maxf (ctx_maxf (ctx_fabsf (state->gstate.transform.m[0][0]),
+                                  ctx_fabsf (state->gstate.transform.m[0][1]) ),
+                        ctx_maxf (ctx_fabsf (state->gstate.transform.m[1][0]),
+                                  ctx_fabsf (state->gstate.transform.m[1][1]) ) );
+      state->gstate.line_width = x;
     }
-    free (data);
-    ctx_free (glyph_ctx);
-  }
-  return ret;
 }
 
-static int
-ctx_glyph_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
+static CtxRasterizer *
+ctx_hasher_init (CtxRasterizer *rasterizer, Ctx *ctx, CtxState *state, int width, int height, int cols, int 
rows)
 {
-  char path[1024];
-  sprintf (path, "file://%s/%010p", font->ctx_fs.path, unichar);
-  uint8_t *data = NULL;
-  long int len_bytes = 0;
-  ctx_get_contents (path, &data, &len_bytes);
-
-  if (data){
-    Ctx *glyph_ctx = ctx_new ();
-    ctx_parse (glyph_ctx, data);
-    int ret = ctx_glyph_drawlist (font, ctx, &(glyph_ctx->drawlist),
-                                  unichar, stroke);
-    free (data);
-    ctx_free (glyph_ctx);
-    return ret;
-  }
-  return -1;
-}
-
-int
-ctx_load_font_ctx_fs (const char *name, const void *data, int length);
+  CtxHasher *hasher = (CtxHasher*)rasterizer;
+  ctx_memset (rasterizer, 0, sizeof (CtxHasher) );
+  CtxBackend *backend = (CtxBackend*)hasher;
+  backend->ctx         = ctx;
+  backend->process = ctx_hasher_process;
+  backend->free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
+  // XXX need own destructor to not leak ->hashes
+  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
+  rasterizer->state       = state;
+  ctx_state_init (rasterizer->state);
+  rasterizer->blit_x      = 0;
+  rasterizer->blit_y      = 0;
+  rasterizer->blit_width  = width;
+  rasterizer->blit_height = height;
+  rasterizer->state->gstate.clip_min_x  = 0;
+  rasterizer->state->gstate.clip_min_y  = 0;
+  rasterizer->state->gstate.clip_max_x  = width - 1;
+  rasterizer->state->gstate.clip_max_y  = height - 1;
+  rasterizer->scan_min    = 5000;
+  rasterizer->scan_max    = -5000;
+  //rasterizer->aa          = 15;
 
-static CtxFontEngine ctx_font_engine_ctx_fs =
-{
-#if CTX_FONTS_FROM_FILE
-  NULL,
-#endif
-  ctx_load_font_ctx_fs,
-  ctx_glyph_ctx_fs,
-  ctx_glyph_width_ctx_fs,
-  ctx_glyph_kern_ctx_fs,
-};
+  hasher->rows = rows;
+  hasher->cols = cols;
 
-int
-ctx_load_font_ctx_fs (const char *name, const void *path, int length) // length is ignored
-{
-  if (ctx_font_count >= CTX_MAX_FONTS)
-    { return -1; }
+  hasher->hashes = (uint8_t*)ctx_calloc (20, rows * cols);
+  ctx_sha1_init (&hasher->sha1_fill[hasher->source_level]);
+  ctx_sha1_init (&hasher->sha1_stroke[hasher->source_level]);
 
-  ctx_fonts[ctx_font_count].type = 42;
-  ctx_fonts[ctx_font_count].name = name;
-  ctx_fonts[ctx_font_count].ctx_fs.path = strdup (path);
-  int path_len = strlen (path);
-  if (ctx_fonts[ctx_font_count].ctx_fs.path[path_len-1] == '/')
-   ctx_fonts[ctx_font_count].ctx_fs.path[path_len-1] = 0;
-  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_ctx_fs;
-  ctx_font_count++;
-  return ctx_font_count-1;
+  return rasterizer;
 }
 
-#endif
-
-int
-_ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke)
+Ctx *ctx_hasher_new (int width, int height, int cols, int rows)
 {
-  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
-  // a begin-path here did not remove stray spikes in terminal
-  return font->engine->glyph (font, ctx, unichar, stroke);
+  Ctx *ctx           = _ctx_new_drawlist (width, height);
+  CtxState    *state = &ctx->state;
+  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx_calloc (sizeof (CtxHasher), 1);
+  ctx_hasher_init (rasterizer, ctx, state, width, height, cols, rows);
+  ctx_set_backend (ctx, (void*)rasterizer);
+  return ctx;
 }
 
-int
-ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke)
+uint8_t *ctx_hasher_get_hash (Ctx *ctx, int col, int row)
 {
-#if CTX_BACKEND_TEXT
-  CtxEntry commands[3]; // 3 to silence incorrect warning from static analysis
-  ctx_memset (commands, 0, sizeof (commands) );
-  commands[0] = ctx_u32 (CTX_GLYPH, unichar, 0);
-  commands[0].data.u8[4] = stroke;
-  ctx_process (ctx, commands);
-  return 0; // XXX is return value used?
-#else
-  return _ctx_glyph (ctx, unichar, stroke);
-#endif
+  CtxHasher *hasher = (CtxHasher*)ctx->backend;
+  if (row < 0) row =0;
+  if (col < 0) col =0;
+  if (row >= hasher->rows) row = hasher->rows-1;
+  if (col >= hasher->cols) col = hasher->cols-1;
+
+  return &hasher->hashes[(row*hasher->cols+col)*20];
 }
 
-float
-ctx_glyph_width (Ctx *ctx, int unichar)
-{
-  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
+#endif
 
-  return font->engine->glyph_width (font, ctx, unichar);
-}
+#if CTX_CAIRO
 
-static float
-ctx_glyph_kern (Ctx *ctx, int unicharA, int unicharB)
+typedef struct _CtxCairo CtxCairo;
+struct
+  _CtxCairo
 {
-  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
-  return font->engine->glyph_kern (font, ctx, unicharA, unicharB);
-}
+  CtxBackend        backend;
+  cairo_t          *cr;
+  cairo_pattern_t  *pat;
+  cairo_surface_t  *image;
+  int               preserve;
 
-float
-ctx_text_width (Ctx        *ctx,
-                const char *string)
-{
-  float sum = 0.0;
-  if (!string)
-    return 0.0f;
-  for (const char *utf8 = string; *utf8; utf8 = ctx_utf8_skip (utf8, 1) )
-    {
-      sum += ctx_glyph_width (ctx, ctx_utf8_to_unichar (utf8) );
-    }
-  return sum;
-}
+  // maintain separate fill and stroke state - even though the more limited use of ctx
+  // then suffers?
+  //
+};
 
 static void
-_ctx_glyphs (Ctx     *ctx,
-             CtxGlyph *glyphs,
-             int       n_glyphs,
-             int       stroke)
+ctx_cairo_process (Ctx *ctx, CtxCommand *c)
 {
-  for (int i = 0; i < n_glyphs; i++)
-    {
-      {
-        uint32_t unichar = glyphs[i].index;
-        ctx_move_to (ctx, glyphs[i].x, glyphs[i].y);
-        ctx_glyph (ctx, unichar, stroke);
-      }
-    }
-}
+  CtxCairo *ctx_cairo = (void*)ctx->backend;
+  CtxEntry *entry = (CtxEntry *) &c->entry;
 
-static void
-_ctx_text (Ctx        *ctx,
-           const char *string,
-           int         stroke,
-           int         visible)
-{
-  CtxState *state = &ctx->state;
-  float x = ctx->state.x;
-  switch ( (int) ctx_state_get (state, CTX_text_align) )
-    //switch (state->gstate.text_align)
+#if CTX_CURRENT_PATH
+  ctx_update_current_path (ctx, entry);
+#endif
+
+  cairo_t *cr = ctx_cairo->cr;
+  switch (entry->code)
     {
-      case CTX_TEXT_ALIGN_START:
-      case CTX_TEXT_ALIGN_LEFT:
+      case CTX_LINE_TO:
+        cairo_line_to (cr, c->line_to.x, c->line_to.y);
         break;
-      case CTX_TEXT_ALIGN_CENTER:
-        x -= ctx_text_width (ctx, string) /2;
+      case CTX_REL_LINE_TO:
+        cairo_rel_line_to (cr, c->rel_line_to.x, c->rel_line_to.y);
+        break;
+      case CTX_MOVE_TO:
+        cairo_move_to (cr, c->move_to.x, c->move_to.y);
+        break;
+      case CTX_REL_MOVE_TO:
+        cairo_rel_move_to (cr, ctx_arg_float (0), ctx_arg_float (1) );
+        break;
+      case CTX_CURVE_TO:
+        cairo_curve_to (cr, ctx_arg_float (0), ctx_arg_float (1),
+                        ctx_arg_float (2), ctx_arg_float (3),
+                        ctx_arg_float (4), ctx_arg_float (5) );
+        break;
+      case CTX_REL_CURVE_TO:
+        cairo_rel_curve_to (cr,ctx_arg_float (0), ctx_arg_float (1),
+                            ctx_arg_float (2), ctx_arg_float (3),
+                            ctx_arg_float (4), ctx_arg_float (5) );
+        break;
+      case CTX_PRESERVE:
+        ctx_cairo->preserve = 1;
+        break;
+      case CTX_QUAD_TO:
+        {
+          double x0, y0;
+          cairo_get_current_point (cr, &x0, &y0);
+          float cx = ctx_arg_float (0);
+          float cy = ctx_arg_float (1);
+          float  x = ctx_arg_float (2);
+          float  y = ctx_arg_float (3);
+          cairo_curve_to (cr,
+                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
+                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
+                          x,                              y);
+        }
+        break;
+      case CTX_REL_QUAD_TO:
+        {
+          double x0, y0;
+          cairo_get_current_point (cr, &x0, &y0);
+          float cx = ctx_arg_float (0) + x0;
+          float cy = ctx_arg_float (1) + y0;
+          float  x = ctx_arg_float (2) + x0;
+          float  y = ctx_arg_float (3) + y0;
+          cairo_curve_to (cr,
+                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
+                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
+                          x,                              y);
+        }
+        break;
+      /* rotate/scale/translate does not occur in fully minified data stream */
+      case CTX_ROTATE:
+        cairo_rotate (cr, ctx_arg_float (0) );
+        break;
+      case CTX_SCALE:
+        cairo_scale (cr, ctx_arg_float (0), ctx_arg_float (1) );
+        break;
+      case CTX_TRANSLATE:
+        cairo_translate (cr, ctx_arg_float (0), ctx_arg_float (1) );
+        break;
+      case CTX_LINE_WIDTH:
+        cairo_set_line_width (cr, ctx_arg_float (0) );
+        break;
+      case CTX_ARC:
+#if 0
+        fprintf (stderr, "F %2.1f %2.1f %2.1f %2.1f %2.1f %2.1f\n",
+                        ctx_arg_float(0),
+                        ctx_arg_float(1),
+                        ctx_arg_float(2),
+                        ctx_arg_float(3),
+                        ctx_arg_float(4),
+                        ctx_arg_float(5),
+                        ctx_arg_float(6));
+#endif
+        if (ctx_arg_float (5) == 1)
+          cairo_arc (cr, ctx_arg_float (0), ctx_arg_float (1),
+                     ctx_arg_float (2), ctx_arg_float (3),
+                     ctx_arg_float (4) );
+        else
+          cairo_arc_negative (cr, ctx_arg_float (0), ctx_arg_float (1),
+                              ctx_arg_float (2), ctx_arg_float (3),
+                              ctx_arg_float (4) );
+        break;
+      case CTX_SET_RGBA_U8:
+        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
+                               ctx_u8_to_float (ctx_arg_u8 (1) ),
+                               ctx_u8_to_float (ctx_arg_u8 (2) ),
+                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
+        break;
+#if 0
+      case CTX_SET_RGBA_STROKE: // XXX : we need to maintain
+        //       state for the two kinds
+        cairo_set_source_rgba (cr, ctx_arg_u8 (0) /255.0,
+                               ctx_arg_u8 (1) /255.0,
+                               ctx_arg_u8 (2) /255.0,
+                               ctx_arg_u8 (3) /255.0);
+        break;
+#endif
+      case CTX_RECTANGLE:
+      case CTX_ROUND_RECTANGLE: // XXX - arcs
+        cairo_rectangle (cr, c->rectangle.x, c->rectangle.y,
+                         c->rectangle.width, c->rectangle.height);
+        break;
+      case CTX_SET_PIXEL:
+        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
+                               ctx_u8_to_float (ctx_arg_u8 (1) ),
+                               ctx_u8_to_float (ctx_arg_u8 (2) ),
+                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
+        cairo_rectangle (cr, ctx_arg_u16 (2), ctx_arg_u16 (3), 1, 1);
+        cairo_fill (cr);
+        break;
+      case CTX_FILL:
+        if (ctx_cairo->preserve)
+        {
+          cairo_fill_preserve (cr);
+          ctx_cairo->preserve = 0;
+        }
+        else
+        {
+          cairo_fill (cr);
+        }
+        break;
+      case CTX_STROKE:
+        if (ctx_cairo->preserve)
+        {
+          cairo_stroke_preserve (cr);
+          ctx_cairo->preserve = 0;
+        }
+        else
+        {
+          cairo_stroke (cr);
+        }
+        break;
+      case CTX_IDENTITY:
+        cairo_identity_matrix (cr);
+        break;
+      case CTX_CLIP:
+        if (ctx_cairo->preserve)
+        {
+          cairo_clip_preserve (cr);
+          ctx_cairo->preserve = 0;
+        }
+        else
+        {
+          cairo_clip (cr);
+        }
+        break;
+        break;
+      case CTX_BEGIN_PATH:
+        cairo_new_path (cr);
+        break;
+      case CTX_CLOSE_PATH:
+        cairo_close_path (cr);
+        break;
+      case CTX_SAVE:
+        cairo_save (cr);
+        break;
+      case CTX_RESTORE:
+        cairo_restore (cr);
+        break;
+      case CTX_FONT_SIZE:
+        cairo_set_font_size (cr, ctx_arg_float (0) );
         break;
-      case CTX_TEXT_ALIGN_END:
-      case CTX_TEXT_ALIGN_RIGHT:
-        x -= ctx_text_width (ctx, string);
+      case CTX_MITER_LIMIT:
+        cairo_set_miter_limit (cr, ctx_arg_float (0) );
         break;
-    }
-  float y = ctx->state.y;
-  float baseline_offset = 0.0f;
-  switch ( (int) ctx_state_get (state, CTX_text_baseline) )
-    {
-      case CTX_TEXT_BASELINE_HANGING:
-        /* XXX : crude */
-        baseline_offset = ctx->state.gstate.font_size  * 0.55;
+      case CTX_LINE_CAP:
+        {
+          int cairo_val = CAIRO_LINE_CAP_SQUARE;
+          switch (ctx_arg_u8 (0) )
+            {
+              case CTX_CAP_ROUND:
+                cairo_val = CAIRO_LINE_CAP_ROUND;
+                break;
+              case CTX_CAP_SQUARE:
+                cairo_val = CAIRO_LINE_CAP_SQUARE;
+                break;
+              case CTX_CAP_NONE:
+                cairo_val = CAIRO_LINE_CAP_BUTT;
+                break;
+            }
+          cairo_set_line_cap (cr, cairo_val);
+        }
         break;
-      case CTX_TEXT_BASELINE_TOP:
-        /* XXX : crude */
-        baseline_offset = ctx->state.gstate.font_size  * 0.7;
+      case CTX_BLEND_MODE:
+        {
+          // does not map to cairo
+        }
         break;
-      case CTX_TEXT_BASELINE_BOTTOM:
-        baseline_offset = -ctx->state.gstate.font_size * 0.1;
+      case CTX_COMPOSITING_MODE:
+        {
+          int cairo_val = CAIRO_OPERATOR_OVER;
+          switch (ctx_arg_u8 (0) )
+            {
+              case CTX_COMPOSITE_SOURCE_OVER:
+                cairo_val = CAIRO_OPERATOR_OVER;
+                break;
+              case CTX_COMPOSITE_COPY:
+                cairo_val = CAIRO_OPERATOR_SOURCE;
+                break;
+            }
+          cairo_set_operator (cr, cairo_val);
+        }
         break;
-      case CTX_TEXT_BASELINE_ALPHABETIC:
-      case CTX_TEXT_BASELINE_IDEOGRAPHIC:
-        baseline_offset = 0.0f;
+      case CTX_LINE_JOIN:
+        {
+          int cairo_val = CAIRO_LINE_JOIN_ROUND;
+          switch (ctx_arg_u8 (0) )
+            {
+              case CTX_JOIN_ROUND:
+                cairo_val = CAIRO_LINE_JOIN_ROUND;
+                break;
+              case CTX_JOIN_BEVEL:
+                cairo_val = CAIRO_LINE_JOIN_BEVEL;
+                break;
+              case CTX_JOIN_MITER:
+                cairo_val = CAIRO_LINE_JOIN_MITER;
+                break;
+            }
+          cairo_set_line_join (cr, cairo_val);
+        }
         break;
-      case CTX_TEXT_BASELINE_MIDDLE:
-        baseline_offset = ctx->state.gstate.font_size * 0.25;
+      case CTX_LINEAR_GRADIENT:
+        {
+          if (ctx_cairo->pat)
+            {
+              cairo_pattern_destroy (ctx_cairo->pat);
+              ctx_cairo->pat = NULL;
+            }
+          ctx_cairo->pat = cairo_pattern_create_linear (ctx_arg_float (0), ctx_arg_float (1),
+                           ctx_arg_float (2), ctx_arg_float (3) );
+          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 0, 0, 0, 0, 1);
+          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 1, 1, 1, 1, 1);
+          cairo_set_source (cr, ctx_cairo->pat);
+        }
         break;
-    }
-  float x0 = x;
-  for (const char *utf8 = string; *utf8; utf8 = ctx_utf8_skip (utf8, 1) )
-    {
-      if (*utf8 == '\n')
+      case CTX_RADIAL_GRADIENT:
         {
-          y += ctx->state.gstate.font_size * ctx_state_get (state, CTX_line_spacing);
-          x = x0;
-          if (visible)
-            { ctx_move_to (ctx, x, y); }
+          if (ctx_cairo->pat)
+            {
+              cairo_pattern_destroy (ctx_cairo->pat);
+              ctx_cairo->pat = NULL;
+            }
+          ctx_cairo->pat = cairo_pattern_create_radial (ctx_arg_float (0), ctx_arg_float (1),
+                           ctx_arg_float (2), ctx_arg_float (3),
+                           ctx_arg_float (4), ctx_arg_float (5) );
+          cairo_set_source (cr, ctx_cairo->pat);
         }
-      else
+        break;
+      case CTX_GRADIENT_STOP:
+        cairo_pattern_add_color_stop_rgba (ctx_cairo->pat,
+                                           ctx_arg_float (0),
+                                           ctx_u8_to_float (ctx_arg_u8 (4) ),
+                                           ctx_u8_to_float (ctx_arg_u8 (5) ),
+                                           ctx_u8_to_float (ctx_arg_u8 (6) ),
+                                           ctx_u8_to_float (ctx_arg_u8 (7) ) );
+        break;
+        // XXX  implement TEXTURE
+#if 0
+      case CTX_LOAD_IMAGE:
         {
-          uint32_t unichar = ctx_utf8_to_unichar (utf8);
-          if (visible)
+          if (image)
             {
-              ctx_move_to (ctx, x, y + baseline_offset);
-              _ctx_glyph (ctx, unichar, stroke);
+              cairo_surface_destroy (image);
+              image = NULL;
             }
-          const char *next_utf8 = ctx_utf8_skip (utf8, 1);
-          if (next_utf8)
+          if (pat)
             {
-              x += ctx_glyph_width (ctx, unichar);
-              x += ctx_glyph_kern (ctx, unichar, ctx_utf8_to_unichar (next_utf8) );
+              cairo_pattern_destroy (pat);
+              pat = NULL;
             }
-          if (visible)
-            { ctx_move_to (ctx, x, y); }
+          image = cairo_image_surface_create_from_png (ctx_arg_string() );
+          cairo_set_source_surface (cr, image, ctx_arg_float (0), ctx_arg_float (1) );
         }
+        break;
+#endif
+      case CTX_TEXT:
+        /* XXX: implement some linebreaking/wrap, positioning
+         *      behavior here?
+         */
+        cairo_show_text (cr, ctx_arg_string () );
+        break;
+      case CTX_CONT:
+      case CTX_EDGE:
+      case CTX_DATA:
+      case CTX_DATA_REV:
+      case CTX_FLUSH:
+        break;
     }
-  if (!visible)
-    { ctx_move_to (ctx, x, y); }
+  ctx_process (ctx_cairo->backend.ctx, entry);
 }
 
-
-CtxGlyph *
-ctx_glyph_allocate (int n_glyphs)
-{
-  return (CtxGlyph *) malloc (sizeof (CtxGlyph) * n_glyphs);
-}
-void
-gtx_glyph_free     (CtxGlyph *glyphs)
+void ctx_cairo_free (CtxCairo *ctx_cairo)
 {
-  free (glyphs);
+  if (ctx_cairo->pat)
+    { cairo_pattern_destroy (ctx_cairo->pat); }
+  if (ctx_cairo->image)
+    { cairo_surface_destroy (ctx_cairo->image); }
+  free (ctx_cairo);
 }
 
 void
-ctx_glyphs (Ctx        *ctx,
-            CtxGlyph   *glyphs,
-            int         n_glyphs)
+ctx_render_cairo (Ctx *ctx, cairo_t *cr)
 {
-  _ctx_glyphs (ctx, glyphs, n_glyphs, 0);
+  CtxCairo    ctx_cairo; /* on-stack backend */
+  CtxBackend *backend = (CtxBackend*)&ctx_cairo;
+  CtxIterator iterator;
+  CtxCommand *command;
+  ctx_cairo.cr = cr;
+  backend->process = ctx_cairo_process;
+  backend->ctx = ctx;
+  ctx_iterator_init (&iterator, &ctx->drawlist, 0,
+                     CTX_ITERATOR_EXPAND_BITPACK);
+  while ( (command = ctx_iterator_next (&iterator) ) )
+    { ctx_cairo_process (ctx, command); }
 }
 
-void
-ctx_glyphs_stroke (Ctx        *ctx,
-                   CtxGlyph   *glyphs,
-                   int         n_glyphs)
+Ctx *
+ctx_new_for_cairo (cairo_t *cr)
 {
-  _ctx_glyphs (ctx, glyphs, n_glyphs, 1);
+  Ctx *ctx = _ctx_new_drawlist (640, 480);
+  CtxCairo *ctx_cairo = calloc(sizeof(CtxCairo),1);
+  CtxBackend *backend  = (CtxBackend*)ctx_cairo;
+  backend->free    = (void*)ctx_cairo_free;
+  backend->process = ctx_cairo_process;
+  backend->ctx = ctx;
+  ctx_cairo->cr = cr;
+  ctx_set_backend (ctx, (void*)ctx_cairo);
+  return ctx;
 }
 
-void
-ctx_text (Ctx        *ctx,
-          const char *string)
-{
-  if (!string)
-    return;
-#if CTX_BACKEND_TEXT
-  ctx_process_cmd_str (ctx, CTX_TEXT, string, 0, 0);
-  _ctx_text (ctx, string, 0, 0);
-#else
-  _ctx_text (ctx, string, 0, 1);
 #endif
-}
 
+#if CTX_EVENTS
 
-void
-ctx_fill_text (Ctx *ctx, const char *string,
-               float x, float y)
-{
-  ctx_move_to (ctx, x, y);
-  ctx_text (ctx, string);
-}
+static int ctx_find_largest_matching_substring
+ (const char *X, const char *Y, int m, int n, int *offsetY, int *offsetX) 
+{ 
+  int longest_common_suffix[2][n+1];
+  int best_length = 0;
+  for (int i=0; i<=m; i++)
+  {
+    for (int j=0; j<=n; j++)
+    {
+      if (i == 0 || j == 0 || !(X[i-1] == Y[j-1]))
+      {
+        longest_common_suffix[i%2][j] = 0;
+      }
+      else
+      {
+          longest_common_suffix[i%2][j] = longest_common_suffix[(i-1)%2][j-1] + 1;
+          if (best_length < longest_common_suffix[i%2][j])
+          {
+            best_length = longest_common_suffix[i%2][j];
+            if (offsetY) *offsetY = j - best_length;
+            if (offsetX) *offsetX = i - best_length;
+          }
+      }
+    }
+  }
+  return best_length;
+} 
 
-void
-ctx_text_stroke (Ctx        *ctx,
-                 const char *string)
-{
-  if (!string)
-    return;
-#if CTX_BACKEND_TEXT
-  ctx_process_cmd_str (ctx, CTX_STROKE_TEXT, string, 0, 0);
-  _ctx_text (ctx, string, 1, 0);
-#else
-  _ctx_text (ctx, string, 1, 1);
-#endif
-}
+typedef struct CtxSpan {
+  int from_prev;
+  int start;
+  int length;
+} CtxSpan;
 
-void
-ctx_stroke_text (Ctx *ctx, const char *string,
-               float x, float y)
-{
-  ctx_move_to (ctx, x, y);
-  ctx_text_stroke (ctx, string);
-}
+#define CHUNK_SIZE 32
+#define MIN_MATCH  7        // minimum match length to be encoded
+#define WINDOW_PADDING 16   // look-aside amount
 
-static int _ctx_resolve_font (const char *name)
+#if 0
+static void _dassert(int line, int condition, const char *str, int foo, int bar, int baz)
 {
-  for (int i = 0; i < ctx_font_count; i ++)
-    {
-      if (!ctx_strcmp (ctx_fonts[i].name, name) )
-        { return i; }
-    }
-  for (int i = 0; i < ctx_font_count; i ++)
-    {
-      if (ctx_strstr (ctx_fonts[i].name, name) )
-        { return i; }
-    }
-  return -1;
+  if (!condition)
+  {
+    FILE *f = fopen ("/tmp/cdebug", "a");
+    fprintf (f, "%i: %s    %i %i %i\n", line, str, foo, bar, baz);
+    fclose (f);
+  }
 }
+#define dassert(cond, foo, bar, baz) _dassert(__LINE__, cond, #cond, foo, bar ,baz)
+#endif
+#define dassert(cond, foo, bar, baz)
 
-int ctx_resolve_font (const char *name)
+/* XXX repeated substring matching is slow, we'll be
+ * better off with a hash-table with linked lists of
+ * matching 3-4 characters in previous.. or even
+ * a naive approach that expects rough alignment..
+ */
+static char *encode_in_terms_of_previous (
+                const char *src,  int src_len,
+                const char *prev, int prev_len,
+                int *out_len,
+                int max_ticks)
 {
-  int ret = _ctx_resolve_font (name);
-  if (ret >= 0)
-    { return ret; }
-  if (!ctx_strcmp (name, "regular") )
-    {
-      int ret = _ctx_resolve_font ("sans");
-      if (ret >= 0) { return ret; }
-      ret = _ctx_resolve_font ("serif");
-      if (ret >= 0) { return ret; }
-    }
-  return 0;
-}
+  CtxString *string = ctx_string_new ("");
+  CtxList *encoded_list = NULL;
 
-static void ctx_font_setup (void)
-{
-  static int initialized = 0;
-  if (initialized) { return; }
-  initialized = 1;
-#if CTX_FONT_ENGINE_CTX
-  ctx_font_count = 0; // oddly - this is needed in arduino
+  /* TODO : make expected position offset in prev slide based on
+   * matches and not be constant */
 
-#if CTX_FONT_ENGINE_CTX_FS
-  ctx_load_font_ctx_fs ("sans-ctx", "/tmp/ctx-regular", 0);
+  long ticks_start = ctx_ticks ();
+  int start = 0;
+  int length = CHUNK_SIZE;
+  for (start = 0; start < src_len; start += length)
+  {
+    CtxSpan *span = calloc (sizeof (CtxSpan), 1);
+    span->start = start;
+    if (start + length > src_len)
+      span->length = src_len - start;
+    else
+      span->length = length;
+    span->from_prev = 0;
+    ctx_list_append (&encoded_list, span);
+  }
+
+  for (CtxList *l = encoded_list; l; l = l->next)
+  {
+    CtxSpan *span = l->data;
+    if (!span->from_prev)
+    {
+      if (span->length >= MIN_MATCH)
+      {
+         int prev_pos = 0;
+         int curr_pos = 0;
+         assert(1);
+#if 0
+         int prev_start =  0;
+         int prev_window_length = prev_len;
 #else
-#if CTX_FONT_ascii
-  ctx_load_font_ctx ("sans-ctx", ctx_font_ascii, sizeof (ctx_font_ascii) );
-#endif
-#if CTX_FONT_regular
-  ctx_load_font_ctx ("sans-ctx", ctx_font_regular, sizeof (ctx_font_regular) );
-#endif
-#endif
+         int window_padding = WINDOW_PADDING;
+         int prev_start = span->start - window_padding;
+         if (prev_start < 0)
+           prev_start = 0;
 
-#if CTX_FONT_mono
-  ctx_load_font_ctx ("mono-ctx", ctx_font_mono, sizeof (ctx_font_mono) );
-#endif
-#if CTX_FONT_bold
-  ctx_load_font_ctx ("bold-ctx", ctx_font_bold, sizeof (ctx_font_bold) );
-#endif
-#if CTX_FONT_italic
-  ctx_load_font_ctx ("italic-ctx", ctx_font_italic, sizeof (ctx_font_italic) );
-#endif
-#if CTX_FONT_sans
-  ctx_load_font_ctx ("sans-ctx", ctx_font_sans, sizeof (ctx_font_sans) );
-#endif
-#if CTX_FONT_serif
-  ctx_load_font_ctx ("serif-ctx", ctx_font_serif, sizeof (ctx_font_serif) );
-#endif
-#if CTX_FONT_symbol
-  ctx_load_font_ctx ("symbol-ctx", ctx_font_symbol, sizeof (ctx_font_symbol) );
-#endif
-#if CTX_FONT_emoji
-  ctx_load_font_ctx ("emoji-ctx", ctx_font_emoji, sizeof (ctx_font_emoji) );
-#endif
-#endif
+         dassert(span->start>=0 , 0,0,0);
 
-#if NOTO_EMOJI_REGULAR
-  ctx_load_font_ttf ("sans-NotoEmoji_Regular", ttf_NotoEmoji_Regular_ttf, ttf_NotoEmoji_Regular_ttf_len);
-#endif
-#if ROBOTO_LIGHT
-  ctx_load_font_ttf ("sans-light-Roboto_Light", ttf_Roboto_Light_ttf, ttf_Roboto_Light_ttf_len);
-#endif
-#if ROBOTO_REGULAR
-  ctx_load_font_ttf ("sans-Roboto_Regular", ttf_Roboto_Regular_ttf, ttf_Roboto_Regular_ttf_len);
-#endif
-#if ROBOTO_BOLD
-  ctx_load_font_ttf ("sans-bold-Roboto_Bold", ttf_Roboto_Bold_ttf, ttf_Roboto_Bold_ttf_len);
-#endif
-#if DEJAVU_SANS
-  ctx_load_font_ttf ("sans-DejaVuSans", ttf_DejaVuSans_ttf, ttf_DejaVuSans_ttf_len);
-#endif
-#if VERA
-  ctx_load_font_ttf ("sans-Vera", ttf_Vera_ttf, ttf_Vera_ttf_len);
-#endif
-#if UNSCII_16
-  ctx_load_font_ttf ("mono-unscii16", ttf_unscii_16_ttf, ttf_unscii_16_ttf_len);
-#endif
-#if XA000_MONO
-  ctx_load_font_ttf ("mono-0xA000", ttf_0xA000_Mono_ttf, ttf_0xA000_Mono_ttf_len);
-#endif
-#if DEJAVU_SANS_MONO
-  ctx_load_font_ttf ("mono-DejaVuSansMono", ttf_DejaVuSansMono_ttf, ttf_DejaVuSansMono_ttf_len);
-#endif
-#if NOTO_MONO_REGULAR
-  ctx_load_font_ttf ("mono-NotoMono_Regular", ttf_NotoMono_Regular_ttf, ttf_NotoMono_Regular_ttf_len);
+         int prev_window_length = prev_len - prev_start;
+         if (prev_window_length > span->length + window_padding * 2 + span->start)
+           prev_window_length = span->length + window_padding * 2 + span->start;
 #endif
-}
-
-
-
-#if !__COSMOPOLITAN__
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+         int match_len = 0;
+         if (prev_window_length > 0)
+           match_len = ctx_find_largest_matching_substring(prev + prev_start, src + span->start, 
prev_window_length, span->length, &curr_pos, &prev_pos);
+#if 1
+         prev_pos += prev_start;
 #endif
 
-//#include "ctx.h"
-/* instead of including ctx.h we declare the few utf8
- * functions we use
- */
-uint32_t ctx_utf8_to_unichar (const char *input);
-int ctx_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
-int ctx_utf8_strlen (const char *s);
+         if (match_len >= MIN_MATCH)
+         {
+            int start  = span->start;
+            int length = span->length;
 
-static void ctx_string_init (CtxString *string, int initial_size)
-{
-  string->allocated_length = initial_size;
-  string->length = 0;
-  string->utf8_length = 0;
-  string->str = (char*)malloc (string->allocated_length + 1);
-  string->str[0]='\0';
-}
+            span->from_prev = 1;
+            span->start     = prev_pos;
+            span->length    = match_len;
+            dassert (span->start >= 0, prev_pos, prev_start, span->start);
+            dassert (span->length > 0, prev_pos, prev_start, span->length);
 
-static void ctx_string_destroy (CtxString *string)
-{
-  if (string->str)
-    {
-      free (string->str);
-      string->str = NULL;
-    }
-}
+            if (curr_pos)
+            {
+              CtxSpan *prev = calloc (sizeof (CtxSpan), 1);
+              prev->start = start;
+              prev->length =  curr_pos;
+            dassert (prev->start >= 0, prev_pos, prev_start, prev->start);
+            dassert (prev->length > 0, prev_pos, prev_start, prev->length);
+              prev->from_prev = 0;
+              ctx_list_insert_before (&encoded_list, l, prev);
+            }
 
-void ctx_string_clear (CtxString *string)
-{
-  string->length = 0;
-  string->utf8_length = 0;
-  string->str[string->length]=0;
-}
 
+            if (match_len + curr_pos < start + length)
+            {
+              CtxSpan *next = calloc (sizeof (CtxSpan), 1);
+              next->start = start + curr_pos + match_len;
+              next->length = (start + length) - next->start;
+            dassert (next->start >= 0, prev_pos, prev_start, next->start);
+      //    dassert (next->length > 0, prev_pos, prev_start, next->length);
+              next->from_prev = 0;
+              if (next->length)
+              {
+                if (l->next)
+                  ctx_list_insert_before (&encoded_list, l->next, next);
+                else
+                  ctx_list_append (&encoded_list, next);
+              }
+              else
+                free (next);
+            }
 
-void ctx_string_pre_alloc (CtxString *string, int size)
-{
-  char *old = string->str;
-  string->allocated_length = CTX_MAX (size + 2, string->length + 2);
-  string->str = (char*)realloc (old, string->allocated_length);
-}
+            if (curr_pos) // step one item back for forloop
+            {
+              CtxList *tmp = encoded_list;
+              int found = 0;
+              while (!found && tmp && tmp->next)
+              {
+                if (tmp->next == l)
+                {
+                  l = tmp;
+                  break;
+                }
+                tmp = tmp->next;
+              }
+            }
+         }
+      }
+    }
 
+    if (ctx_ticks ()-ticks_start > (unsigned long)max_ticks)
+      break;
+  }
 
-static inline void _ctx_string_append_byte (CtxString *string, char  val)
-{
-  if (CTX_LIKELY((val & 0xC0) != 0x80))
-    { string->utf8_length++; }
-  if (CTX_UNLIKELY(string->length + 2 >= string->allocated_length))
+  /* merge adjecant prev span references  */
+  {
+    for (CtxList *l = encoded_list; l; l = l->next)
     {
-      char *old = string->str;
-      string->allocated_length = CTX_MAX (string->allocated_length * 2, string->length + 2);
-      string->str = (char*)realloc (old, string->allocated_length);
+      CtxSpan *span = l->data;
+again:
+      if (l->next)
+      {
+        CtxSpan *next_span = l->next->data;
+        if (span->from_prev && next_span->from_prev &&
+            span->start + span->length == 
+            next_span->start)
+        {
+           span->length += next_span->length;
+           ctx_list_remove (&encoded_list, next_span);
+           goto again;
+        }
+      }
     }
-  string->str[string->length++] = val;
-  string->str[string->length] = '\0';
-}
-
-void ctx_string_append_byte (CtxString *string, char  val)
-{
-  _ctx_string_append_byte (string, val);
-}
+  }
 
-void ctx_string_append_unichar (CtxString *string, unsigned int unichar)
-{
-  char *str;
-  char utf8[5];
-  utf8[ctx_unichar_to_utf8 (unichar, (unsigned char *) utf8)]=0;
-  str = utf8;
-  while (str && *str)
+  while (encoded_list)
+  {
+    CtxSpan *span = encoded_list->data;
+    if (span->from_prev)
     {
-      _ctx_string_append_byte (string, *str);
-      str++;
+      char ref[128];
+      sprintf (ref, "%c%i %i%c", CTX_CODEC_CHAR, span->start, span->length, CTX_CODEC_CHAR);
+      ctx_string_append_data (string, ref, strlen(ref));
     }
-}
-
-static inline void _ctx_string_append_str (CtxString *string, const char *str)
-{
-  if (!str) { return; }
-  while (*str)
+    else
     {
-      _ctx_string_append_byte (string, *str);
-      str++;
+      for (int i = span->start; i< span->start+span->length; i++)
+      {
+        if (src[i] == CTX_CODEC_CHAR)
+        {
+          char bytes[2]={CTX_CODEC_CHAR, CTX_CODEC_CHAR};
+          ctx_string_append_data (string, bytes, 2);
+        }
+        else
+        {
+          ctx_string_append_data (string, &src[i], 1);
+        }
+      }
     }
+    free (span);
+    ctx_list_remove (&encoded_list, span);
+  }
+
+  char *ret = string->str;
+  if (out_len) *out_len = string->length;
+  ctx_string_free (string, 0);
+  return ret;
 }
 
-void ctx_string_append_utf8char (CtxString *string, const char *str)
+#if 0 // for documentation/reference purposes
+static char *decode_ctx (const char *encoded, int enc_len, const char *prev, int prev_len, int *out_len)
 {
-  if (!str) { return; }
-  int len = ctx_utf8_len (*str);
-  for (int i = 0; i < len && *str; i++)
+  CtxString *string = ctx_string_new ("");
+  char reference[32]="";
+  int ref_len = 0;
+  int in_ref = 0;
+  for (int i = 0; i < enc_len; i++)
+  {
+    if (encoded[i] == CTX_CODEC_CHAR)
     {
-      _ctx_string_append_byte (string, *str);
-      str++;
+      if (!in_ref)
+      {
+        in_ref = 1;
+      }
+      else
+      {
+        int start = atoi (reference);
+        int len = 0;
+        if (strchr (reference, ' '))
+          len = atoi (strchr (reference, ' ')+1);
+
+        if (start < 0)start = 0;
+        if (start >= prev_len)start = prev_len-1;
+        if (len + start > prev_len)
+          len = prev_len - start;
+
+        if (start == 0 && len == 0)
+          ctx_string_append_byte (string, CTX_CODEC_CHAR);
+        else
+          ctx_string_append_data (string, prev + start, len);
+        ref_len = 0;
+        in_ref = 0;
+      }
+    }
+    else
+    {
+      if (in_ref)
+      {
+        if (ref_len < 16)
+        {
+          reference[ref_len++] = encoded[i];
+          reference[ref_len] = 0;
+        }
+      }
+      else
+      ctx_string_append_data (string, &encoded[i], 1);
     }
+  }
+  char *ret = string->str;
+  if (out_len) *out_len = string->length;
+  ctx_string_free (string, 0);
+  return ret;
 }
+#endif
 
-void ctx_string_append_str (CtxString *string, const char *str)
-{
-  _ctx_string_append_str (string, str);
-}
+#define CTX_START_STRING "U\n"  // or " reset "
+#define CTX_END_STRING   "\nX"  // or "\ndone"
+#define CTX_END_STRING2  "\n"
 
-CtxString *ctx_string_new_with_size (const char *initial, int initial_size)
-{
-  CtxString *string = (CtxString*)ctx_calloc (sizeof (CtxString), 1);
-  ctx_string_init (string, initial_size);
-  if (initial)
-    { _ctx_string_append_str (string, initial); }
-  return string;
-}
+int ctx_frame_ack = -1;
+static char *prev_frame_contents = NULL;
+static int   prev_frame_len = 0;
 
-CtxString *ctx_string_new (const char *initial)
-{
-  return ctx_string_new_with_size (initial, 8);
-}
+static int ctx_native_events = 1;
 
-void ctx_string_append_data (CtxString *string, const char *str, int len)
+static void ctx_ctx_flush (Ctx *ctx)
 {
-  int i;
-  for (i = 0; i<len; i++)
-    { _ctx_string_append_byte (string, str[i]); }
-}
+  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
+#if 0
+  FILE *debug = fopen ("/tmp/ctx-debug", "a");
+  fprintf (debug, "------\n");
+#endif
 
-void ctx_string_append_string (CtxString *string, CtxString *string2)
-{
-  const char *str = ctx_string_get (string2);
-  while (str && *str)
-    {
-      _ctx_string_append_byte (string, *str);
-      str++;
-    }
-}
+  if (ctx_native_events)
+    fprintf (stdout, "\e[?201h");
+  fprintf (stdout, "\e[H\e[?25l\e[?200h");
+#if 1
+  fprintf (stdout, CTX_START_STRING);
+  ctx_render_stream (ctxctx->backend.ctx, stdout, 0);
+  fprintf (stdout, CTX_END_STRING);
+#else
+  {
+    int cur_frame_len = 0;
+    char *rest = ctx_render_string (ctxctx->ctx, 0, &cur_frame_len);
+    char *cur_frame_contents = malloc (cur_frame_len + strlen(CTX_START_STRING) + strlen (CTX_END_STRING) + 
1);
 
-const char *ctx_string_get (CtxString *string)
-{
-  return string->str;
-}
+    cur_frame_contents[0]=0;
+    strcat (cur_frame_contents, CTX_START_STRING);
+    strcat (cur_frame_contents, rest);
+    strcat (cur_frame_contents, CTX_END_STRING);
+    free (rest);
+    cur_frame_len += strlen (CTX_START_STRING) + strlen (CTX_END_STRING);
 
-int ctx_string_get_utf8length (CtxString *string)
-{
-  return string->utf8_length;
-}
+    if (prev_frame_contents && 0)  // XXX : 
+    {
+      char *encoded;
+      int encoded_len = 0;
+      //uint64_t ticks_start = ctx_ticks ();
 
-int ctx_string_get_length (CtxString *string)
-{
-  return string->length;
-}
+      encoded = encode_in_terms_of_previous (cur_frame_contents, cur_frame_len, prev_frame_contents, 
prev_frame_len, &encoded_len, 1000 * 10);
+//    encoded = strdup (cur_frame_contents);
+//    encoded_len = strlen (encoded);
+      //uint64_t ticks_end = ctx_ticks ();
 
-void
-ctx_string_free (CtxString *string, int freealloc)
-{
-  if (freealloc)
+      fwrite (encoded, encoded_len, 1, stdout);
+//    fwrite (encoded, cur_frame_len, 1, stdout);
+#if 0
+      fprintf (debug, "---prev-frame(%i)\n%s", (int)strlen(prev_frame_contents), prev_frame_contents);
+      fprintf (debug, "---cur-frame(%i)\n%s", (int)strlen(cur_frame_contents), cur_frame_contents);
+      fprintf (debug, "---encoded(%.4f %i)---\n%s--------\n",
+                      (ticks_end-ticks_start)/1000.0,
+                      (int)strlen(encoded), encoded);
+#endif
+      free (encoded);
+    }
+    else
     {
-      ctx_string_destroy (string);
+      fwrite (cur_frame_contents, cur_frame_len, 1, stdout);
     }
-#if 0
-  if (string->is_line)
-  {
-    VtLine *line = (VtLine*)string;
-    if (line->style)
-      { free (line->style); }
-    if (line->ctx)
-      { ctx_free (line->ctx); }
-    if (line->ctx_copy)
-      { ctx_free (line->ctx_copy); }
+
+    if (prev_frame_contents)
+      free (prev_frame_contents);
+    prev_frame_contents = cur_frame_contents;
+    prev_frame_len = cur_frame_len;
   }
 #endif
-  free (string);
-}
+  fprintf (stdout, CTX_END_STRING2);
+#if 0
+    fclose (debug);
+#endif
 
-char       *ctx_string_dissolve       (CtxString *string)
-{
-  char *ret = string->str;
-  ctx_string_free (string, 0);
-  return ret;
-}
+#if CTX_SYNC_FRAMES
+  fprintf (stdout, "\e[5n");
+  fflush (stdout);
 
-void
-ctx_string_set (CtxString *string, const char *new_string)
-{
-  ctx_string_clear (string);
-  _ctx_string_append_str (string, new_string);
+  ctx_frame_ack = 0;
+  do {
+     ctx_consume_events (ctxctx->backend.ctx);
+  } while (ctx_frame_ack != 1);
+#else
+  fflush (stdout);
+#endif
 }
 
-static char *ctx_strdup (const char *str)
+void ctx_ctx_free (CtxCtx *ctx)
 {
-  int len = strlen (str);
-  char *ret = (char*)malloc (len + 1);
-  memcpy (ret, str, len);
-  ret[len]=0;
-  return ret;
+  nc_at_exit ();
+  free (ctx);
+  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
 }
 
-void ctx_string_replace_utf8 (CtxString *string, int pos, const char *new_glyph)
+void ctx_ctx_consume_events (Ctx *ctx)
 {
-#if 1
-  int old_len = string->utf8_length;
-#else
-  int old_len = ctx_utf8_strlen (string->str);// string->utf8_length;
+  //int ix, iy;
+  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
+  const char *event = NULL;
+#if CTX_AUDIO
+  ctx_ctx_pcm (ctx);
 #endif
-  if (CTX_LIKELY(pos == old_len))
-    {
-      _ctx_string_append_str (string, new_glyph);
-      return;
-    }
+  assert (ctx_native_events);
 
-  char tmpg[3]=" ";
-  int new_len = ctx_utf8_len (*new_glyph);
-  if (new_len <= 1 && new_glyph[0] < 32)
-    {
-      new_len = 1;
-      tmpg[0]=new_glyph[0]+64;
-      new_glyph = tmpg;
+#if 1
+    { /* XXX : this is a work-around for signals not working properly, we are polling the
+         size with an ioctl per consume-events
+         */
+      struct winsize ws;
+      ioctl(0,TIOCGWINSZ,&ws);
+      ctxctx->cols = ws.ws_col;
+      ctxctx->rows = ws.ws_row;
+      ctx_set_size (ctx, ws.ws_xpixel, ws.ws_ypixel);
     }
-  {
-    for (int i = old_len; i <= pos + 2; i++)
+#endif
+    //char *cmd = ctx_strdup_printf ("touch /tmp/ctx-%ix%i", ctxctx->width, ctxctx->height);
+    //system (cmd);
+    //free (cmd);
+
+  if (ctx_native_events)
+    do {
+
+      float x = 0, y = 0;
+      int b = 0;
+      char event_type[128]="";
+      event = ctx_native_get_event (ctx, 1000/120);
+
+      if (event)
       {
-        _ctx_string_append_byte (string, ' ');
-        old_len++;
+      sscanf (event, "%s %f %f %i", event_type, &x, &y, &b);
+      if (!strcmp (event_type, "idle"))
+      {
+              event = NULL;
       }
-  }
-  if (string->length + new_len  >= string->allocated_length - 2)
-    {
-      char *tmp;
-      char *defer;
-      string->allocated_length = string->length + new_len + 2;
-      tmp = (char*) ctx_calloc (string->allocated_length + 1 + 8, 1);
-      strcpy (tmp, string->str);
-      defer = string->str;
-      string->str = tmp;
-      free (defer);
-    }
-  char *p = (char *) ctx_utf8_skip (string->str, pos);
-  int prev_len = ctx_utf8_len (*p);
-  char *rest;
-  if (*p == 0 || * (p+prev_len) == 0)
-    {
-      rest = ctx_strdup ("");
-    }
-  else
-    {
-      if (p + prev_len >= string->length  + string->str)
-        { rest = ctx_strdup (""); }
-      else
-        { rest = ctx_strdup (p + prev_len); }
-    }
-  memcpy (p, new_glyph, new_len);
-  memcpy (p + new_len, rest, strlen (rest) + 1);
-  string->length += new_len;
-  string->length -= prev_len;
-  free (rest);
-  //string->length = strlen (string->str);
-  //string->utf8_length = ctx_utf8_strlen (string->str);
-}
+      else if (!strcmp (event_type, "pp"))
+      {
+        ctx_pointer_press (ctx, x, y, b, 0);
+      }
+      else if (!strcmp (event_type, "pd")||
+               !strcmp (event_type, "pm"))
+      {
+        ctx_pointer_motion (ctx, x, y, b, 0);
+      }
+      else if (!strcmp (event_type, "pr"))
+      {
+        ctx_pointer_release (ctx, x, y, b, 0);
+      }
+      else if (!strcmp (event_type, "message"))
+      {
+        ctx_incoming_message (ctx, event + strlen ("message"), 0);
+      } else if (!strcmp (event, "size-changed"))
+      {
+        fprintf (stdout, "\e[H\e[2J\e[?25l");
+        ctxctx->cols = ctx_terminal_cols ();
+        ctxctx->rows = ctx_terminal_rows ();
 
-void ctx_string_replace_unichar (CtxString *string, int pos, uint32_t unichar)
-{
-  uint8_t utf8[8];
-  ctx_unichar_to_utf8 (unichar, utf8);
-  ctx_string_replace_utf8 (string, pos, (char *) utf8);
-}
+        //system ("touch /tmp/ctx-abc");
 
-uint32_t ctx_string_get_unichar (CtxString *string, int pos)
-{
-  char *p = (char *) ctx_utf8_skip (string->str, pos);
-  if (!p)
-    { return 0; }
-  return ctx_utf8_to_unichar (p);
-}
+        ctx_set_size (ctx, ctx_terminal_width(), ctx_terminal_height());
 
-void ctx_string_insert_utf8 (CtxString *string, int pos, const char *new_glyph)
-{
-  int new_len = ctx_utf8_len (*new_glyph);
-  int old_len = string->utf8_length;
-  char tmpg[3]=" ";
-  if (old_len == pos && 0)
-    {
-      ctx_string_append_str (string, new_glyph);
-      return;
-    }
-  if (new_len <= 1 && new_glyph[0] < 32)
-    {
-      tmpg[0]=new_glyph[0]+64;
-      new_glyph = tmpg;
-    }
-  {
-    for (int i = old_len; i <= pos; i++)
+        if (prev_frame_contents)
+          free (prev_frame_contents);
+        prev_frame_contents = NULL;
+        prev_frame_len = 0;
+        ctx_queue_draw (ctx);
+
+      //   ctx_key_press(ctx,0,"size-changed",0);
+      }
+      else if (!strcmp (event_type, "keyup"))
       {
-        _ctx_string_append_byte (string, ' ');
-        old_len++;
+        char buf[4]={ x, 0 };
+        ctx_key_up (ctx, (int)x, buf, 0);
       }
-  }
-  if (string->length + new_len + 1  > string->allocated_length)
-    {
-      char *tmp;
-      char *defer;
-      string->allocated_length = string->length + new_len + 1;
-      tmp = (char*) ctx_calloc (string->allocated_length + 1, 1);
-      strcpy (tmp, string->str);
-      defer = string->str;
-      string->str = tmp;
-      free (defer);
-    }
-  char *p = (char *) ctx_utf8_skip (string->str, pos);
-  int prev_len = ctx_utf8_len (*p);
-  char *rest;
-  if ( (*p == 0 || * (p+prev_len) == 0) && pos != 0)
-    {
-      rest = ctx_strdup ("");
-    }
-  else
-    {
-      rest = ctx_strdup (p);
-    }
-  memcpy (p, new_glyph, new_len);
-  memcpy (p + new_len, rest, strlen (rest) + 1);
-  free (rest);
-  string->length = strlen (string->str);
-  string->utf8_length = ctx_utf8_strlen (string->str);
-}
-
-void ctx_string_insert_unichar (CtxString *string, int pos, uint32_t unichar)
-{
-  uint8_t utf8[5]="";
-  utf8[ctx_unichar_to_utf8(unichar, utf8)]=0;
-  ctx_string_insert_utf8 (string, pos, (char*)utf8);
+      else if (!strcmp (event_type, "keydown"))
+      {
+        char buf[4]={ x, 0 };
+        ctx_key_down (ctx, (int)x, buf, 0);
+      }
+      else
+      {
+        ctx_key_press (ctx, 0, event, 0);
+      }
+      }
+    } while (event);
 }
 
-void ctx_string_remove (CtxString *string, int pos)
+Ctx *ctx_new_ctx (int width, int height)
 {
-  int old_len = string->utf8_length;
-  {
-    for (int i = old_len; i <= pos; i++)
-      {
-        _ctx_string_append_byte (string, ' ');
-        old_len++;
-      }
-  }
-  char *p = (char *) ctx_utf8_skip (string->str, pos);
-  int prev_len = ctx_utf8_len (*p);
-  char *rest;
-  if (!p || *p == 0)
-    {
-      return;
-      rest = ctx_strdup ("");
-      prev_len = 0;
-    }
-  else if (* (p+prev_len) == 0)
+  float font_size = 12.0;
+  Ctx *ctx = _ctx_new_drawlist (width, height);
+  CtxCtx *ctxctx = (CtxCtx*)calloc (sizeof (CtxCtx), 1);
+  CtxBackend *backend = (CtxBackend*)ctxctx;
+  fprintf (stdout, "\e[?1049h");
+  fflush (stdout);
+  //fprintf (stderr, "\e[H");
+  //fprintf (stderr, "\e[2J");
+  ctx_native_events = 1;
+  if (width <= 0 || height <= 0)
   {
-      rest = ctx_strdup ("");
+    ctxctx->cols = ctx_terminal_cols ();
+    ctxctx->rows = ctx_terminal_rows ();
+    width  = ctx->width  = ctx_terminal_width ();
+    height = ctx->height = ctx_terminal_height ();
+    font_size = height / ctxctx->rows;
+    ctx_font_size (ctx, font_size);
   }
   else
-    {
-      rest = ctx_strdup (p + prev_len);
-    }
-  strcpy (p, rest);
-  string->str[string->length - prev_len] = 0;
-  free (rest);
-  string->length = strlen (string->str);
-  string->utf8_length = ctx_utf8_strlen (string->str);
+  {
+    ctx->width  = width;
+    ctx->height = height;
+    ctxctx->cols   = width / 80;
+    ctxctx->rows   = height / 24;
+  }
+  backend->ctx = ctx;
+  if (!ctx_native_events)
+    _ctx_mouse (ctx, NC_MOUSE_DRAG);
+  backend->flush = ctx_ctx_flush;
+  backend->free  = (void(*)(void *))ctx_ctx_free;
+  backend->process = (void*)ctx_drawlist_process;
+  backend->consume_events = ctx_ctx_consume_events;
+  ctx_set_backend (ctx, ctxctx);
+  ctx_set_size (ctx, width, height);
+  return ctx;
 }
 
-char *ctx_strdup_printf (const char *format, ...)
-{
-  va_list ap;
-  size_t needed;
-  char *buffer;
-  va_start (ap, format);
-  needed = vsnprintf (NULL, 0, format, ap) + 1;
-  buffer = (char*)malloc (needed);
-  va_end (ap);
-  va_start (ap, format);
-  vsnprintf (buffer, needed, format, ap);
-  va_end (ap);
-  return buffer;
-}
+void ctx_ctx_pcm (Ctx *ctx);
 
-void ctx_string_append_printf (CtxString *string, const char *format, ...)
-{
-  va_list ap;
-  size_t needed;
-  char *buffer;
-  va_start (ap, format);
-  needed = vsnprintf (NULL, 0, format, ap) + 1;
-  buffer = (char*)malloc (needed);
-  va_end (ap);
-  va_start (ap, format);
-  vsnprintf (buffer, needed, format, ap);
-  va_end (ap);
-  ctx_string_append_str (string, buffer);
-  free (buffer);
-}
 
-CtxString *ctx_string_new_printf (const char *format, ...)
+#endif
+
+#if CTX_TILED
+static inline int
+ctx_tiled_threads_done (CtxTiled *tiled)
 {
-  CtxString *string = ctx_string_new ("");
-  va_list ap;
-  size_t needed;
-  char *buffer;
-  va_start (ap, format);
-  needed = vsnprintf (NULL, 0, format, ap) + 1;
-  buffer = (char*)malloc (needed);
-  va_end (ap);
-  va_start (ap, format);
-  vsnprintf (buffer, needed, format, ap);
-  va_end (ap);
-  ctx_string_append_str (string, buffer);
-  free (buffer);
-  return string;
+  int sum = 0;
+  for (int i = 0; i < _ctx_max_threads; i++)
+  {
+     if (tiled->rendered_frame[i] == tiled->render_frame)
+       sum ++;
+  }
+  return sum;
 }
 
-#if CTX_CAIRO
+int _ctx_damage_control = 0;
 
-typedef struct _CtxCairo CtxCairo;
-struct
-  _CtxCairo
+void ctx_tiled_free (CtxTiled *tiled)
 {
-  CtxImplementation vfuncs;
-  Ctx              *ctx;
-  cairo_t          *cr;
-  cairo_pattern_t  *pat;
-  cairo_surface_t  *image;
-  int               preserve;
-};
+  tiled->quit = 1;
+  mtx_lock (&tiled->mtx);
+  cnd_broadcast (&tiled->cond);
+  mtx_unlock (&tiled->mtx);
 
-static void
-ctx_cairo_process (CtxCairo *ctx_cairo, CtxCommand *c)
+  while (tiled->thread_quit < _ctx_max_threads)
+    usleep (1000);
+
+  if (tiled->pixels)
+  {
+    free (tiled->pixels);
+    tiled->pixels = NULL;
+    for (int i = 0 ; i < _ctx_max_threads; i++)
+    {
+      if (tiled->host[i])
+        ctx_free (tiled->host[i]);
+      tiled->host[i]=NULL;
+    }
+    ctx_free (tiled->ctx_copy);
+  }
+  // leak?
+}
+static unsigned char *sdl_icc = NULL;
+static long sdl_icc_length = 0;
+
+static void ctx_tiled_flush (Ctx *ctx)
 {
-  CtxEntry *entry = (CtxEntry *) &c->entry;
-  cairo_t *cr = ctx_cairo->cr;
-  switch (entry->code)
+  CtxTiled *tiled = (CtxTiled*)ctx->backend;
+  mtx_lock (&tiled->mtx);
+  if (tiled->shown_frame == tiled->render_frame)
+  {
+    int dirty_tiles = 0;
+    ctx_set_drawlist (tiled->ctx_copy, &tiled->backend.ctx->drawlist.entries[0],
+                                           tiled->backend.ctx->drawlist.count * 9);
+    if (_ctx_enable_hash_cache)
     {
-      case CTX_LINE_TO:
-        cairo_line_to (cr, c->line_to.x, c->line_to.y);
-        break;
-      case CTX_REL_LINE_TO:
-        cairo_rel_line_to (cr, c->rel_line_to.x, c->rel_line_to.y);
-        break;
-      case CTX_MOVE_TO:
-        cairo_move_to (cr, c->move_to.x, c->move_to.y);
-        break;
-      case CTX_REL_MOVE_TO:
-        cairo_rel_move_to (cr, ctx_arg_float (0), ctx_arg_float (1) );
-        break;
-      case CTX_CURVE_TO:
-        cairo_curve_to (cr, ctx_arg_float (0), ctx_arg_float (1),
-                        ctx_arg_float (2), ctx_arg_float (3),
-                        ctx_arg_float (4), ctx_arg_float (5) );
-        break;
-      case CTX_REL_CURVE_TO:
-        cairo_rel_curve_to (cr,ctx_arg_float (0), ctx_arg_float (1),
-                            ctx_arg_float (2), ctx_arg_float (3),
-                            ctx_arg_float (4), ctx_arg_float (5) );
-        break;
-      case CTX_PRESERVE:
-        ctx_cairo->preserve = 1;
-        break;
-      case CTX_QUAD_TO:
+      Ctx *hasher = ctx_hasher_new (tiled->width, tiled->height,
+                        CTX_HASH_COLS, CTX_HASH_ROWS);
+      ctx_render_ctx (tiled->ctx_copy, hasher);
+
+      for (int row = 0; row < CTX_HASH_ROWS; row++)
+        for (int col = 0; col < CTX_HASH_COLS; col++)
         {
-          double x0, y0;
-          cairo_get_current_point (cr, &x0, &y0);
-          float cx = ctx_arg_float (0);
-          float cy = ctx_arg_float (1);
-          float  x = ctx_arg_float (2);
-          float  y = ctx_arg_float (3);
-          cairo_curve_to (cr,
-                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
-                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
-                          x,                              y);
+          uint8_t *new_hash = ctx_hasher_get_hash (hasher, col, row);
+          if (new_hash && memcmp (new_hash, &tiled->hashes[(row * CTX_HASH_COLS + col) *  20], 20))
+          {
+            memcpy (&tiled->hashes[(row * CTX_HASH_COLS +  col)*20], new_hash, 20);
+            tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
+            dirty_tiles++;
+          }
+          else
+          {
+            tiled->tile_affinity[row * CTX_HASH_COLS + col] = -1;
+          }
         }
-        break;
-      case CTX_REL_QUAD_TO:
+      free (((CtxHasher*)(hasher->backend))->hashes);
+      ctx_free (hasher);
+    }
+    else
+    {
+      for (int row = 0; row < CTX_HASH_ROWS; row++)
+        for (int col = 0; col < CTX_HASH_COLS; col++)
+          {
+            tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
+            dirty_tiles++;
+          }
+    }
+    int dirty_no = 0;
+    if (dirty_tiles)
+    for (int row = 0; row < CTX_HASH_ROWS; row++)
+      for (int col = 0; col < CTX_HASH_COLS; col++)
+      {
+        if (tiled->tile_affinity[row * CTX_HASH_COLS + col] != -1)
         {
-          double x0, y0;
-          cairo_get_current_point (cr, &x0, &y0);
-          float cx = ctx_arg_float (0) + x0;
-          float cy = ctx_arg_float (1) + y0;
-          float  x = ctx_arg_float (2) + x0;
-          float  y = ctx_arg_float (3) + y0;
-          cairo_curve_to (cr,
-                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
-                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
-                          x,                              y);
+          tiled->tile_affinity[row * CTX_HASH_COLS + col] = dirty_no * (_ctx_max_threads) / dirty_tiles;
+          dirty_no++;
+          if (col > tiled->max_col) tiled->max_col = col;
+          if (col < tiled->min_col) tiled->min_col = col;
+          if (row > tiled->max_row) tiled->max_row = row;
+          if (row < tiled->min_row) tiled->min_row = row;
         }
-        break;
-      /* rotate/scale/translate does not occur in fully minified data stream */
-      case CTX_ROTATE:
-        cairo_rotate (cr, ctx_arg_float (0) );
-        break;
-      case CTX_SCALE:
-        cairo_scale (cr, ctx_arg_float (0), ctx_arg_float (1) );
-        break;
-      case CTX_TRANSLATE:
-        cairo_translate (cr, ctx_arg_float (0), ctx_arg_float (1) );
-        break;
-      case CTX_LINE_WIDTH:
-        cairo_set_line_width (cr, ctx_arg_float (0) );
-        break;
-      case CTX_ARC:
-#if 0
-        fprintf (stderr, "F %2.1f %2.1f %2.1f %2.1f %2.1f %2.1f\n",
-                        ctx_arg_float(0),
-                        ctx_arg_float(1),
-                        ctx_arg_float(2),
-                        ctx_arg_float(3),
-                        ctx_arg_float(4),
-                        ctx_arg_float(5),
-                        ctx_arg_float(6));
-#endif
-        if (ctx_arg_float (5) == 1)
-          cairo_arc (cr, ctx_arg_float (0), ctx_arg_float (1),
-                     ctx_arg_float (2), ctx_arg_float (3),
-                     ctx_arg_float (4) );
-        else
-          cairo_arc_negative (cr, ctx_arg_float (0), ctx_arg_float (1),
-                              ctx_arg_float (2), ctx_arg_float (3),
-                              ctx_arg_float (4) );
-        break;
-      case CTX_SET_RGBA_U8:
-        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
-                               ctx_u8_to_float (ctx_arg_u8 (1) ),
-                               ctx_u8_to_float (ctx_arg_u8 (2) ),
-                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
-        break;
+      }
+
+    if (_ctx_damage_control)
+    {
+      for (int i = 0; i < tiled->width * tiled->height; i++)
+      {
+        tiled->pixels[i*4+2]  = (tiled->pixels[i*4+2] + 255)/2;
+      }
+    }
+
+    tiled->render_frame = ++tiled->frame;
+
 #if 0
-      case CTX_SET_RGBA_STROKE: // XXX : we need to maintain
-        //       state for the two kinds
-        cairo_set_source_rgba (cr, ctx_arg_u8 (0) /255.0,
-                               ctx_arg_u8 (1) /255.0,
-                               ctx_arg_u8 (2) /255.0,
-                               ctx_arg_u8 (3) /255.0);
-        break;
+
+          //if (tiled->tile_affinity[hno]==no)
+          {
+            int x0 = ((tiled->width)/CTX_HASH_COLS) * 0;
+            int y0 = ((tiled->height)/CTX_HASH_ROWS) * 0;
+            int width = tiled->width / CTX_HASH_COLS;
+            int height = tiled->height / CTX_HASH_ROWS;
+            Ctx *host = tiled->host[0];
+
+            CtxRasterizer *rasterizer = (CtxRasterizer*)host->backend;
+            int swap_red_green = ((CtxRasterizer*)(host->backend))->swap_red_green;
+            ctx_rasterizer_init (rasterizer,
+                                 host, tiled->backend.ctx, &host->state,
+                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
+                                 0, 0, 1, 1,
+                                 tiled->width*4, CTX_FORMAT_BGRA8,
+                                 tiled->antialias);
+            ((CtxRasterizer*)(host->backend))->swap_red_green = swap_red_green;
+            if (sdl_icc_length)
+              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);
+
+            ctx_translate (host, -x0, -y0);
+            ctx_render_ctx (tiled->ctx_copy, host);
+          }
 #endif
-      case CTX_RECTANGLE:
-      case CTX_ROUND_RECTANGLE: // XXX - arcs
-        cairo_rectangle (cr, c->rectangle.x, c->rectangle.y,
-                         c->rectangle.width, c->rectangle.height);
-        break;
-      case CTX_SET_PIXEL:
-        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
-                               ctx_u8_to_float (ctx_arg_u8 (1) ),
-                               ctx_u8_to_float (ctx_arg_u8 (2) ),
-                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
-        cairo_rectangle (cr, ctx_arg_u16 (2), ctx_arg_u16 (3), 1, 1);
-        cairo_fill (cr);
-        break;
-      case CTX_FILL:
-        if (ctx_cairo->preserve)
-        {
-          cairo_fill_preserve (cr);
-          ctx_cairo->preserve = 0;
-        }
-        else
-        {
-          cairo_fill (cr);
-        }
-        break;
-      case CTX_STROKE:
-        if (ctx_cairo->preserve)
-        {
-          cairo_stroke_preserve (cr);
-          ctx_cairo->preserve = 0;
-        }
-        else
-        {
-          cairo_stroke (cr);
-        }
-        break;
-      case CTX_IDENTITY:
-        cairo_identity_matrix (cr);
-        break;
-      case CTX_CLIP:
-        if (ctx_cairo->preserve)
-        {
-          cairo_clip_preserve (cr);
-          ctx_cairo->preserve = 0;
-        }
-        else
-        {
-          cairo_clip (cr);
-        }
-        break;
-        break;
-      case CTX_BEGIN_PATH:
-        cairo_new_path (cr);
-        break;
-      case CTX_CLOSE_PATH:
-        cairo_close_path (cr);
-        break;
-      case CTX_SAVE:
-        cairo_save (cr);
-        break;
-      case CTX_RESTORE:
-        cairo_restore (cr);
-        break;
-      case CTX_FONT_SIZE:
-        cairo_set_font_size (cr, ctx_arg_float (0) );
-        break;
-      case CTX_MITER_LIMIT:
-        cairo_set_miter_limit (cr, ctx_arg_float (0) );
-        break;
-      case CTX_LINE_CAP:
-        {
-          int cairo_val = CAIRO_LINE_CAP_SQUARE;
-          switch (ctx_arg_u8 (0) )
-            {
-              case CTX_CAP_ROUND:
-                cairo_val = CAIRO_LINE_CAP_ROUND;
-                break;
-              case CTX_CAP_SQUARE:
-                cairo_val = CAIRO_LINE_CAP_SQUARE;
-                break;
-              case CTX_CAP_NONE:
-                cairo_val = CAIRO_LINE_CAP_BUTT;
-                break;
-            }
-          cairo_set_line_cap (cr, cairo_val);
-        }
-        break;
-      case CTX_BLEND_MODE:
-        {
-          // does not map to cairo
-        }
-        break;
-      case CTX_COMPOSITING_MODE:
+    cnd_broadcast (&tiled->cond);
+  }
+  else
+  {
+    fprintf (stderr, "{drip}");
+  }
+  mtx_unlock (&tiled->mtx);
+  ctx_drawlist_clear (ctx);
+}
+
+static
+void ctx_tiled_render_fun (void **data)
+{
+  int      no = (size_t)data[0];
+  CtxTiled *tiled = data[1];
+
+  while (!tiled->quit)
+  {
+    Ctx *host = tiled->host[no];
+
+    mtx_lock (&tiled->mtx);
+    cnd_wait(&tiled->cond, &tiled->mtx);
+    mtx_unlock (&tiled->mtx);
+
+    if (tiled->render_frame != tiled->rendered_frame[no])
+    {
+      int hno = 0;
+      for (int row = 0; row < CTX_HASH_ROWS; row++)
+        for (int col = 0; col < CTX_HASH_COLS; col++, hno++)
         {
-          int cairo_val = CAIRO_OPERATOR_OVER;
-          switch (ctx_arg_u8 (0) )
+          if (tiled->tile_affinity[hno]==no)
+          {
+            int x0 = ((tiled->width)/CTX_HASH_COLS) * col;
+            int y0 = ((tiled->height)/CTX_HASH_ROWS) * row;
+            int width = tiled->width / CTX_HASH_COLS;
+            int height = tiled->height / CTX_HASH_ROWS;
+
+            CtxRasterizer *rasterizer = (CtxRasterizer*)host->backend;
+#if 1 // merge horizontally adjecant tiles of same affinity into one job
+            while (col + 1 < CTX_HASH_COLS &&
+                   tiled->tile_affinity[hno+1] == no)
             {
-              case CTX_COMPOSITE_SOURCE_OVER:
-                cairo_val = CAIRO_OPERATOR_OVER;
-                break;
-              case CTX_COMPOSITE_COPY:
-                cairo_val = CAIRO_OPERATOR_SOURCE;
-                break;
+              width += tiled->width / CTX_HASH_COLS;
+              col++;
+              hno++;
             }
-          cairo_set_operator (cr, cairo_val);
+#endif
+            int swap_red_green = ((CtxRasterizer*)(host->backend))->swap_red_green;
+            ctx_rasterizer_init (rasterizer,
+                                 host, tiled->backend.ctx, &host->state,
+                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
+                                 0, 0, width, height,
+                                 tiled->width*4, CTX_FORMAT_BGRA8,
+                                 tiled->antialias);
+            ((CtxRasterizer*)(host->backend))->swap_red_green = swap_red_green;
+            if (sdl_icc_length)
+              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);
+
+            ctx_translate (host, -x0, -y0);
+            ctx_render_ctx (tiled->ctx_copy, host);
+          }
         }
-        break;
-      case CTX_LINE_JOIN:
+      tiled->rendered_frame[no] = tiled->render_frame;
+    }
+  }
+  tiled->thread_quit++; // need atomic?
+}
+
+
+static int       ctx_tiled_cursor_drawn   = 0;
+static int       ctx_tiled_cursor_drawn_x = 0;
+static int       ctx_tiled_cursor_drawn_y = 0;
+static CtxCursor ctx_tiled_cursor_drawn_shape = 0;
+
+
+#define CTX_FB_HIDE_CURSOR_FRAMES 200
+
+static int ctx_tiled_cursor_same_pos = CTX_FB_HIDE_CURSOR_FRAMES;
+
+static inline int ctx_is_in_cursor (int x, int y, int size, CtxCursor shape)
+{
+  switch (shape)
+  {
+    case CTX_CURSOR_ARROW:
+      if (x > ((size * 4)-y*4)) return 0;
+      if (x < y && x > y / 16)
+        return 1;
+      return 0;
+
+    case CTX_CURSOR_RESIZE_SE:
+    case CTX_CURSOR_RESIZE_NW:
+    case CTX_CURSOR_RESIZE_SW:
+    case CTX_CURSOR_RESIZE_NE:
+      {
+        float theta = -45.0/180 * M_PI;
+        float cos_theta;
+        float sin_theta;
+
+        if ((shape == CTX_CURSOR_RESIZE_SW) ||
+            (shape == CTX_CURSOR_RESIZE_NE))
         {
-          int cairo_val = CAIRO_LINE_JOIN_ROUND;
-          switch (ctx_arg_u8 (0) )
-            {
-              case CTX_JOIN_ROUND:
-                cairo_val = CAIRO_LINE_JOIN_ROUND;
-                break;
-              case CTX_JOIN_BEVEL:
-                cairo_val = CAIRO_LINE_JOIN_BEVEL;
-                break;
-              case CTX_JOIN_MITER:
-                cairo_val = CAIRO_LINE_JOIN_MITER;
-                break;
-            }
-          cairo_set_line_join (cr, cairo_val);
+          theta = -theta;
+          cos_theta = ctx_cosf (theta);
+          sin_theta = ctx_sinf (theta);
         }
-        break;
-      case CTX_LINEAR_GRADIENT:
+        else
         {
-          if (ctx_cairo->pat)
-            {
-              cairo_pattern_destroy (ctx_cairo->pat);
-              ctx_cairo->pat = NULL;
-            }
-          ctx_cairo->pat = cairo_pattern_create_linear (ctx_arg_float (0), ctx_arg_float (1),
-                           ctx_arg_float (2), ctx_arg_float (3) );
-          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 0, 0, 0, 0, 1);
-          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 1, 1, 1, 1, 1);
-          cairo_set_source (cr, ctx_cairo->pat);
+          cos_theta = ctx_cosf (theta);
+          sin_theta = ctx_sinf (theta);
         }
-        break;
-      case CTX_RADIAL_GRADIENT:
+        int rot_x = x * cos_theta - y * sin_theta;
+        int rot_y = y * cos_theta + x * sin_theta;
+        x = rot_x;
+        y = rot_y;
+      }
+      /*FALLTHROUGH*/
+    case CTX_CURSOR_RESIZE_W:
+    case CTX_CURSOR_RESIZE_E:
+    case CTX_CURSOR_RESIZE_ALL:
+      if (abs (x) < size/2 && abs (y) < size/2)
+      {
+        if (abs(y) < size/10)
         {
-          if (ctx_cairo->pat)
-            {
-              cairo_pattern_destroy (ctx_cairo->pat);
-              ctx_cairo->pat = NULL;
-            }
-          ctx_cairo->pat = cairo_pattern_create_radial (ctx_arg_float (0), ctx_arg_float (1),
-                           ctx_arg_float (2), ctx_arg_float (3),
-                           ctx_arg_float (4), ctx_arg_float (5) );
-          cairo_set_source (cr, ctx_cairo->pat);
+          return 1;
         }
+      }
+      if ((abs (x) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
+      {
+        if (abs(y) < (size/2.8)-(abs(x) - (size/2)))
+          return 1;
+      }
+      if (shape != CTX_CURSOR_RESIZE_ALL)
         break;
-      case CTX_GRADIENT_STOP:
-        cairo_pattern_add_color_stop_rgba (ctx_cairo->pat,
-                                           ctx_arg_float (0),
-                                           ctx_u8_to_float (ctx_arg_u8 (4) ),
-                                           ctx_u8_to_float (ctx_arg_u8 (5) ),
-                                           ctx_u8_to_float (ctx_arg_u8 (6) ),
-                                           ctx_u8_to_float (ctx_arg_u8 (7) ) );
-        break;
-        // XXX  implement TEXTURE
-#if 0
-      case CTX_LOAD_IMAGE:
+      /* FALLTHROUGH */
+    case CTX_CURSOR_RESIZE_S:
+    case CTX_CURSOR_RESIZE_N:
+      if (abs (y) < size/2 && abs (x) < size/2)
+      {
+        if (abs(x) < size/10)
         {
-          if (image)
-            {
-              cairo_surface_destroy (image);
-              image = NULL;
-            }
-          if (pat)
-            {
-              cairo_pattern_destroy (pat);
-              pat = NULL;
-            }
-          image = cairo_image_surface_create_from_png (ctx_arg_string() );
-          cairo_set_source_surface (cr, image, ctx_arg_float (0), ctx_arg_float (1) );
+          return 1;
         }
-        break;
+      }
+      if ((abs (y) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
+      {
+        if (abs(x) < (size/2.8)-(abs(y) - (size/2)))
+          return 1;
+      }
+      break;
+#if 0
+    case CTX_CURSOR_RESIZE_ALL:
+      if (abs (x) < size/2 && abs (y) < size/2)
+      {
+        if (abs (x) < size/10 || abs(y) < size/10)
+          return 1;
+      }
+      break;
 #endif
-      case CTX_TEXT:
-        /* XXX: implement some linebreaking/wrap, positioning
-         *      behavior here?
-         */
-        cairo_show_text (cr, ctx_arg_string () );
-        break;
-      case CTX_CONT:
-      case CTX_EDGE:
-      case CTX_DATA:
-      case CTX_DATA_REV:
-      case CTX_FLUSH:
-        break;
-    }
-  ctx_process (ctx_cairo->ctx, entry);
+    default:
+      return (x ^ y) & 1;
+  }
+  return 0;
 }
 
-void ctx_cairo_free (CtxCairo *ctx_cairo)
+static void ctx_tiled_undraw_cursor (CtxTiled *tiled)
 {
-  if (ctx_cairo->pat)
-    { cairo_pattern_destroy (ctx_cairo->pat); }
-  if (ctx_cairo->image)
-    { cairo_surface_destroy (ctx_cairo->image); }
-  free (ctx_cairo);
-}
+    int cursor_size = ctx_height (tiled->backend.ctx) / 28;
 
-void
-ctx_render_cairo (Ctx *ctx, cairo_t *cr)
-{
-  CtxIterator iterator;
-  CtxCommand *command;
-  CtxCairo    ctx_cairo = {{(void*)ctx_cairo_process, NULL, NULL}, ctx, cr, NULL, NULL};
-  ctx_iterator_init (&iterator, &ctx->drawlist, 0,
-                     CTX_ITERATOR_EXPAND_BITPACK);
-  while ( (command = ctx_iterator_next (&iterator) ) )
-    { ctx_cairo_process (&ctx_cairo, command); }
-}
+    if (ctx_tiled_cursor_drawn)
+    {
+      int no = 0;
+      int startx = -cursor_size;
+      int starty = -cursor_size;
+      if (ctx_tiled_cursor_drawn_shape == CTX_CURSOR_ARROW)
+        startx = starty = 0;
 
-Ctx *
-ctx_new_for_cairo (cairo_t *cr)
-{
-  Ctx *ctx = ctx_new ();
-  CtxCairo *ctx_cairo = calloc(sizeof(CtxCairo),1);
-  ctx_cairo->vfuncs.free = (void*)ctx_cairo_free;
-  ctx_cairo->vfuncs.process = (void*)ctx_cairo_process;
-  ctx_cairo->ctx = ctx;
-  ctx_cairo->cr = cr;
+      for (int y = starty; y < cursor_size; y++)
+      for (int x = startx; x < cursor_size; x++, no+=4)
+      {
+        if (x + ctx_tiled_cursor_drawn_x < tiled->width && y + ctx_tiled_cursor_drawn_y < tiled->height)
+        {
+          if (ctx_is_in_cursor (x, y, cursor_size, ctx_tiled_cursor_drawn_shape))
+          {
+            int o = ((ctx_tiled_cursor_drawn_y + y) * tiled->width + (ctx_tiled_cursor_drawn_x + x)) * 4;
+            tiled->fb[o+0]^=0x88;
+            tiled->fb[o+1]^=0x88;
+            tiled->fb[o+2]^=0x88;
+          }
+        }
+      }
 
-  ctx_set_renderer (ctx, (void*)ctx_cairo);
-  return ctx;
+    ctx_tiled_cursor_drawn = 0;
+    }
 }
 
-#endif
+static void ctx_tiled_draw_cursor (CtxTiled *tiled)
+{
+    int cursor_x    = ctx_pointer_x (tiled->backend.ctx);
+    int cursor_y    = ctx_pointer_y (tiled->backend.ctx);
+    int cursor_size = ctx_height (tiled->backend.ctx) / 28;
+    CtxCursor cursor_shape = tiled->backend.ctx->cursor;
+    int no = 0;
 
-#if CTX_EVENTS
+    if (cursor_x == ctx_tiled_cursor_drawn_x &&
+        cursor_y == ctx_tiled_cursor_drawn_y &&
+        cursor_shape == ctx_tiled_cursor_drawn_shape)
+      ctx_tiled_cursor_same_pos ++;
+    else
+      ctx_tiled_cursor_same_pos = 0;
 
-static int ctx_find_largest_matching_substring
- (const char *X, const char *Y, int m, int n, int *offsetY, int *offsetX) 
-{ 
-  int longest_common_suffix[2][n+1];
-  int best_length = 0;
-  for (int i=0; i<=m; i++)
-  {
-    for (int j=0; j<=n; j++)
+    if (ctx_tiled_cursor_same_pos >= CTX_FB_HIDE_CURSOR_FRAMES)
     {
-      if (i == 0 || j == 0 || !(X[i-1] == Y[j-1]))
-      {
-        longest_common_suffix[i%2][j] = 0;
-      }
-      else
+      if (ctx_tiled_cursor_drawn)
+        ctx_tiled_undraw_cursor (tiled);
+      return;
+    }
+
+    /* no need to flicker when stationary, motion flicker can also be removed
+     * by combining the previous and next position masks when a motion has
+     * occured..
+     */
+    if (ctx_tiled_cursor_same_pos && ctx_tiled_cursor_drawn)
+      return;
+
+    ctx_tiled_undraw_cursor (tiled);
+
+    no = 0;
+
+    int startx = -cursor_size;
+    int starty = -cursor_size;
+
+    if (cursor_shape == CTX_CURSOR_ARROW)
+      startx = starty = 0;
+
+    for (int y = starty; y < cursor_size; y++)
+      for (int x = startx; x < cursor_size; x++, no+=4)
       {
-          longest_common_suffix[i%2][j] = longest_common_suffix[(i-1)%2][j-1] + 1;
-          if (best_length < longest_common_suffix[i%2][j])
+        if (x + cursor_x < tiled->width && y + cursor_y < tiled->height)
+        {
+          if (ctx_is_in_cursor (x, y, cursor_size, cursor_shape))
           {
-            best_length = longest_common_suffix[i%2][j];
-            if (offsetY) *offsetY = j - best_length;
-            if (offsetX) *offsetX = i - best_length;
+            int o = ((cursor_y + y) * tiled->width + (cursor_x + x)) * 4;
+            tiled->fb[o+0]^=0x88;
+            tiled->fb[o+1]^=0x88;
+            tiled->fb[o+2]^=0x88;
           }
+        }
       }
-    }
-  }
-  return best_length;
-} 
+    ctx_tiled_cursor_drawn = 1;
+    ctx_tiled_cursor_drawn_x = cursor_x;
+    ctx_tiled_cursor_drawn_y = cursor_y;
+    ctx_tiled_cursor_drawn_shape = cursor_shape;
+}
 
-typedef struct CtxSpan {
-  int from_prev;
-  int start;
-  int length;
-} CtxSpan;
+#endif
+#if CTX_EVENTS
 
-#define CHUNK_SIZE 32
-#define MIN_MATCH  7        // minimum match length to be encoded
-#define WINDOW_PADDING 16   // look-aside amount
 
-#if 0
-static void _dassert(int line, int condition, const char *str, int foo, int bar, int baz)
-{
-  if (!condition)
-  {
-    FILE *f = fopen ("/tmp/cdebug", "a");
-    fprintf (f, "%i: %s    %i %i %i\n", line, str, foo, bar, baz);
-    fclose (f);
-  }
-}
-#define dassert(cond, foo, bar, baz) _dassert(__LINE__, cond, #cond, foo, bar ,baz)
-#endif
-#define dassert(cond, foo, bar, baz)
+#define evsource_has_event(es)   (es)->has_event((es))
+#define evsource_get_event(es)   (es)->get_event((es))
+#define evsource_destroy(es)     do{if((es)->destroy)(es)->destroy((es));}while(0)
+#define evsource_set_coord(es,x,y) do{if((es)->set_coord)(es)->set_coord((es),(x),(y));}while(0)
+#define evsource_get_fd(es)      ((es)->get_fd?(es)->get_fd((es)):0)
 
-/* XXX repeated substring matching is slow, we'll be
- * better off with a hash-table with linked lists of
- * matching 3-4 characters in previous.. or even
- * a naive approach that expects rough alignment..
- */
-static char *encode_in_terms_of_previous (
-                const char *src,  int src_len,
-                const char *prev, int prev_len,
-                int *out_len,
-                int max_ticks)
+static int mice_has_event ();
+static char *mice_get_event ();
+static void mice_destroy ();
+static int mice_get_fd (EvSource *ev_source);
+static void mice_set_coord (EvSource *ev_source, double x, double y);
+
+static EvSource ctx_ev_src_mice = {
+  NULL,
+  (void*)mice_has_event,
+  (void*)mice_get_event,
+  (void*)mice_destroy,
+  mice_get_fd,
+  mice_set_coord
+};
+
+typedef struct Mice
 {
-  CtxString *string = ctx_string_new ("");
-  CtxList *encoded_list = NULL;
+  int     fd;
+  double  x;
+  double  y;
+  int     button;
+  int     prev_state;
+} Mice;
 
-  /* TODO : make expected position offset in prev slide based on
-   * matches and not be constant */
+Mice *_mrg_evsrc_coord = NULL;
+static int _ctx_mice_fd = 0;
 
-  long ticks_start = ctx_ticks ();
-  int start = 0;
-  int length = CHUNK_SIZE;
-  for (start = 0; start < src_len; start += length)
+static Mice  mice;
+static Mice* mrg_mice_this = &mice;
+
+static int mmm_evsource_mice_init ()
+{
+  unsigned char reset[]={0xff};
+  /* need to detect which event */
+
+  mrg_mice_this->prev_state = 0;
+  mrg_mice_this->fd = open ("/dev/input/mice", O_RDONLY | O_NONBLOCK);
+  if (mrg_mice_this->fd == -1)
   {
-    CtxSpan *span = calloc (sizeof (CtxSpan), 1);
-    span->start = start;
-    if (start + length > src_len)
-      span->length = src_len - start;
-    else
-      span->length = length;
-    span->from_prev = 0;
-    ctx_list_append (&encoded_list, span);
+    fprintf (stderr, "error opening /dev/input/mice device, maybe add user to input group if such group 
exist, or otherwise make the rights be satisfied.\n");
+    return -1;
   }
-
-  for (CtxList *l = encoded_list; l; l = l->next)
+  if (write (mrg_mice_this->fd, reset, 1) == -1)
   {
-    CtxSpan *span = l->data;
-    if (!span->from_prev)
-    {
-      if (span->length >= MIN_MATCH)
-      {
-         int prev_pos = 0;
-         int curr_pos = 0;
-         assert(1);
-#if 0
-         int prev_start =  0;
-         int prev_window_length = prev_len;
-#else
-         int window_padding = WINDOW_PADDING;
-         int prev_start = span->start - window_padding;
-         if (prev_start < 0)
-           prev_start = 0;
+    // might happen if we're a regular user with only read permission
+  }
+  _ctx_mice_fd = mrg_mice_this->fd;
+  _mrg_evsrc_coord = mrg_mice_this;
+  return 0;
+}
 
-         dassert(span->start>=0 , 0,0,0);
+static void mice_destroy ()
+{
+  if (mrg_mice_this->fd != -1)
+    close (mrg_mice_this->fd);
+}
 
-         int prev_window_length = prev_len - prev_start;
-         if (prev_window_length > span->length + window_padding * 2 + span->start)
-           prev_window_length = span->length + window_padding * 2 + span->start;
-#endif
-         int match_len = 0;
-         if (prev_window_length > 0)
-           match_len = ctx_find_largest_matching_substring(prev + prev_start, src + span->start, 
prev_window_length, span->length, &curr_pos, &prev_pos);
-#if 1
-         prev_pos += prev_start;
-#endif
+static int mice_has_event ()
+{
+  struct timeval tv;
+  int retval;
 
-         if (match_len >= MIN_MATCH)
-         {
-            int start  = span->start;
-            int length = span->length;
+  if (mrg_mice_this->fd == -1)
+    return 0;
 
-            span->from_prev = 1;
-            span->start     = prev_pos;
-            span->length    = match_len;
-            dassert (span->start >= 0, prev_pos, prev_start, span->start);
-            dassert (span->length > 0, prev_pos, prev_start, span->length);
+  fd_set rfds;
+  FD_ZERO (&rfds);
+  FD_SET(mrg_mice_this->fd, &rfds);
+  tv.tv_sec = 0; tv.tv_usec = 0;
+  retval = select (mrg_mice_this->fd+1, &rfds, NULL, NULL, &tv);
+  if (retval == 1)
+    return FD_ISSET (mrg_mice_this->fd, &rfds);
+  return 0;
+}
 
-            if (curr_pos)
-            {
-              CtxSpan *prev = calloc (sizeof (CtxSpan), 1);
-              prev->start = start;
-              prev->length =  curr_pos;
-            dassert (prev->start >= 0, prev_pos, prev_start, prev->start);
-            dassert (prev->length > 0, prev_pos, prev_start, prev->length);
-              prev->from_prev = 0;
-              ctx_list_insert_before (&encoded_list, l, prev);
-            }
+static char *mice_get_event ()
+{
+  const char *ret = "pm";
+  double relx, rely;
+  signed char buf[3];
+  int n_read = 0;
+  CtxTiled *tiled = (void*)ctx_ev_src_mice.priv;
+  n_read = read (mrg_mice_this->fd, buf, 3);
+  if (n_read == 0)
+     return strdup ("");
+  relx = buf[1];
+  rely = -buf[2];
+
+  if (relx < 0)
+  {
+    if (relx > -6)
+    relx = - relx*relx;
+    else
+    relx = -36;
+  }
+  else
+  {
+    if (relx < 6)
+    relx = relx*relx;
+    else
+    relx = 36;
+  }
 
+  if (rely < 0)
+  {
+    if (rely > -6)
+    rely = - rely*rely;
+    else
+    rely = -36;
+  }
+  else
+  {
+    if (rely < 6)
+    rely = rely*rely;
+    else
+    rely = 36;
+  }
 
-            if (match_len + curr_pos < start + length)
-            {
-              CtxSpan *next = calloc (sizeof (CtxSpan), 1);
-              next->start = start + curr_pos + match_len;
-              next->length = (start + length) - next->start;
-            dassert (next->start >= 0, prev_pos, prev_start, next->start);
-      //    dassert (next->length > 0, prev_pos, prev_start, next->length);
-              next->from_prev = 0;
-              if (next->length)
-              {
-                if (l->next)
-                  ctx_list_insert_before (&encoded_list, l->next, next);
-                else
-                  ctx_list_append (&encoded_list, next);
-              }
-              else
-                free (next);
-            }
+  mrg_mice_this->x += relx;
+  mrg_mice_this->y += rely;
 
-            if (curr_pos) // step one item back for forloop
-            {
-              CtxList *tmp = encoded_list;
-              int found = 0;
-              while (!found && tmp && tmp->next)
-              {
-                if (tmp->next == l)
-                {
-                  l = tmp;
-                  break;
-                }
-                tmp = tmp->next;
-              }
-            }
-         }
-      }
+  if (mrg_mice_this->x < 0)
+    mrg_mice_this->x = 0;
+  if (mrg_mice_this->y < 0)
+    mrg_mice_this->y = 0;
+  if (mrg_mice_this->x >= tiled->width)
+    mrg_mice_this->x = tiled->width -1;
+  if (mrg_mice_this->y >= tiled->height)
+    mrg_mice_this->y = tiled->height -1;
+  int button = 0;
+  
+  if ((mrg_mice_this->prev_state & 1) != (buf[0] & 1))
+    {
+      if (buf[0] & 1)
+        {
+          ret = "pp";
+        }
+      else
+        {
+          ret = "pr";
+        }
+      button = 1;
     }
-
-    if (ctx_ticks ()-ticks_start > (unsigned long)max_ticks)
-      break;
+  else if (buf[0] & 1)
+  {
+    ret = "pd";
+    button = 1;
   }
 
-  /* merge adjecant prev span references  */
+  if (!button)
   {
-    for (CtxList *l = encoded_list; l; l = l->next)
+    if ((mrg_mice_this->prev_state & 2) != (buf[0] & 2))
     {
-      CtxSpan *span = l->data;
-again:
-      if (l->next)
-      {
-        CtxSpan *next_span = l->next->data;
-        if (span->from_prev && next_span->from_prev &&
-            span->start + span->length == 
-            next_span->start)
+      if (buf[0] & 2)
         {
-           span->length += next_span->length;
-           ctx_list_remove (&encoded_list, next_span);
-           goto again;
+          ret = "pp";
         }
-      }
+      else
+        {
+          ret = "pr";
+        }
+      button = 3;
+    }
+    else if (buf[0] & 2)
+    {
+      ret = "pd";
+      button = 3;
     }
   }
 
-  while (encoded_list)
+  if (!button)
   {
-    CtxSpan *span = encoded_list->data;
-    if (span->from_prev)
-    {
-      char ref[128];
-      sprintf (ref, "%c%i %i%c", CTX_CODEC_CHAR, span->start, span->length, CTX_CODEC_CHAR);
-      ctx_string_append_data (string, ref, strlen(ref));
-    }
-    else
+    if ((mrg_mice_this->prev_state & 4) != (buf[0] & 4))
     {
-      for (int i = span->start; i< span->start+span->length; i++)
-      {
-        if (src[i] == CTX_CODEC_CHAR)
+      if (buf[0] & 4)
         {
-          char bytes[2]={CTX_CODEC_CHAR, CTX_CODEC_CHAR};
-          ctx_string_append_data (string, bytes, 2);
+          ret = "pp";
         }
-        else
+      else
         {
-          ctx_string_append_data (string, &src[i], 1);
+          ret = "pr";
         }
-      }
+      button = 2;
+    }
+    else if (buf[0] & 4)
+    {
+      ret = "pd";
+      button = 2;
     }
-    free (span);
-    ctx_list_remove (&encoded_list, span);
   }
 
-  char *ret = string->str;
-  if (out_len) *out_len = string->length;
-  ctx_string_free (string, 0);
-  return ret;
+  mrg_mice_this->prev_state = buf[0];
+
+  {
+    char *r = malloc (64);
+    sprintf (r, "%s %.0f %.0f %i", ret, mrg_mice_this->x, mrg_mice_this->y, button);
+    return r;
+  }
+
+  return NULL;
 }
 
-#if 0 // for documentation/reference purposes
-static char *decode_ctx (const char *encoded, int enc_len, const char *prev, int prev_len, int *out_len)
+static int mice_get_fd (EvSource *ev_source)
 {
-  CtxString *string = ctx_string_new ("");
-  char reference[32]="";
-  int ref_len = 0;
-  int in_ref = 0;
-  for (int i = 0; i < enc_len; i++)
-  {
-    if (encoded[i] == CTX_CODEC_CHAR)
-    {
-      if (!in_ref)
-      {
-        in_ref = 1;
-      }
-      else
-      {
-        int start = atoi (reference);
-        int len = 0;
-        if (strchr (reference, ' '))
-          len = atoi (strchr (reference, ' ')+1);
+  return mrg_mice_this->fd;
+}
 
-        if (start < 0)start = 0;
-        if (start >= prev_len)start = prev_len-1;
-        if (len + start > prev_len)
-          len = prev_len - start;
+static void mice_set_coord (EvSource *ev_source, double x, double y)
+{
+  mrg_mice_this->x = x;
+  mrg_mice_this->y = y;
+}
 
-        if (start == 0 && len == 0)
-          ctx_string_append_byte (string, CTX_CODEC_CHAR);
-        else
-          ctx_string_append_data (string, prev + start, len);
-        ref_len = 0;
-        in_ref = 0;
-      }
-    }
-    else
+static EvSource *evsource_mice_new (void)
+{
+  if (mmm_evsource_mice_init () == 0)
     {
-      if (in_ref)
-      {
-        if (ref_len < 16)
-        {
-          reference[ref_len++] = encoded[i];
-          reference[ref_len] = 0;
-        }
-      }
-      else
-      ctx_string_append_data (string, &encoded[i], 1);
+      mrg_mice_this->x = 0;
+      mrg_mice_this->y = 0;
+      return &ctx_ev_src_mice;
     }
-  }
-  char *ret = string->str;
-  if (out_len) *out_len = string->length;
-  ctx_string_free (string, 0);
-  return ret;
+  return NULL;
 }
-#endif
 
-#define CTX_START_STRING "U\n"  // or " reset "
-#define CTX_END_STRING   "\nX"  // or "\ndone"
-#define CTX_END_STRING2  "\n\e"
+static int evsource_kb_has_event (void);
+static char *evsource_kb_get_event (void);
+static void evsource_kb_destroy (int sign);
+static int evsource_kb_get_fd (void);
 
-int ctx_frame_ack = -1;
-static char *prev_frame_contents = NULL;
-static int   prev_frame_len = 0;
+/* kept out of struct to be reachable by atexit */
+static EvSource ctx_ev_src_kb = {
+  NULL,
+  (void*)evsource_kb_has_event,
+  (void*)evsource_kb_get_event,
+  (void*)evsource_kb_destroy,
+  (void*)evsource_kb_get_fd,
+  NULL
+};
+
+static struct termios orig_attr;
 
-static void ctx_ctx_flush (CtxCtx *ctxctx)
+static void real_evsource_kb_destroy (int sign)
 {
-#if 0
-  FILE *debug = fopen ("/tmp/ctx-debug", "a");
-  fprintf (debug, "------\n");
-#endif
+  static int done = 0;
 
-  if (ctx_native_events)
-    fprintf (stdout, "\e[?201h");
-  fprintf (stdout, "\e[H\e[?25l\e[?200h");
-#if 1
-  fprintf (stdout, CTX_START_STRING);
-  ctx_render_stream (ctxctx->ctx, stdout, 0);
-  fprintf (stdout, CTX_END_STRING);
-#else
+  if (sign == 0)
+    return;
+
+  if (done)
+    return;
+  done = 1;
+
+  switch (sign)
   {
-    int cur_frame_len = 0;
-    char *rest = ctx_render_string (ctxctx->ctx, 0, &cur_frame_len);
-    char *cur_frame_contents = malloc (cur_frame_len + strlen(CTX_START_STRING) + strlen (CTX_END_STRING) + 
1);
+    case  -11:break; /* will be called from atexit with sign==-11 */
+    case   SIGSEGV: break;//fprintf (stderr, " SIGSEGV\n");break;
+    case   SIGABRT: fprintf (stderr, " SIGABRT\n");break;
+    case   SIGBUS:  fprintf (stderr, " SIGBUS\n");break;
+    case   SIGKILL: fprintf (stderr, " SIGKILL\n");break;
+    case   SIGINT:  fprintf (stderr, " SIGINT\n");break;
+    case   SIGTERM: fprintf (stderr, " SIGTERM\n");break;
+    case   SIGQUIT: fprintf (stderr, " SIGQUIT\n");break;
+    default: fprintf (stderr, "sign: %i\n", sign);
+             fprintf (stderr, "%i %i %i %i %i %i %i\n", SIGSEGV, SIGABRT, SIGBUS, SIGKILL, SIGINT, SIGTERM, 
SIGQUIT);
+  }
+  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
+  //fprintf (stderr, "evsource kb destroy\n");
+}
 
-    cur_frame_contents[0]=0;
-    strcat (cur_frame_contents, CTX_START_STRING);
-    strcat (cur_frame_contents, rest);
-    strcat (cur_frame_contents, CTX_END_STRING);
-    free (rest);
-    cur_frame_len += strlen (CTX_START_STRING) + strlen (CTX_END_STRING);
+static void evsource_kb_destroy (int sign)
+{
+  real_evsource_kb_destroy (-11);
+}
 
-    if (prev_frame_contents && 0)  // XXX : 
+static int evsource_kb_init ()
+{
+//  ioctl(STDIN_FILENO, KDSKBMODE, K_RAW);
+  //atexit ((void*) real_evsource_kb_destroy);
+  signal (SIGSEGV, (void*) real_evsource_kb_destroy);
+  signal (SIGABRT, (void*) real_evsource_kb_destroy);
+  signal (SIGBUS,  (void*) real_evsource_kb_destroy);
+  signal (SIGKILL, (void*) real_evsource_kb_destroy);
+  signal (SIGINT,  (void*) real_evsource_kb_destroy);
+  signal (SIGTERM, (void*) real_evsource_kb_destroy);
+  signal (SIGQUIT, (void*) real_evsource_kb_destroy);
+
+  struct termios raw;
+  if (tcgetattr (STDIN_FILENO, &orig_attr) == -1)
     {
-      char *encoded;
-      int encoded_len = 0;
-      //uint64_t ticks_start = ctx_ticks ();
+      fprintf (stderr, "error initializing keyboard\n");
+      return -1;
+    }
+  raw = orig_attr;
 
-      encoded = encode_in_terms_of_previous (cur_frame_contents, cur_frame_len, prev_frame_contents, 
prev_frame_len, &encoded_len, 1000 * 10);
-//    encoded = strdup (cur_frame_contents);
-//    encoded_len = strlen (encoded);
-      //uint64_t ticks_end = ctx_ticks ();
+  cfmakeraw (&raw);
 
-      fwrite (encoded, encoded_len, 1, stdout);
-//    fwrite (encoded, cur_frame_len, 1, stdout);
-#if 0
-      fprintf (debug, "---prev-frame(%i)\n%s", (int)strlen(prev_frame_contents), prev_frame_contents);
-      fprintf (debug, "---cur-frame(%i)\n%s", (int)strlen(cur_frame_contents), cur_frame_contents);
-      fprintf (debug, "---encoded(%.4f %i)---\n%s--------\n",
-                      (ticks_end-ticks_start)/1000.0,
-                      (int)strlen(encoded), encoded);
-#endif
-      free (encoded);
-    }
-    else
+  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
+  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
+    return 0; // XXX? return other value?
+
+  return 0;
+}
+static int evsource_kb_has_event (void)
+{
+  struct timeval tv;
+  int retval;
+
+  fd_set rfds;
+  FD_ZERO (&rfds);
+  FD_SET(STDIN_FILENO, &rfds);
+  tv.tv_sec = 0; tv.tv_usec = 0;
+  retval = select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv);
+  return retval == 1;
+}
+
+/* note that a nick can have multiple occurences, the labels
+ * should be kept the same for all occurences of a combination.
+ *
+ * this table is taken from nchanterm.
+ */
+typedef struct MmmKeyCode {
+  char *nick;          /* programmers name for key */
+  char  sequence[10];  /* terminal sequence */
+} MmmKeyCode;
+static const MmmKeyCode ufb_keycodes[]={
+  {"up",                  "\e[A"},
+  {"down",                "\e[B"},
+  {"right",               "\e[C"},
+  {"left",                "\e[D"},
+
+  {"shift-up",            "\e[1;2A"},
+  {"shift-down",          "\e[1;2B"},
+  {"shift-right",         "\e[1;2C"},
+  {"shift-left",          "\e[1;2D"},
+
+  {"alt-up",              "\e[1;3A"},
+  {"alt-down",            "\e[1;3B"},
+  {"alt-right",           "\e[1;3C"},
+  {"alt-left",            "\e[1;3D"},
+  {"alt-shift-up",         "\e[1;4A"},
+  {"alt-shift-down",       "\e[1;4B"},
+  {"alt-shift-right",      "\e[1;4C"},
+  {"alt-shift-left",       "\e[1;4D"},
+
+  {"control-up",          "\e[1;5A"},
+  {"control-down",        "\e[1;5B"},
+  {"control-right",       "\e[1;5C"},
+  {"control-left",        "\e[1;5D"},
+
+  /* putty */
+  {"control-up",          "\eOA"},
+  {"control-down",        "\eOB"},
+  {"control-right",       "\eOC"},
+  {"control-left",        "\eOD"},
+
+  {"control-shift-up",    "\e[1;6A"},
+  {"control-shift-down",  "\e[1;6B"},
+  {"control-shift-right", "\e[1;6C"},
+  {"control-shift-left",  "\e[1;6D"},
+
+  {"control-up",          "\eOa"},
+  {"control-down",        "\eOb"},
+  {"control-right",       "\eOc"},
+  {"control-left",        "\eOd"},
+
+  {"shift-up",            "\e[a"},
+  {"shift-down",          "\e[b"},
+  {"shift-right",         "\e[c"},
+  {"shift-left",          "\e[d"},
+
+  {"insert",              "\e[2~"},
+  {"delete",              "\e[3~"},
+  {"page-up",             "\e[5~"},
+  {"page-down",           "\e[6~"},
+  {"home",                "\eOH"},
+  {"end",                 "\eOF"},
+  {"home",                "\e[H"},
+  {"end",                 "\e[F"},
+ {"control-delete",       "\e[3;5~"},
+  {"shift-delete",        "\e[3;2~"},
+  {"control-shift-delete","\e[3;6~"},
+
+  {"F1",         "\e[25~"},
+  {"F2",         "\e[26~"},
+  {"F3",         "\e[27~"},
+  {"F4",         "\e[26~"},
+
+
+  {"F1",         "\e[11~"},
+  {"F2",         "\e[12~"},
+  {"F3",         "\e[13~"},
+  {"F4",         "\e[14~"},
+  {"F1",         "\eOP"},
+  {"F2",         "\eOQ"},
+  {"F3",         "\eOR"},
+  {"F4",         "\eOS"},
+  {"F5",         "\e[15~"},
+  {"F6",         "\e[16~"},
+  {"F7",         "\e[17~"},
+  {"F8",         "\e[18~"},
+  {"F9",         "\e[19~"},
+  {"F9",         "\e[20~"},
+  {"F10",        "\e[21~"},
+  {"F11",        "\e[22~"},
+  {"F12",        "\e[23~"},
+  {"tab",         {9, '\0'}},
+  {"shift-tab",   {27, 9, '\0'}}, // also generated by alt-tab in linux console
+  {"alt-space",   {27, ' ', '\0'}},
+  {"shift-tab",   "\e[Z"},
+  {"backspace",   {127, '\0'}},
+  {"space",       " "},
+  {"\e",          "\e"},
+  {"return",      {10,0}},
+  {"return",      {13,0}},
+  /* this section could be autogenerated by code */
+  {"control-a",   {1,0}},
+  {"control-b",   {2,0}},
+  {"control-c",   {3,0}},
+  {"control-d",   {4,0}},
+  {"control-e",   {5,0}},
+  {"control-f",   {6,0}},
+  {"control-g",   {7,0}},
+  {"control-h",   {8,0}}, /* backspace? */
+  {"control-i",   {9,0}},
+  {"control-j",   {10,0}},
+  {"control-k",   {11,0}},
+  {"control-l",   {12,0}},
+  {"control-n",   {14,0}},
+  {"control-o",   {15,0}},
+  {"control-p",   {16,0}},
+  {"control-q",   {17,0}},
+  {"control-r",   {18,0}},
+  {"control-s",   {19,0}},
+  {"control-t",   {20,0}},
+  {"control-u",   {21,0}},
+  {"control-v",   {22,0}},
+  {"control-w",   {23,0}},
+  {"control-x",   {24,0}},
+  {"control-y",   {25,0}},
+  {"control-z",   {26,0}},
+  {"alt-`",       "\e`"},
+  {"alt-0",       "\e0"},
+  {"alt-1",       "\e1"},
+  {"alt-2",       "\e2"},
+  {"alt-3",       "\e3"},
+  {"alt-4",       "\e4"},
+  {"alt-5",       "\e5"},
+  {"alt-6",       "\e6"},
+  {"alt-7",       "\e7"}, /* backspace? */
+  {"alt-8",       "\e8"},
+  {"alt-9",       "\e9"},
+  {"alt-+",       "\e+"},
+  {"alt--",       "\e-"},
+  {"alt-/",       "\e/"},
+  {"alt-a",       "\ea"},
+  {"alt-b",       "\eb"},
+  {"alt-c",       "\ec"},
+  {"alt-d",       "\ed"},
+  {"alt-e",       "\ee"},
+  {"alt-f",       "\ef"},
+  {"alt-g",       "\eg"},
+  {"alt-h",       "\eh"}, /* backspace? */
+  {"alt-i",       "\ei"},
+  {"alt-j",       "\ej"},
+  {"alt-k",       "\ek"},
+  {"alt-l",       "\el"},
+  {"alt-n",       "\em"},
+  {"alt-n",       "\en"},
+  {"alt-o",       "\eo"},
+  {"alt-p",       "\ep"},
+  {"alt-q",       "\eq"},
+  {"alt-r",       "\er"},
+  {"alt-s",       "\es"},
+  {"alt-t",       "\et"},
+  {"alt-u",       "\eu"},
+  {"alt-v",       "\ev"},
+  {"alt-w",       "\ew"},
+  {"alt-x",       "\ex"},
+  {"alt-y",       "\ey"},
+  {"alt-z",       "\ez"},
+  /* Linux Console  */
+  {"home",       "\e[1~"},
+  {"end",        "\e[4~"},
+  {"F1",         "\e[[A"},
+  {"F2",         "\e[[B"},
+  {"F3",         "\e[[C"},
+  {"F4",         "\e[[D"},
+  {"F5",         "\e[[E"},
+  {"F6",         "\e[[F"},
+  {"F7",         "\e[[G"},
+  {"F8",         "\e[[H"},
+  {"F9",         "\e[[I"},
+  {"F10",        "\e[[J"},
+  {"F11",        "\e[[K"},
+  {"F12",        "\e[[L"},
+  {NULL, }
+};
+static int fb_keyboard_match_keycode (const char *buf, int length, const MmmKeyCode **ret)
+{
+  int i;
+  int matches = 0;
+
+  if (!strncmp (buf, "\e[M", MIN(length,3)))
     {
-      fwrite (cur_frame_contents, cur_frame_len, 1, stdout);
+      if (length >= 6)
+        return 9001;
+      return 2342;
     }
+  for (i = 0; ufb_keycodes[i].nick; i++)
+    if (!strncmp (buf, ufb_keycodes[i].sequence, length))
+      {
+        matches ++;
+        if ((int)strlen (ufb_keycodes[i].sequence) == length && ret)
+          {
+            *ret = &ufb_keycodes[i];
+            return 1;
+          }
+      }
+  if (matches != 1 && ret)
+    *ret = NULL;
+  return matches==1?2:matches;
+}
 
-    if (prev_frame_contents)
-      free (prev_frame_contents);
-    prev_frame_contents = cur_frame_contents;
-    prev_frame_len = cur_frame_len;
-  }
-#endif
-#if 0
-    fclose (debug);
-#endif
-  fprintf (stdout, CTX_END_STRING2);
+//int is_active (void *host)
+//{
+//        return 1;
+//}
 
-  fprintf (stdout, "\e[5n");
-  fflush (stdout);
+static char *evsource_kb_get_event (void)
+{
+  unsigned char buf[20];
+  int length;
 
-  ctx_frame_ack = 0;
-  do {
-     ctx_consume_events (ctxctx->ctx);
-  } while (ctx_frame_ack != 1);
+
+  for (length = 0; length < 10; length ++)
+    if (read (STDIN_FILENO, &buf[length], 1) != -1)
+      {
+        const MmmKeyCode *match = NULL;
+
+        //if (!is_active (ctx_ev_src_kb.priv))
+        //  return NULL;
+
+        /* special case ESC, so that we can use it alone in keybindings */
+        if (length == 0 && buf[0] == 27)
+          {
+            struct timeval tv;
+            fd_set rfds;
+            FD_ZERO (&rfds);
+            FD_SET (STDIN_FILENO, &rfds);
+            tv.tv_sec = 0;
+            tv.tv_usec = 1000 * 120;
+            if (select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv) == 0)
+              return strdup ("escape");
+          }
+
+        switch (fb_keyboard_match_keycode ((void*)buf, length + 1, &match))
+          {
+            case 1: /* unique match */
+              if (!match)
+                return NULL;
+              return strdup (match->nick);
+              break;
+            case 0: /* no matches, bail*/
+             {
+                static char ret[256]="";
+                if (length == 0 && ctx_utf8_len (buf[0])>1) /* read a
+                                                             * single unicode
+                                                             * utf8 character
+                                                             */
+                  {
+                    int bytes = read (STDIN_FILENO, &buf[length+1], ctx_utf8_len(buf[0])-1);
+                    if (bytes)
+                    {
+                      buf[ctx_utf8_len(buf[0])]=0;
+                      strcpy (ret, (void*)buf);
+                    }
+                    return strdup(ret); //XXX: simplify
+                  }
+                if (length == 0) /* ascii */
+                  {
+                    buf[1]=0;
+                    strcpy (ret, (void*)buf);
+                    return strdup(ret);
+                  }
+                sprintf (ret, "unhandled %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c'",
+                    length >=0 ? buf[0] : 0,
+                    length >=0 ? buf[0]>31?buf[0]:'?' : ' ',
+                    length >=1 ? buf[1] : 0,
+                    length >=1 ? buf[1]>31?buf[1]:'?' : ' ',
+                    length >=2 ? buf[2] : 0,
+                    length >=2 ? buf[2]>31?buf[2]:'?' : ' ',
+                    length >=3 ? buf[3] : 0,
+                    length >=3 ? buf[3]>31?buf[3]:'?' : ' ',
+                    length >=4 ? buf[4] : 0,
+                    length >=4 ? buf[4]>31?buf[4]:'?' : ' ',
+                    length >=5 ? buf[5] : 0,
+                    length >=5 ? buf[5]>31?buf[5]:'?' : ' ',
+                    length >=6 ? buf[6] : 0,
+                    length >=6 ? buf[6]>31?buf[6]:'?' : ' '
+                    );
+                return strdup(ret);
+            }
+              return NULL;
+            default: /* continue */
+              break;
+          }
+      }
+    else
+      return strdup("key read eek");
+  return strdup("fail");
 }
 
-void ctx_ctx_free (CtxCtx *ctx)
+static int evsource_kb_get_fd (void)
 {
-  nc_at_exit ();
-  free (ctx);
-  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
+  return STDIN_FILENO;
 }
 
 
-Ctx *ctx_new_ctx (int width, int height)
+static EvSource *evsource_kb_new (void)
 {
-  float font_size = 12.0;
-  Ctx *ctx = ctx_new ();
-  CtxCtx *ctxctx = (CtxCtx*)calloc (sizeof (CtxCtx), 1);
-  fprintf (stdout, "\e[?1049h");
-  fflush (stdout);
-  //fprintf (stderr, "\e[H");
-  //fprintf (stderr, "\e[2J");
-  ctx_native_events = 1;
-  if (width <= 0 || height <= 0)
-  {
-    ctxctx->cols = ctx_terminal_cols ();
-    ctxctx->rows = ctx_terminal_rows ();
-    width  = ctxctx->width  = ctx_terminal_width ();
-    height = ctxctx->height = ctx_terminal_height ();
-    font_size = height / ctxctx->rows;
-    ctx_font_size (ctx, font_size);
-  }
-  else
+  if (evsource_kb_init() == 0)
   {
-    ctxctx->width  = width;
-    ctxctx->height = height;
-    ctxctx->cols   = width / 80;
-    ctxctx->rows   = height / 24;
+    return &ctx_ev_src_kb;
   }
-  ctxctx->ctx = ctx;
-  if (!ctx_native_events)
-    _ctx_mouse (ctx, NC_MOUSE_DRAG);
-  ctx_set_renderer (ctx, ctxctx);
-  ctx_set_size (ctx, width, height);
-  ctxctx->flush = (void(*)(void *))ctx_ctx_flush;
-  ctxctx->free  = (void(*)(void *))ctx_ctx_free;
-  return ctx;
+  return NULL;
 }
 
-void ctx_ctx_pcm (Ctx *ctx);
-
-int ctx_ctx_consume_events (Ctx *ctx)
+#if CTX_BABL
+static int _ctx_babl_inits = 0;
+#endif
+static void ctx_babl_init (void)
 {
-  //int ix, iy;
-  CtxCtx *ctxctx = (CtxCtx*)ctx->renderer;
-  const char *event = NULL;
-#if CTX_AUDIO
-  ctx_ctx_pcm (ctx);
+#if CTX_BABL
+  _ctx_babl_inits ++;
+  if (_ctx_babl_inits == 1)
+  {
+    babl_init ();
+  }
 #endif
-  assert (ctx_native_events);
-
-#if 1
-    { /* XXX : this is a work-around for signals not working properly, we are polling the
-         size with an ioctl per consume-events
-         */
-      struct winsize ws;
-      ioctl(0,TIOCGWINSZ,&ws);
-      ctxctx->cols = ws.ws_col;
-      ctxctx->rows = ws.ws_row;
-      ctxctx->width = ws.ws_xpixel;
-      ctxctx->height = ws.ws_ypixel;
-      ctx_set_size (ctx, ctxctx->width, ctxctx->height);
-    }
+}
+static void ctx_babl_exit (void)
+{
+#if CTX_BABL
+  _ctx_babl_inits --;
+  if (_ctx_babl_inits == 0)
+  {
+    babl_exit ();
+  }
 #endif
-    //char *cmd = ctx_strdup_printf ("touch /tmp/ctx-%ix%i", ctxctx->width, ctxctx->height);
-    //system (cmd);
-    //free (cmd);
+}
 
-  if (ctx_native_events)
+static int event_check_pending (CtxTiled *tiled)
+{
+  int events = 0;
+  for (int i = 0; i < tiled->evsource_count; i++)
+  {
+    while (evsource_has_event (tiled->evsource[i]))
     {
-
-      float x = 0, y = 0;
-      int b = 0;
-      char event_type[128]="";
-      event = ctx_native_get_event (ctx, 1000/120);
-
+      char *event = evsource_get_event (tiled->evsource[i]);
       if (event)
       {
-      sscanf (event, "%s %f %f %i", event_type, &x, &y, &b);
-      if (!strcmp (event_type, "idle"))
-      {
-      }
-      else if (!strcmp (event_type, "mouse-press"))
-      {
-        ctx_pointer_press (ctx, x, y, b, 0);
-      }
-      else if (!strcmp (event_type, "mouse-drag")||
-               !strcmp (event_type, "mouse-motion"))
-      {
-        ctx_pointer_motion (ctx, x, y, b, 0);
-      }
-      else if (!strcmp (event_type, "mouse-release"))
-      {
-        ctx_pointer_release (ctx, x, y, b, 0);
+        if (tiled->vt_active)
+        {
+          ctx_key_press (tiled->backend.ctx, 0, event, 0); // we deliver all events as key-press, the 
key_press handler disambiguates
+          events++;
+        }
+        free (event);
       }
-      else if (!strcmp (event_type, "message"))
-      {
-        ctx_incoming_message (ctx, event + strlen ("message"), 0);
-      } else if (!strcmp (event, "size-changed"))
-      {
-        fprintf (stdout, "\e[H\e[2J\e[?25l");
-        ctxctx->cols = ctx_terminal_cols ();
-        ctxctx->rows = ctx_terminal_rows ();
-        ctxctx->width  = ctx_terminal_width ();
-        ctxctx->height = ctx_terminal_height ();
-
-        //system ("touch /tmp/ctx-abc");
+    }
+  }
+  return events;
+}
 
-        ctx_set_size (ctx, ctxctx->width, ctxctx->height);
+#endif
 
-        if (prev_frame_contents)
-          free (prev_frame_contents);
-        prev_frame_contents = NULL;
-        prev_frame_len = 0;
-        ctx_set_dirty (ctx, 1);
+#if CTX_EVENTS
+#if CTX_HEADLESS
 
-      //   ctx_key_press(ctx,0,"size-changed",0);
-      }
-      else if (!strcmp (event_type, "keyup"))
-      {
-        char buf[4]={ x, 0 };
-        ctx_key_up (ctx, (int)x, buf, 0);
-      }
-      else if (!strcmp (event_type, "keydown"))
-      {
-        char buf[4]={ x, 0 };
-        ctx_key_down (ctx, (int)x, buf, 0);
-      }
-      else
-      {
-        ctx_key_press (ctx, 0, event, 0);
-      }
-      }
-    }
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <signal.h>
 
-  return 1;
+static char *ctx_fb_clipboard = NULL;
+static void ctx_headless_set_clipboard (Ctx *ctx, const char *text)
+{
+  if (ctx_fb_clipboard)
+    free (ctx_fb_clipboard);
+  ctx_fb_clipboard = NULL;
+  if (text)
+  {
+    ctx_fb_clipboard = strdup (text);
+  }
 }
 
-int ctx_renderer_is_ctx (Ctx *ctx)
+static char *ctx_headless_get_clipboard (Ctx *ctx)
 {
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_ctx_free)
-          return 1;
-  return 0;
+  if (ctx_fb_clipboard) return strdup (ctx_fb_clipboard);
+  return strdup ("");
 }
 
-#endif
-
-#if CTX_TILED
-static inline int
-ctx_tiled_threads_done (CtxTiled *tiled)
+static int ctx_headless_get_mice_fd (Ctx *ctx)
 {
-  int sum = 0;
-  for (int i = 0; i < _ctx_max_threads; i++)
-  {
-     if (tiled->rendered_frame[i] == tiled->render_frame)
-       sum ++;
-  }
-  return sum;
+  //CtxHeadless *fb = (void*)ctx->backend;
+  return _ctx_mice_fd;
 }
 
-int _ctx_damage_control = 0;
-
-void ctx_tiled_free (CtxTiled *tiled)
+typedef struct _CtxHeadless CtxHeadless;
+struct _CtxHeadless
 {
-  tiled->quit = 1;
-  mtx_lock (&tiled->mtx);
-  cnd_broadcast (&tiled->cond);
-  mtx_unlock (&tiled->mtx);
+   CtxTiled tiled;
+   int           key_balance;
+   int           key_repeat;
+   int           lctrl;
+   int           lalt;
+   int           rctrl;
 
-  while (tiled->thread_quit < _ctx_max_threads)
-    usleep (1000);
 
-  if (tiled->pixels)
-  {
-    free (tiled->pixels);
-  tiled->pixels = NULL;
-  for (int i = 0 ; i < _ctx_max_threads; i++)
-  {
-    ctx_free (tiled->host[i]);
-    tiled->host[i]=NULL;
-  }
+   int          fb_fd;
+   char        *fb_path;
+   int          fb_bits;
+   int          fb_bpp;
+   int          fb_mapped_size;
+   int          vt;
+   cnd_t        cond;
+   mtx_t        mtx;
+   int          tty;
+};
 
-  ctx_free (tiled->ctx_copy);
-  }
-  // leak?
-}
-static unsigned char *sdl_icc = NULL;
-static long sdl_icc_length = 0;
+#if UINTPTR_MAX == 0xffFFffFF
+  #define fbdrmuint_t uint32_t
+#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
+  #define fbdrmuint_t uint64_t
+#endif
 
-inline static void ctx_tiled_flush (CtxTiled *tiled)
+static void ctx_headless_show_frame (CtxHeadless *fb, int block)
 {
+  CtxTiled *tiled = (void*)fb;
   if (tiled->shown_frame == tiled->render_frame)
   {
-    int dirty_tiles = 0;
-    ctx_set_drawlist (tiled->ctx_copy, &tiled->ctx->drawlist.entries[0],
-                                           tiled->ctx->drawlist.count * 9);
-    if (_ctx_enable_hash_cache)
-    {
-      Ctx *hasher = ctx_hasher_new (tiled->width, tiled->height,
-                        CTX_HASH_COLS, CTX_HASH_ROWS);
-      ctx_render_ctx (tiled->ctx_copy, hasher);
-
-      for (int row = 0; row < CTX_HASH_ROWS; row++)
-        for (int col = 0; col < CTX_HASH_COLS; col++)
-        {
-          uint8_t *new_hash = ctx_hasher_get_hash (hasher, col, row);
-          if (new_hash && memcmp (new_hash, &tiled->hashes[(row * CTX_HASH_COLS + col) *  20], 20))
-          {
-            memcpy (&tiled->hashes[(row * CTX_HASH_COLS +  col)*20], new_hash, 20);
-            tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
-            dirty_tiles++;
-          }
-          else
-          {
-            tiled->tile_affinity[row * CTX_HASH_COLS + col] = -1;
-          }
-        }
-      free (((CtxHasher*)(hasher->renderer))->hashes);
-      ctx_free (hasher);
-    }
-    else
-    {
-    for (int row = 0; row < CTX_HASH_ROWS; row++)
-      for (int col = 0; col < CTX_HASH_COLS; col++)
-        {
-          tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
-          dirty_tiles++;
-        }
-    }
-    int dirty_no = 0;
-    if (dirty_tiles)
-    for (int row = 0; row < CTX_HASH_ROWS; row++)
-      for (int col = 0; col < CTX_HASH_COLS; col++)
-      {
-        if (tiled->tile_affinity[row * CTX_HASH_COLS + col] != -1)
-        {
-          tiled->tile_affinity[row * CTX_HASH_COLS + col] = dirty_no * (_ctx_max_threads) / dirty_tiles;
-          dirty_no++;
-          if (col > tiled->max_col) tiled->max_col = col;
-          if (col < tiled->min_col) tiled->min_col = col;
-          if (row > tiled->max_row) tiled->max_row = row;
-          if (row < tiled->min_row) tiled->min_row = row;
-        }
-      }
+    return;
+  }
 
-    if (_ctx_damage_control)
+  if (block)
+  {
+    int count = 0;
+    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
     {
-      for (int i = 0; i < tiled->width * tiled->height; i++)
+      usleep (500);
+      count ++;
+      if (count > 2000)
       {
-        tiled->pixels[i*4+2]  = (tiled->pixels[i*4+2] + 255)/2;
+        tiled->shown_frame = tiled->render_frame;
+        return;
       }
     }
-
-    tiled->render_frame = ++tiled->frame;
-
-#if 0
-
-          //if (tiled->tile_affinity[hno]==no)
-          {
-            int x0 = ((tiled->width)/CTX_HASH_COLS) * 0;
-            int y0 = ((tiled->height)/CTX_HASH_ROWS) * 0;
-            int width = tiled->width / CTX_HASH_COLS;
-            int height = tiled->height / CTX_HASH_ROWS;
-            Ctx *host = tiled->host[0];
-
-            CtxRasterizer *rasterizer = (CtxRasterizer*)host->renderer;
-            int swap_red_green = ((CtxRasterizer*)(host->renderer))->swap_red_green;
-            ctx_rasterizer_init (rasterizer,
-                                 host, tiled->ctx, &host->state,
-                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
-                                 0, 0, 1, 1,
-                                 tiled->width*4, CTX_FORMAT_BGRA8,
-                                 tiled->antialias);
-            ((CtxRasterizer*)(host->renderer))->swap_red_green = swap_red_green;
-            if (sdl_icc_length)
-              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);
-
-            ctx_translate (host, -x0, -y0);
-            ctx_render_ctx (tiled->ctx_copy, host);
-          }
-#endif
-
-
-    mtx_lock (&tiled->mtx);
-    cnd_broadcast (&tiled->cond);
-    mtx_unlock (&tiled->mtx);
   }
-}
-
-static
-void ctx_tiled_render_fun (void **data)
-{
-  int      no = (size_t)data[0];
-  CtxTiled *tiled = data[1];
-
-  while (!tiled->quit)
+  else
   {
-    Ctx *host = tiled->host[no];
+    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
+      return;
+  }
+    if (tiled->vt_active)
+    {
+       int pre_skip = tiled->min_row * tiled->height/CTX_HASH_ROWS * tiled->width;
+       int post_skip = (CTX_HASH_ROWS-tiled->max_row-1) * tiled->height/CTX_HASH_ROWS * tiled->width;
 
-    mtx_lock (&tiled->mtx);
-    cnd_wait(&tiled->cond, &tiled->mtx);
-    mtx_unlock (&tiled->mtx);
+       int rows = ((tiled->width * tiled->height) - pre_skip - post_skip)/tiled->width;
 
-    if (tiled->render_frame != tiled->rendered_frame[no])
-    {
-      int hno = 0;
-      for (int row = 0; row < CTX_HASH_ROWS; row++)
-        for (int col = 0; col < CTX_HASH_COLS; col++, hno++)
-        {
-          if (tiled->tile_affinity[hno]==no)
-          {
-            int x0 = ((tiled->width)/CTX_HASH_COLS) * col;
-            int y0 = ((tiled->height)/CTX_HASH_ROWS) * row;
-            int width = tiled->width / CTX_HASH_COLS;
-            int height = tiled->height / CTX_HASH_ROWS;
+       int col_pre_skip = tiled->min_col * tiled->width/CTX_HASH_COLS;
+       int col_post_skip = (CTX_HASH_COLS-tiled->max_col-1) * tiled->width/CTX_HASH_COLS;
+       if (_ctx_damage_control)
+       {
+         pre_skip = post_skip = col_pre_skip = col_post_skip = 0;
+       }
 
-            CtxRasterizer *rasterizer = (CtxRasterizer*)host->renderer;
-#if 1 // merge horizontally adjecant tiles of same affinity into one job
-            while (col + 1 < CTX_HASH_COLS &&
-                   tiled->tile_affinity[hno+1] == no)
-            {
-              width += tiled->width / CTX_HASH_COLS;
-              col++;
-              hno++;
-            }
-#endif
-            int swap_red_green = ((CtxRasterizer*)(host->renderer))->swap_red_green;
-            ctx_rasterizer_init (rasterizer,
-                                 host, tiled->ctx, &host->state,
-                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
-                                 0, 0, width, height,
-                                 tiled->width*4, CTX_FORMAT_BGRA8,
-                                 tiled->antialias);
-            ((CtxRasterizer*)(host->renderer))->swap_red_green = swap_red_green;
-            if (sdl_icc_length)
-              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);
+       if (pre_skip < 0) pre_skip = 0;
+       if (post_skip < 0) post_skip = 0;
 
-            ctx_translate (host, -x0, -y0);
-            ctx_render_ctx (tiled->ctx_copy, host);
-          }
-        }
-      tiled->rendered_frame[no] = tiled->render_frame;
+
+       if (tiled->min_row == 100){
+          pre_skip = 0;
+          post_skip = 0;
+       }
+       else
+       {
+         tiled->min_row = 100;
+         tiled->max_row = 0;
+         tiled->min_col = 100;
+         tiled->max_col = 0;
+         {
+           uint8_t *dst = tiled->fb + pre_skip * 4;
+           uint8_t *src = tiled->pixels + pre_skip * 4;
+           int pre = col_pre_skip * 4;
+           int post = col_post_skip * 4;
+           int core = tiled->width * 4 - pre - post;
+           for (int i = 0; i < rows; i++)
+           {
+             dst  += pre;
+             src  += pre;
+             memcpy (dst, src, core);
+             src  += core;
+             dst  += core;
+             dst  += post;
+             src  += post;
+           }
+         }
     }
+    tiled->shown_frame = tiled->render_frame;
   }
-  tiled->thread_quit++; // need atomic?
 }
 
-
-static int       ctx_tiled_cursor_drawn   = 0;
-static int       ctx_tiled_cursor_drawn_x = 0;
-static int       ctx_tiled_cursor_drawn_y = 0;
-static CtxCursor ctx_tiled_cursor_drawn_shape = 0;
-
-
-#define CTX_FB_HIDE_CURSOR_FRAMES 200
-
-static int ctx_tiled_cursor_same_pos = CTX_FB_HIDE_CURSOR_FRAMES;
-
-static inline int ctx_is_in_cursor (int x, int y, int size, CtxCursor shape)
+void ctx_headless_consume_events (Ctx *ctx)
 {
-  switch (shape)
-  {
-    case CTX_CURSOR_ARROW:
-      if (x > ((size * 4)-y*4)) return 0;
-      if (x < y && x > y / 16)
-        return 1;
-      return 0;
-
-    case CTX_CURSOR_RESIZE_SE:
-    case CTX_CURSOR_RESIZE_NW:
-    case CTX_CURSOR_RESIZE_SW:
-    case CTX_CURSOR_RESIZE_NE:
-      {
-        float theta = -45.0/180 * M_PI;
-        float cos_theta;
-        float sin_theta;
+  CtxHeadless *fb = (void*)ctx->backend;
+  ctx_headless_show_frame (fb, 0);
+  event_check_pending (&fb->tiled);
+}
 
-        if ((shape == CTX_CURSOR_RESIZE_SW) ||
-            (shape == CTX_CURSOR_RESIZE_NE))
-        {
-          theta = -theta;
-          cos_theta = cos (theta);
-          sin_theta = sin (theta);
-        }
-        else
-        {
-          cos_theta = cos (theta);
-          sin_theta = sin (theta);
-        }
-        int rot_x = x * cos_theta - y * sin_theta;
-        int rot_y = y * cos_theta + x * sin_theta;
-        x = rot_x;
-        y = rot_y;
-      }
-      /*FALLTHROUGH*/
-    case CTX_CURSOR_RESIZE_W:
-    case CTX_CURSOR_RESIZE_E:
-    case CTX_CURSOR_RESIZE_ALL:
-      if (abs (x) < size/2 && abs (y) < size/2)
-      {
-        if (abs(y) < size/10)
-        {
-          return 1;
-        }
-      }
-      if ((abs (x) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
-      {
-        if (abs(y) < (size/2.8)-(abs(x) - (size/2)))
-          return 1;
-      }
-      if (shape != CTX_CURSOR_RESIZE_ALL)
-        break;
-      /* FALLTHROUGH */
-    case CTX_CURSOR_RESIZE_S:
-    case CTX_CURSOR_RESIZE_N:
-      if (abs (y) < size/2 && abs (x) < size/2)
-      {
-        if (abs(x) < size/10)
-        {
-          return 1;
-        }
-      }
-      if ((abs (y) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
-      {
-        if (abs(x) < (size/2.8)-(abs(y) - (size/2)))
-          return 1;
-      }
-      break;
-#if 0
-    case CTX_CURSOR_RESIZE_ALL:
-      if (abs (x) < size/2 && abs (y) < size/2)
-      {
-        if (abs (x) < size/10 || abs(y) < size/10)
-          return 1;
-      }
-      break;
-#endif
-    default:
-      return (x ^ y) & 1;
-  }
-  return 0;
+inline static void ctx_headless_reset (Ctx *ctx)
+{
+  ctx_headless_show_frame ((CtxHeadless*)ctx->backend, 1);
 }
 
-static void ctx_tiled_undraw_cursor (CtxTiled *tiled)
+void ctx_headless_free (CtxHeadless *fb)
 {
-    int cursor_size = ctx_height (tiled->ctx) / 28;
+  CtxTiled *tiled=(CtxTiled*)fb;
 
-    if (ctx_tiled_cursor_drawn)
-    {
-      int no = 0;
-      int startx = -cursor_size;
-      int starty = -cursor_size;
-      if (ctx_tiled_cursor_drawn_shape == CTX_CURSOR_ARROW)
-        startx = starty = 0;
+  if (tiled->fb)
+  {
+  free (tiled->fb); // it is not the tiled renderers responsibilty,
+                    // since it might not be allocated this way
+  tiled->fb = NULL;
+  ctx_babl_exit (); // we do this together with the fb,
+                    // which makes it happen only once
+                    // even if the headless_free is called
+                    // twice
+  }
+  //munmap (tiled->fb, fb->fb_mapped_size);
+  //close (fb->fb_fd);
+  //if (system("stty sane")){};
+  ctx_tiled_free ((CtxTiled*)fb);
+  //free (fb);
+}
 
-      for (int y = starty; y < cursor_size; y++)
-      for (int x = startx; x < cursor_size; x++, no+=4)
-      {
-        if (x + ctx_tiled_cursor_drawn_x < tiled->width && y + ctx_tiled_cursor_drawn_y < tiled->height)
-        {
-          if (ctx_is_in_cursor (x, y, cursor_size, ctx_tiled_cursor_drawn_shape))
-          {
-            int o = ((ctx_tiled_cursor_drawn_y + y) * tiled->width + (ctx_tiled_cursor_drawn_x + x)) * 4;
-            tiled->fb[o+0]^=0x88;
-            tiled->fb[o+1]^=0x88;
-            tiled->fb[o+2]^=0x88;
-          }
-        }
-      }
+//static unsigned char *fb_icc = NULL;
+//static long fb_icc_length = 0;
 
-    ctx_tiled_cursor_drawn = 0;
-    }
-}
+static CtxHeadless *ctx_headless = NULL;
 
-static void ctx_tiled_draw_cursor (CtxTiled *tiled)
+
+Ctx *ctx_new_headless (int width, int height)
 {
-    int cursor_x    = ctx_pointer_x (tiled->ctx);
-    int cursor_y    = ctx_pointer_y (tiled->ctx);
-    int cursor_size = ctx_height (tiled->ctx) / 28;
-    CtxCursor cursor_shape = tiled->ctx->cursor;
-    int no = 0;
+  if (width < 0 || height < 0)
+  {
+    width = 1920;
+    height = 780;
+  }
+#if CTX_RASTERIZER
+  CtxHeadless *fb = calloc (sizeof (CtxHeadless), 1);
+  CtxBackend *backend = (CtxBackend*)fb;
+  CtxTiled *tiled     = (CtxTiled*)fb;
+  ctx_headless = fb;
 
-    if (cursor_x == ctx_tiled_cursor_drawn_x &&
-        cursor_y == ctx_tiled_cursor_drawn_y &&
-        cursor_shape == ctx_tiled_cursor_drawn_shape)
-      ctx_tiled_cursor_same_pos ++;
-    else
-      ctx_tiled_cursor_same_pos = 0;
+  tiled->width = width;
+  tiled->height = height;
 
-    if (ctx_tiled_cursor_same_pos >= CTX_FB_HIDE_CURSOR_FRAMES)
-    {
-      if (ctx_tiled_cursor_drawn)
-        ctx_tiled_undraw_cursor (tiled);
-      return;
-    }
+  fb->fb_bits        = 32;
+  fb->fb_bpp         = 4;
+  fb->fb_mapped_size = width * height * 4;
+#endif
 
-    /* no need to flicker when stationary, motion flicker can also be removed
-     * by combining the previous and next position masks when a motion has
-     * occured..
-     */
-    if (ctx_tiled_cursor_same_pos && ctx_tiled_cursor_drawn)
-      return;
+  tiled->fb = calloc (fb->fb_mapped_size, 1);
+  if (!tiled->fb)
+    return NULL;
+  tiled->pixels = calloc (fb->fb_mapped_size, 1);
+  tiled->show_frame = (void*)ctx_headless_show_frame;
 
-    ctx_tiled_undraw_cursor (tiled);
+  ctx_babl_init ();
 
-    no = 0;
+ // ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
+ //
+ // not to be done for headless, we want sRGB thumbs - at least not device specific
+ // perhaps rec2020 or similar?
 
-    int startx = -cursor_size;
-    int starty = -cursor_size;
+  backend->ctx = _ctx_new_drawlist (width, height);
+  backend->flush = ctx_tiled_flush;
+  backend->process = (void*)ctx_drawlist_process;
+  backend->reset = ctx_headless_reset;
+  backend->free  = (void*)ctx_headless_free;
+  backend->set_clipboard = ctx_headless_set_clipboard;
+  backend->get_clipboard = ctx_headless_get_clipboard;
+  backend->consume_events = ctx_headless_consume_events;
 
-    if (cursor_shape == CTX_CURSOR_ARROW)
-      startx = starty = 0;
+  tiled->ctx_copy = ctx_new (width, height, "drawlist");
+  tiled->width    = width;
+  tiled->height   = height;
 
-    for (int y = starty; y < cursor_size; y++)
-      for (int x = startx; x < cursor_size; x++, no+=4)
-      {
-        if (x + cursor_x < tiled->width && y + cursor_y < tiled->height)
-        {
-          if (ctx_is_in_cursor (x, y, cursor_size, cursor_shape))
-          {
-            int o = ((cursor_y + y) * tiled->width + (cursor_x + x)) * 4;
-            tiled->fb[o+0]^=0x88;
-            tiled->fb[o+1]^=0x88;
-            tiled->fb[o+2]^=0x88;
-          }
-        }
-      }
-    ctx_tiled_cursor_drawn = 1;
-    ctx_tiled_cursor_drawn_x = cursor_x;
-    ctx_tiled_cursor_drawn_y = cursor_y;
-    ctx_tiled_cursor_drawn_shape = cursor_shape;
-}
+  ctx_set_backend (backend->ctx, fb);
+  ctx_set_backend (tiled->ctx_copy, fb);
+  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);
 
-#endif
-#if CTX_EVENTS
+  for (int i = 0; i < _ctx_max_threads; i++)
+  {
+    tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
+                   tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
+                   tiled->width * 4, CTX_FORMAT_BGRA8); // this format
+                                  // is overriden in  thread
+    ((CtxRasterizer*)(tiled->host[i]->backend))->swap_red_green = 1;
+    ctx_set_texture_source (tiled->host[i], backend->ctx);
+  }
 
+  mtx_init (&tiled->mtx, mtx_plain);
+  cnd_init (&tiled->cond);
 
-#define evsource_has_event(es)   (es)->has_event((es))
-#define evsource_get_event(es)   (es)->get_event((es))
-#define evsource_destroy(es)     do{if((es)->destroy)(es)->destroy((es));}while(0)
-#define evsource_set_coord(es,x,y) do{if((es)->set_coord)(es)->set_coord((es),(x),(y));}while(0)
-#define evsource_get_fd(es)      ((es)->get_fd?(es)->get_fd((es)):0)
+#define start_thread(no)\
+  if(_ctx_max_threads>no){ \
+    static void *args[2]={(void*)no, };\
+    thrd_t tid;\
+    args[1]=fb;\
+    thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
+  }
+  start_thread(0);
+  start_thread(1);
+  start_thread(2);
+  start_thread(3);
+  start_thread(4);
+  start_thread(5);
+  start_thread(6);
+  start_thread(7);
+  start_thread(8);
+  start_thread(9);
+  start_thread(10);
+  start_thread(11);
+  start_thread(12);
+  start_thread(13);
+  start_thread(14);
+  start_thread(15);
+#undef start_thread
 
+  tiled->vt_active = 1;
 
+  return backend->ctx;
+}
+#endif
+#endif
 
-static int mice_has_event ();
-static char *mice_get_event ();
-static void mice_destroy ();
-static int mice_get_fd (EvSource *ev_source);
-static void mice_set_coord (EvSource *ev_source, double x, double y);
+#if CTX_EVENTS
 
-static EvSource ctx_ev_src_mice = {
-  NULL,
-  (void*)mice_has_event,
-  (void*)mice_get_event,
-  (void*)mice_destroy,
-  mice_get_fd,
-  mice_set_coord
-};
+#if !__COSMOPOLITAN__
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#endif
 
-typedef struct Mice
-{
-  int     fd;
-  double  x;
-  double  y;
-  int     button;
-  int     prev_state;
-} Mice;
 
-Mice *_mrg_evsrc_coord = NULL;
-static int _ctx_mice_fd = 0;
+#if CTX_KMS || CTX_FB
 
-void _mmm_get_coords (Ctx *ctx, double *x, double *y)
+static int ctx_fb_get_mice_fd (Ctx *ctx)
 {
-  if (!_mrg_evsrc_coord)
-    return;
-  if (x)
-    *x = _mrg_evsrc_coord->x;
-  if (y)
-    *y = _mrg_evsrc_coord->y;
+  //CtxFb *fb = (void*)ctx->backend;
+  return _ctx_mice_fd;
 }
 
-static Mice  mice;
-static Mice* mrg_mice_this = &mice;
-
-static int mmm_evsource_mice_init ()
+static void ctx_fb_get_event_fds (Ctx *ctx, int *fd, int *count)
 {
-  unsigned char reset[]={0xff};
-  /* need to detect which event */
-
-  mrg_mice_this->prev_state = 0;
-  mrg_mice_this->fd = open ("/dev/input/mice", O_RDONLY | O_NONBLOCK);
-  if (mrg_mice_this->fd == -1)
+  int mice_fd = ctx_fb_get_mice_fd (ctx);
+  fd[0] = STDIN_FILENO;
+  if (mice_fd)
   {
-    fprintf (stderr, "error opening /dev/input/mice device, maybe add user to input group if such group 
exist, or otherwise make the rights be satisfied.\n");
-    return -1;
+    fd[1] = mice_fd;
+    *count = 2;
   }
-  if (write (mrg_mice_this->fd, reset, 1) == -1)
+  else
   {
-    // might happen if we're a regular user with only read permission
+    *count = 1;
   }
-  _ctx_mice_fd = mrg_mice_this->fd;
-  _mrg_evsrc_coord = mrg_mice_this;
-  return 0;
 }
+#endif
 
-static void mice_destroy ()
-{
-  if (mrg_mice_this->fd != -1)
-    close (mrg_mice_this->fd);
-}
+#if CTX_FB
 
-static int mice_has_event ()
-{
-  struct timeval tv;
-  int retval;
+#ifdef __linux__
+  #include <linux/fb.h>
+  #include <linux/vt.h>
+  #include <linux/kd.h>
+#endif
 
-  if (mrg_mice_this->fd == -1)
-    return 0;
+#ifdef __NetBSD__
+  typedef uint8_t unchar;
+  typedef uint8_t u_char;
+  typedef uint16_t ushort;
+  typedef uint32_t u_int;
+  typedef uint64_t u_long;
+  #include <sys/param.h>
+  #include <dev/wscons/wsdisplay_usl_io.h>
+  #include <dev/wscons/wsconsio.h>
+  #include <dev/wscons/wsksymdef.h>
+#endif
 
-  fd_set rfds;
-  FD_ZERO (&rfds);
-  FD_SET(mrg_mice_this->fd, &rfds);
-  tv.tv_sec = 0; tv.tv_usec = 0;
-  retval = select (mrg_mice_this->fd+1, &rfds, NULL, NULL, &tv);
-  if (retval == 1)
-    return FD_ISSET (mrg_mice_this->fd, &rfds);
-  return 0;
-}
+  #include <sys/mman.h>
 
-static char *mice_get_event ()
+typedef struct _CtxFb CtxFb;
+struct _CtxFb
 {
-  const char *ret = "mouse-motion";
-  double relx, rely;
-  signed char buf[3];
-  int n_read = 0;
-  CtxTiled *tiled = (void*)ctx_ev_src_mice.priv;
-  n_read = read (mrg_mice_this->fd, buf, 3);
-  if (n_read == 0)
-     return strdup ("");
-  relx = buf[1];
-  rely = -buf[2];
+   CtxTiled tiled;
+   int           key_balance;
+   int           key_repeat;
+   int           lctrl;
+   int           lalt;
+   int           rctrl;
 
-  if (relx < 0)
-  {
-    if (relx > -6)
-    relx = - relx*relx;
-    else
-    relx = -36;
-  }
-  else
-  {
-    if (relx < 6)
-    relx = relx*relx;
-    else
-    relx = 36;
-  }
 
-  if (rely < 0)
-  {
-    if (rely > -6)
-    rely = - rely*rely;
-    else
-    rely = -36;
-  }
-  else
-  {
-    if (rely < 6)
-    rely = rely*rely;
-    else
-    rely = 36;
-  }
+   int          fb_fd;
+   char        *fb_path;
+   int          fb_bits;
+   int          fb_bpp;
+   int          fb_mapped_size;
+   int          vt;
+   int          tty;
+   cnd_t        cond;
+   mtx_t        mtx;
+#if __linux__
+   struct       fb_var_screeninfo vinfo;
+   struct       fb_fix_screeninfo finfo;
+#endif
+};
 
-  mrg_mice_this->x += relx;
-  mrg_mice_this->y += rely;
+#if UINTPTR_MAX == 0xffFFffFF
+  #define fbdrmuint_t uint32_t
+#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
+  #define fbdrmuint_t uint64_t
+#endif
 
-  if (mrg_mice_this->x < 0)
-    mrg_mice_this->x = 0;
-  if (mrg_mice_this->y < 0)
-    mrg_mice_this->y = 0;
-  if (mrg_mice_this->x >= tiled->width)
-    mrg_mice_this->x = tiled->width -1;
-  if (mrg_mice_this->y >= tiled->height)
-    mrg_mice_this->y = tiled->height -1;
-  int button = 0;
-  
-  if ((mrg_mice_this->prev_state & 1) != (buf[0] & 1))
-    {
-      if (buf[0] & 1)
-        {
-          ret = "mouse-press";
-        }
-      else
-        {
-          ret = "mouse-release";
-        }
-      button = 1;
-    }
-  else if (buf[0] & 1)
-  {
-    ret = "mouse-drag";
-    button = 1;
-  }
 
-  if (!button)
+static void ctx_fb_flip (CtxFb *fb)
+{
+#ifdef __linux__
+  ioctl (fb->fb_fd, FBIOPAN_DISPLAY, &fb->vinfo);
+#endif
+}
+
+static void ctx_fb_show_frame (CtxFb *fb, int block)
+{
+  CtxTiled *tiled = (void*)fb;
+  if (tiled->shown_frame == tiled->render_frame)
   {
-    if ((mrg_mice_this->prev_state & 2) != (buf[0] & 2))
-    {
-      if (buf[0] & 2)
-        {
-          ret = "mouse-press";
-        }
-      else
-        {
-          ret = "mouse-release";
-        }
-      button = 3;
-    }
-    else if (buf[0] & 2)
+    if (block == 0) // consume event call
     {
-      ret = "mouse-drag";
-      button = 3;
+      ctx_tiled_draw_cursor (tiled);
+      ctx_fb_flip (fb);
     }
+    return;
   }
 
-  if (!button)
+  if (block)
   {
-    if ((mrg_mice_this->prev_state & 4) != (buf[0] & 4))
-    {
-      if (buf[0] & 4)
-        {
-          ret = "mouse-press";
-        }
-      else
-        {
-          ret = "mouse-release";
-        }
-      button = 2;
-    }
-    else if (buf[0] & 4)
+    int count = 0;
+    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
     {
-      ret = "mouse-drag";
-      button = 2;
+      usleep (500);
+      count ++;
+      if (count > 2000)
+      {
+        tiled->shown_frame = tiled->render_frame;
+        return;
+      }
     }
   }
-
-  mrg_mice_this->prev_state = buf[0];
-
+  else
   {
-    char *r = malloc (64);
-    sprintf (r, "%s %.0f %.0f %i", ret, mrg_mice_this->x, mrg_mice_this->y, button);
-    return r;
+    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
+      return;
   }
 
-  return NULL;
-}
+    if (tiled->vt_active)
+    {
+       int pre_skip = tiled->min_row * tiled->height/CTX_HASH_ROWS * tiled->width;
+       int post_skip = (CTX_HASH_ROWS-tiled->max_row-1) * tiled->height/CTX_HASH_ROWS * tiled->width;
 
-static int mice_get_fd (EvSource *ev_source)
-{
-  return mrg_mice_this->fd;
-}
+       int rows = ((tiled->width * tiled->height) - pre_skip - post_skip)/tiled->width;
 
-static void mice_set_coord (EvSource *ev_source, double x, double y)
-{
-  mrg_mice_this->x = x;
-  mrg_mice_this->y = y;
-}
+       int col_pre_skip = tiled->min_col * tiled->width/CTX_HASH_COLS;
+       int col_post_skip = (CTX_HASH_COLS-tiled->max_col-1) * tiled->width/CTX_HASH_COLS;
+       if (_ctx_damage_control)
+       {
+         pre_skip = post_skip = col_pre_skip = col_post_skip = 0;
+       }
 
-static EvSource *evsource_mice_new (void)
-{
-  if (mmm_evsource_mice_init () == 0)
+       if (pre_skip < 0) pre_skip = 0;
+       if (post_skip < 0) post_skip = 0;
+
+
+       if (tiled->min_row == 100){
+          pre_skip = 0;
+          post_skip = 0;
+#ifdef __linux__
+           __u32 dummy = 0;
+          ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
+#endif
+          ctx_tiled_undraw_cursor (tiled);
+       }
+       else
+       {
+
+      tiled->min_row = 100;
+      tiled->max_row = 0;
+      tiled->min_col = 100;
+      tiled->max_col = 0;
+#ifdef __linux__
     {
-      mrg_mice_this->x = 0;
-      mrg_mice_this->y = 0;
-      return &ctx_ev_src_mice;
+     __u32 dummy = 0;
+     ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
+    }
+#endif
+     ctx_tiled_undraw_cursor (tiled);
+     switch (fb->fb_bits)
+     {
+       case 32:
+#if 1
+         {
+           uint8_t *dst = tiled->fb + pre_skip * 4;
+           uint8_t *src = tiled->pixels + pre_skip * 4;
+           int pre = col_pre_skip * 4;
+           int post = col_post_skip * 4;
+           int core = tiled->width * 4 - pre - post;
+           for (int i = 0; i < rows; i++)
+           {
+             dst  += pre;
+             src  += pre;
+             memcpy (dst, src, core);
+             src  += core;
+             dst  += core;
+             dst  += post;
+             src  += post;
+           }
+         }
+#else
+         { int count = tiled->width * tiled->height;
+           const uint32_t *src = (void*)tiled->pixels;
+           uint32_t *dst = (void*)tiled->fb;
+           count-= pre_skip;
+           src+= pre_skip;
+           dst+= pre_skip;
+           count-= post_skip;
+           while (count -- > 0)
+           {
+             dst[0] = ctx_swap_red_green2 (src[0]);
+             src++;
+             dst++;
+           }
+         }
+#endif
+         break;
+         /* XXX  :  note: converting a scanline (or all) to target and
+          * then doing a bulk memcpy be faster (at least with som /dev/fbs)  */
+       case 24:
+         { int count = tiled->width * tiled->height;
+           const uint8_t *src = tiled->pixels;
+           uint8_t *dst = tiled->fb;
+           count-= pre_skip;
+           src+= pre_skip * 4;
+           dst+= pre_skip * 3;
+           count-= post_skip;
+           while (count -- > 0)
+           {
+             dst[0] = src[0];
+             dst[1] = src[1];
+             dst[2] = src[2];
+             dst+=3;
+             src+=4;
+           }
+         }
+         break;
+       case 16:
+         { int count = tiled->width * tiled->height;
+           const uint8_t *src = tiled->pixels;
+           uint8_t *dst = tiled->fb;
+           count-= post_skip;
+           count-= pre_skip;
+           src+= pre_skip * 4;
+           dst+= pre_skip * 2;
+           while (count -- > 0)
+           {
+             int big = ((src[0] >> 3)) +
+                ((src[1] >> 2)<<5) +
+                ((src[2] >> 3)<<11);
+             dst[0] = big & 255;
+             dst[1] = big >>  8;
+             dst+=2;
+             src+=4;
+           }
+         }
+         break;
+       case 15:
+         { int count = tiled->width * tiled->height;
+           const uint8_t *src = tiled->pixels;
+           uint8_t *dst = tiled->fb;
+           count-= post_skip;
+           count-= pre_skip;
+           src+= pre_skip * 4;
+           dst+= pre_skip * 2;
+           while (count -- > 0)
+           {
+             int big = ((src[2] >> 3)) +
+                       ((src[1] >> 2)<<5) +
+                       ((src[0] >> 3)<<10);
+             dst[0] = big & 255;
+             dst[1] = big >>  8;
+             dst+=2;
+             src+=4;
+           }
+         }
+         break;
+       case 8:
+         { int count = tiled->width * tiled->height;
+           const uint8_t *src = tiled->pixels;
+           uint8_t *dst = tiled->fb;
+           count-= post_skip;
+           count-= pre_skip;
+           src+= pre_skip * 4;
+           dst+= pre_skip;
+           while (count -- > 0)
+           {
+             dst[0] = ((src[0] >> 5)) +
+                      ((src[1] >> 5)<<3) +
+                      ((src[2] >> 6)<<6);
+             dst+=1;
+             src+=4;
+           }
+         }
+         break;
+     }
     }
-  return NULL;
+    ctx_tiled_cursor_drawn = 0;
+    ctx_tiled_draw_cursor (tiled);
+    ctx_fb_flip (fb);
+    tiled->shown_frame = tiled->render_frame;
+  }
 }
 
-static int evsource_kb_has_event (void);
-static char *evsource_kb_get_event (void);
-static void evsource_kb_destroy (int sign);
-static int evsource_kb_get_fd (void);
-
-/* kept out of struct to be reachable by atexit */
-static EvSource ctx_ev_src_kb = {
-  NULL,
-  (void*)evsource_kb_has_event,
-  (void*)evsource_kb_get_event,
-  (void*)evsource_kb_destroy,
-  (void*)evsource_kb_get_fd,
-  NULL
-};
-
-static struct termios orig_attr;
-
-static void real_evsource_kb_destroy (int sign)
+void ctx_fb_consume_events (Ctx *ctx)
 {
-  static int done = 0;
-
-  if (sign == 0)
-    return;
-
-  if (done)
-    return;
-  done = 1;
-
-  switch (sign)
-  {
-    case  -11:break; /* will be called from atexit with sign==-11 */
-    case   SIGSEGV: break;//fprintf (stderr, " SIGSEGV\n");break;
-    case   SIGABRT: fprintf (stderr, " SIGABRT\n");break;
-    case   SIGBUS:  fprintf (stderr, " SIGBUS\n");break;
-    case   SIGKILL: fprintf (stderr, " SIGKILL\n");break;
-    case   SIGINT:  fprintf (stderr, " SIGINT\n");break;
-    case   SIGTERM: fprintf (stderr, " SIGTERM\n");break;
-    case   SIGQUIT: fprintf (stderr, " SIGQUIT\n");break;
-    default: fprintf (stderr, "sign: %i\n", sign);
-             fprintf (stderr, "%i %i %i %i %i %i %i\n", SIGSEGV, SIGABRT, SIGBUS, SIGKILL, SIGINT, SIGTERM, 
SIGQUIT);
-  }
-  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
-  //fprintf (stderr, "evsource kb destroy\n");
+  CtxFb *fb = (void*)ctx->backend;
+  ctx_fb_show_frame (fb, 0);
+  event_check_pending (&fb->tiled);
 }
 
-static void evsource_kb_destroy (int sign)
+inline static void ctx_fb_reset (Ctx *ctx)
 {
-  real_evsource_kb_destroy (-11);
+  ctx_fb_show_frame ((CtxFb*)ctx->backend, 1);
 }
 
-static int evsource_kb_init ()
+void ctx_fb_free (CtxFb *fb)
 {
-//  ioctl(STDIN_FILENO, KDSKBMODE, K_RAW);
-  atexit ((void*) real_evsource_kb_destroy);
-  signal (SIGSEGV, (void*) real_evsource_kb_destroy);
-  signal (SIGABRT, (void*) real_evsource_kb_destroy);
-  signal (SIGBUS,  (void*) real_evsource_kb_destroy);
-  signal (SIGKILL, (void*) real_evsource_kb_destroy);
-  signal (SIGINT,  (void*) real_evsource_kb_destroy);
-  signal (SIGTERM, (void*) real_evsource_kb_destroy);
-  signal (SIGQUIT, (void*) real_evsource_kb_destroy);
-
-  struct termios raw;
-  if (tcgetattr (STDIN_FILENO, &orig_attr) == -1)
-    {
-      fprintf (stderr, "error initializing keyboard\n");
-      return -1;
-    }
-  raw = orig_attr;
+  CtxTiled*tiled=(CtxTiled*)fb;
 
-  cfmakeraw (&raw);
+//#ifdef __linux__
+  ioctl (0, KDSETMODE, KD_TEXT);
+//#endif
+#ifdef __NetBSD__
+  {
+   int mode = WSDISPLAYIO_MODE_EMUL;
+   ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode);
+  }
+#endif
+  munmap (tiled->fb, fb->fb_mapped_size);
+  close (fb->fb_fd);
+  if (system("stty sane")){};
+  ctx_tiled_free ((CtxTiled*)fb);
+  //free (fb);
+  ctx_babl_exit ();
+}
 
-  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
-  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
-    return 0; // XXX? return other value?
+//static unsigned char *fb_icc = NULL;
+//static long fb_icc_length = 0;
 
-  return 0;
-}
-static int evsource_kb_has_event (void)
+static CtxFb *ctx_fb = NULL;
+#ifdef __linux__
+static void fb_vt_switch_cb (int sig)
 {
-  struct timeval tv;
-  int retval;
+  CtxTiled *tiled = (void*)ctx_fb;
+  CtxBackend *backend = (void*)ctx_fb;
+  if (sig == SIGUSR1)
+  {
+    ioctl (0, VT_RELDISP, 1);
+    tiled->vt_active = 0;
+    ioctl (0, KDSETMODE, KD_TEXT);
+  }
+  else
+  {
+    ioctl (0, VT_RELDISP, VT_ACKACQ);
+    tiled->vt_active = 1;
+    // queue draw
+    tiled->render_frame = ++tiled->frame;
+    ioctl (0, KDSETMODE, KD_GRAPHICS);
+    {
+      backend->ctx->dirty=1;
 
-  fd_set rfds;
-  FD_ZERO (&rfds);
-  FD_SET(STDIN_FILENO, &rfds);
-  tv.tv_sec = 0; tv.tv_usec = 0;
-  retval = select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv);
-  return retval == 1;
+      for (int row = 0; row < CTX_HASH_ROWS; row++)
+      for (int col = 0; col < CTX_HASH_COLS; col++)
+      {
+        tiled->hashes[(row * CTX_HASH_COLS + col) *  20] += 1;
+      }
+    }
+  }
 }
+#endif
 
-/* note that a nick can have multiple occurences, the labels
- * should be kept the same for all occurences of a combination.
- *
- * this table is taken from nchanterm.
- */
-typedef struct MmmKeyCode {
-  char *nick;          /* programmers name for key */
-  char  sequence[10];  /* terminal sequence */
-} MmmKeyCode;
-static const MmmKeyCode ufb_keycodes[]={
-  {"up",                  "\e[A"},
-  {"down",                "\e[B"},
-  {"right",               "\e[C"},
-  {"left",                "\e[D"},
-
-  {"shift-up",            "\e[1;2A"},
-  {"shift-down",          "\e[1;2B"},
-  {"shift-right",         "\e[1;2C"},
-  {"shift-left",          "\e[1;2D"},
-
-  {"alt-up",              "\e[1;3A"},
-  {"alt-down",            "\e[1;3B"},
-  {"alt-right",           "\e[1;3C"},
-  {"alt-left",            "\e[1;3D"},
-  {"alt-shift-up",         "\e[1;4A"},
-  {"alt-shift-down",       "\e[1;4B"},
-  {"alt-shift-right",      "\e[1;4C"},
-  {"alt-shift-left",       "\e[1;4D"},
-
-  {"control-up",          "\e[1;5A"},
-  {"control-down",        "\e[1;5B"},
-  {"control-right",       "\e[1;5C"},
-  {"control-left",        "\e[1;5D"},
-
-  /* putty */
-  {"control-up",          "\eOA"},
-  {"control-down",        "\eOB"},
-  {"control-right",       "\eOC"},
-  {"control-left",        "\eOD"},
 
-  {"control-shift-up",    "\e[1;6A"},
-  {"control-shift-down",  "\e[1;6B"},
-  {"control-shift-right", "\e[1;6C"},
-  {"control-shift-left",  "\e[1;6D"},
+Ctx *ctx_new_fb (int width, int height)
+{
+#if CTX_RASTERIZER
+  CtxFb *fb = calloc (sizeof (CtxFb), 1);
+  CtxTiled *tiled = (void*)fb;
+  CtxBackend *backend = (void*)fb;
+  ctx_fb = fb;
+  {
+#ifdef __linux__
+  const char *dev_path = "/dev/fb0";
+#endif
+#ifdef __NetBSD__
+  const char *dev_path = "/dev/ttyE0";
+#endif
+#ifdef __OpenBSD__
+  const char *dev_path = "/dev/ttyC0";
+#endif
+  fb->fb_fd = open (dev_path, O_RDWR);
+  if (fb->fb_fd > 0)
+    fb->fb_path = strdup (dev_path);
+  else
+  {
+#ifdef __linux__
+    fb->fb_fd = open ("/dev/graphics/fb0", O_RDWR);
+    if (fb->fb_fd > 0)
+    {
+      fb->fb_path = strdup ("/dev/graphics/fb0");
+    }
+    else
+#endif
+    {
+      free (fb);
+      return NULL;
+    }
+  }
 
-  {"control-up",          "\eOa"},
-  {"control-down",        "\eOb"},
-  {"control-right",       "\eOc"},
-  {"control-left",        "\eOd"},
+#ifdef __linux__
+  if (ioctl(fb->fb_fd, FBIOGET_FSCREENINFO, &fb->finfo))
+    {
+      fprintf (stderr, "error getting fbinfo\n");
+      close (fb->fb_fd);
+      free (fb->fb_path);
+      free (fb);
+      return NULL;
+    }
 
-  {"shift-up",            "\e[a"},
-  {"shift-down",          "\e[b"},
-  {"shift-right",         "\e[c"},
-  {"shift-left",          "\e[d"},
+   if (ioctl(fb->fb_fd, FBIOGET_VSCREENINFO, &fb->vinfo))
+     {
+       fprintf (stderr, "error getting fbinfo\n");
+      close (fb->fb_fd);
+      free (fb->fb_path);
+      free (fb);
+      return NULL;
+     }
+  ioctl (0, KDSETMODE, KD_GRAPHICS);
 
-  {"insert",              "\e[2~"},
-  {"delete",              "\e[3~"},
-  {"page-up",             "\e[5~"},
-  {"page-down",           "\e[6~"},
-  {"home",                "\eOH"},
-  {"end",                 "\eOF"},
-  {"home",                "\e[H"},
-  {"end",                 "\e[F"},
- {"control-delete",       "\e[3;5~"},
-  {"shift-delete",        "\e[3;2~"},
-  {"control-shift-delete","\e[3;6~"},
+//fprintf (stderr, "%s\n", fb->fb_path);
+  width = tiled->width = fb->vinfo.xres;
+  height = tiled->height = fb->vinfo.yres;
 
-  {"F1",         "\e[25~"},
-  {"F2",         "\e[26~"},
-  {"F3",         "\e[27~"},
-  {"F4",         "\e[26~"},
+  fb->fb_bits = fb->vinfo.bits_per_pixel;
+//fprintf (stderr, "fb bits: %i\n", fb->fb_bits);
 
+  if (fb->fb_bits == 16)
+    fb->fb_bits =
+      fb->vinfo.red.length +
+      fb->vinfo.green.length +
+      fb->vinfo.blue.length;
+   else if (fb->fb_bits == 8)
+  {
+    unsigned short red[256],  green[256],  blue[256];
+  //  unsigned short original_red[256];
+  //  unsigned short original_green[256];
+  //  unsigned short original_blue[256];
+    struct fb_cmap cmap = {0, 256, red, green, blue, NULL};
+  //  struct fb_cmap original_cmap = {0, 256, original_red, original_green, original_blue, NULL};
+    int i;
 
-  {"F1",         "\e[11~"},
-  {"F2",         "\e[12~"},
-  {"F3",         "\e[13~"},
-  {"F4",         "\e[14~"},
-  {"F1",         "\eOP"},
-  {"F2",         "\eOQ"},
-  {"F3",         "\eOR"},
-  {"F4",         "\eOS"},
-  {"F5",         "\e[15~"},
-  {"F6",         "\e[16~"},
-  {"F7",         "\e[17~"},
-  {"F8",         "\e[18~"},
-  {"F9",         "\e[19~"},
-  {"F9",         "\e[20~"},
-  {"F10",        "\e[21~"},
-  {"F11",        "\e[22~"},
-  {"F12",        "\e[23~"},
-  {"tab",         {9, '\0'}},
-  {"shift-tab",   {27, 9, '\0'}}, // also generated by alt-tab in linux console
-  {"alt-space",   {27, ' ', '\0'}},
-  {"shift-tab",   "\e[Z"},
-  {"backspace",   {127, '\0'}},
-  {"space",       " "},
-  {"\e",          "\e"},
-  {"return",      {10,0}},
-  {"return",      {13,0}},
-  /* this section could be autogenerated by code */
-  {"control-a",   {1,0}},
-  {"control-b",   {2,0}},
-  {"control-c",   {3,0}},
-  {"control-d",   {4,0}},
-  {"control-e",   {5,0}},
-  {"control-f",   {6,0}},
-  {"control-g",   {7,0}},
-  {"control-h",   {8,0}}, /* backspace? */
-  {"control-i",   {9,0}},
-  {"control-j",   {10,0}},
-  {"control-k",   {11,0}},
-  {"control-l",   {12,0}},
-  {"control-n",   {14,0}},
-  {"control-o",   {15,0}},
-  {"control-p",   {16,0}},
-  {"control-q",   {17,0}},
-  {"control-r",   {18,0}},
-  {"control-s",   {19,0}},
-  {"control-t",   {20,0}},
-  {"control-u",   {21,0}},
-  {"control-v",   {22,0}},
-  {"control-w",   {23,0}},
-  {"control-x",   {24,0}},
-  {"control-y",   {25,0}},
-  {"control-z",   {26,0}},
-  {"alt-`",       "\e`"},
-  {"alt-0",       "\e0"},
-  {"alt-1",       "\e1"},
-  {"alt-2",       "\e2"},
-  {"alt-3",       "\e3"},
-  {"alt-4",       "\e4"},
-  {"alt-5",       "\e5"},
-  {"alt-6",       "\e6"},
-  {"alt-7",       "\e7"}, /* backspace? */
-  {"alt-8",       "\e8"},
-  {"alt-9",       "\e9"},
-  {"alt-+",       "\e+"},
-  {"alt--",       "\e-"},
-  {"alt-/",       "\e/"},
-  {"alt-a",       "\ea"},
-  {"alt-b",       "\eb"},
-  {"alt-c",       "\ec"},
-  {"alt-d",       "\ed"},
-  {"alt-e",       "\ee"},
-  {"alt-f",       "\ef"},
-  {"alt-g",       "\eg"},
-  {"alt-h",       "\eh"}, /* backspace? */
-  {"alt-i",       "\ei"},
-  {"alt-j",       "\ej"},
-  {"alt-k",       "\ek"},
-  {"alt-l",       "\el"},
-  {"alt-n",       "\em"},
-  {"alt-n",       "\en"},
-  {"alt-o",       "\eo"},
-  {"alt-p",       "\ep"},
-  {"alt-q",       "\eq"},
-  {"alt-r",       "\er"},
-  {"alt-s",       "\es"},
-  {"alt-t",       "\et"},
-  {"alt-u",       "\eu"},
-  {"alt-v",       "\ev"},
-  {"alt-w",       "\ew"},
-  {"alt-x",       "\ex"},
-  {"alt-y",       "\ey"},
-  {"alt-z",       "\ez"},
-  /* Linux Console  */
-  {"home",       "\e[1~"},
-  {"end",        "\e[4~"},
-  {"F1",         "\e[[A"},
-  {"F2",         "\e[[B"},
-  {"F3",         "\e[[C"},
-  {"F4",         "\e[[D"},
-  {"F5",         "\e[[E"},
-  {"F6",         "\e[[F"},
-  {"F7",         "\e[[G"},
-  {"F8",         "\e[[H"},
-  {"F9",         "\e[[I"},
-  {"F10",        "\e[[J"},
-  {"F11",        "\e[[K"},
-  {"F12",        "\e[[L"},
-  {NULL, }
-};
-static int fb_keyboard_match_keycode (const char *buf, int length, const MmmKeyCode **ret)
-{
-  int i;
-  int matches = 0;
+    /* do we really need to restore it ? */
+   // if (ioctl (fb->fb_fd, FBIOPUTCMAP, &original_cmap) == -1)
+   // {
+   //   fprintf (stderr, "palette initialization problem %i\n", __LINE__);
+   // }
 
-  if (!strncmp (buf, "\e[M", MIN(length,3)))
+    for (i = 0; i < 256; i++)
     {
-      if (length >= 6)
-        return 9001;
-      return 2342;
+      red[i]   = ((( i >> 5) & 0x7) << 5) << 8;
+      green[i] = ((( i >> 2) & 0x7) << 5) << 8;
+      blue[i]  = ((( i >> 0) & 0x3) << 6) << 8;
     }
-  for (i = 0; ufb_keycodes[i].nick; i++)
-    if (!strncmp (buf, ufb_keycodes[i].sequence, length))
-      {
-        matches ++;
-        if ((int)strlen (ufb_keycodes[i].sequence) == length && ret)
-          {
-            *ret = &ufb_keycodes[i];
-            return 1;
-          }
-      }
-  if (matches != 1 && ret)
-    *ret = NULL;
-  return matches==1?2:matches;
-}
 
-//int is_active (void *host)
-//{
-//        return 1;
-//}
+    if (ioctl (fb->fb_fd, FBIOPUTCMAP, &cmap) == -1)
+    {
+      fprintf (stderr, "palette initialization problem %i\n", __LINE__);
+    }
+  }
 
-static char *evsource_kb_get_event (void)
-{
-  unsigned char buf[20];
-  int length;
+  fb->fb_bpp = fb->vinfo.bits_per_pixel / 8;
+  fb->fb_mapped_size = fb->finfo.smem_len;
+#endif
 
+#ifdef __NetBSD__
+  struct wsdisplay_fbinfo finfo;
 
-  for (length = 0; length < 10; length ++)
-    if (read (STDIN_FILENO, &buf[length], 1) != -1)
-      {
-        const MmmKeyCode *match = NULL;
+  int mode = WSDISPLAYIO_MODE_DUMBFB;
+  //int mode = WSDISPLAYIO_MODE_MAPPED;
+  if (ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode)) {
+    return NULL;
+  }
+  if (ioctl (fb->fb_fd, WSDISPLAYIO_GINFO, &finfo)) {
+    fprintf (stderr, "ioctl: WSIDSPLAYIO_GINFO failed\n");
+    return NULL;
+  }
 
-        //if (!is_active (ctx_ev_src_kb.priv))
-        //  return NULL;
+  width = tiled->width = finfo.width;
+  height = tiled->height = finfo.height;
+  fb->fb_bits = finfo.depth;
+  fb->fb_bpp = (fb->fb_bits + 1) / 8;
+  fb->fb_mapped_size = width * height * fb->fb_bpp;
 
-        /* special case ESC, so that we can use it alone in keybindings */
-        if (length == 0 && buf[0] == 27)
-          {
-            struct timeval tv;
-            fd_set rfds;
-            FD_ZERO (&rfds);
-            FD_SET (STDIN_FILENO, &rfds);
-            tv.tv_sec = 0;
-            tv.tv_usec = 1000 * 120;
-            if (select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv) == 0)
-              return strdup ("escape");
-          }
 
-        switch (fb_keyboard_match_keycode ((void*)buf, length + 1, &match))
-          {
-            case 1: /* unique match */
-              if (!match)
-                return NULL;
-              return strdup (match->nick);
-              break;
-            case 0: /* no matches, bail*/
-             {
-                static char ret[256]="";
-                if (length == 0 && ctx_utf8_len (buf[0])>1) /* read a
-                                                             * single unicode
-                                                             * utf8 character
-                                                             */
-                  {
-                    int bytes = read (STDIN_FILENO, &buf[length+1], ctx_utf8_len(buf[0])-1);
-                    if (bytes)
-                    {
-                      buf[ctx_utf8_len(buf[0])]=0;
-                      strcpy (ret, (void*)buf);
-                    }
-                    return strdup(ret); //XXX: simplify
-                  }
-                if (length == 0) /* ascii */
-                  {
-                    buf[1]=0;
-                    strcpy (ret, (void*)buf);
-                    return strdup(ret);
-                  }
-                sprintf (ret, "unhandled %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c'",
-                    length >=0 ? buf[0] : 0,
-                    length >=0 ? buf[0]>31?buf[0]:'?' : ' ',
-                    length >=1 ? buf[1] : 0,
-                    length >=1 ? buf[1]>31?buf[1]:'?' : ' ',
-                    length >=2 ? buf[2] : 0,
-                    length >=2 ? buf[2]>31?buf[2]:'?' : ' ',
-                    length >=3 ? buf[3] : 0,
-                    length >=3 ? buf[3]>31?buf[3]:'?' : ' ',
-                    length >=4 ? buf[4] : 0,
-                    length >=4 ? buf[4]>31?buf[4]:'?' : ' ',
-                    length >=5 ? buf[5] : 0,
-                    length >=5 ? buf[5]>31?buf[5]:'?' : ' ',
-                    length >=6 ? buf[6] : 0,
-                    length >=6 ? buf[6]>31?buf[6]:'?' : ' '
-                    );
-                return strdup(ret);
-            }
-              return NULL;
-            default: /* continue */
-              break;
-          }
-      }
-    else
-      return strdup("key read eek");
-  return strdup("fail");
-}
+  if (fb->fb_bits == 8)
+  {
+    uint8_t red[256],  green[256],  blue[256];
+    struct wsdisplay_cmap cmap;
+    cmap.red = red;
+    cmap.green = green;
+    cmap.blue = blue;
+    cmap.count = 256;
+    cmap.index = 0;
+    for (int i = 0; i < 256; i++)
+    {
+      red[i]   = ((( i >> 5) & 0x7) << 5);
+      green[i] = ((( i >> 2) & 0x7) << 5);
+      blue[i]  = ((( i >> 0) & 0x3) << 6);
+    }
 
-static int evsource_kb_get_fd (void)
-{
-  return STDIN_FILENO;
-}
+    ioctl (fb->fb_fd, WSDISPLAYIO_PUTCMAP, &cmap);
+  }
+#endif
+
+                                              
+  tiled->fb = mmap (NULL, fb->fb_mapped_size, PROT_READ|PROT_WRITE, MAP_SHARED, fb->fb_fd, 0);
+  }
+  if (!tiled->fb)
+    return NULL;
+  tiled->pixels = calloc (fb->fb_mapped_size, 1);
+  tiled->show_frame = (void*)ctx_fb_show_frame;
 
+  ctx_babl_init ();
 
-static EvSource *evsource_kb_new (void)
-{
-  if (evsource_kb_init() == 0)
+  ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
+
+  backend->ctx    = _ctx_new_drawlist (width, height);
+  tiled->ctx_copy = _ctx_new_drawlist (width, height);
+  tiled->width    = width;
+  tiled->height   = height;
+
+  ctx_set_backend (backend->ctx, fb);
+  ctx_set_backend (tiled->ctx_copy, fb);
+  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);
+
+
+  backend->flush = ctx_tiled_flush;
+  backend->process = (void*)ctx_drawlist_process;
+
+  backend->reset = ctx_fb_reset;
+  backend->free  = (void*)ctx_fb_free;
+  backend->set_clipboard = ctx_headless_set_clipboard;
+  backend->get_clipboard = ctx_headless_get_clipboard;
+  backend->consume_events = ctx_fb_consume_events;
+  backend->get_event_fds = ctx_fb_get_event_fds;
+
+  ctx_set_size (backend->ctx, width, height);
+  ctx_set_size (tiled->ctx_copy, width, height);
+
+  for (int i = 0; i < _ctx_max_threads; i++)
   {
-    return &ctx_ev_src_kb;
+    tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
+                   tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
+                   tiled->width * 4, CTX_FORMAT_BGRA8); // this format
+                                  // is overriden in  thread
+    ((CtxRasterizer*)(tiled->host[i]->backend))->swap_red_green = 1;
+    ctx_set_texture_source (tiled->host[i], backend->ctx);
   }
-  return NULL;
-}
 
-static int event_check_pending (CtxTiled *tiled)
-{
-  int events = 0;
-  for (int i = 0; i < tiled->evsource_count; i++)
+  mtx_init (&tiled->mtx, mtx_plain);
+  cnd_init (&tiled->cond);
+
+#define start_thread(no)\
+  if(_ctx_max_threads>no){ \
+    static void *args[2]={(void*)no, };\
+    thrd_t tid;\
+    args[1]=fb;\
+    thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
+  }
+  start_thread(0);
+  start_thread(1);
+  start_thread(2);
+  start_thread(3);
+  start_thread(4);
+  start_thread(5);
+  start_thread(6);
+  start_thread(7);
+  start_thread(8);
+  start_thread(9);
+  start_thread(10);
+  start_thread(11);
+  start_thread(12);
+  start_thread(13);
+  start_thread(14);
+  start_thread(15);
+#undef start_thread
+
+  EvSource *kb = evsource_kb_new ();
+  if (kb)
   {
-    while (evsource_has_event (tiled->evsource[i]))
-    {
-      char *event = evsource_get_event (tiled->evsource[i]);
-      if (event)
-      {
-        if (tiled->vt_active)
-        {
-          ctx_key_press (tiled->ctx, 0, event, 0); // we deliver all events as key-press, the key_press 
handler disambiguates
-          events++;
-        }
-        free (event);
-      }
-    }
+    tiled->evsource[tiled->evsource_count++] = kb;
+    kb->priv = fb;
+  }
+  EvSource *mice  = evsource_mice_new ();
+  if (mice)
+  {
+    tiled->evsource[tiled->evsource_count++] = mice;
+    mice->priv = fb;
   }
-  return events;
-}
 
-int ctx_renderer_is_tiled (Ctx *ctx)
-{
-  return ctx_renderer_is_fb (ctx)
-          || ctx_renderer_is_sdl (ctx)
-       || ctx_renderer_is_kms (ctx)
-     ;
-}
+  tiled->vt_active = 1;
+#ifdef __linux__
+  ioctl(0, KDSETMODE, KD_GRAPHICS);
+  signal (SIGUSR1, fb_vt_switch_cb);
+  signal (SIGUSR2, fb_vt_switch_cb);
+
+  struct vt_stat st;
+  if (ioctl (0, VT_GETSTATE, &st) == -1)
+  {
+    ctx_log ("VT_GET_MODE on vt %i failed\n", fb->vt);
+    return NULL;
+  }
+
+  fb->vt = st.v_active;
+
+  struct vt_mode mode;
+  mode.mode   = VT_PROCESS;
+  mode.relsig = SIGUSR1;
+  mode.acqsig = SIGUSR2;
+  if (ioctl (0, VT_SETMODE, &mode) < 0)
+  {
+    ctx_log ("VT_SET_MODE on vt %i failed\n", fb->vt);
+    return NULL;
+  }
+#endif
 
+  return backend->ctx;
+#else
+  return NULL;
+#endif
+}
+#endif
 #endif
 
 #if CTX_EVENTS
@@ -30171,25 +31602,6 @@ int ctx_renderer_is_tiled (Ctx *ctx)
 #include <signal.h>
 #endif
 
-#if CTX_KMS || CTX_FB
-static char *ctx_fb_clipboard = NULL;
-static void ctx_fb_set_clipboard (void *fb, const char *text)
-{
-  if (ctx_fb_clipboard)
-    free (ctx_fb_clipboard);
-  ctx_fb_clipboard = NULL;
-  if (text)
-  {
-    ctx_fb_clipboard = strdup (text);
-  }
-}
-
-static char *ctx_fb_get_clipboard (void *sdl)
-{
-  if (ctx_fb_clipboard) return strdup (ctx_fb_clipboard);
-  return strdup ("");
-}
-#endif
 
 
 #if CTX_KMS
@@ -30208,41 +31620,6 @@ typedef struct _CtxKMS CtxKMS;
 struct _CtxKMS
 {
    CtxTiled tiled;
-#if 0
-   void (*render) (void *fb, CtxCommand *command);
-   void (*reset)  (void *fb);
-   void (*flush)  (void *fb);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard) (void *ctxctx, const char *text);
-   void (*free)   (void *fb);
-   Ctx          *ctx;
-   int           width;
-   int           height;
-   int           cols; // unused
-   int           rows; // unused
-   int           was_down;
-   uint8_t      *pixels;
-   Ctx          *ctx_copy;
-   Ctx          *host[CTX_MAX_THREADS];
-   CtxAntialias  antialias;
-   int           quit;
-   _Atomic int   thread_quit;
-   int           shown_frame;
-   int           render_frame;
-   int           rendered_frame[CTX_MAX_THREADS];
-   int           frame;
-   int           min_col; // hasher cols and rows
-   int           min_row;
-   int           max_col;
-   int           max_row;
-   uint8_t       hashes[CTX_HASH_ROWS * CTX_HASH_COLS *  20];
-   int8_t        tile_affinity[CTX_HASH_ROWS * CTX_HASH_COLS]; // which render thread no is
-                                                           // responsible for a tile
-                                                           //
-
-
-   int           pointer_down[3];
-#endif
    int           key_balance;
    int           key_repeat;
    int           lctrl;
@@ -30623,22 +32000,16 @@ static void ctx_kms_show_frame (CtxKMS *fb, int block)
   }
 }
 
-int ctx_kms_consume_events (Ctx *ctx)
+void ctx_kms_consume_events (Ctx *ctx)
 {
-  CtxKMS *fb = (void*)ctx->renderer;
+  CtxKMS *fb = (void*)ctx->backend;
   ctx_kms_show_frame (fb, 0);
   event_check_pending (&fb->tiled);
-  return 0;
-}
-
-inline static void ctx_kms_reset (CtxKMS *fb)
-{
-  ctx_kms_show_frame (fb, 1);
 }
 
-inline static void ctx_kms_flush (CtxKMS *fb)
+inline static void ctx_kms_reset (Ctx *ctx)
 {
-  ctx_tiled_flush ((CtxTiled*)fb);
+  ctx_kms_show_frame ((CtxKMS*)ctx->backend, 1);
 }
 
 void ctx_kms_free (CtxKMS *fb)
@@ -30653,22 +32024,12 @@ void ctx_kms_free (CtxKMS *fb)
   if (system("stty sane")){};
   ctx_tiled_free ((CtxTiled*)fb);
   //free (fb);
-#if CTX_BABL
-  babl_exit ();
-#endif
+  ctx_babl_exit ();
 }
 
 //static unsigned char *fb_icc = NULL;
 //static long fb_icc_length = 0;
 
-int ctx_renderer_is_kms (Ctx *ctx)
-{
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_kms_free)
-          return 1;
-  return 0;
-}
-
 #if 0
 static CtxKMS *ctx_fb = NULL;
 static void vt_switch_cb (int sig)
@@ -30714,7 +32075,7 @@ static void vt_switch_cb (int sig)
 
 static int ctx_kms_get_mice_fd (Ctx *ctx)
 {
-  //CtxKMS *fb = (void*)ctx->renderer;
+  //CtxKMS *fb = (void*)ctx->backend;
   return _ctx_mice_fd;
 }
 
@@ -30722,6 +32083,7 @@ Ctx *ctx_new_kms (int width, int height)
 {
 #if CTX_RASTERIZER
   CtxKMS *fb = calloc (sizeof (CtxKMS), 1);
+  CtxBackend *backend = (CtxBackend*)fb;
 
   CtxTiled *tiled = (void*)fb;
   tiled->fb = ctx_fbkms_new (fb, &tiled->width, &tiled->height);
@@ -30742,31 +32104,30 @@ Ctx *ctx_new_kms (int width, int height)
   if (!tiled->fb)
     return NULL;
   tiled->pixels = calloc (fb->fb_mapped_size, 1);
-  ctx_kms_events = 1;
 
-#if CTX_BABL
-  babl_init ();
-#endif
+  ctx_babl_init ();
 
   ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
 
-  tiled->ctx      = ctx_new ();
-  tiled->ctx_copy = ctx_new ();
+  backend->ctx = _ctx_new_drawlist (width, height);
+  tiled->ctx_copy = _ctx_new_drawlist (width, height);
+
   tiled->width    = width;
   tiled->height   = height;
+  tiled->show_frame = (void*)ctx_kms_show_frame;
 
-  ctx_set_renderer (tiled->ctx, fb);
-  ctx_set_renderer (tiled->ctx_copy, fb);
-  ctx_set_texture_cache (tiled->ctx_copy, tiled->ctx);
-
-  ctx_set_size (tiled->ctx, width, height);
-  ctx_set_size (tiled->ctx_copy, width, height);
+  ctx_set_backend (backend->ctx, fb);
+  ctx_set_backend (tiled->ctx_copy, fb);
+  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);
 
-  tiled->flush = (void*)ctx_kms_flush;
-  tiled->reset = (void*)ctx_kms_reset;
-  tiled->free  = (void*)ctx_kms_free;
-  tiled->set_clipboard = (void*)ctx_fb_set_clipboard;
-  tiled->get_clipboard = (void*)ctx_fb_get_clipboard;
+  backend->flush = ctx_tiled_flush;
+  backend->reset = ctx_kms_reset;
+  backend->free  = (void*)ctx_kms_free;
+  backend->process = (void*)ctx_drawlist_process;
+  backend->consume_events = ctx_kms_consume_events;
+  backend->get_event_fds = (void*) ctx_fb_get_event_fds;
+  backend->set_clipboard = ctx_headless_set_clipboard;
+  backend->get_clipboard = ctx_headless_get_clipboard;
 
   for (int i = 0; i < _ctx_max_threads; i++)
   {
@@ -30774,8 +32135,8 @@ Ctx *ctx_new_kms (int width, int height)
                    tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
                    tiled->width * 4, CTX_FORMAT_BGRA8); // this format
                                   // is overriden in  thread
-    ((CtxRasterizer*)(tiled->host[i]->renderer))->swap_red_green = 1;
-    ctx_set_texture_source (tiled->host[i], tiled->ctx);
+    ((CtxRasterizer*)(tiled->host[i]->backend))->swap_red_green = 1;
+    ctx_set_texture_source (tiled->host[i], backend->ctx);
   }
 
   mtx_init (&tiled->mtx, mtx_plain);
@@ -30825,7 +32186,6 @@ Ctx *ctx_new_kms (int width, int height)
   ioctl(0, KDSETMODE, KD_GRAPHICS);
 #endif
   tiled->shown_frame = tiled->render_frame;
-  //ctx_flush (tiled->ctx);
 #if 0
   signal (SIGUSR1, vt_switch_cb);
   signal (SIGUSR2, vt_switch_cb);
@@ -30850,595 +32210,647 @@ Ctx *ctx_new_kms (int width, int height)
   }
 #endif
 
-  return tiled->ctx;
+  return backend->ctx;
 #else
   return NULL;
 #endif
 }
-#else
-
-int ctx_renderer_is_kms (Ctx *ctx)
-{
-  return 0;
-}
-
-#endif
-#endif
-
-#if CTX_EVENTS
-
-#if !__COSMOPOLITAN__
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <signal.h>
 #endif
-
-#if CTX_FB
-static int ctx_fb_get_mice_fd (Ctx *ctx)
-{
-  //CtxFb *fb = (void*)ctx->renderer;
-  return _ctx_mice_fd;
-}
-
-#ifdef __linux__
-  #include <linux/fb.h>
-  #include <linux/vt.h>
-  #include <linux/kd.h>
 #endif
 
-#ifdef __NetBSD__
-  typedef uint8_t unchar;
-  typedef uint8_t u_char;
-  typedef uint16_t ushort;
-  typedef uint32_t u_int;
-  typedef uint64_t u_long;
-  #include <sys/param.h>
-  #include <dev/wscons/wsdisplay_usl_io.h>
-  #include <dev/wscons/wsconsio.h>
-  #include <dev/wscons/wsksymdef.h>
-#endif
+#if CTX_SDL
 
-  #include <sys/mman.h>
+/**/
 
-typedef struct _CtxFb CtxFb;
-struct _CtxFb
+typedef struct _CtxSDL CtxSDL;
+struct _CtxSDL
 {
-   CtxTiled tiled;
-#if 0
-   void (*render) (void *fb, CtxCommand *command);
-   void (*reset)  (void *fb);
-   void (*flush)  (void *fb);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard) (void *ctxctx, const char *text);
-   void (*free)   (void *fb);
-   Ctx          *ctx;
-   int           width;
-   int           height;
-   int           cols; // unused
-   int           rows; // unused
-   int           was_down;
-   uint8_t      *pixels;
-   Ctx          *ctx_copy;
-   Ctx          *host[CTX_MAX_THREADS];
-   CtxAntialias  antialias;
-   int           quit;
-   _Atomic int   thread_quit;
-   int           shown_frame;
-   int           render_frame;
-   int           rendered_frame[CTX_MAX_THREADS];
-   int           frame;
-   int           min_col; // hasher cols and rows
-   int           min_row;
-   int           max_col;
-   int           max_row;
-   uint8_t       hashes[CTX_HASH_ROWS * CTX_HASH_COLS *  20];
-   int8_t        tile_affinity[CTX_HASH_ROWS * CTX_HASH_COLS]; // which render thread no is
-                                                           // responsible for a tile
-                                                           //
-
-
-   int           pointer_down[3];
-#endif
+   CtxTiled  tiled;
+   /* where we diverge from fb*/
    int           key_balance;
    int           key_repeat;
    int           lctrl;
    int           lalt;
    int           rctrl;
+   int           lshift;
+   int           rshift;
 
+   SDL_Window   *window;
+   SDL_Renderer *backend;
+   SDL_Texture  *texture;
 
-   int          fb_fd;
-   char        *fb_path;
-   int          fb_bits;
-   int          fb_bpp;
-   int          fb_mapped_size;
-   int          vt;
-   int          tty;
-   cnd_t        cond;
-   mtx_t        mtx;
-#if __linux__
-   struct       fb_var_screeninfo vinfo;
-   struct       fb_fix_screeninfo finfo;
-#endif
+   int           fullscreen;
 };
 
-#if UINTPTR_MAX == 0xffFFffFF
-  #define fbdrmuint_t uint32_t
-#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
-  #define fbdrmuint_t uint64_t
-#endif
-
-
-static void ctx_fb_flip (CtxFb *fb)
-{
-#ifdef __linux__
-  ioctl (fb->fb_fd, FBIOPAN_DISPLAY, &fb->vinfo);
-#endif
-}
 
-static void ctx_fb_show_frame (CtxFb *fb, int block)
+void ctx_screenshot (Ctx *ctx, const char *output_path)
 {
-  CtxTiled *tiled = (void*)fb;
-  if (tiled->shown_frame == tiled->render_frame)
-  {
-    if (block == 0) // consume event call
-    {
-      ctx_tiled_draw_cursor (tiled);
-      ctx_fb_flip (fb);
-    }
-    return;
-  }
+#if CTX_SCREENSHOT
+  CtxTiled *tiled = (CtxTiled*)ctx->backend;
 
-  if (block)
-  {
-    int count = 0;
-    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
-    {
-      usleep (500);
-      count ++;
-      if (count > 2000)
+   if (ctx_backend_type (ctx) == CTX_BACKEND_RASTERIZER)
+   {
+      CtxRasterizer *rasterizer = (CtxRasterizer*)ctx->backend;
+      // XXX  only valid for RGBA8
+      if (rasterizer->format->pixel_format == CTX_FORMAT_RGBA8)
       {
-        tiled->shown_frame = tiled->render_frame;
+#ifdef INCLUDE_STB_IMAGE_WRITE_H
+        stbi_write_png (output_path, rasterizer->blit_width, rasterizer->blit_height, 4, rasterizer->buf, 
rasterizer->blit_stride);
+#endif
         return;
       }
-    }
-  }
-  else
-  {
-    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
-      return;
-  }
-
-    if (tiled->vt_active)
-    {
-       int pre_skip = tiled->min_row * tiled->height/CTX_HASH_ROWS * tiled->width;
-       int post_skip = (CTX_HASH_ROWS-tiled->max_row-1) * tiled->height/CTX_HASH_ROWS * tiled->width;
-
-       int rows = ((tiled->width * tiled->height) - pre_skip - post_skip)/tiled->width;
-
-       int col_pre_skip = tiled->min_col * tiled->width/CTX_HASH_COLS;
-       int col_post_skip = (CTX_HASH_COLS-tiled->max_col-1) * tiled->width/CTX_HASH_COLS;
-       if (_ctx_damage_control)
-       {
-         pre_skip = post_skip = col_pre_skip = col_post_skip = 0;
-       }
-
-       if (pre_skip < 0) pre_skip = 0;
-       if (post_skip < 0) post_skip = 0;
-
+   }
 
-       if (tiled->min_row == 100){
-          pre_skip = 0;
-          post_skip = 0;
-#ifdef __linux__
-           __u32 dummy = 0;
-          ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
-#endif
-          ctx_tiled_undraw_cursor (tiled);
-       }
-       else
-       {
+  if (!ctx_backend_is_tiled (ctx))
+    return;
 
-      tiled->min_row = 100;
-      tiled->max_row = 0;
-      tiled->min_col = 100;
-      tiled->max_col = 0;
-#ifdef __linux__
-    {
-     __u32 dummy = 0;
-     ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
-    }
-#endif
-     ctx_tiled_undraw_cursor (tiled);
-     switch (fb->fb_bits)
-     {
-       case 32:
-#if 1
-         {
-           uint8_t *dst = tiled->fb + pre_skip * 4;
-           uint8_t *src = tiled->pixels + pre_skip * 4;
-           int pre = col_pre_skip * 4;
-           int post = col_post_skip * 4;
-           int core = tiled->width * 4 - pre - post;
-           for (int i = 0; i < rows; i++)
-           {
-             dst  += pre;
-             src  += pre;
-             memcpy (dst, src, core);
-             src  += core;
-             dst  += core;
-             dst  += post;
-             src  += post;
-           }
-         }
-#else
-         { int count = tiled->width * tiled->height;
-           const uint32_t *src = (void*)tiled->pixels;
-           uint32_t *dst = (void*)tiled->fb;
-           count-= pre_skip;
-           src+= pre_skip;
-           dst+= pre_skip;
-           count-= post_skip;
-           while (count -- > 0)
-           {
-             dst[0] = ctx_swap_red_green2 (src[0]);
-             src++;
-             dst++;
-           }
-         }
-#endif
-         break;
-         /* XXX  :  note: converting a scanline (or all) to target and
-          * then doing a bulk memcpy be faster (at least with som /dev/fbs)  */
-       case 24:
-         { int count = tiled->width * tiled->height;
-           const uint8_t *src = tiled->pixels;
-           uint8_t *dst = tiled->fb;
-           count-= pre_skip;
-           src+= pre_skip * 4;
-           dst+= pre_skip * 3;
-           count-= post_skip;
-           while (count -- > 0)
-           {
-             dst[0] = src[0];
-             dst[1] = src[1];
-             dst[2] = src[2];
-             dst+=3;
-             src+=4;
-           }
-         }
-         break;
-       case 16:
-         { int count = tiled->width * tiled->height;
-           const uint8_t *src = tiled->pixels;
-           uint8_t *dst = tiled->fb;
-           count-= post_skip;
-           count-= pre_skip;
-           src+= pre_skip * 4;
-           dst+= pre_skip * 2;
-           while (count -- > 0)
-           {
-             int big = ((src[0] >> 3)) +
-                ((src[1] >> 2)<<5) +
-                ((src[2] >> 3)<<11);
-             dst[0] = big & 255;
-             dst[1] = big >>  8;
-             dst+=2;
-             src+=4;
-           }
-         }
-         break;
-       case 15:
-         { int count = tiled->width * tiled->height;
-           const uint8_t *src = tiled->pixels;
-           uint8_t *dst = tiled->fb;
-           count-= post_skip;
-           count-= pre_skip;
-           src+= pre_skip * 4;
-           dst+= pre_skip * 2;
-           while (count -- > 0)
-           {
-             int big = ((src[2] >> 3)) +
-                       ((src[1] >> 2)<<5) +
-                       ((src[0] >> 3)<<10);
-             dst[0] = big & 255;
-             dst[1] = big >>  8;
-             dst+=2;
-             src+=4;
-           }
-         }
-         break;
-       case 8:
-         { int count = tiled->width * tiled->height;
-           const uint8_t *src = tiled->pixels;
-           uint8_t *dst = tiled->fb;
-           count-= post_skip;
-           count-= pre_skip;
-           src+= pre_skip * 4;
-           dst+= pre_skip;
-           while (count -- > 0)
-           {
-             dst[0] = ((src[0] >> 5)) +
-                      ((src[1] >> 5)<<3) +
-                      ((src[2] >> 6)<<6);
-             dst+=1;
-             src+=4;
-           }
-         }
-         break;
-     }
-    }
-    ctx_tiled_cursor_drawn = 0;
-    ctx_tiled_draw_cursor (tiled);
-    ctx_fb_flip (fb);
-    tiled->shown_frame = tiled->render_frame;
+  // we rely on the same struxt layout XXX !
+  for (int i = 0; i < tiled->width * tiled->height; i++)
+  {
+    int tmp = tiled->pixels[i*4];
+    tiled->pixels[i*4] = tiled->pixels[i*4 + 2];
+    tiled->pixels[i*4 + 2] = tmp;
   }
-}
-
-int ctx_fb_consume_events (Ctx *ctx)
-{
-  CtxFb *fb = (void*)ctx->renderer;
-  ctx_fb_show_frame (fb, 0);
-  event_check_pending (&fb->tiled);
-  return 0;
-}
-
-inline static void ctx_fb_reset (CtxFb *fb)
-{
-  ctx_fb_show_frame (fb, 1);
-}
-
-inline static void ctx_fb_flush (CtxFb *fb)
-{
-  ctx_tiled_flush ((CtxTiled*)fb);
-}
-
-void ctx_fb_free (CtxFb *fb)
-{
-  CtxTiled*tiled=(CtxTiled*)fb;
 
-//#ifdef __linux__
-  ioctl (0, KDSETMODE, KD_TEXT);
-//#endif
-#ifdef __NetBSD__
-  {
-   int mode = WSDISPLAYIO_MODE_EMUL;
-   ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode);
+#if 1
+  if (ctx_backend_type (ctx) != CTX_BACKEND_HEADLESS)
+  for (int i = 0; i < tiled->width * tiled->height; i++)
+  {
+    int tmp = tiled->pixels[i*4];
+    tiled->pixels[i*4] = tiled->pixels[i*4 + 2];
+    tiled->pixels[i*4 + 2] = tmp;
   }
 #endif
-  munmap (tiled->fb, fb->fb_mapped_size);
-  close (fb->fb_fd);
-  if (system("stty sane")){};
-  ctx_tiled_free ((CtxTiled*)fb);
-  //free (fb);
-#if CTX_BABL
-  babl_exit ();
+#ifdef INCLUDE_STB_IMAGE_WRITE_H
+  stbi_write_png (output_path, tiled->width, tiled->height, 4, tiled->pixels, tiled->width*4);
+#endif
 #endif
 }
 
-//static unsigned char *fb_icc = NULL;
-//static long fb_icc_length = 0;
-
-int ctx_renderer_is_fb (Ctx *ctx)
+int ctx_show_fps = 1;
+void ctx_sdl_set_title (void *self, const char *new_title)
 {
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_fb_free)
-          return 1;
-  return 0;
+   Ctx *ctx = (Ctx*)self;
+   CtxSDL *sdl = (CtxSDL*)ctx->backend;
+   if (!ctx_show_fps)
+   SDL_SetWindowTitle (sdl->window, new_title);
 }
 
-static CtxFb *ctx_fb = NULL;
-#ifdef __linux__
-static void fb_vt_switch_cb (int sig)
+static long ctx_sdl_start_time = 0;
+
+static void ctx_sdl_show_frame (CtxSDL *sdl, int block)
 {
-  CtxTiled *tiled = (void*)ctx_fb;
-  if (sig == SIGUSR1)
+  CtxTiled *tiled = &sdl->tiled;
+  CtxBackend *backend = (CtxBackend*)tiled;
+  if (tiled->shown_cursor != backend->ctx->cursor)
   {
-    ioctl (0, VT_RELDISP, 1);
-    tiled->vt_active = 0;
-    ioctl (0, KDSETMODE, KD_TEXT);
+    tiled->shown_cursor = backend->ctx->cursor;
+    SDL_Cursor *new_cursor =  NULL;
+    switch (tiled->shown_cursor)
+    {
+      case CTX_CURSOR_UNSET: // XXX: document how this differs from none
+                             //      perhaps falling back to arrow?
+        break;
+      case CTX_CURSOR_NONE:
+        new_cursor = NULL;
+        break;
+      case CTX_CURSOR_ARROW:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_ARROW);
+        break;
+      case CTX_CURSOR_CROSSHAIR:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_CROSSHAIR);
+        break;
+      case CTX_CURSOR_WAIT:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_WAIT);
+        break;
+      case CTX_CURSOR_HAND:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_HAND);
+        break;
+      case CTX_CURSOR_IBEAM:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_IBEAM);
+        break;
+      case CTX_CURSOR_MOVE:
+      case CTX_CURSOR_RESIZE_ALL:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZEALL);
+        break;
+      case CTX_CURSOR_RESIZE_N:
+      case CTX_CURSOR_RESIZE_S:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENS);
+        break;
+      case CTX_CURSOR_RESIZE_E:
+      case CTX_CURSOR_RESIZE_W:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZEWE);
+        break;
+      case CTX_CURSOR_RESIZE_NE:
+      case CTX_CURSOR_RESIZE_SW:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENESW);
+        break;
+      case CTX_CURSOR_RESIZE_NW:
+      case CTX_CURSOR_RESIZE_SE:
+        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENWSE);
+        break;
+    }
+    if (new_cursor)
+    {
+      SDL_Cursor *old_cursor = SDL_GetCursor();
+      SDL_SetCursor (new_cursor);
+      SDL_ShowCursor (1);
+      if (old_cursor)
+        SDL_FreeCursor (old_cursor);
+    }
+    else
+    {
+      SDL_ShowCursor (0);
+    }
   }
-  else
+
+  if (tiled->shown_frame == tiled->render_frame)
   {
-    ioctl (0, VT_RELDISP, VT_ACKACQ);
-    tiled->vt_active = 1;
-    // queue draw
-    tiled->render_frame = ++tiled->frame;
-    ioctl (0, KDSETMODE, KD_GRAPHICS);
-    {
-      tiled->ctx->dirty=1;
+    return;
+  }
 
-      for (int row = 0; row < CTX_HASH_ROWS; row++)
-      for (int col = 0; col < CTX_HASH_COLS; col++)
+  if (block)
+  {
+    int count = 0;
+    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
+    {
+      usleep (500);
+      count ++;
+      if (count > 300)
       {
-        tiled->hashes[(row * CTX_HASH_COLS + col) *  20] += 1;
+        tiled->shown_frame = tiled->render_frame;
+        fprintf (stderr, "[drop]");
+        return;
       }
     }
   }
-}
+  else
+  {
+    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
+      return;
+  }
+
+
+  if (tiled->min_row == 100)
+  {
+  }
+  else
+  {
+#if 1
+    int x = tiled->min_col * tiled->width/CTX_HASH_COLS;
+    int y = tiled->min_row * tiled->height/CTX_HASH_ROWS;
+    int x1 = (tiled->max_col+1) * tiled->width/CTX_HASH_COLS;
+    int y1 = (tiled->max_row+1) * tiled->height/CTX_HASH_ROWS;
+    int width = x1 - x;
+    int height = y1 - y;
 #endif
+    tiled->min_row = 100;
+    tiled->max_row = 0;
+    tiled->min_col = 100;
+    tiled->max_col = 0;
 
+    SDL_Rect r = {x, y, width, height};
+    SDL_UpdateTexture (sdl->texture, &r,
+                      //(void*)sdl->pixels,
+                      (void*)(tiled->pixels + y * tiled->width * 4 + x * 4),
+                      
+                      tiled->width * 4);
+    SDL_RenderClear (sdl->backend);
+    SDL_RenderCopy (sdl->backend, sdl->texture, NULL, NULL);
+    SDL_RenderPresent (sdl->backend);
 
-Ctx *ctx_new_fb (int width, int height)
-{
-#if CTX_RASTERIZER
-  CtxFb *fb = calloc (sizeof (CtxFb), 1);
 
-  CtxTiled *tiled = (void*)fb;
-  ctx_fb = fb;
+  if (ctx_show_fps)
   {
-#ifdef __linux__
-  const char *dev_path = "/dev/fb0";
-#endif
-#ifdef __NetBSD__
-  const char *dev_path = "/dev/ttyE0";
-#endif
-#ifdef __OpenBSD__
-  const char *dev_path = "/dev/ttyC0";
-#endif
-  fb->fb_fd = open (dev_path, O_RDWR);
-  if (fb->fb_fd > 0)
-    fb->fb_path = strdup (dev_path);
-  else
+    static char tmp_title[1024];
+    static uint64_t prev_time = 0;
+    uint64_t time = ctx_ticks ();
+    float fps = 1000000.0/  (time - ctx_sdl_start_time);
+    float fps2 = 1000000.0/  (time - prev_time);
+    prev_time = time;
+    static float fps_avg = 0.0f;
+
+    if (time - prev_time < 1000 * 1000 * 0.05)
+    fps_avg = (fps_avg * 0.9f + fps2 *  0.1f);
+
+    sprintf (tmp_title, "FPS: %.1f %.1f %.1f", (fps2*0.75+fps_avg*0.25), fps2, fps);
+    
+    sprintf (&tmp_title[strlen(tmp_title)], " shape hit rate: %.2f", ctx_shape_cache_rate);
+
+    SDL_SetWindowTitle (sdl->window, tmp_title);
+  }
+  }
+  tiled->shown_frame = tiled->render_frame;
+}
+
+static const char *ctx_sdl_keysym_to_name (unsigned int sym, int *r_keycode)
+{
+  static char buf[16]="";
+  buf[ctx_unichar_to_utf8 (sym, (void*)buf)]=0;
+  int scan_code = sym;
+  const char *name = &buf[0];
+   switch (sym)
+   {
+     case SDLK_RSHIFT: scan_code = 16 ; break;
+     case SDLK_LSHIFT: scan_code = 16 ; break;
+     case SDLK_LCTRL: scan_code = 17 ; break;
+     case SDLK_RCTRL: scan_code = 17 ; break;
+     case SDLK_LALT:  scan_code = 18 ; break;
+     case SDLK_RALT:  scan_code = 18 ; break;
+     case SDLK_CAPSLOCK: name = "capslock"; scan_code = 20 ; break;
+     //case SDLK_NUMLOCK: name = "numlock"; scan_code = 144 ; break;
+     //case SDLK_SCROLLLOCK: name = "scrollock"; scan_code = 145 ; break;
+
+     case SDLK_F1:     name = "F1"; scan_code = 112; break;
+     case SDLK_F2:     name = "F2"; scan_code = 113; break;
+     case SDLK_F3:     name = "F3"; scan_code = 114; break;
+     case SDLK_F4:     name = "F4"; scan_code = 115; break;
+     case SDLK_F5:     name = "F5"; scan_code = 116; break;
+     case SDLK_F6:     name = "F6"; scan_code = 117; break;
+     case SDLK_F7:     name = "F7"; scan_code = 118; break;
+     case SDLK_F8:     name = "F8"; scan_code = 119; break;
+     case SDLK_F9:     name = "F9"; scan_code = 120; break;
+     case SDLK_F10:    name = "F10"; scan_code = 121; break;
+     case SDLK_F11:    name = "F11"; scan_code = 122; break;
+     case SDLK_F12:    name = "F12"; scan_code = 123; break;
+     case SDLK_ESCAPE: name = "escape"; break;
+     case SDLK_DOWN:   name = "down"; scan_code = 40; break;
+     case SDLK_LEFT:   name = "left"; scan_code = 37; break;
+     case SDLK_UP:     name = "up"; scan_code = 38;  break;
+     case SDLK_RIGHT:  name = "right"; scan_code = 39; break;
+     case SDLK_BACKSPACE: name = "backspace"; break;
+     case SDLK_SPACE:  name = "space"; break;
+     case SDLK_TAB:    name = "tab"; break;
+     case SDLK_DELETE: name = "delete"; scan_code = 46; break;
+     case SDLK_INSERT: name = "insert"; scan_code = 45; break;
+     case SDLK_RETURN:
+       //if (key_repeat == 0) // return never should repeat
+       name = "return";   // on a DEC like terminal
+       break;
+     case SDLK_HOME:     name = "home"; scan_code = 36; break;
+     case SDLK_END:      name = "end"; scan_code = 35; break;
+     case SDLK_PAGEDOWN: name = "page-down"; scan_code = 34; break;
+     case SDLK_PAGEUP:   name = "page-up"; scan_code = 33; break;
+     case ',': scan_code = 188; break;
+     case '.': scan_code = 190; break;
+     case '/': scan_code = 191; break;
+     case '`': scan_code = 192; break;
+     case '[': scan_code = 219; break;
+     case '\\': scan_code = 220; break;
+     case ']':  scan_code = 221; break;
+     case '\'': scan_code = 222; break;
+     default:
+       ;
+   }
+   if (sym >= 'a' && sym <='z') scan_code -= 32;
+   if (r_keycode)
+   {
+     *r_keycode = scan_code;
+   }
+   return name;
+}
+
+void ctx_sdl_consume_events (Ctx *ctx)
+{
+  CtxBackend *backend = (void*)ctx->backend;
+  CtxTiled    *tiled = (void*)backend;
+  CtxSDL      *sdl = (void*)backend;
+  SDL_Event event;
+  int got_events = 0;
+
+  ctx_sdl_show_frame (sdl, 0);
+
+  while (SDL_PollEvent (&event))
   {
-#ifdef __linux__
-    fb->fb_fd = open ("/dev/graphics/fb0", O_RDWR);
-    if (fb->fb_fd > 0)
+    got_events ++;
+    switch (event.type)
     {
-      fb->fb_path = strdup ("/dev/graphics/fb0");
-    }
-    else
+      case SDL_MOUSEBUTTONDOWN:
+        SDL_CaptureMouse (1);
+        ctx_pointer_press (ctx, event.button.x, event.button.y, event.button.button, 0);
+        break;
+      case SDL_MOUSEBUTTONUP:
+        SDL_CaptureMouse (0);
+        ctx_pointer_release (ctx, event.button.x, event.button.y, event.button.button, 0);
+        break;
+      case SDL_MOUSEMOTION:
+        //  XXX : look at mask and generate motion for each pressed
+        //        button
+        ctx_pointer_motion (ctx, event.motion.x, event.motion.y, 1, 0);
+        break;
+      case SDL_FINGERMOTION:
+        ctx_pointer_motion (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height,
+            (event.tfinger.fingerId%10) + 4, 0);
+        break;
+      case SDL_FINGERDOWN:
+        {
+        static int fdowns = 0;
+        fdowns ++;
+        if (fdowns > 1) // the very first finger down from SDL seems to be
+                        // mirrored as mouse events, later ones not - at
+                        // least under wayland
+        {
+          ctx_pointer_press (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height, 
+          (event.tfinger.fingerId%10) + 4, 0);
+        }
+        }
+        break;
+      case SDL_FINGERUP:
+        ctx_pointer_release (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height,
+          (event.tfinger.fingerId%10) + 4, 0);
+        break;
+#if 1
+      case SDL_TEXTINPUT:
+    //  if (!active)
+    //    break;
+        if (!sdl->lctrl && !sdl->rctrl && !sdl->lalt 
+           //&& ( (vt && vt_keyrepeat (vt) ) || (key_repeat==0) )
+           )
+          {
+            const char *name = event.text.text;
+            int keycode = 0;
+            if (!strcmp (name, " ") ) { name = "space"; }
+            if (name[0] && name[1] == 0)
+            {
+              keycode = name[0];
+              keycode = toupper (keycode);
+              switch (keycode)
+              {
+                case '.':  keycode = 190; break;
+                case ';':  keycode = 59; break;
+                case ',':  keycode = 188; break;
+                case '/':  keycode = 191; break;
+                case '\'': keycode = 222; break;
+                case '`':  keycode = 192; break;
+                case '[':  keycode = 219; break;
+                case ']':  keycode = 221; break;
+                case '\\': keycode = 220; break;
+              }
+            }
+            ctx_key_press (ctx, keycode, name, 0);
+          }
+        break;
 #endif
-    {
-      free (fb);
-      return NULL;
+      case SDL_KEYDOWN:
+        {
+          char buf[32] = "";
+          const char *name = buf;
+          if (!event.key.repeat)
+          {
+            sdl->key_balance ++;
+            sdl->key_repeat = 0;
+          }
+          else
+          {
+            sdl->key_repeat ++;
+          }
+          switch (event.key.keysym.sym)
+          {
+            case SDLK_LSHIFT: sdl->lshift = 1; break;
+            case SDLK_RSHIFT: sdl->rshift = 1; break;
+            case SDLK_LCTRL:  sdl->lctrl = 1; break;
+            case SDLK_LALT:   sdl->lalt = 1; break;
+            case SDLK_RCTRL:  sdl->rctrl = 1; break;
+          }
+          if (sdl->lshift | sdl->rshift | sdl->lctrl | sdl->lalt | sdl->rctrl)
+          {
+            ctx->events.modifier_state ^= ~(CTX_MODIFIER_STATE_CONTROL|
+                                            CTX_MODIFIER_STATE_ALT|
+                                            CTX_MODIFIER_STATE_SHIFT);
+            if (sdl->lshift | sdl->rshift)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_SHIFT;
+            if (sdl->lctrl | sdl->rctrl)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_CONTROL;
+            if (sdl->lalt)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_ALT;
+          }
+          int keycode;
+          name = ctx_sdl_keysym_to_name (event.key.keysym.sym, &keycode);
+          ctx_key_down (ctx, keycode, name, 0);
+
+          if (strlen (name)
+              &&(event.key.keysym.mod & (KMOD_CTRL) ||
+                 event.key.keysym.mod & (KMOD_ALT) ||
+                 ctx_utf8_strlen (name) >= 2))
+          {
+            if (event.key.keysym.mod & (KMOD_CTRL) )
+              {
+                static char buf[64] = "";
+                sprintf (buf, "control-%s", name);
+                name = buf;
+              }
+            if (event.key.keysym.mod & (KMOD_ALT) )
+              {
+                static char buf[128] = "";
+                sprintf (buf, "alt-%s", name);
+                name = buf;
+              }
+            if (event.key.keysym.mod & (KMOD_SHIFT) )
+              {
+                static char buf[196] = "";
+                sprintf (buf, "shift-%s", name);
+                name = buf;
+              }
+            if (strcmp (name, "space"))
+              {
+               ctx_key_press (ctx, keycode, name, 0);
+              }
+          }
+          else
+          {
+#if 0
+             ctx_key_press (ctx, 0, buf, 0);
+#endif
+          }
+        }
+        break;
+      case SDL_KEYUP:
+        {
+           sdl->key_balance --;
+           switch (event.key.keysym.sym)
+           {
+             case SDLK_LSHIFT: sdl->lshift = 0; break;
+             case SDLK_RSHIFT: sdl->rshift = 0; break;
+             case SDLK_LCTRL: sdl->lctrl = 0; break;
+             case SDLK_RCTRL: sdl->rctrl = 0; break;
+             case SDLK_LALT:  sdl->lalt  = 0; break;
+           }
+
+          {
+            ctx->events.modifier_state ^= ~(CTX_MODIFIER_STATE_CONTROL|
+                                            CTX_MODIFIER_STATE_ALT|
+                                            CTX_MODIFIER_STATE_SHIFT);
+            if (sdl->lshift | sdl->rshift)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_SHIFT;
+            if (sdl->lctrl | sdl->rctrl)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_CONTROL;
+            if (sdl->lalt)
+              ctx->events.modifier_state |= CTX_MODIFIER_STATE_ALT;
+          }
+
+           int keycode;
+           const char *name = ctx_sdl_keysym_to_name (event.key.keysym.sym, &keycode);
+           ctx_key_up (ctx, keycode, name, 0);
+        }
+        break;
+      case SDL_QUIT:
+        ctx_quit (ctx);
+        break;
+      case SDL_WINDOWEVENT:
+        if (event.window.event == SDL_WINDOWEVENT_RESIZED)
+        {
+          ctx_sdl_show_frame (sdl, 1);
+          int width = event.window.data1;
+          int height = event.window.data2;
+          SDL_DestroyTexture (sdl->texture);
+          sdl->texture = SDL_CreateTexture (sdl->backend, SDL_PIXELFORMAT_ABGR8888,
+                          SDL_TEXTUREACCESS_STREAMING, width, height);
+          free (tiled->pixels);
+          tiled->pixels = calloc (4, width * height);
+
+          tiled->width  = width;
+          tiled->height = height;
+          ctx_set_size (backend->ctx, width, height);
+          ctx_set_size (tiled->ctx_copy, width, height);
+        }
+        break;
     }
   }
+}
+#else
+void ctx_screenshot (Ctx *ctx, const char *path)
+{
+}
+#endif
 
-#ifdef __linux__
-  if (ioctl(fb->fb_fd, FBIOGET_FSCREENINFO, &fb->finfo))
-    {
-      fprintf (stderr, "error getting fbinfo\n");
-      close (fb->fb_fd);
-      free (fb->fb_path);
-      free (fb);
-      return NULL;
-    }
+#if CTX_SDL
 
-   if (ioctl(fb->fb_fd, FBIOGET_VSCREENINFO, &fb->vinfo))
-     {
-       fprintf (stderr, "error getting fbinfo\n");
-      close (fb->fb_fd);
-      free (fb->fb_path);
-      free (fb);
-      return NULL;
-     }
-  ioctl (0, KDSETMODE, KD_GRAPHICS);
+static void ctx_sdl_set_clipboard (Ctx *ctx, const char *text)
+{
+  if (text)
+    SDL_SetClipboardText (text);
+}
 
-//fprintf (stderr, "%s\n", fb->fb_path);
-  width = tiled->width = fb->vinfo.xres;
-  height = tiled->height = fb->vinfo.yres;
+static char *ctx_sdl_get_clipboard (Ctx *ctx)
+{
+  return SDL_GetClipboardText ();
+}
 
-  fb->fb_bits = fb->vinfo.bits_per_pixel;
-//fprintf (stderr, "fb bits: %i\n", fb->fb_bits);
 
-  if (fb->fb_bits == 16)
-    fb->fb_bits =
-      fb->vinfo.red.length +
-      fb->vinfo.green.length +
-      fb->vinfo.blue.length;
-   else if (fb->fb_bits == 8)
-  {
-    unsigned short red[256],  green[256],  blue[256];
-  //  unsigned short original_red[256];
-  //  unsigned short original_green[256];
-  //  unsigned short original_blue[256];
-    struct fb_cmap cmap = {0, 256, red, green, blue, NULL};
-  //  struct fb_cmap original_cmap = {0, 256, original_red, original_green, original_blue, NULL};
-    int i;
+inline static void ctx_sdl_reset (Ctx *ctx)
+{
+  CtxSDL  *sdl = (CtxSDL*)ctx->backend;
+  ctx_sdl_show_frame (sdl, 1);
+  ctx_sdl_start_time = ctx_ticks ();
+}
 
-    /* do we really need to restore it ? */
-   // if (ioctl (fb->fb_fd, FBIOPUTCMAP, &original_cmap) == -1)
-   // {
-   //   fprintf (stderr, "palette initialization problem %i\n", __LINE__);
-   // }
+void ctx_sdl_free (CtxSDL *sdl)
+{
+  if (sdl->texture)
+    SDL_DestroyTexture (sdl->texture);
+  if (sdl->backend)
+    SDL_DestroyRenderer (sdl->backend);
+  if (sdl->window)
+  {
+    SDL_DestroyWindow (sdl->window);
+    ctx_babl_exit ();
+  }
+  sdl->texture = NULL;sdl->backend = NULL;sdl->window = NULL;
 
-    for (i = 0; i < 256; i++)
-    {
-      red[i]   = ((( i >> 5) & 0x7) << 5) << 8;
-      green[i] = ((( i >> 2) & 0x7) << 5) << 8;
-      blue[i]  = ((( i >> 0) & 0x3) << 6) << 8;
-    }
+  ctx_tiled_free ((CtxTiled*)sdl);
+}
 
-    if (ioctl (fb->fb_fd, FBIOPUTCMAP, &cmap) == -1)
-    {
-      fprintf (stderr, "palette initialization problem %i\n", __LINE__);
-    }
-  }
 
-  fb->fb_bpp = fb->vinfo.bits_per_pixel / 8;
-  fb->fb_mapped_size = fb->finfo.smem_len;
-#endif
 
-#ifdef __NetBSD__
-  struct wsdisplay_fbinfo finfo;
+void ctx_sdl_set_fullscreen (Ctx *ctx, int val)
+{
+  CtxSDL *sdl = (void*)ctx->backend;
 
-  int mode = WSDISPLAYIO_MODE_DUMBFB;
-  //int mode = WSDISPLAYIO_MODE_MAPPED;
-  if (ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode)) {
-    return NULL;
+  if (val)
+  {
+    SDL_SetWindowFullscreen (sdl->window, SDL_WINDOW_FULLSCREEN_DESKTOP);
   }
-  if (ioctl (fb->fb_fd, WSDISPLAYIO_GINFO, &finfo)) {
-    fprintf (stderr, "ioctl: WSIDSPLAYIO_GINFO failed\n");
-    return NULL;
+  else
+  {
+    SDL_SetWindowFullscreen (sdl->window, 0);
   }
+  // XXX we're presuming success
+  sdl->fullscreen = val;
+}
+int ctx_sdl_get_fullscreen (Ctx *ctx)
+{
+  CtxSDL *sdl = (void*)ctx->backend;
+  return sdl->fullscreen;
+}
 
-  width = tiled->width = finfo.width;
-  height = tiled->height = finfo.height;
-  fb->fb_bits = finfo.depth;
-  fb->fb_bpp = (fb->fb_bits + 1) / 8;
-  fb->fb_mapped_size = width * height * fb->fb_bpp;
-
+Ctx *ctx_new_sdl (int width, int height)
+{
+#if CTX_RASTERIZER
 
-  if (fb->fb_bits == 8)
-  {
-    uint8_t red[256],  green[256],  blue[256];
-    struct wsdisplay_cmap cmap;
-    cmap.red = red;
-    cmap.green = green;
-    cmap.blue = blue;
-    cmap.count = 256;
-    cmap.index = 0;
-    for (int i = 0; i < 256; i++)
-    {
-      red[i]   = ((( i >> 5) & 0x7) << 5);
-      green[i] = ((( i >> 2) & 0x7) << 5);
-      blue[i]  = ((( i >> 0) & 0x3) << 6);
-    }
+  CtxSDL *sdl = (CtxSDL*)calloc (sizeof (CtxSDL), 1);
+  CtxTiled *tiled = (void*)sdl;
+  CtxBackend *backend = (CtxBackend*)sdl;
 
-    ioctl (fb->fb_fd, WSDISPLAYIO_PUTCMAP, &cmap);
+  ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
+  if (width <= 0 || height <= 0)
+  {
+    width  = 1920;
+    height = 1080;
   }
-#endif
-
-                                              
-  tiled->fb = mmap (NULL, fb->fb_mapped_size, PROT_READ|PROT_WRITE, MAP_SHARED, fb->fb_fd, 0);
+  sdl->window = SDL_CreateWindow("ctx", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, 
SDL_WINDOW_SHOWN|SDL_WINDOW_RESIZABLE);
+  //sdl->backend = SDL_CreateRenderer (sdl->window, -1, SDL_RENDERER_SOFTWARE);
+  sdl->backend = SDL_CreateRenderer (sdl->window, -1, 0);
+  if (!sdl->backend)
+  {
+     ctx_free (backend->ctx);
+     free (sdl);
+     return NULL;
   }
-  if (!tiled->fb)
-    return NULL;
-  tiled->pixels = calloc (fb->fb_mapped_size, 1);
-  ctx_fb_events = 1;
+  ctx_babl_init ();
+  sdl->fullscreen = 0;
 
-#if CTX_BABL
-  babl_init ();
-#endif
 
-  ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
+  ctx_show_fps = getenv ("CTX_SHOW_FPS")!=NULL;
+
+  sdl->texture = SDL_CreateTexture (sdl->backend,
+        SDL_PIXELFORMAT_ABGR8888,
+        SDL_TEXTUREACCESS_STREAMING,
+        width, height);
+
+  SDL_StartTextInput ();
+  SDL_EnableScreenSaver ();
+  SDL_GL_SetSwapInterval (1);
 
-  tiled->ctx      = ctx_new ();
-  tiled->ctx_copy = ctx_new ();
+  backend->ctx      = _ctx_new_drawlist (width, height);
+  tiled->ctx_copy = _ctx_new_drawlist (width, height);
   tiled->width    = width;
   tiled->height   = height;
+  tiled->cols     = 80;
+  tiled->rows     = 20;
+  ctx_set_backend (backend->ctx, sdl);
+  ctx_set_backend (tiled->ctx_copy, sdl);
+  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);
+
+  tiled->pixels = (uint8_t*)malloc (width * height * 4);
+  tiled->show_frame = (void*)ctx_sdl_show_frame;
 
-  ctx_set_renderer (tiled->ctx, fb);
-  ctx_set_renderer (tiled->ctx_copy, fb);
-  ctx_set_texture_cache (tiled->ctx_copy, tiled->ctx);
 
-  ctx_set_size (tiled->ctx, width, height);
-  ctx_set_size (tiled->ctx_copy, width, height);
+  backend->set_windowtitle = (void*)ctx_sdl_set_title;
+  backend->flush = ctx_tiled_flush;
+  backend->process = (void*)ctx_drawlist_process;
+  backend->reset = ctx_sdl_reset;
+  backend->free  = (void*)ctx_sdl_free;
+  backend->consume_events = ctx_sdl_consume_events;
 
-  tiled->flush = (void*)ctx_fb_flush;
-  tiled->reset = (void*)ctx_fb_reset;
-  tiled->free  = (void*)ctx_fb_free;
-  tiled->set_clipboard = (void*)ctx_fb_set_clipboard;
-  tiled->get_clipboard = (void*)ctx_fb_get_clipboard;
+  backend->set_clipboard = ctx_sdl_set_clipboard;
+  backend->get_clipboard = ctx_sdl_get_clipboard;
 
   for (int i = 0; i < _ctx_max_threads; i++)
   {
     tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
-                   tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
-                   tiled->width * 4, CTX_FORMAT_BGRA8); // this format
-                                  // is overriden in  thread
-    ((CtxRasterizer*)(tiled->host[i]->renderer))->swap_red_green = 1;
-    ctx_set_texture_source (tiled->host[i], tiled->ctx);
+                     tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
+                     tiled->width * 4, CTX_FORMAT_RGBA8);
+    ctx_set_texture_source (tiled->host[i], backend->ctx);
   }
 
   mtx_init (&tiled->mtx, mtx_plain);
@@ -31448,7 +32860,7 @@ Ctx *ctx_new_fb (int width, int height)
   if(_ctx_max_threads>no){ \
     static void *args[2]={(void*)no, };\
     thrd_t tid;\
-    args[1]=fb;\
+    args[1]=sdl;\
     thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
   }
   start_thread(0);
@@ -31469,728 +32881,972 @@ Ctx *ctx_new_fb (int width, int height)
   start_thread(15);
 #undef start_thread
 
-  //ctx_flush (tiled->ctx);
+  return backend->ctx;
+#else
+  return NULL;
+#endif
+}
+#endif
+
+#if CTX_EVENTS
 
-  EvSource *kb = evsource_kb_new ();
-  if (kb)
+#if !__COSMOPOLITAN__
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#endif
+
+typedef struct CtxTermCell
+{
+  char    utf8[5];
+  uint8_t fg[4];
+  uint8_t bg[4];
+
+  char    prev_utf8[5];
+  uint8_t prev_fg[4];
+  uint8_t prev_bg[4];
+} CtxTermCell;
+
+typedef struct CtxTermLine
+{
+  CtxTermCell *cells;
+  int maxcol;
+  int size;
+} CtxTermLine;
+
+typedef enum
+{
+  CTX_TERM_ASCII,
+  CTX_TERM_ASCII_MONO,
+  CTX_TERM_SEXTANT,
+  CTX_TERM_BRAILLE_MONO,
+  CTX_TERM_BRAILLE,
+  CTX_TERM_QUARTER,
+} CtxTermMode;
+
+typedef struct _CtxTerm CtxTerm;
+struct _CtxTerm
+{
+   CtxBackend  backender;
+   int         width;
+   int         height;
+   int         cols;
+   int         rows;
+   int         was_down;
+
+   uint8_t    *pixels;
+
+   Ctx        *host;
+   CtxList    *lines;
+   CtxTermMode mode;
+};
+
+static int ctx_term_ch = 8;
+static int ctx_term_cw = 8;
+
+void ctx_term_set (CtxTerm *term,
+                      int col, int row, const char *utf8,
+                      uint8_t *fg, uint8_t *bg)
+{
+  if (col < 1 || row < 1 || col > term->cols  || row > term->rows) return;
+  while (ctx_list_length (term->lines) < row)
   {
-    tiled->evsource[tiled->evsource_count++] = kb;
-    kb->priv = fb;
+    ctx_list_append (&term->lines, calloc (sizeof (CtxTermLine), 1));
   }
-  EvSource *mice  = evsource_mice_new ();
-  if (mice)
+  CtxTermLine *line = ctx_list_nth_data (term->lines, row-1);
+  assert (line);
+  if (line->size < col)
   {
-    tiled->evsource[tiled->evsource_count++] = mice;
-    mice->priv = fb;
+     int new_size = ((col + 128)/128)*128;
+     line->cells = realloc (line->cells, sizeof (CtxTermCell) * new_size);
+     memset (&line->cells[line->size], 0, sizeof (CtxTermCell) * (new_size - line->size) );
+     line->size = new_size;
   }
+  if (col > line->maxcol) line->maxcol = col;
+  strncpy (line->cells[col-1].utf8, (char*)utf8, 4);
+  memcpy  (line->cells[col-1].fg, fg, 4);
+  memcpy  (line->cells[col-1].bg, bg, 4);
+}
 
-  tiled->vt_active = 1;
-#ifdef __linux__
-  ioctl(0, KDSETMODE, KD_GRAPHICS);
-  signal (SIGUSR1, fb_vt_switch_cb);
-  signal (SIGUSR2, fb_vt_switch_cb);
+static int _ctx_term256 = 0; // XXX TODO implement autodetect for this
+static long _ctx_curfg = -1;
+static long _ctx_curbg = -1;
 
-  struct vt_stat st;
-  if (ioctl (0, VT_GETSTATE, &st) == -1)
+static long ctx_rgb_to_long (int r,int g, int b)
+{
+  return r * 256 * 256 + g * 256 + b;
+}
+
+
+static void ctx_term_set_fg (int red, int green, int blue)
+{
+  long lc = ctx_rgb_to_long (red, green, blue);
+  if (lc == _ctx_curfg)
+    return;
+  _ctx_curfg=lc;
+  if (_ctx_term256 == 0)
   {
-    ctx_log ("VT_GET_MODE on vt %i failed\n", fb->vt);
-    return NULL;
+    printf("\e[38;2;%i;%i;%im", red,green,blue);
   }
+  else
+  {
+    int gray = (green /255.0) * 24 + 0.5;
+    int r    = (red/255.0)    * 6 + 0.5;
+    int g    = (green/255.0)  * 6 + 0.5;
+    int b    = (blue/255.0)   * 6 + 0.5;
+    if (gray > 23) gray = 23;
 
-  fb->vt = st.v_active;
+    if (r > 5) r = 5;
+    if (g > 5) g = 5;
+    if (b > 5) b = 5;
 
-  struct vt_mode mode;
-  mode.mode   = VT_PROCESS;
-  mode.relsig = SIGUSR1;
-  mode.acqsig = SIGUSR2;
-  if (ioctl (0, VT_SETMODE, &mode) < 0)
+    if (((int)(r/1.66)== (int)(g/1.66)) && ((int)(g/1.66) == ((int)(b/1.66))))
+    {
+      printf("\e[38;5;%im", 16 + 216 + gray);
+    }
+    else
+      printf("\e[38;5;%im", 16 + r * 6 * 6 + g * 6  + b);
+  }
+}
+
+static void ctx_term_set_bg(int red, int green, int blue)
+{
+  long lc = ctx_rgb_to_long (red, green, blue);
+//if (lc == _ctx_curbg)
+//  return;
+  _ctx_curbg=lc;
+  if (_ctx_term256 == 0)
   {
-    ctx_log ("VT_SET_MODE on vt %i failed\n", fb->vt);
-    return NULL;
+    printf("\e[48;2;%i;%i;%im", red,green,blue);
   }
-#endif
+  else
+  {
+    int gray = (green /255.0) * 24 + 0.5;
+    int r    = (red/255.0)    * 6 + 0.5;
+    int g    = (green/255.0)  * 6 + 0.5;
+    int b    = (blue/255.0)   * 6 + 0.5;
+    if (gray > 23) gray = 23;
 
-  return tiled->ctx;
-#else
-  return NULL;
-#endif
+    if (r > 5) r = 5;
+    if (g > 5) g = 5;
+    if (b > 5) b = 5;
+
+    if (((int)(r/1.66)== (int)(g/1.66)) && ((int)(g/1.66) == ((int)(b/1.66))))
+    {
+      printf("\e[48;5;%im", 16 + 216 + gray);
+    }
+    else
+      printf("\e[48;5;%im", 16 + r * 6 * 6 + g * 6  + b);
+  }
 }
-#else
 
-int ctx_renderer_is_fb (Ctx *ctx)
+static int _ctx_term_force_full = 0;
+
+void ctx_term_scanout (CtxTerm *term)
 {
-  return 0;
+  int row = 1;
+  printf ("\e[H");
+//  printf ("\e[?25l");
+  printf ("\e[0m");
+
+  int cur_fg[3]={-1,-1,-1};
+  int cur_bg[3]={-1,-1,-1};
+
+  for (CtxList *l = term->lines; l; l = l->next)
+  {
+    CtxTermLine *line = l->data;
+    for (int col = 1; col <= line->maxcol; col++)
+    {
+      CtxTermCell *cell = &line->cells[col-1];
+
+      if (strcmp(cell->utf8, cell->prev_utf8) ||
+          memcmp(cell->fg, cell->prev_fg, 3) ||
+          memcmp(cell->bg, cell->prev_bg, 3) || _ctx_term_force_full)
+      {
+        if (cell->fg[0] != cur_fg[0] ||
+            cell->fg[1] != cur_fg[1] ||
+            cell->fg[2] != cur_fg[2])
+        {
+          ctx_term_set_fg (cell->fg[0], cell->fg[1], cell->fg[2]);
+          cur_fg[0]=cell->fg[0];
+          cur_fg[1]=cell->fg[1];
+          cur_fg[2]=cell->fg[2];
+        }
+        if (cell->bg[0] != cur_bg[0] ||
+            cell->bg[1] != cur_bg[1] ||
+            cell->bg[2] != cur_bg[2])
+        {
+          ctx_term_set_bg (cell->bg[0], cell->bg[1], cell->bg[2]);
+          cur_bg[0]=cell->bg[0];
+          cur_bg[1]=cell->bg[1];
+          cur_bg[2]=cell->bg[2];
+        }
+        printf ("%s", cell->utf8);
+      }
+      else
+      {
+        // TODO: accumulate succesive such to be ignored items,
+        // and compress them into one, making us compress largely
+        // reused screens well
+        printf ("\e[C");
+      }
+      strcpy (cell->prev_utf8, cell->utf8);
+      memcpy (cell->prev_fg, cell->fg, 3);
+      memcpy (cell->prev_bg, cell->bg, 3);
+    }
+    if (row != term->rows)
+      printf ("\n\r");
+    row ++;
+  }
+  printf ("\e[0m");
+  //printf ("\e[?25h");
+  //
 }
-#endif
-#endif
 
-#if CTX_SDL
+// xx
+// xx
+// xx
+//
 
-/**/
+static inline int _ctx_rgba8_manhattan_diff (const uint8_t *a, const uint8_t *b)
+{
+  int c;
+  int diff = 0;
+  for (c = 0; c<3;c++)
+    diff += ctx_pow2(a[c]-b[c]);
+  return ctx_sqrtf(diff);
+  return diff;
+}
 
-typedef struct _CtxSDL CtxSDL;
-struct _CtxSDL
+static void ctx_term_output_buf_half (uint8_t *pixels,
+                          int width,
+                          int height,
+                          CtxTerm *term)
 {
-   CtxTiled  tiled;
-   /* where we diverge from fb*/
-   int           key_balance;
-   int           key_repeat;
-   int           lctrl;
-   int           lalt;
-   int           rctrl;
-   int           lshift;
-   int           rshift;
+  int stride = width * 4;
+  const char *sextants[]={
+   " ","▘","▝","▀","▖","▌", "▞", "▛", "▗", "▚", "▐", "▜","▄","▙","▟","█",
+
+  };
+  for (int row = 0; row < height/2; row++)
+    {
+      for (int col = 0; col < width-3; col++)
+        {
+          int     unicode = 0;
+          int     bitno = 0;
+          uint8_t rgba[2][4] = {
+                             {255,255,255,0},
+                             {0,0,0,0}};
+          int i = 0;
+
+          int  rgbasum[2][4] = {0,};
+          int  sumcount[2];
+
+          int curdiff = 0;
+          /* first find starting point colors */
+          for (int yi = 0; yi < ctx_term_ch; yi++)
+            for (int xi = 0; xi < ctx_term_cw; xi++, i++)
+                {
+                  int noi = (row * ctx_term_ch + yi) * stride + (col*ctx_term_cw+xi) * 4;
+
+                  if (rgba[0][3] == 0)
+                  {
+                    for (int c = 0; c < 3; c++)
+                      rgba[0][c] = pixels[noi + c];
+                    rgba[0][3] = 255; // used only as mark of in-use
+                  }
+                  else
+                  {
+                    int diff = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
+                    if (diff > curdiff)
+                    {
+                      curdiff = diff;
+                      for (int c = 0; c < 3; c++)
+                        rgba[1][c] = pixels[noi + c];
+                    }
+                  }
+
+                }
+
+          for (int iters = 0; iters < 1; iters++)
+          {
+                  i= 0;
+          for (int i = 0; i < 4; i ++)
+             rgbasum[0][i] = rgbasum[1][i]=0;
+          sumcount[0] = sumcount[1] = 0;
+
+          for (int yi = 0; yi < ctx_term_ch; yi++)
+            for (int xi = 0; xi < ctx_term_cw; xi++, i++)
+                {
+                  int noi = (row * ctx_term_ch + yi) * stride + (col*ctx_term_cw+xi) * 4;
+
+                  int diff1 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
+                  int diff2 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[1]);
+                  int cluster = 0;
+                  if (diff1 <= diff2)
+                    cluster = 0;
+                  else
+                    cluster = 1;
+                  sumcount[cluster]++;
+                  for (int c = 0; c < 3; c++)
+                    rgbasum[cluster][c] += pixels[noi+c];
+                }
+
+
+          if (sumcount[0])
+          for (int c = 0; c < 3; c++)
+          {
+            rgba[0][c] = rgbasum[0][c] / sumcount[0];
+          }
+          if (sumcount[1])
+          for (int c = 0; c < 3; c++)
+          {
+            rgba[1][c] = rgbasum[1][c] / sumcount[1];
+          }
+          }
+
+          int pixels_set = 0;
+          for (int y = 0; y < ctx_term_ch; y++)
+            for (int x = 0; x < ctx_term_cw; x++)
+              {
+                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
+#define CHECK_IS_SET \
+      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
+       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
+
+                int set = CHECK_IS_SET;
+#undef CHECK_IS_SET
+                if (set)
+                  { unicode |=  (1<< (bitno) ); 
+                    pixels_set ++; 
+                  }
+                bitno++;
+              }
+           if (pixels_set == 4)
+             ctx_term_set (term, col +1, row + 1, " ",
+                           rgba[1], rgba[0]);
+           else
+             ctx_term_set (term, col +1, row + 1, sextants[unicode],
+                           rgba[0], rgba[1]);
+        }
+    }
+}
+
+void ctx_term_find_color_pair (CtxTerm *term, int x0, int y0, int w, int h,
+                uint8_t rgba[2][4])
+        //uint8_t *rgba0, uint8_t *rgba1)
+{
+int curdiff = 0;
+int stride = term->width * 4;
+uint8_t *pixels = term->pixels;
+/* first find starting point colors */
+for (int y = y0; y < y0 + h; y++)
+  for (int x = x0; x < x0 + w; x++)
+      {
+        int noi = (y) * stride + (x) * 4;
+
+        if (rgba[0][3] == 0)
+        {
+          for (int c = 0; c < 3; c++)
+            rgba[0][c] = pixels[noi + c];
+          rgba[0][3] = 255; // used only as mark of in-use
+        }
+        else
+        {
+          int diff = _ctx_rgba8_manhattan_diff (&pixels[noi], &rgba[0][0]);
+          if (diff > curdiff)
+          {
+            curdiff = diff;
+            for (int c = 0; c < 3; c++)
+              rgba[1][c] = pixels[noi + c];
+          }
+        }
+      }
+          int  rgbasum[2][4] = {0,};
+          int  sumcount[2];
+
+          for (int iters = 0; iters < 1; iters++)
+          {
+          for (int i = 0; i < 4; i ++)
+             rgbasum[0][i] = rgbasum[1][i]=0;
+          sumcount[0] = sumcount[1] = 0;
+
+          for (int y = y0; y < y0 + h; y++)
+            for (int x = x0; x < x0 + w; x++)
+                {
+                  int noi = (y) * stride + (x) * 4;
+
+                  int diff1 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
+                  int diff2 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[1]);
+                  int cluster = 0;
+                  if (diff1 <= diff2)
+                    cluster = 0;
+                  else
+                    cluster = 1;
+                  sumcount[cluster]++;
+                  for (int c = 0; c < 3; c++)
+                    rgbasum[cluster][c] += pixels[noi+c];
+                }
+
+
+          if (sumcount[0])
+          for (int c = 0; c < 3; c++)
+          {
+            rgba[0][c] = rgbasum[0][c] / sumcount[0];
+          }
+          if (sumcount[1])
+          for (int c = 0; c < 3; c++)
+          {
+            rgba[1][c] = rgbasum[1][c] / sumcount[1];
+          }
+          }
+
+}
+
+
+
+static void ctx_term_output_buf_quarter (uint8_t *pixels,
+                          int width,
+                          int height,
+                          CtxTerm *term)
+{
+  int stride = width * 4;
+  const char *sextants[]={
+   " ","▘","▝","▀","▖","▌", "▞", "▛", "▗", "▚", "▐", "▜","▄","▙","▟","█"
+
+  };
+  for (int row = 0; row < height/ctx_term_ch; row++)
+    {
+      for (int col = 0; col < width /ctx_term_cw; col++)
+        {
+          int     unicode = 0;
+          int     bitno = 0;
+          uint8_t rgba[2][4] = {
+                             {255,255,255,0},
+                             {0,0,0,0}};
+          ctx_term_find_color_pair (term, col * ctx_term_cw,
+                                    row * ctx_term_ch,
+                                    ctx_term_cw,
+                                    ctx_term_ch, rgba);
 
-   SDL_Window   *window;
-   SDL_Renderer *renderer;
-   SDL_Texture  *texture;
+          int pixels_set = 0;
+          for (int y = 0; y < 2; y++)
+            for (int x = 0; x < ctx_term_cw; x++)
+              {
+                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
+#define CHECK_IS_SET \
+      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
+       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
 
-   int           fullscreen;
-};
+                int set = CHECK_IS_SET;
+#undef CHECK_IS_SET
+                if (set)
+                  { unicode |=  (1<< (bitno) ); 
+                    pixels_set ++; 
+                  }
+                bitno++;
+              }
+           if (pixels_set == 4)
+             ctx_term_set (term, col +1, row + 1, " ",
+                           rgba[1], rgba[0]);
+           else
+             ctx_term_set (term, col +1, row + 1, sextants[unicode],
+                           rgba[0], rgba[1]);
+        }
+    }
+}
 
-#include "stb_image_write.h"
 
-void ctx_screenshot (Ctx *ctx, const char *output_path)
+static void ctx_term_output_buf_sextant (uint8_t *pixels,
+                          int width,
+                          int height,
+                          CtxTerm *term)
 {
-#if CTX_SCREENSHOT
-  int valid = 0;
-  CtxSDL *sdl = (void*)ctx->renderer;
+  int stride = width * 4;
 
-  if (ctx_renderer_is_sdl (ctx)) valid = 1;
-#if CTX_FB
-  if (ctx_renderer_is_fb  (ctx)) valid = 1;
-#endif
-#if CTX_KMS
-  if (ctx_renderer_is_kms (ctx)) valid = 1;
-#endif
+  const char *sextants[]={
+   " 
","🬀","🬁","🬂","🬃","🬄","🬅","🬆","🬇","🬈","🬉","🬊","🬋","🬌","🬍","🬎","🬏","🬐","🬑","🬒","🬓","▌","🬔","🬕","🬖","🬗","🬘","🬙","🬚","🬛","🬜","🬝","🬞","🬟","🬠","🬡","🬢","🬣","🬤","🬥","🬦","🬧","▐","🬨","🬩","🬪","🬫","🬬","🬭","🬮","🬯","🬰","🬱","🬲","🬳","🬴","🬵","🬶","🬷","🬸","🬹","🬺","🬻","█"
+  };
 
-  if (!valid)
-    return;
+  for (int row = 0; row < height/ctx_term_ch; row++)
+    {
+      for (int col = 0; col < width /ctx_term_cw; col++)
+        {
+          int     unicode = 0;
+          int     bitno = 0;
+          uint8_t rgba[2][4] = {
+                             {255,255,255,0},
+                             {0,0,0,0}};
 
-  // we rely on the same struxt layout XXX !
-  for (int i = 0; i < sdl->width * sdl->height; i++)
-  {
-    int tmp = sdl->pixels[i*4];
-    sdl->pixels[i*4] = sdl->pixels[i*4 + 2];
-    sdl->pixels[i*4 + 2] = tmp;
-  }
+          ctx_term_find_color_pair (term, col * ctx_term_cw,
+                                    row * ctx_term_ch,
+                                    ctx_term_cw,
+                                    ctx_term_ch, rgba);
 
-  stbi_write_png (output_path, sdl->width, sdl->height, 4, sdl->pixels, sdl->width*4);
+          int pixels_set = 0;
+          for (int y = 0; y < ctx_term_ch; y++)
+            for (int x = 0; x < ctx_term_cw; x++)
+              {
+                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
+#define CHECK_IS_SET \
+      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
+       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
 
-#if 0
-#if CTX_FB || CTX_KMS
-  for (int i = 0; i < sdl->width * sdl->height; i++)
-  {
-    int tmp = sdl->pixels[i*4];
-    sdl->pixels[i*4] = sdl->pixels[i*4 + 2];
-    sdl->pixels[i*4 + 2] = tmp;
-  }
-#endif
-#endif
-#endif
-}
+                int set = CHECK_IS_SET;
+#undef CHECK_IS_SET
+                if (set)
+                  { unicode |=  (1<< (bitno) ); 
+                    pixels_set ++; 
+                  }
+                bitno++;
+              }
 
-int ctx_show_fps = 1;
-void ctx_sdl_set_title (void *self, const char *new_title)
-{
-   CtxSDL *sdl = self;
-   if (!ctx_show_fps)
-   SDL_SetWindowTitle (sdl->window, new_title);
+          if (pixels_set == 6)
+            ctx_term_set (term, col +1, row + 1, " ",
+                          rgba[1], rgba[0]);
+          else
+            ctx_term_set (term, col +1, row + 1, sextants[unicode], rgba[0], rgba[1]);
+        }
+    }
 }
 
-static void ctx_sdl_show_frame (CtxSDL *sdl, int block)
+static void ctx_term_output_buf_ascii (uint8_t *pixels,
+                          int width,
+                          int height,
+                          CtxTerm *term,
+                          int mono)
 {
-  CtxTiled *tiled = &sdl->tiled;
-  if (tiled->shown_cursor != tiled->ctx->cursor)
-  {
-    tiled->shown_cursor = tiled->ctx->cursor;
-    SDL_Cursor *new_cursor =  NULL;
-    switch (tiled->shown_cursor)
-    {
-      case CTX_CURSOR_UNSET: // XXX: document how this differs from none
-                             //      perhaps falling back to arrow?
-        break;
-      case CTX_CURSOR_NONE:
-        new_cursor = NULL;
-        break;
-      case CTX_CURSOR_ARROW:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_ARROW);
-        break;
-      case CTX_CURSOR_CROSSHAIR:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_CROSSHAIR);
-        break;
-      case CTX_CURSOR_WAIT:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_WAIT);
-        break;
-      case CTX_CURSOR_HAND:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_HAND);
-        break;
-      case CTX_CURSOR_IBEAM:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_IBEAM);
-        break;
-      case CTX_CURSOR_MOVE:
-      case CTX_CURSOR_RESIZE_ALL:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZEALL);
-        break;
-      case CTX_CURSOR_RESIZE_N:
-      case CTX_CURSOR_RESIZE_S:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENS);
-        break;
-      case CTX_CURSOR_RESIZE_E:
-      case CTX_CURSOR_RESIZE_W:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZEWE);
-        break;
-      case CTX_CURSOR_RESIZE_NE:
-      case CTX_CURSOR_RESIZE_SW:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENESW);
-        break;
-      case CTX_CURSOR_RESIZE_NW:
-      case CTX_CURSOR_RESIZE_SE:
-        new_cursor = SDL_CreateSystemCursor(SDL_SYSTEM_CURSOR_SIZENWSE);
-        break;
-    }
-    if (new_cursor)
-    {
-      SDL_Cursor *old_cursor = SDL_GetCursor();
-      SDL_SetCursor (new_cursor);
-      SDL_ShowCursor (1);
-      if (old_cursor)
-        SDL_FreeCursor (old_cursor);
-    }
-    else
+  /* this is a crude ascii-mode built on a quick mapping of sexels to ascii */
+  int stride = width * 4;
+  const char *sextants[]={
+   " ","`","'","^","🬃","`","~","\"","-","\"","'","\"","-","\"","~","^",",",";",
+   "=","/","i","[","p","P","z",")","/","7","f",">","/","F",",","\\",":",":",
+   "\\","\\","(","T","j","T","]","?","s","\\","<","q","_","=","=","=","c","L",
+   "Q","C","a","b","J","]","m","b","d","@"
+  };
+  uint8_t black[4] = {0,0,0,255};
+  for (int row = 0; row < height/ctx_term_ch; row++)
     {
-      SDL_ShowCursor (0);
-    }
-  }
+      for (int col = 0; col < width /ctx_term_cw; col++)
+        {
+          int     unicode = 0;
+          int     bitno = 0;
+          uint8_t rgba[2][4] = {
+                             {255,255,255,0},
+                             {0,0,0,0}};
 
-  if (tiled->shown_frame == tiled->render_frame)
-  {
-    return;
-  }
+          ctx_term_find_color_pair (term, col * ctx_term_cw,
+                                    row * ctx_term_ch,
+                                    ctx_term_cw,
+                                    ctx_term_ch, rgba);
 
-  if (block)
-  {
-    int count = 0;
-    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
-    {
-      usleep (500);
-      count ++;
-      if (count > 100)
-      {
-        tiled->shown_frame = tiled->render_frame;
-        return;
-      }
-    }
-  }
-  else
-  {
-    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
-      return;
-  }
+
+          if (_ctx_rgba8_manhattan_diff (black, rgba[1]) >
+              _ctx_rgba8_manhattan_diff (black, rgba[0]))
+          {
+            for (int c = 0; c < 4; c ++)
+            {
+              int tmp = rgba[0][c];
+              rgba[0][c] = rgba[1][c];
+              rgba[1][c] = tmp;
+            }
+          }
+          if (mono)
+          {
+            rgba[1][0] = 0;
+            rgba[1][1] = 0;
+            rgba[1][2] = 0;
+          }
 
 
-  if (tiled->min_row == 100)
-  {
-  }
-  else
-  {
-#if 1
-    int x = tiled->min_col * tiled->width/CTX_HASH_COLS;
-    int y = tiled->min_row * tiled->height/CTX_HASH_ROWS;
-    int x1 = (tiled->max_col+1) * tiled->width/CTX_HASH_COLS;
-    int y1 = (tiled->max_row+1) * tiled->height/CTX_HASH_ROWS;
-    int width = x1 - x;
-    int height = y1 - y;
-#endif
-    tiled->min_row = 100;
-    tiled->max_row = 0;
-    tiled->min_col = 100;
-    tiled->max_col = 0;
+          int brightest_dark_diff = _ctx_rgba8_manhattan_diff (black, rgba[0]);
 
-    SDL_Rect r = {x, y, width, height};
-    SDL_UpdateTexture (sdl->texture, &r,
-                      //(void*)sdl->pixels,
-                      (void*)(tiled->pixels + y * tiled->width * 4 + x * 4),
-                      
-                      tiled->width * 4);
-    SDL_RenderClear (sdl->renderer);
-    SDL_RenderCopy (sdl->renderer, sdl->texture, NULL, NULL);
-    SDL_RenderPresent (sdl->renderer);
+          int pixels_set = 0;
+          for (int y = 0; y < ctx_term_ch; y++)
+            for (int x = 0; x < ctx_term_cw; x++)
+              {
+                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
+#define CHECK_IS_SET \
+      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
+       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
 
+                int set = CHECK_IS_SET;
+#undef CHECK_IS_SET
+                if (set)
+                  { unicode |=  (1<< (bitno) ); 
+                    pixels_set ++; 
+                  }
+                bitno++;
+              }
 
-  if (ctx_show_fps)
-  {
-    static uint64_t prev_time = 0;
-    static char tmp_title[1024];
-    uint64_t time = ctx_ticks ();
-    sprintf (tmp_title, "FPS: %.1f", 1000000.0/  (time - prev_time));
-    prev_time = time;
-    SDL_SetWindowTitle (sdl->window, tmp_title);
-  }
-  }
-  tiled->shown_frame = tiled->render_frame;
-}
 
-static const char *ctx_sdl_keysym_to_name (unsigned int sym, int *r_keycode)
-{
-  static char buf[16]="";
-  buf[ctx_unichar_to_utf8 (sym, (void*)buf)]=0;
-  int code = sym;
-  const char *name = &buf[0];
-   switch (sym)
-   {
-     case SDLK_RSHIFT: code = 16 ; break;
-     case SDLK_LSHIFT: code = 16 ; break;
-     case SDLK_LCTRL: code = 17 ; break;
-     case SDLK_RCTRL: code = 17 ; break;
-     case SDLK_LALT:  code = 18 ; break;
-     case SDLK_RALT:  code = 18 ; break;
-     case SDLK_CAPSLOCK: name = "capslock"; code = 20 ; break;
-     //case SDLK_NUMLOCK: name = "numlock"; code = 144 ; break;
-     //case SDLK_SCROLLLOCK: name = "scrollock"; code = 145 ; break;
-
-     case SDLK_F1:     name = "F1"; code = 112; break;
-     case SDLK_F2:     name = "F2"; code = 113; break;
-     case SDLK_F3:     name = "F3"; code = 114; break;
-     case SDLK_F4:     name = "F4"; code = 115; break;
-     case SDLK_F5:     name = "F5"; code = 116; break;
-     case SDLK_F6:     name = "F6"; code = 117; break;
-     case SDLK_F7:     name = "F7"; code = 118; break;
-     case SDLK_F8:     name = "F8"; code = 119; break;
-     case SDLK_F9:     name = "F9"; code = 120; break;
-     case SDLK_F10:    name = "F10"; code = 121; break;
-     case SDLK_F11:    name = "F11"; code = 122; break;
-     case SDLK_F12:    name = "F12"; code = 123; break;
-     case SDLK_ESCAPE: name = "escape"; break;
-     case SDLK_DOWN:   name = "down"; code = 40; break;
-     case SDLK_LEFT:   name = "left"; code = 37; break;
-     case SDLK_UP:     name = "up"; code = 38;  break;
-     case SDLK_RIGHT:  name = "right"; code = 39; break;
-     case SDLK_BACKSPACE: name = "backspace"; break;
-     case SDLK_SPACE:  name = "space"; break;
-     case SDLK_TAB:    name = "tab"; break;
-     case SDLK_DELETE: name = "delete"; code = 46; break;
-     case SDLK_INSERT: name = "insert"; code = 45; break;
-     case SDLK_RETURN:
-       //if (key_repeat == 0) // return never should repeat
-       name = "return";   // on a DEC like terminal
-       break;
-     case SDLK_HOME:     name = "home"; code = 36; break;
-     case SDLK_END:      name = "end"; code = 35; break;
-     case SDLK_PAGEDOWN: name = "page-down"; code = 34; break;
-     case SDLK_PAGEUP:   name = "page-up"; code = 33; break;
-     case ',': code = 188; break;
-     case '.': code = 190; break;
-     case '/': code = 191; break;
-     case '`': code = 192; break;
-     case '[': code = 219; break;
-     case '\\': code = 220; break;
-     case ']':  code = 221; break;
-     case '\'': code = 222; break;
-     default:
-       ;
-   }
-   if (sym >= 'a' && sym <='z') code -= 32;
-   if (r_keycode)
-   {
-     *r_keycode = code;
-   }
-   return name;
+           if (pixels_set == 6 && brightest_dark_diff < 40)
+             ctx_term_set (term, col +1, row + 1, " ",
+                           rgba[1], rgba[0]);
+           else
+             ctx_term_set (term, col +1, row + 1, sextants[unicode],
+                           rgba[0], rgba[1]);
+        }
+    }
 }
 
-int ctx_sdl_consume_events (Ctx *ctx)
+static void ctx_term_output_buf_braille (uint8_t *pixels,
+                          int width,
+                          int height,
+                          CtxTerm *term,
+                          int mono)
 {
-  CtxTiled *tiled = (void*)ctx->renderer;
-  CtxSDL *sdl = (void*)ctx->renderer;
-  SDL_Event event;
-  int got_events = 0;
-
-  ctx_sdl_show_frame (sdl, 0);
-
-  while (SDL_PollEvent (&event))
-  {
-    got_events ++;
-    switch (event.type)
+  int reverse = 0;
+  int stride = width * 4;
+  uint8_t black[4] = {0,0,0,255};
+  for (int row = 0; row < height/ctx_term_ch; row++)
     {
-      case SDL_MOUSEBUTTONDOWN:
-        SDL_CaptureMouse (1);
-        ctx_pointer_press (ctx, event.button.x, event.button.y, event.button.button, 0);
-        break;
-      case SDL_MOUSEBUTTONUP:
-        SDL_CaptureMouse (0);
-        ctx_pointer_release (ctx, event.button.x, event.button.y, event.button.button, 0);
-        break;
-      case SDL_MOUSEMOTION:
-        //  XXX : look at mask and generate motion for each pressed
-        //        button
-        ctx_pointer_motion (ctx, event.motion.x, event.motion.y, 1, 0);
-        break;
-      case SDL_FINGERMOTION:
-        ctx_pointer_motion (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height,
-            (event.tfinger.fingerId%10) + 4, 0);
-        break;
-      case SDL_FINGERDOWN:
-        {
-        static int fdowns = 0;
-        fdowns ++;
-        if (fdowns > 1) // the very first finger down from SDL seems to be
-                        // mirrored as mouse events, later ones not - at
-                        // least under wayland
+      for (int col = 0; col < width /ctx_term_cw; col++)
         {
-          ctx_pointer_press (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height, 
-          (event.tfinger.fingerId%10) + 4, 0);
-        }
-        }
-        break;
-      case SDL_FINGERUP:
-        ctx_pointer_release (ctx, event.tfinger.x * tiled->width, event.tfinger.y * tiled->height,
-          (event.tfinger.fingerId%10) + 4, 0);
-        break;
-#if 1
-      case SDL_TEXTINPUT:
-    //  if (!active)
-    //    break;
-        if (!sdl->lctrl && !sdl->rctrl && !sdl->lalt 
-           //&& ( (vt && vt_keyrepeat (vt) ) || (key_repeat==0) )
-           )
+          int     unicode = 0;
+          int     bitno = 0;
+          uint8_t rgba[2][4] = {
+                             {255,255,255,0},
+                             {0,0,0,0}};
+
+          ctx_term_find_color_pair (term, col * ctx_term_cw,
+                                    row * ctx_term_ch,
+                                    ctx_term_cw,
+                                    ctx_term_ch, rgba);
+
+
+          /* make darkest consistently be background  */
+          if (_ctx_rgba8_manhattan_diff (black, rgba[1]) >
+              _ctx_rgba8_manhattan_diff (black, rgba[0]))
           {
-            const char *name = event.text.text;
-            int keycode = 0;
-            if (!strcmp (name, " ") ) { name = "space"; }
-            if (name[0] && name[1] == 0)
+            for (int c = 0; c < 4; c ++)
             {
-              keycode = name[0];
-              keycode = toupper (keycode);
-              switch (keycode)
-              {
-                case '.':  keycode = 190; break;
-                case ';':  keycode = 59; break;
-                case ',':  keycode = 188; break;
-                case '/':  keycode = 191; break;
-                case '\'': keycode = 222; break;
-                case '`':  keycode = 192; break;
-                case '[':  keycode = 219; break;
-                case ']':  keycode = 221; break;
-                case '\\': keycode = 220; break;
-              }
+              int tmp = rgba[0][c];
+              rgba[0][c] = rgba[1][c];
+              rgba[1][c] = tmp;
             }
-            ctx_key_press (ctx, keycode, name, 0);
-            //got_event = 1;
-          }
-        break;
-#endif
-      case SDL_KEYDOWN:
-        {
-          char buf[32] = "";
-          const char *name = buf;
-          if (!event.key.repeat)
-          {
-            sdl->key_balance ++;
-            sdl->key_repeat = 0;
-          }
-          else
-          {
-            sdl->key_repeat ++;
-          }
-          switch (event.key.keysym.sym)
-          {
-            case SDLK_LSHIFT: sdl->lshift = 1; break;
-            case SDLK_RSHIFT: sdl->rshift = 1; break;
-            case SDLK_LCTRL:  sdl->lctrl = 1; break;
-            case SDLK_LALT:   sdl->lalt = 1; break;
-            case SDLK_RCTRL:  sdl->rctrl = 1; break;
           }
-          if (sdl->lshift | sdl->rshift | sdl->lctrl | sdl->lalt | sdl->rctrl)
+          if (mono)
           {
-            ctx->events.modifier_state ^= ~(CTX_MODIFIER_STATE_CONTROL|
-                                            CTX_MODIFIER_STATE_ALT|
-                                            CTX_MODIFIER_STATE_SHIFT);
-            if (sdl->lshift | sdl->rshift)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_SHIFT;
-            if (sdl->lctrl | sdl->rctrl)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_CONTROL;
-            if (sdl->lalt)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_ALT;
+            rgba[1][0] = 0;
+            rgba[1][1] = 0;
+            rgba[1][2] = 0;
           }
-          int keycode;
-          name = ctx_sdl_keysym_to_name (event.key.keysym.sym, &keycode);
-          ctx_key_down (ctx, keycode, name, 0);
 
-          if (strlen (name)
-              &&(event.key.keysym.mod & (KMOD_CTRL) ||
-                 event.key.keysym.mod & (KMOD_ALT) ||
-                 ctx_utf8_strlen (name) >= 2))
-          {
-            if (event.key.keysym.mod & (KMOD_CTRL) )
-              {
-                static char buf[64] = "";
-                sprintf (buf, "control-%s", name);
-                name = buf;
-              }
-            if (event.key.keysym.mod & (KMOD_ALT) )
-              {
-                static char buf[128] = "";
-                sprintf (buf, "alt-%s", name);
-                name = buf;
-              }
-            if (event.key.keysym.mod & (KMOD_SHIFT) )
+          int pixels_set = 0;
+          for (int x = 0; x < 2; x++)
+            for (int y = 0; y < 3; y++)
               {
-                static char buf[196] = "";
-                sprintf (buf, "shift-%s", name);
-                name = buf;
+                int no = (row * 4 + y) * stride + (col*2+x) * 4;
+#define CHECK_IS_SET \
+      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
+       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
+
+                int set = CHECK_IS_SET;
+                if (reverse) { set = !set; }
+                if (set)
+                  { unicode |=  (1<< (bitno) ); 
+                    pixels_set ++; 
+                  }
+                bitno++;
               }
-            if (strcmp (name, "space"))
+          {
+            int x = 0;
+            int y = 3;
+            int no = (row * 4 + y) * stride + (col*2+x) * 4;
+            int setA = CHECK_IS_SET;
+            no = (row * 4 + y) * stride + (col*2+x+1) * 4;
+            int setB = CHECK_IS_SET;
+
+            pixels_set += setA;
+            pixels_set += setB;
+#undef CHECK_IS_SET
+            if (reverse) { setA = !setA; }
+            if (reverse) { setB = !setB; }
+            if (setA != 0 && setB==0)
+              { unicode += 0x2840; }
+            else if (setA == 0 && setB)
+              { unicode += 0x2880; }
+            else if ( (setA != 0) && (setB != 0) )
+              { unicode += 0x28C0; }
+            else
+              { unicode += 0x2800; }
+            char utf8[5];
+            utf8[ctx_unichar_to_utf8 (unicode, (uint8_t*)utf8)]=0;
+
+#if 0
+            if (pixels_set == 8)
+            {
+              if (rgba[0][0] < 32 && rgba[0][1] < 32 && rgba[0][2] < 32)
               {
-               ctx_key_press (ctx, keycode, name, 0);
+                ctx_term_set (term, col +1, row + 1, " ",
+                                 rgba[1], rgba[0]);
+                continue;
               }
-          }
-          else
-          {
-#if 0
-             ctx_key_press (ctx, 0, buf, 0);
+            }
 #endif
+            {
+              ctx_term_set (term, col +1, row + 1, utf8,
+                               rgba[0], rgba[1]);
+            }
           }
         }
-        break;
-      case SDL_KEYUP:
-        {
-           sdl->key_balance --;
-           switch (event.key.keysym.sym)
-           {
-             case SDLK_LSHIFT: sdl->lshift = 0; break;
-             case SDLK_RSHIFT: sdl->rshift = 0; break;
-             case SDLK_LCTRL: sdl->lctrl = 0; break;
-             case SDLK_RCTRL: sdl->rctrl = 0; break;
-             case SDLK_LALT:  sdl->lalt  = 0; break;
-           }
-
-          {
-            ctx->events.modifier_state ^= ~(CTX_MODIFIER_STATE_CONTROL|
-                                            CTX_MODIFIER_STATE_ALT|
-                                            CTX_MODIFIER_STATE_SHIFT);
-            if (sdl->lshift | sdl->rshift)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_SHIFT;
-            if (sdl->lctrl | sdl->rctrl)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_CONTROL;
-            if (sdl->lalt)
-              ctx->events.modifier_state |= CTX_MODIFIER_STATE_ALT;
-          }
-
-           int keycode;
-           const char *name = ctx_sdl_keysym_to_name (event.key.keysym.sym, &keycode);
-           ctx_key_up (ctx, keycode, name, 0);
-        }
-        break;
-      case SDL_QUIT:
-        ctx_quit (ctx);
-        break;
-      case SDL_WINDOWEVENT:
-        if (event.window.event == SDL_WINDOWEVENT_RESIZED)
-        {
-          ctx_sdl_show_frame (sdl, 1);
-          int width = event.window.data1;
-          int height = event.window.data2;
-          SDL_DestroyTexture (sdl->texture);
-          sdl->texture = SDL_CreateTexture (sdl->renderer, SDL_PIXELFORMAT_ABGR8888,
-                          SDL_TEXTUREACCESS_STREAMING, width, height);
-          free (tiled->pixels);
-          tiled->pixels = calloc (4, width * height);
-
-          tiled->width  = width;
-          tiled->height = height;
-          ctx_set_size (tiled->ctx, width, height);
-          ctx_set_size (tiled->ctx_copy, width, height);
-        }
-        break;
     }
-  }
-  return 1;
-}
-#else
-void ctx_screenshot (Ctx *ctx, const char *path)
-{
 }
-#endif
 
-#if CTX_SDL
 
-static void ctx_sdl_set_clipboard (CtxSDL *sdl, const char *text)
+inline static int
+ctx_is_half_opaque (CtxRasterizer *rasterizer)
 {
-  if (text)
-    SDL_SetClipboardText (text);
+  CtxGState *gstate = &rasterizer->state->gstate;
+  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
+  {
+    uint8_t ga[2];
+    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
+    if ( (ga[1] * gstate->global_alpha_f) >= 127)
+      return 1;
+    return 0;
+  }
+  return gstate->global_alpha_f > 0.5f;
 }
 
-static char *ctx_sdl_get_clipboard (CtxSDL *sdl)
+inline static void ctx_term_process (Ctx *ctx,
+                                     CtxCommand *command)
 {
-  return SDL_GetClipboardText ();
-}
+  CtxTerm *term = (void*)ctx->backend;
 
-inline static void ctx_sdl_reset (CtxSDL *sdl)
-{
-  ctx_sdl_show_frame (sdl, 1);
-}
+#if CTX_CURRENT_PATH
+  ctx_update_current_path (ctx, &command->entry);
+#endif
 
-inline static void ctx_sdl_flush (CtxSDL *sdl)
-{
-  ctx_tiled_flush ((void*)sdl);
-  //CtxTiled *tiled = (void*)sdl;
-}
 
-void ctx_sdl_free (CtxSDL *sdl)
-{
+#if CTX_BRAILLE_TEXT
+  if (command->code == CTX_FILL)
+  {
+     CtxRasterizer *rasterizer = (CtxRasterizer*)term->host->backend;
 
-  if (sdl->texture)
-  SDL_DestroyTexture (sdl->texture);
-  if (sdl->renderer)
-  SDL_DestroyRenderer (sdl->renderer);
-  if (sdl->window)
-  SDL_DestroyWindow (sdl->window);
+     if (ctx_is_half_opaque (rasterizer))
+     {
+        CtxIntRectangle shape_rect = {
+          ((int)(rasterizer->col_min / CTX_SUBDIV - 2))/2,
+          ((int)(rasterizer->scan_min / 15 - 2))/3,
+          ((int)(3+((int)rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV))/2,
+          ((int)(3+((int)rasterizer->scan_max - rasterizer->scan_min + 1) / 15))/3
+        };
+#if 0
+  CtxGState *gstate = &rasterizer->state->gstate;
+       fprintf (stderr, "{%i,%i %ix%i %.2f %i}",
+                       shape_rect.x, shape_rect.y,
+                       shape_rect.width, shape_rect.height,
 
-  ctx_tiled_free ((CtxTiled*)sdl);
-#if CTX_BABL
-  babl_exit ();
+                       gstate->global_alpha_f,
+                       ga[1]
+                       
+                       );
+   //  sleep(1);
 #endif
-}
 
+       if (shape_rect.y > 0) // XXX : workaround 
+       for (int row = shape_rect.y;
+            row < (shape_rect.y+(int)shape_rect.height);
+            row++)
+       for (int col = shape_rect.x;
+            col < (shape_rect.x+(int)shape_rect.width);
+            col++)
 
-int ctx_renderer_is_sdl (Ctx *ctx)
-{
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_sdl_free)
-          return 1;
-  return 0;
+       {
+         for (CtxList *l = rasterizer->glyphs; l; l=l?l->next:NULL)
+         {
+           CtxTermGlyph *glyph = l->data;
+           if ((glyph->row == row+1) &&
+               (glyph->col == col+1))
+           {
+              ctx_list_remove (&rasterizer->glyphs, glyph);
+              free (glyph);
+              l = NULL;
+           }
+         }
+       }
+     }
+  }
+#endif
+
+  /* directly forward */
+  ctx_process (term->host, &command->entry);
 }
 
-void ctx_sdl_set_fullscreen (Ctx *ctx, int val)
+inline static void ctx_term_flush (Ctx *ctx)
 {
-  CtxSDL *sdl = (void*)ctx->renderer;
+  CtxTerm *term = (CtxTerm*)ctx->backend;
+  int width =  term->width;
+  int height = term->height;
+  switch (term->mode)
+  {
+    case CTX_TERM_QUARTER:
+       ctx_term_output_buf_quarter (term->pixels,
+                                width, height, term);
+       break;
+    case CTX_TERM_ASCII:
+       ctx_term_output_buf_ascii (term->pixels,
+                                width, height, term, 0);
+       break;
+    case CTX_TERM_ASCII_MONO:
+       ctx_term_output_buf_ascii (term->pixels,
+                                width, height, term, 1);
+       break;
+    case CTX_TERM_SEXTANT:
+       ctx_term_output_buf_sextant (term->pixels,
+                                width, height, term);
+       break;
+    case CTX_TERM_BRAILLE:
+       ctx_term_output_buf_braille (term->pixels,
+                                width, height, term, 0);
+       break;
+    case CTX_TERM_BRAILLE_MONO:
+       ctx_term_output_buf_braille (term->pixels,
+                                width, height, term, 1);
+       break;
+  }
+#if CTX_BRAILLE_TEXT
+  CtxRasterizer *rasterizer = (CtxRasterizer*)(term->host->backend);
+  // XXX instead sort and inject along with braille
+  //
 
-  if (val)
+  //uint8_t rgba_bg[4]={0,0,0,0};
+  //uint8_t rgba_fg[4]={255,0,255,255};
+
+  for (CtxList *l = rasterizer->glyphs; l; l = l->next)
   {
-    SDL_SetWindowFullscreen (sdl->window, SDL_WINDOW_FULLSCREEN_DESKTOP);
+    CtxTermGlyph *glyph = l->data;
+
+    uint8_t *pixels = term->pixels;
+    long rgb_sum[4]={0,0,0};
+    for (int v = 0; v <  ctx_term_ch; v ++)
+    for (int u = 0; u <  ctx_term_cw; u ++)
+    {
+      int i = ((glyph->row-1) * ctx_term_ch + v) * rasterizer->blit_width + 
+              ((glyph->col-1) * ctx_term_cw + u);
+      for (int c = 0; c < 3; c ++)
+        rgb_sum[c] += pixels[i*4+c];
+    }
+    for (int c = 0; c < 3; c ++)
+      glyph->rgba_bg[c] = rgb_sum[c] / (ctx_term_ch * ctx_term_cw);
+    char utf8[8];
+    utf8[ctx_unichar_to_utf8(glyph->unichar, (uint8_t*)utf8)]=0;
+    ctx_term_set (term, glyph->col, glyph->row, 
+                     utf8, glyph->rgba_fg, glyph->rgba_bg);
+    free (glyph);
   }
-  else
+
+  printf ("\e[H");
+  printf ("\e[0m");
+  ctx_term_scanout (term);
+  printf ("\e[0m");
+  fflush(NULL);
+  while (rasterizer->glyphs)
+    ctx_list_remove (&rasterizer->glyphs, rasterizer->glyphs->data);
+#endif
+}
+
+void ctx_term_free (CtxTerm *term)
+{
+  while (term->lines)
   {
-    SDL_SetWindowFullscreen (sdl->window, 0);
+    free (term->lines->data);
+    ctx_list_remove (&term->lines, term->lines->data);
   }
-  // XXX we're presuming success
-  sdl->fullscreen = val;
+  printf ("\e[?25h"); // cursor on
+  nc_at_exit ();
+  free (term->pixels);
+  ctx_free (term->host);
+  free (term);
+  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
 }
-int ctx_sdl_get_fullscreen (Ctx *ctx)
+
+float ctx_term_get_cell_width (Ctx *ctx)
 {
-  CtxSDL *sdl = (void*)ctx->renderer;
-  return sdl->fullscreen;
+  return ctx_term_cw;
 }
 
+float ctx_term_get_cell_height (Ctx *ctx)
+{
+  return ctx_term_ch;
+}
 
-Ctx *ctx_new_sdl (int width, int height)
+Ctx *ctx_new_term (int width, int height)
 {
+  Ctx *ctx = _ctx_new_drawlist (width, height);
 #if CTX_RASTERIZER
+  CtxTerm *term = (CtxTerm*)calloc (sizeof (CtxTerm), 1);
+  CtxBackend *backend = (void*)term;
+ 
+  const char *mode = getenv ("CTX_TERM_MODE");
+  ctx_term_cw = 2;
+  ctx_term_ch = 3;
 
-  CtxSDL *sdl = (CtxSDL*)calloc (sizeof (CtxSDL), 1);
-  CtxTiled *tiled = (void*)sdl;
-
-  ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
-  if (width <= 0 || height <= 0)
-  {
-    width  = 1920;
-    height = 1080;
+  if (!mode) term->mode = CTX_TERM_SEXTANT;
+  else if (!strcmp (mode, "sextant")) term->mode = CTX_TERM_SEXTANT;
+  else if (!strcmp (mode, "ascii")) term->mode = CTX_TERM_ASCII_MONO;
+  //else if (!strcmp (mode, "ascii-mono")) term->mode = CTX_TERM_ASCII_MONO;
+  else if (!strcmp (mode, "quarter")) term->mode = CTX_TERM_QUARTER;
+  //else if (!strcmp (mode, "braille")){
+  //  term->mode = CTX_TERM_BRAILLE;
+  //  ctx_term_ch = 4;
+  //}
+  else if (!strcmp (mode, "braille")){
+    term->mode = CTX_TERM_BRAILLE_MONO;
+    ctx_term_ch = 4;
   }
-  sdl->window = SDL_CreateWindow("ctx", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, 
SDL_WINDOW_SHOWN|SDL_WINDOW_RESIZABLE);
-  //sdl->renderer = SDL_CreateRenderer (sdl->window, -1, SDL_RENDERER_SOFTWARE);
-  sdl->renderer = SDL_CreateRenderer (sdl->window, -1, 0);
-  if (!sdl->renderer)
-  {
-     ctx_free (tiled->ctx);
-     free (sdl);
-     return NULL;
+  else {
+    fprintf (stderr, "recognized values for CTX_TERM_MODE:\n"
+                    " sextant ascii quarter braille\n");
+    exit (1);
   }
-#if CTX_BABL
-  babl_init ();
-#endif
-  sdl->fullscreen = 0;
-
-
-  ctx_show_fps = getenv ("CTX_SHOW_FPS")!=NULL;
-
-  ctx_sdl_events = 1;
-  sdl->texture = SDL_CreateTexture (sdl->renderer,
-        SDL_PIXELFORMAT_ABGR8888,
-        SDL_TEXTUREACCESS_STREAMING,
-        width, height);
-
-  SDL_StartTextInput ();
-  SDL_EnableScreenSaver ();
-
-  tiled->ctx      = ctx_new ();
-  tiled->ctx_copy = ctx_new ();
-  tiled->width    = width;
-  tiled->height   = height;
-  tiled->cols     = 80;
-  tiled->rows     = 20;
-  ctx_set_renderer (tiled->ctx, sdl);
-  ctx_set_renderer (tiled->ctx_copy, sdl);
-  ctx_set_texture_cache (tiled->ctx_copy, tiled->ctx);
-
-  tiled->pixels = (uint8_t*)malloc (width * height * 4);
 
-  ctx_set_size (tiled->ctx,      width, height);
-  ctx_set_size (tiled->ctx_copy, width, height);
+  mode = getenv ("CTX_TERM_FORCE_FULL");
+  if (mode && strcmp (mode, "0") && strcmp (mode, "no"))
+    _ctx_term_force_full = 1;
 
-  tiled->flush = (void*)ctx_sdl_flush;
-  tiled->reset = (void*)ctx_sdl_reset;
-  tiled->free  = (void*)ctx_sdl_free;
-  tiled->set_clipboard = (void*)ctx_sdl_set_clipboard;
-  tiled->get_clipboard = (void*)ctx_sdl_get_clipboard;
+  fprintf (stdout, "\e[?1049h");
+  fprintf (stdout, "\e[?25l"); // cursor off
 
-  for (int i = 0; i < _ctx_max_threads; i++)
+  int maxwidth = ctx_terminal_cols  () * ctx_term_cw;
+  int maxheight = (ctx_terminal_rows ()) * ctx_term_ch;
+  if (width <= 0 || height <= 0)
   {
-    tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
-                     tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
-                     tiled->width * 4, CTX_FORMAT_RGBA8);
-    ctx_set_texture_source (tiled->host[i], tiled->ctx);
+    width = maxwidth;
+    height = maxheight;
   }
+  if (width > maxwidth) width = maxwidth;
+  if (height > maxheight) height = maxheight;
+  backend->ctx = ctx;
+  term->width  = width;
+  term->height = height;
 
-  mtx_init (&tiled->mtx, mtx_plain);
-  cnd_init (&tiled->cond);
+  term->cols = (width + 1) / ctx_term_cw;
+  term->rows = (height + 2) / ctx_term_ch;
+  term->lines = 0;
+  term->pixels = (uint8_t*)malloc (width * height * 4);
+  term->host = ctx_new_for_framebuffer (term->pixels,
+                                           width, height,
+                                           width * 4, CTX_FORMAT_RGBA8);
+#if CTX_BRAILLE_TEXT
+  ((CtxRasterizer*)term->host->backend)->term_glyphs=1;
+#endif
+  _ctx_mouse (ctx, NC_MOUSE_DRAG);
+  ctx_set_backend (ctx, term);
+  backend->process = ctx_term_process;
+  backend->flush   = ctx_term_flush;
+  backend->free    = (void(*)(void*))ctx_term_free;
+  backend->consume_events = ctx_nct_consume_events;
+  backend->get_event_fds = (void*) ctx_stdin_get_event_fds;
+  ctx_set_size (ctx, width, height);
+  ctx_font_size (ctx, ctx_term_ch); 
+#endif
 
-#define start_thread(no)\
-  if(_ctx_max_threads>no){ \
-    static void *args[2]={(void*)no, };\
-    thrd_t tid;\
-    args[1]=sdl;\
-    thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
-  }
-  start_thread(0);
-  start_thread(1);
-  start_thread(2);
-  start_thread(3);
-  start_thread(4);
-  start_thread(5);
-  start_thread(6);
-  start_thread(7);
-  start_thread(8);
-  start_thread(9);
-  start_thread(10);
-  start_thread(11);
-  start_thread(12);
-  start_thread(13);
-  start_thread(14);
-  start_thread(15);
-#undef start_thread
 
-  //ctx_flush (tiled->ctx);
-  return tiled->ctx;
-#else
-  return NULL;
-#endif
+  return ctx;
 }
-#else
 
-int ctx_renderer_is_sdl (Ctx *ctx)
-{
-  return 0;
-}
 #endif
 
 #if CTX_EVENTS
@@ -32200,1036 +33856,1298 @@ int ctx_renderer_is_sdl (Ctx *ctx)
 #include <sys/ioctl.h>
 #endif
 
-typedef struct CtxTermCell
+typedef struct _CtxTermImg CtxTermImg;
+struct _CtxTermImg
 {
-  char    utf8[5];
-  uint8_t fg[4];
-  uint8_t bg[4];
-
-  char    prev_utf8[5];
-  uint8_t prev_fg[4];
-  uint8_t prev_bg[4];
-} CtxTermCell;
+   CtxBackend backend;
+   int         width;
+   int         height;
+   int         cols;
+   int         rows;
+   int         was_down;
+   // we need to have the above members in that order up to here
+   uint8_t    *pixels;
+   Ctx        *host;
+   CtxList    *lines;
+};
 
-typedef struct CtxTermLine
+inline static void ctx_termimg_process (Ctx        *ctx,
+                                        CtxCommand *command)
 {
-  CtxTermCell *cells;
-  int maxcol;
-  int size;
-} CtxTermLine;
+  CtxTermImg *termimg = (void*)ctx->backend;
+#if CTX_CURRENT_PATH
+  ctx_update_current_path (ctx, &command->entry);
+#endif
 
-typedef enum
-{
-  CTX_TERM_ASCII,
-  CTX_TERM_ASCII_MONO,
-  CTX_TERM_SEXTANT,
-  CTX_TERM_BRAILLE_MONO,
-  CTX_TERM_BRAILLE,
-  CTX_TERM_QUARTER,
-} CtxTermMode;
+  /* directly forward */
+  ctx_process (termimg->host, &command->entry);
+}
 
-typedef struct _CtxTerm CtxTerm;
-struct _CtxTerm
+inline static void ctx_termimg_flush (Ctx *ctx)
 {
-   void (*render) (void *term, CtxCommand *command);
-   void (*reset)  (void *term);
-   void (*flush)  (void *term);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard) (void *ctxctx, const char *text);
-   void (*free)   (void *term);
-   Ctx      *ctx;
-   int       width;
-   int       height;
-   int       cols;
-   int       rows;
-   int       was_down;
-
-   uint8_t  *pixels;
-
-   Ctx      *host;
-   CtxList  *lines;
-   CtxTermMode mode;
-};
+  CtxTermImg *termimg = (CtxTermImg*)ctx->backend;
+  int width =  termimg->width;
+  int height = termimg->height;
+  if (!termimg->pixels) return;
+  char *encoded = malloc (width * height * 3 * 3);
+  ctx_bin2base64 (termimg->pixels, width * height * 3,
+                  encoded);
+  int encoded_len = strlen (encoded);
 
-static int ctx_term_ch = 8;
-static int ctx_term_cw = 8;
+  int i = 0;
 
-void ctx_term_set (CtxTerm *term,
-                      int col, int row, const char *utf8,
-                      uint8_t *fg, uint8_t *bg)
-{
-  if (col < 1 || row < 1 || col > term->cols  || row > term->rows) return;
-  while (ctx_list_length (term->lines) < row)
+  printf ("\e[H");
+  printf ("\e_Gf=24,s=%i,v=%i,t=d,a=T,m=1;\e\\", width, height);
+  while (i <  encoded_len)
   {
-    ctx_list_append (&term->lines, calloc (sizeof (CtxTermLine), 1));
+     if (i + 4096 <  encoded_len)
+     {
+       printf  ("\e_Gm=1;");
+     }
+     else
+     {
+       printf  ("\e_Gm=0;");
+     }
+     for (int n = 0; n < 4000 && i < encoded_len; n++)
+     {
+       printf ("%c", encoded[i]);
+       i++;
+     }
+     printf ("\e\\");
   }
-  CtxTermLine *line = ctx_list_nth_data (term->lines, row-1);
-  assert (line);
-  if (line->size < col)
+  free (encoded);
+  
+  fflush (NULL);
+}
+
+void ctx_termimg_free (CtxTermImg *termimg)
+{
+  while (termimg->lines)
   {
-     int new_size = ((col + 128)/128)*128;
-     line->cells = realloc (line->cells, sizeof (CtxTermCell) * new_size);
-     memset (&line->cells[line->size], 0, sizeof (CtxTermCell) * (new_size - line->size) );
-     line->size = new_size;
+    free (termimg->lines->data);
+    ctx_list_remove (&termimg->lines, termimg->lines->data);
   }
-  if (col > line->maxcol) line->maxcol = col;
-  strncpy (line->cells[col-1].utf8, (char*)utf8, 4);
-  memcpy  (line->cells[col-1].fg, fg, 4);
-  memcpy  (line->cells[col-1].bg, bg, 4);
+  printf ("\e[?25h"); // cursor on
+  nc_at_exit ();
+  free (termimg->pixels);
+  ctx_free (termimg->host);
+  free (termimg);
+  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
 }
 
-static int _ctx_term256 = 0; // XXX TODO implement autodetect for this
-static long _ctx_curfg = -1;
-static long _ctx_curbg = -1;
-
-static long ctx_rgb_to_long (int r,int g, int b)
+Ctx *ctx_new_termimg (int width, int height)
 {
-  return r * 256 * 256 + g * 256 + b;
-}
+  Ctx *ctx = _ctx_new_drawlist (width, height);
+#if CTX_RASTERIZER
+  fprintf (stdout, "\e[?1049h");
+  fprintf (stdout, "\e[?25l"); // cursor off
+  CtxTermImg *termimg = (CtxTermImg*)calloc (sizeof (CtxTermImg), 1);
+  CtxBackend *backend = (void*)termimg;
 
 
-static void ctx_term_set_fg (int red, int green, int blue)
-{
-  long lc = ctx_rgb_to_long (red, green, blue);
-  if (lc == _ctx_curfg)
-    return;
-  _ctx_curfg=lc;
-  if (_ctx_term256 == 0)
-  {
-    printf("\e[38;2;%i;%i;%im", red,green,blue);
-  }
-  else
-  {
-    int gray = (green /255.0) * 24 + 0.5;
-    int r    = (red/255.0)    * 6 + 0.5;
-    int g    = (green/255.0)  * 6 + 0.5;
-    int b    = (blue/255.0)   * 6 + 0.5;
-    if (gray > 23) gray = 23;
+  int maxwidth = ctx_terminal_width ();
 
-    if (r > 5) r = 5;
-    if (g > 5) g = 5;
-    if (b > 5) b = 5;
+  int colwidth = maxwidth/ctx_terminal_cols ();
+  maxwidth-=colwidth;
 
-    if (((int)(r/1.66)== (int)(g/1.66)) && ((int)(g/1.66) == ((int)(b/1.66))))
-    {
-      printf("\e[38;5;%im", 16 + 216 + gray);
-    }
-    else
-      printf("\e[38;5;%im", 16 + r * 6 * 6 + g * 6  + b);
+  int maxheight = ctx_terminal_height ();
+  if (width <= 0 || height <= 0)
+  {
+    width  = maxwidth;
+    height = maxheight;
   }
+  if (width > maxwidth) width = maxwidth;
+  if (height > maxheight) height = maxheight;
+  termimg->width  = width;
+  termimg->height = height;
+  termimg->lines = 0;
+  termimg->pixels = (uint8_t*)malloc (width * height * 3);
+  termimg->host = ctx_new_for_framebuffer (termimg->pixels,
+                                           width, height,
+                                           width * 3, CTX_FORMAT_RGB8);
+  _ctx_mouse (ctx, NC_MOUSE_DRAG);
+  ctx_set_backend (ctx, termimg);
+
+  backend->ctx = ctx;
+  backend->process = ctx_termimg_process;
+  backend->flush = ctx_termimg_flush;
+  backend->free  = (void(*)(void*))ctx_termimg_free;
+  backend->consume_events = ctx_nct_consume_events;
+  backend->get_event_fds = (void*) ctx_stdin_get_event_fds;
+  ctx_set_size (ctx, width, height);
+  ctx_font_size (ctx, 14.0f);
+#endif
+
+  return ctx;
 }
 
-static void ctx_term_set_bg(int red, int green, int blue)
+#endif
+
+#if CTX_TFT_ESPI
+
+#ifndef CTX_TFT_ESPI_MEMORY_BUDGET
+#define CTX_TFT_ESPI_MEMORY_BUDGET  (320*280)
+#endif
+
+
+typedef struct CtxTftBackend
 {
-  long lc = ctx_rgb_to_long (red, green, blue);
-//if (lc == _ctx_curbg)
-//  return;
-  _ctx_curbg=lc;
-  if (_ctx_term256 == 0)
-  {
-    printf("\e[48;2;%i;%i;%im", red,green,blue);
-  }
-  else
+  CtxBackend   backend;
+  TFT_eSPI    *tft;
+  int          flags;
+  uint16_t    *fb;
+
+  int     min_col; // hasher cols and rows
+  int     min_row; // hasher cols and rows
+  int     max_col; // hasher cols and rows
+  int     max_row; // hasher cols and rows
+  uint8_t hashes[CTX_HASH_ROWS * CTX_HASH_COLS * 20];
+  uint8_t state[CTX_HASH_ROWS * CTX_HASH_COLS];
+} CtxTftBackend;
+
+static void ctx_render_tft (Ctx *ctx, 
+                            int x0, int y0, int x1, int y1)
+{
+  CtxTftBackend *backend_tft = (CtxTftBackend*)ctx->backend;
+  TFT_eSPI *tft = backend_tft->tft;
+  int flags = backend_tft->flags;
+  int memory_budget = CTX_TFT_ESPI_MEMORY_BUDGET;
+  int width  = x1 - x0 + 1;
+  int height = y1 - y0 + 1;
+  uint16_t *fb;
+
+  int chunk_size = 16; /* wanting chunks of 16 scanlines at a
+                          time to go out seems to give good
+                          spi bandwidth use */
+  while (chunk_size * width * 2 > memory_budget/2)
   {
-    int gray = (green /255.0) * 24 + 0.5;
-    int r    = (red/255.0)    * 6 + 0.5;
-    int g    = (green/255.0)  * 6 + 0.5;
-    int b    = (blue/255.0)   * 6 + 0.5;
-    if (gray > 23) gray = 23;
+    chunk_size/=2;
+  }
+ 
+  if (!backend_tft->fb)
+    backend_tft->fb = (uint16_t*)malloc (memory_budget);
+  fb = backend_tft->fb;
 
-    if (r > 5) r = 5;
-    if (g > 5) g = 5;
-    if (b > 5) b = 5;
+  if (flags & CTX_TFT_332)
+  {
+    int render_height = height;
+    memory_budget -= chunk_size * width * 2;
 
-    if (((int)(r/1.66)== (int)(g/1.66)) && ((int)(g/1.66) == ((int)(b/1.66))))
+    if (width * render_height > memory_budget)
     {
-      printf("\e[48;5;%im", 16 + 216 + gray);
+       render_height = memory_budget / width;
     }
-    else
-      printf("\e[48;5;%im", 16 + r * 6 * 6 + g * 6  + b);
-  }
-}
+    do
+    {
 
-static int _ctx_term_force_full = 0;
+    render_height = ctx_mini (render_height, y1-y0);
+    memset (fb, 0, width * render_height);
+    Ctx *renderer = ctx_new_for_framebuffer (fb,
+       width, render_height, width,
+       CTX_FORMAT_RGB332);
 
-void ctx_term_scanout (CtxTerm *term)
-{
-  int row = 1;
-  printf ("\e[H");
-//  printf ("\e[?25l");
-  printf ("\e[0m");
+    ctx_translate (renderer, -1.0 * x0, -1.0 * y0);
+    ctx_render_ctx (ctx, renderer);
+    ctx_free (renderer);
 
-  int cur_fg[3]={-1,-1,-1};
-  int cur_bg[3]={-1,-1,-1};
+    uint8_t *temp = ((uint8_t*)fb)+memory_budget;
+    uint8_t *src = (uint8_t*)fb;
 
-  for (CtxList *l = term->lines; l; l = l->next)
-  {
-    CtxTermLine *line = l->data;
-    for (int col = 1; col <= line->maxcol; col++)
+    for (int y = y0; y < y0 + render_height; y+=chunk_size)
     {
-      CtxTermCell *cell = &line->cells[col-1];
-
-      if (strcmp(cell->utf8, cell->prev_utf8) ||
-          memcmp(cell->fg, cell->prev_fg, 3) ||
-          memcmp(cell->bg, cell->prev_bg, 3) || _ctx_term_force_full)
-      {
-        if (cell->fg[0] != cur_fg[0] ||
-            cell->fg[1] != cur_fg[1] ||
-            cell->fg[2] != cur_fg[2])
-        {
-          ctx_term_set_fg (cell->fg[0], cell->fg[1], cell->fg[2]);
-          cur_fg[0]=cell->fg[0];
-          cur_fg[1]=cell->fg[1];
-          cur_fg[2]=cell->fg[2];
-        }
-        if (cell->bg[0] != cur_bg[0] ||
-            cell->bg[1] != cur_bg[1] ||
-            cell->bg[2] != cur_bg[2])
-        {
-          ctx_term_set_bg (cell->bg[0], cell->bg[1], cell->bg[2]);
-          cur_bg[0]=cell->bg[0];
-          cur_bg[1]=cell->bg[1];
-          cur_bg[2]=cell->bg[2];
-        }
-        printf ("%s", cell->utf8);
-      }
-      else
+      uint16_t *dst = (uint16_t*)temp;
+      float h = ctx_mini (chunk_size, y1-y);
+      for (int i = 0; i < width * h; i++)
       {
-        // TODO: accumulate succesive such to be ignored items,
-        // and compress them into one, making us compress largely
-        // reused screens well
-        printf ("\e[C");
+        int val = *src++;
+        uint8_t r, g, b;
+        ctx_332_unpack (val, &r, &g, &b);
+        *dst++ = ctx_565_pack (r, g, b, 1);
       }
-      strcpy (cell->prev_utf8, cell->utf8);
-      memcpy (cell->prev_fg, cell->fg, 3);
-      memcpy (cell->prev_bg, cell->bg, 3);
+      tft->pushRect(x0, y, width, h, (uint16_t*)temp);
     }
-    if (row != term->rows)
-      printf ("\n\r");
-    row ++;
+      y0 += render_height;
+    } while (y0 < y1);
   }
-  printf ("\e[0m");
-  //printf ("\e[?25h");
-  //
-}
+  else if (flags & CTX_TFT_GRAY)
+  {
+     int render_height = height;
+     memory_budget -= chunk_size * width * 2;
+     if (width * render_height > memory_budget)
+     {
+       render_height = memory_budget / width;
+     }
+    do
+    {
 
-// xx
-// xx
-// xx
-//
+    render_height = ctx_mini (render_height, y1-y0);
+    memset (fb, 0, width * render_height);
+    Ctx *renderer = ctx_new_for_framebuffer (fb,
+       width, render_height, width,
+       CTX_FORMAT_GRAY8);
 
-static inline int _ctx_rgba8_manhattan_diff (const uint8_t *a, const uint8_t *b)
-{
-  int c;
-  int diff = 0;
-  for (c = 0; c<3;c++)
-    diff += ctx_pow2(a[c]-b[c]);
-  return ctx_sqrtf(diff);
-  return diff;
-}
+    ctx_translate (renderer, -1.0 * x0, -1.0 * y0);
+    ctx_render_ctx (ctx, renderer);
+    ctx_free (renderer);
 
-static void ctx_term_output_buf_half (uint8_t *pixels,
-                          int width,
-                          int height,
-                          CtxTerm *term)
-{
-  int stride = width * 4;
-  const char *sextants[]={
-   " ","▘","▝","▀","▖","▌", "▞", "▛", "▗", "▚", "▐", "▜","▄","▙","▟","█",
+    uint8_t *temp = ((uint8_t*)fb)+memory_budget;
+    uint8_t *src = (uint8_t*)fb;
 
-  };
-  for (int row = 0; row < height/2; row++)
+    for (int y = y0; y < y0 + render_height; y+=chunk_size)
     {
-      for (int col = 0; col < width-3; col++)
-        {
-          int     unicode = 0;
-          int     bitno = 0;
-          uint8_t rgba[2][4] = {
-                             {255,255,255,0},
-                             {0,0,0,0}};
-          int i = 0;
+      uint16_t *dst = (uint16_t*)temp;
+      float h = ctx_mini (chunk_size, y1-y);
+      for (int i = 0; i < width * h; i++)
+      {
+        int val = *src++;
+        *dst++ = ctx_565_pack (val, val, val, 1);
+      }
+      tft->pushRect(x0, y, width, h, (uint16_t*)temp);
+    }
+      y0 += render_height;
+    } while (y0 < y1);
+  }
+  else
+  {
+    int render_height = height;
+    if (width * render_height > memory_budget / 2)
+    {
+       render_height = memory_budget / width / 2;
+    }
 
-          int  rgbasum[2][4] = {0,};
-          int  sumcount[2];
+    do
+    {
+      render_height = ctx_mini (render_height, y1-y0);
+      memset (fb, 0, width * 2 * render_height);
+      Ctx *renderer = ctx_new_for_framebuffer (fb, width, render_height, width * 2,
+            CTX_FORMAT_RGB565_BYTESWAPPED);
+      ctx_translate (renderer, -1.0 * x0, -1.0 * y0);
+      ctx_render_ctx (ctx, renderer);
+      tft->pushRect(x0, y0, width, render_height, fb);
+      ctx_free (renderer);    
 
-          int curdiff = 0;
-          /* first find starting point colors */
-          for (int yi = 0; yi < ctx_term_ch; yi++)
-            for (int xi = 0; xi < ctx_term_cw; xi++, i++)
-                {
-                  int noi = (row * ctx_term_ch + yi) * stride + (col*ctx_term_cw+xi) * 4;
+      y0 += render_height;
+    } while (y0 < y1);
+  }
+  if (flags & CTX_TFT_CYCLE_BUF)
+  {
+    free (fb);
+    backend_tft->fb = NULL;
+  }
+}
 
-                  if (rgba[0][3] == 0)
-                  {
-                    for (int c = 0; c < 3; c++)
-                      rgba[0][c] = pixels[noi + c];
-                    rgba[0][3] = 255; // used only as mark of in-use
-                  }
-                  else
-                  {
-                    int diff = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
-                    if (diff > curdiff)
-                    {
-                      curdiff = diff;
-                      for (int c = 0; c < 3; c++)
-                        rgba[1][c] = pixels[noi + c];
-                    }
-                  }
 
-                }
+static void
+ctx_tft_flush (Ctx *ctx)
+{
+  CtxTftBackend *tft_backend = (CtxTftBackend*)ctx->backend;
+  static int64_t prev_time = 0;
+  int64_t cur_time = ctx_ticks () / 1000;
 
-          for (int iters = 0; iters < 1; iters++)
-          {
-                  i= 0;
-          for (int i = 0; i < 4; i ++)
-             rgbasum[0][i] = rgbasum[1][i]=0;
-          sumcount[0] = sumcount[1] = 0;
+  if (tft_backend->flags & CTX_TFT_SHOW_FPS)
+  {
+   
+  float em = ctx_height (ctx) * 0.08;
+  float y = em;
+  ctx_font_size (ctx, em);
+  ctx_rectangle (ctx, ctx_width(ctx)-(em*4), 0, em *4, em * 1.1);
+  ctx_rgba (ctx, 0, 0, 0, 0.7);
+  ctx_fill (ctx);
 
-          for (int yi = 0; yi < ctx_term_ch; yi++)
-            for (int xi = 0; xi < ctx_term_cw; xi++, i++)
-                {
-                  int noi = (row * ctx_term_ch + yi) * stride + (col*ctx_term_cw+xi) * 4;
+  ctx_rgba (ctx, 1, 1, 0, 1);
 
-                  int diff1 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
-                  int diff2 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[1]);
-                  int cluster = 0;
-                  if (diff1 <= diff2)
-                    cluster = 0;
-                  else
-                    cluster = 1;
-                  sumcount[cluster]++;
-                  for (int c = 0; c < 3; c++)
-                    rgbasum[cluster][c] += pixels[noi+c];
-                }
+  if (prev_time)
+  {
+    char buf[22];
+    float fps = 1.0f/((cur_time-prev_time)/1000.0f);
+    ctx_move_to (ctx, ctx_width (ctx) - (em * 3.8), y);
+    sprintf (buf, "%2.1f fps", fps);
+    ctx_text (ctx, buf);
+    ctx_begin_path (ctx);
+  }
+  prev_time = cur_time;
+  }
 
 
-          if (sumcount[0])
-          for (int c = 0; c < 3; c++)
-          {
-            rgba[0][c] = rgbasum[0][c] / sumcount[0];
-          }
-          if (sumcount[1])
-          for (int c = 0; c < 3; c++)
+  if (tft_backend->flags & CTX_TFT_HASH_CACHE)
+  {
+    Ctx *hasher = ctx_hasher_new (ctx_width (ctx), ctx_height (ctx),
+                                  CTX_HASH_COLS, CTX_HASH_ROWS);
+    int dirty_tiles = 0;
+    ctx_render_ctx (ctx, hasher);
+
+    tft_backend->max_col = -100;
+    tft_backend->min_col = 100;
+    tft_backend->max_row = -100;
+    tft_backend->min_row = 100;
+
+      for (int row = 0; row < CTX_HASH_ROWS; row++)
+        for (int col = 0; col < CTX_HASH_COLS; col++)
+        {
+          uint8_t *new_hash = ctx_hasher_get_hash (hasher, col, row);
+          if (new_hash && memcmp (new_hash, &tft_backend->hashes[(row * CTX_HASH_COLS + col) *  20], 20))
           {
-            rgba[1][c] = rgbasum[1][c] / sumcount[1];
-          }
+            memcpy (&tft_backend->hashes[(row * CTX_HASH_COLS +  col)*20], new_hash, 20);
+            dirty_tiles++;
+
+            tft_backend->max_col = ctx_maxi (tft_backend->max_col, col);
+            tft_backend->max_row = ctx_maxi (tft_backend->max_row, row);
+            tft_backend->min_col = ctx_mini (tft_backend->min_col, col);
+            tft_backend->min_row = ctx_mini (tft_backend->min_row, row);
           }
+        }
+      free (((CtxHasher*)(hasher->backend))->hashes);
+      ctx_free (hasher);
 
-          int pixels_set = 0;
-          for (int y = 0; y < ctx_term_ch; y++)
-            for (int x = 0; x < ctx_term_cw; x++)
-              {
-                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
-#define CHECK_IS_SET \
-      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
-       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
 
-                int set = CHECK_IS_SET;
-#undef CHECK_IS_SET
-                if (set)
-                  { unicode |=  (1<< (bitno) ); 
-                    pixels_set ++; 
-                  }
-                bitno++;
-              }
-           if (pixels_set == 4)
-             ctx_term_set (term, col +1, row + 1, " ",
-                           rgba[1], rgba[0]);
-           else
-             ctx_term_set (term, col +1, row + 1, sextants[unicode],
-                           rgba[0], rgba[1]);
-        }
-    }
+      if (dirty_tiles)
+      {
+         int x0 = tft_backend->min_col * (ctx_width (ctx)/CTX_HASH_COLS);
+         int x1 = (tft_backend->max_col+1) * (ctx_width (ctx)/CTX_HASH_COLS)-1;
+         int y0 = tft_backend->min_row * (ctx_height (ctx)/CTX_HASH_ROWS);
+         int y1 = (tft_backend->max_row+1) * (ctx_height (ctx)/CTX_HASH_ROWS)-1;
+#if 0
+         ctx_rectangle (ctx, x0, y0, x1-x0+1, y1-y0+1);
+         ctx_rgba (ctx, 1,0,0,0.5);
+         ctx_line_width (ctx, 4.0);
+         ctx_stroke (ctx);
+
+         //ctx_move_to (ctx, (x0+x1)/2, (y0+y1)/2);
+         //char buf[44];
+         //sprintf (buf, "%ix%i", ctx_width(ctx), ctx_height(ctx));
+         //ctx_text (ctx, buf);
+
+         //ctx_rgba (ctx, 0,1,0,0.5);
+         //ctx_rectangle (ctx, 0, 0, ctx_width(ctx)/2, ctx_height(ctx)/2);
+         //ctx_fill (ctx);
+#endif
+         int width = x1 - x0 + 1;
+         int height = y1 - y0 + 1;
+         if ( (tft_backend->flags & CTX_TFT_AUTO_332) &&
+              ((width) * height * 2 > CTX_TFT_ESPI_MEMORY_BUDGET))
+         {
+           tft_backend->flags |= CTX_TFT_332;
+           ctx_render_tft (ctx, x0, y0, x1, y1);
+           tft_backend->flags -= CTX_TFT_332;
+         }
+         else
+         {
+           ctx_render_tft (ctx, x0, y0, x1, y1);
+         }
+      }
+  }
+  else
+  {
+    ctx_render_tft (ctx, 0, 0, ctx_width(ctx)-1, ctx_height(ctx)-1);
+  }
 }
 
-void ctx_term_find_color_pair (CtxTerm *term, int x0, int y0, int w, int h,
-                uint8_t rgba[2][4])
-        //uint8_t *rgba0, uint8_t *rgba1)
+Ctx *ctx_new_tft (TFT_eSPI *tft, int flags)
 {
-int curdiff = 0;
-int stride = term->width * 4;
-uint8_t *pixels = term->pixels;
-/* first find starting point colors */
-for (int y = y0; y < y0 + h; y++)
-  for (int x = x0; x < x0 + w; x++)
-      {
-        int noi = (y) * stride + (x) * 4;
+  Ctx *ctx = ctx_new_drawlist (tft->width(), tft->height());
+  CtxBackend *backend = (CtxBackend*)calloc (sizeof (CtxTftBackend), 1);
+  CtxTftBackend *tft_backend = (CtxTftBackend*)backend;
+  tft_backend->tft = tft;
+  tft_backend->flags = flags;
+  backend->flush = ctx_tft_flush;
+  ctx_set_backend (ctx, backend);
+  return ctx;
+}
 
-        if (rgba[0][3] == 0)
-        {
-          for (int c = 0; c < 3; c++)
-            rgba[0][c] = pixels[noi + c];
-          rgba[0][3] = 255; // used only as mark of in-use
-        }
-        else
-        {
-          int diff = _ctx_rgba8_manhattan_diff (&pixels[noi], &rgba[0][0]);
-          if (diff > curdiff)
-          {
-            curdiff = diff;
-            for (int c = 0; c < 3; c++)
-              rgba[1][c] = pixels[noi + c];
-          }
-        }
-      }
-          int  rgbasum[2][4] = {0,};
-          int  sumcount[2];
+#endif
 
-          for (int iters = 0; iters < 1; iters++)
-          {
-          for (int i = 0; i < 4; i ++)
-             rgbasum[0][i] = rgbasum[1][i]=0;
-          sumcount[0] = sumcount[1] = 0;
+static CtxFont ctx_fonts[CTX_MAX_FONTS];
+static int     ctx_font_count = 0;
 
-          for (int y = y0; y < y0 + h; y++)
-            for (int x = x0; x < x0 + w; x++)
-                {
-                  int noi = (y) * stride + (x) * 4;
+#if CTX_FONT_ENGINE_STB
+static float
+ctx_glyph_width_stb (CtxFont *font, Ctx *ctx, uint32_t unichar);
+static float
+ctx_glyph_kern_stb (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB);
+static int
+ctx_glyph_stb (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke);
 
-                  int diff1 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[0]);
-                  int diff2 = _ctx_rgba8_manhattan_diff (&pixels[noi], rgba[1]);
-                  int cluster = 0;
-                  if (diff1 <= diff2)
-                    cluster = 0;
-                  else
-                    cluster = 1;
-                  sumcount[cluster]++;
-                  for (int c = 0; c < 3; c++)
-                    rgbasum[cluster][c] += pixels[noi+c];
-                }
+CtxFontEngine ctx_font_engine_stb =
+{
+#if CTX_FONTS_FROM_FILE
+  ctx_load_font_ttf_file,
+#endif
+  ctx_load_font_ttf,
+  ctx_glyph_stb,
+  ctx_glyph_width_stb,
+  ctx_glyph_kern_stb,
+};
 
+int
+ctx_load_font_ttf (const char *name, const void *ttf_contents, int length)
+{
+  if (ctx_font_count >= CTX_MAX_FONTS)
+    { return -1; }
+  ctx_fonts[ctx_font_count].type = 1;
+  ctx_fonts[ctx_font_count].name = (char *) malloc (strlen (name) + 1);
+  ctx_strcpy ( (char *) ctx_fonts[ctx_font_count].name, name);
+  if (!stbtt_InitFont (&ctx_fonts[ctx_font_count].stb.ttf_info, ttf_contents, 0) )
+    {
+      ctx_log ( "Font init failed\n");
+      return -1;
+    }
+  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_stb;
+  ctx_font_count ++;
+  return ctx_font_count-1;
+}
 
-          if (sumcount[0])
-          for (int c = 0; c < 3; c++)
-          {
-            rgba[0][c] = rgbasum[0][c] / sumcount[0];
-          }
-          if (sumcount[1])
-          for (int c = 0; c < 3; c++)
-          {
-            rgba[1][c] = rgbasum[1][c] / sumcount[1];
-          }
-          }
+#if CTX_FONTS_FROM_FILE
+int
+ctx_load_font_ttf_file (const char *name, const char *path)
+{
+  uint8_t *contents = NULL;
+  long length = 0;
+  ctx_get_contents (path, &contents, &length);
+  if (!contents)
+    {
+      ctx_log ( "File load failed\n");
+      return -1;
+    }
+  return ctx_load_font_ttf (name, contents, length);
+}
+#endif
 
+static int
+ctx_glyph_stb_find (CtxFont *font, uint32_t unichar)
+{
+  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
+  int index = font->stb.cache_index;
+  if (font->stb.cache_unichar == unichar)
+    {
+      return index;
+    }
+  font->stb.cache_unichar = 0;
+  index = font->stb.cache_index = stbtt_FindGlyphIndex (ttf_info, unichar);
+  font->stb.cache_unichar = unichar;
+  return index;
 }
 
+static float
+ctx_glyph_width_stb (CtxFont *font, Ctx *ctx, uint32_t unichar)
+{
+  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
+  float font_size          = ctx->state.gstate.font_size;
+  float scale              = stbtt_ScaleForPixelHeight (ttf_info, font_size);
+  int advance, lsb;
+  int glyph = ctx_glyph_stb_find (font, unichar);
+
+#if CTX_EVENTS
+  if (ctx_backend_type (ctx) == CTX_BACKEND_TERM && (3.02 - font_size) < 0.03)
+    return 2;
+#endif
 
+  if (glyph==0)
+    { return 0.0f; }
+  stbtt_GetGlyphHMetrics (ttf_info, glyph, &advance, &lsb);
+  return (advance * scale);
+}
 
-static void ctx_term_output_buf_quarter (uint8_t *pixels,
-                          int width,
-                          int height,
-                          CtxTerm *term)
+static float
+ctx_glyph_kern_stb (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
 {
-  int stride = width * 4;
-  const char *sextants[]={
-   " ","▘","▝","▀","▖","▌", "▞", "▛", "▗", "▚", "▐", "▜","▄","▙","▟","█"
+  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
+  float font_size = ctx->state.gstate.font_size;
+  float scale = stbtt_ScaleForPixelHeight (ttf_info, font_size);
+  int glyphA = ctx_glyph_stb_find (font, unicharA);
+  int glyphB = ctx_glyph_stb_find (font, unicharB);
+  return stbtt_GetGlyphKernAdvance (ttf_info, glyphA, glyphB) * scale;
+}
 
-  };
-  for (int row = 0; row < height/ctx_term_ch; row++)
+static int
+ctx_glyph_stb (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
+{
+  stbtt_fontinfo *ttf_info = &font->stb.ttf_info;
+  int glyph = ctx_glyph_stb_find (font, unichar);
+  if (glyph==0)
+    { return -1; }
+  float font_size = ctx->state.gstate.font_size;
+  int   baseline = ctx->state.y;
+  float origin_x = ctx->state.x;
+  float origin_y = baseline;
+  float scale    = stbtt_ScaleForPixelHeight (ttf_info, font_size);;
+  stbtt_vertex *vertices = NULL;
+  ctx_begin_path (ctx);
+  int num_verts = stbtt_GetGlyphShape (ttf_info, glyph, &vertices);
+  for (int i = 0; i < num_verts; i++)
     {
-      for (int col = 0; col < width /ctx_term_cw; col++)
+      stbtt_vertex *vertex = &vertices[i];
+      switch (vertex->type)
         {
-          int     unicode = 0;
-          int     bitno = 0;
-          uint8_t rgba[2][4] = {
-                             {255,255,255,0},
-                             {0,0,0,0}};
-          ctx_term_find_color_pair (term, col * ctx_term_cw,
-                                    row * ctx_term_ch,
-                                    ctx_term_cw,
-                                    ctx_term_ch, rgba);
+          case STBTT_vmove:
+            ctx_move_to (ctx,
+                         origin_x + vertex->x * scale, origin_y - vertex->y * scale);
+            break;
+          case STBTT_vline:
+            ctx_line_to (ctx,
+                         origin_x + vertex->x * scale, origin_y - vertex->y * scale);
+            break;
+          case STBTT_vcubic:
+            ctx_curve_to (ctx,
+                          origin_x + vertex->cx  * scale, origin_y - vertex->cy  * scale,
+                          origin_x + vertex->cx1 * scale, origin_y - vertex->cy1 * scale,
+                          origin_x + vertex->x   * scale, origin_y - vertex->y   * scale);
+            break;
+          case STBTT_vcurve:
+            ctx_quad_to (ctx,
+                         origin_x + vertex->cx  * scale, origin_y - vertex->cy  * scale,
+                         origin_x + vertex->x   * scale, origin_y - vertex->y   * scale);
+            break;
+        }
+    }
+  stbtt_FreeShape (ttf_info, vertices);
+  if (stroke)
+    {
+      ctx_stroke (ctx);
+    }
+  else
+    { ctx_fill (ctx); }
+  return 0;
+}
+#endif
 
-          int pixels_set = 0;
-          for (int y = 0; y < 2; y++)
-            for (int x = 0; x < ctx_term_cw; x++)
-              {
-                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
-#define CHECK_IS_SET \
-      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
-       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
+#if CTX_FONT_ENGINE_CTX
 
-                int set = CHECK_IS_SET;
-#undef CHECK_IS_SET
-                if (set)
-                  { unicode |=  (1<< (bitno) ); 
-                    pixels_set ++; 
-                  }
-                bitno++;
-              }
-           if (pixels_set == 4)
-             ctx_term_set (term, col +1, row + 1, " ",
-                           rgba[1], rgba[0]);
-           else
-             ctx_term_set (term, col +1, row + 1, sextants[unicode],
-                           rgba[0], rgba[1]);
-        }
+static int ctx_font_find_glyph_cached (CtxFont *font, uint32_t glyph)
+{
+#if 1
+  int min       = 0;
+  int max       = font->ctx.glyphs-1;
+  uint32_t found;
+
+  do {
+    int pos = (min + max)/2;
+    found = font->ctx.index[pos*2];
+    if (found == glyph)
+    {
+      return font->ctx.index[pos*2+1];
+    } else if (min == max)
+      return -1;
+    else if (min == max-1)
+      return -1;
+    else if (found < glyph)
+    {
+      min = pos;
+    } else {
+      max = pos;
+    }
+
+  } while (min != max);
+
+  return -1;
+#else
+  for (int i = 0; i < font->ctx.glyphs; i++)
+    {
+      if (font->ctx.index[i * 2] == glyph)
+        { return font->ctx.index[i * 2 + 1]; }
     }
+  return -1;
+#endif
 }
 
-
-static void ctx_term_output_buf_sextant (uint8_t *pixels,
-                          int width,
-                          int height,
-                          CtxTerm *term)
+static int ctx_glyph_find_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar)
 {
-  int stride = width * 4;
-
-  const char *sextants[]={
-   " 
","🬀","🬁","🬂","🬃","🬄","🬅","🬆","🬇","🬈","🬉","🬊","🬋","🬌","🬍","🬎","🬏","🬐","🬑","🬒","🬓","▌","🬔","🬕","🬖","🬗","🬘","🬙","🬚","🬛","🬜","🬝","🬞","🬟","🬠","🬡","🬢","🬣","🬤","🬥","🬦","🬧","▐","🬨","🬩","🬪","🬫","🬬","🬭","🬮","🬯","🬰","🬱","🬲","🬳","🬴","🬵","🬶","🬷","🬸","🬹","🬺","🬻","█"
-  };
+  int ret = ctx_font_find_glyph_cached (font, unichar);
+  if (ret >= 0) return ret;
 
-  for (int row = 0; row < height/ctx_term_ch; row++)
+  for (int i = 0; i < font->ctx.length; i++)
+  {
+    CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
+    if (entry->code == CTX_DEFINE_GLYPH &&
+        entry->data.u32[0] == unichar)
     {
-      for (int col = 0; col < width /ctx_term_cw; col++)
-        {
-          int     unicode = 0;
-          int     bitno = 0;
-          uint8_t rgba[2][4] = {
-                             {255,255,255,0},
-                             {0,0,0,0}};
+       return i;
+       // XXX this could be prone to insertion of valid header
+       // data in included bitmaps.. is that an issue?
+       //   
+    }
+  }
+  return -1;
+}
 
-          ctx_term_find_color_pair (term, col * ctx_term_cw,
-                                    row * ctx_term_ch,
-                                    ctx_term_cw,
-                                    ctx_term_ch, rgba);
 
-          int pixels_set = 0;
-          for (int y = 0; y < ctx_term_ch; y++)
-            for (int x = 0; x < ctx_term_cw; x++)
-              {
-                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
-#define CHECK_IS_SET \
-      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
-       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
+static float
+ctx_glyph_kern_ctx (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
+{
+  float font_size = ctx->state.gstate.font_size;
+  int first_kern = ctx_glyph_find_ctx (font, ctx, unicharA);
+  if (first_kern < 0) return 0.0;
 
-                int set = CHECK_IS_SET;
-#undef CHECK_IS_SET
-                if (set)
-                  { unicode |=  (1<< (bitno) ); 
-                    pixels_set ++; 
-                  }
-                bitno++;
-              }
+#if CTX_EVENTS
+  if (ctx_backend_type (ctx) == CTX_BACKEND_TERM && (3.02 - font_size) < 0.03)
+    return 0.0f;
+#endif
 
-          if (pixels_set == 6)
-            ctx_term_set (term, col +1, row + 1, " ",
-                          rgba[1], rgba[0]);
-          else
-            ctx_term_set (term, col +1, row + 1, sextants[unicode], rgba[0], rgba[1]);
+  for (int i = first_kern + 1; i < font->ctx.length; i++)
+    {
+      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
+      if (entry->code == CTX_KERNING_PAIR)
+        {
+          if (entry->data.u16[0] == unicharA && entry->data.u16[1] == unicharB)
+            { return entry->data.s32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE; }
         }
+      if (entry->code == CTX_DEFINE_GLYPH)
+        return 0.0;
     }
+  return 0.0;
 }
-
-static void ctx_term_output_buf_ascii (uint8_t *pixels,
-                          int width,
-                          int height,
-                          CtxTerm *term,
-                          int mono)
+#if 0
+static int ctx_glyph_find (Ctx *ctx, CtxFont *font, uint32_t unichar)
 {
-  /* this is a crude ascii-mode built on a quick mapping of sexels to ascii */
-  int stride = width * 4;
-  const char *sextants[]={
-   " ","`","'","^","🬃","`","~","\"","-","\"","'","\"","-","\"","~","^",",",";",
-   "=","/","i","[","p","P","z",")","/","7","f",">","/","F",",","\\",":",":",
-   "\\","\\","(","T","j","T","]","?","s","\\","<","q","_","=","=","=","c","L",
-   "Q","C","a","b","J","]","m","b","d","@"
-  };
-  uint8_t black[4] = {0,0,0,255};
-  for (int row = 0; row < height/ctx_term_ch; row++)
+  for (int i = 0; i < font->ctx.length; i++)
     {
-      for (int col = 0; col < width /ctx_term_cw; col++)
-        {
-          int     unicode = 0;
-          int     bitno = 0;
-          uint8_t rgba[2][4] = {
-                             {255,255,255,0},
-                             {0,0,0,0}};
-
-          ctx_term_find_color_pair (term, col * ctx_term_cw,
-                                    row * ctx_term_ch,
-                                    ctx_term_cw,
-                                    ctx_term_ch, rgba);
-
-
-          if (_ctx_rgba8_manhattan_diff (black, rgba[1]) >
-              _ctx_rgba8_manhattan_diff (black, rgba[0]))
-          {
-            for (int c = 0; c < 4; c ++)
-            {
-              int tmp = rgba[0][c];
-              rgba[0][c] = rgba[1][c];
-              rgba[1][c] = tmp;
-            }
-          }
-          if (mono)
-          {
-            rgba[1][0] = 0;
-            rgba[1][1] = 0;
-            rgba[1][2] = 0;
-          }
-
-
-          int brightest_dark_diff = _ctx_rgba8_manhattan_diff (black, rgba[0]);
+      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
+      if (entry->code == CTX_DEFINE_GLYPH && entry->data.u32[0] == unichar)
+        { return i; }
+    }
+  return 0;
+}
+#endif
 
-          int pixels_set = 0;
-          for (int y = 0; y < ctx_term_ch; y++)
-            for (int x = 0; x < ctx_term_cw; x++)
-              {
-                int no = (row * ctx_term_ch + y) * stride + (col*ctx_term_cw+x) * 4;
-#define CHECK_IS_SET \
-      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
-       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
 
-                int set = CHECK_IS_SET;
-#undef CHECK_IS_SET
-                if (set)
-                  { unicode |=  (1<< (bitno) ); 
-                    pixels_set ++; 
-                  }
-                bitno++;
-              }
+static float
+ctx_glyph_width_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar)
+{
+  CtxState *state = &ctx->state;
+  float font_size = state->gstate.font_size;
+  int   start     = ctx_glyph_find_ctx (font, ctx, unichar);
+  if (start < 0)
+    { return 0.0; }  // XXX : fallback
 
+#if CTX_EVENTS
+  if (ctx_backend_type (ctx) == CTX_BACKEND_TERM && (3.02 - font_size) < 0.03)
+    return 2.0f;
+#endif
 
-           if (pixels_set == 6 && brightest_dark_diff < 40)
-             ctx_term_set (term, col +1, row + 1, " ",
-                           rgba[1], rgba[0]);
-           else
-             ctx_term_set (term, col +1, row + 1, sextants[unicode],
-                           rgba[0], rgba[1]);
-        }
+  for (int i = start; i < font->ctx.length; i++)
+    {
+      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
+      if (entry->code == CTX_DEFINE_GLYPH)
+        if (entry->data.u32[0] == (unsigned) unichar)
+          { return (entry->data.u32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE); }
     }
+  return 0.0;
 }
 
-static void ctx_term_output_buf_braille (uint8_t *pixels,
-                          int width,
-                          int height,
-                          CtxTerm *term,
-                          int mono)
+static int
+ctx_glyph_drawlist (CtxFont *font, Ctx *ctx, CtxDrawlist *drawlist, uint32_t unichar, int stroke)
 {
-  int reverse = 0;
-  int stride = width * 4;
-  uint8_t black[4] = {0,0,0,255};
-  for (int row = 0; row < height/ctx_term_ch; row++)
+  CtxState *state = &ctx->state;
+  CtxIterator iterator;
+  float origin_x = state->x;
+  float origin_y = state->y;
+  ctx_current_point (ctx, &origin_x, &origin_y);
+  int in_glyph = 0;
+  float font_size = state->gstate.font_size;
+  int start = 0;
+  if (font->type == 0)
+  {
+  start = ctx_glyph_find_ctx (font, ctx, unichar);
+  if (start < 0)
+    { return -1; }  // XXX : fallback glyph
+  }
+  ctx_iterator_init (&iterator, drawlist, start, CTX_ITERATOR_EXPAND_BITPACK);
+  CtxCommand *command;
+
+  /* XXX :  do a binary search instead of a linear search */
+  while ( (command= ctx_iterator_next (&iterator) ) )
     {
-      for (int col = 0; col < width /ctx_term_cw; col++)
+      CtxEntry *entry = &command->entry;
+      if (in_glyph)
         {
-          int     unicode = 0;
-          int     bitno = 0;
-          uint8_t rgba[2][4] = {
-                             {255,255,255,0},
-                             {0,0,0,0}};
-
-          ctx_term_find_color_pair (term, col * ctx_term_cw,
-                                    row * ctx_term_ch,
-                                    ctx_term_cw,
-                                    ctx_term_ch, rgba);
-
-
-          /* make darkest consistently be background  */
-          if (_ctx_rgba8_manhattan_diff (black, rgba[1]) >
-              _ctx_rgba8_manhattan_diff (black, rgba[0]))
-          {
-            for (int c = 0; c < 4; c ++)
+          if (entry->code == CTX_DEFINE_GLYPH)
             {
-              int tmp = rgba[0][c];
-              rgba[0][c] = rgba[1][c];
-              rgba[1][c] = tmp;
+              if (stroke)
+                { ctx_stroke (ctx); }
+              else
+                {
+#if CTX_RASTERIZER
+#if CTX_ENABLE_SHADOW_BLUR
+      if (ctx->backend && ((CtxRasterizer*)(ctx->backend))->in_shadow)
+      {
+        ctx_rasterizer_shadow_fill ((CtxRasterizer*)ctx->backend);
+        ((CtxRasterizer*)(ctx->backend))->in_shadow = 1;
+      }
+      else
+#endif
+#endif
+         ctx_fill (ctx); 
+               
+                }
+              ctx_restore (ctx);
+              return 0;
             }
-          }
-          if (mono)
-          {
-            rgba[1][0] = 0;
-            rgba[1][1] = 0;
-            rgba[1][2] = 0;
-          }
-
-          int pixels_set = 0;
-          for (int x = 0; x < 2; x++)
-            for (int y = 0; y < 3; y++)
-              {
-                int no = (row * 4 + y) * stride + (col*2+x) * 4;
-#define CHECK_IS_SET \
-      (_ctx_rgba8_manhattan_diff (&pixels[no], rgba[0])< \
-       _ctx_rgba8_manhattan_diff (&pixels[no], rgba[1]))
+          ctx_process (ctx, entry);
+        }
+      else if (entry->code == CTX_DEFINE_GLYPH && entry->data.u32[0] == unichar)
+        {
+          in_glyph = 1;
+          ctx_save (ctx);
+          ctx_translate (ctx, origin_x, origin_y);
+          ctx_move_to (ctx, 0, 0);
+          ctx_begin_path (ctx);
+          ctx_scale (ctx, font_size / CTX_BAKE_FONT_SIZE,
+                     font_size / CTX_BAKE_FONT_SIZE);
+        }
+    }
+  if (stroke)
+    { ctx_stroke (ctx);
+    }
+  else
+    { 
+    
+#if CTX_RASTERIZER
+#if CTX_ENABLE_SHADOW_BLUR
+      if (ctx->backend && ((CtxRasterizer*)(ctx->backend))->in_shadow)
+      {
+        ctx_rasterizer_shadow_fill ((CtxRasterizer*)ctx->backend);
+        ((CtxRasterizer*)(ctx->backend))->in_shadow = 1;
+      }
+      else
+#endif
+#endif
+      {
+         ctx_fill (ctx); 
+      }
+    }
+  ctx_restore (ctx);
+  return -1;
+}
 
-                int set = CHECK_IS_SET;
-                if (reverse) { set = !set; }
-                if (set)
-                  { unicode |=  (1<< (bitno) ); 
-                    pixels_set ++; 
-                  }
-                bitno++;
-              }
-          {
-            int x = 0;
-            int y = 3;
-            int no = (row * 4 + y) * stride + (col*2+x) * 4;
-            int setA = CHECK_IS_SET;
-            no = (row * 4 + y) * stride + (col*2+x+1) * 4;
-            int setB = CHECK_IS_SET;
+static int
+ctx_glyph_ctx (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
+{
+  CtxDrawlist drawlist = { (CtxEntry *) font->ctx.data,
+                           font->ctx.length,
+                           font->ctx.length, 0, 0
+                         };
+  return ctx_glyph_drawlist (font, ctx, &drawlist, unichar, stroke);
+}
 
-            pixels_set += setA;
-            pixels_set += setB;
-#undef CHECK_IS_SET
-            if (reverse) { setA = !setA; }
-            if (reverse) { setB = !setB; }
-            if (setA != 0 && setB==0)
-              { unicode += 0x2840; }
-            else if (setA == 0 && setB)
-              { unicode += 0x2880; }
-            else if ( (setA != 0) && (setB != 0) )
-              { unicode += 0x28C0; }
-            else
-              { unicode += 0x2800; }
-            char utf8[5];
-            utf8[ctx_unichar_to_utf8 (unicode, (uint8_t*)utf8)]=0;
+#if 1
+uint32_t ctx_glyph_no (Ctx *ctx, int no)
+{
+  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
+  if (no < 0 || no >= font->ctx.glyphs)
+    { return 0; }
+  return font->ctx.index[no*2];
+}
+#endif
 
-#if 0
-            if (pixels_set == 8)
-            {
-              if (rgba[0][0] < 32 && rgba[0][1] < 32 && rgba[0][2] < 32)
-              {
-                ctx_term_set (term, col +1, row + 1, " ",
-                                 rgba[1], rgba[0]);
-                continue;
-              }
-            }
+static void ctx_font_init_ctx (CtxFont *font)
+{
+  int glyph_count = 0;
+  for (int i = 0; i < font->ctx.length; i++)
+    {
+      CtxEntry *entry = &font->ctx.data[i];
+      if (entry->code == CTX_DEFINE_GLYPH)
+        { glyph_count ++; }
+    }
+  font->ctx.glyphs = glyph_count;
+#if CTX_DRAWLIST_STATIC
+  static uint32_t idx[512]; // one might have to adjust this for
+  // larger fonts XXX
+  // should probably be made a #define
+  font->ctx.index = &idx[0];
+#else
+  font->ctx.index = (uint32_t *) malloc (sizeof (uint32_t) * 2 * glyph_count);
 #endif
-            {
-              ctx_term_set (term, col +1, row + 1, utf8,
-                               rgba[0], rgba[1]);
-            }
-          }
+  int no = 0;
+  for (int i = 0; i < font->ctx.length; i++)
+    {
+      CtxEntry *entry = &font->ctx.data[i];
+      if (entry->code == CTX_DEFINE_GLYPH)
+        {
+          font->ctx.index[no*2]   = entry->data.u32[0];
+          font->ctx.index[no*2+1] = i;
+          no++;
         }
     }
 }
 
+int
+ctx_load_font_ctx (const char *name, const void *data, int length);
+#if CTX_FONTS_FROM_FILE
+int
+ctx_load_font_ctx_file (const char *name, const char *path);
+#endif
 
-inline static void ctx_term_render (void *ctx,
-                                       CtxCommand *command)
+static CtxFontEngine ctx_font_engine_ctx =
 {
-  CtxTerm *term = (void*)ctx;
-  /* directly forward */
-  ctx_process (term->host, &command->entry);
+#if CTX_FONTS_FROM_FILE
+  ctx_load_font_ctx_file,
+#endif
+  ctx_load_font_ctx,
+  ctx_glyph_ctx,
+  ctx_glyph_width_ctx,
+  ctx_glyph_kern_ctx,
+};
+
+int
+ctx_load_font_ctx (const char *name, const void *data, int length)
+{
+  if (length % sizeof (CtxEntry) )
+    { return -1; }
+  if (ctx_font_count >= CTX_MAX_FONTS)
+    { return -1; }
+  ctx_fonts[ctx_font_count].type = 0;
+  ctx_fonts[ctx_font_count].name = name;
+  ctx_fonts[ctx_font_count].ctx.data = (CtxEntry *) data;
+  ctx_fonts[ctx_font_count].ctx.length = length / sizeof (CtxEntry);
+  ctx_font_init_ctx (&ctx_fonts[ctx_font_count]);
+  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_ctx;
+  ctx_font_count++;
+  return ctx_font_count-1;
 }
 
-inline static void ctx_term_flush (CtxTerm *term)
+#if CTX_FONTS_FROM_FILE
+int
+ctx_load_font_ctx_file (const char *name, const char *path)
 {
-  int width =  term->width;
-  int height = term->height;
-  switch (term->mode)
-  {
-    case CTX_TERM_QUARTER:
-       ctx_term_output_buf_quarter (term->pixels,
-                                width, height, term);
-       break;
-    case CTX_TERM_ASCII:
-       ctx_term_output_buf_ascii (term->pixels,
-                                width, height, term, 0);
-       break;
-    case CTX_TERM_ASCII_MONO:
-       ctx_term_output_buf_ascii (term->pixels,
-                                width, height, term, 1);
-       break;
-    case CTX_TERM_SEXTANT:
-       ctx_term_output_buf_sextant (term->pixels,
-                                width, height, term);
-       break;
-    case CTX_TERM_BRAILLE:
-       ctx_term_output_buf_braille (term->pixels,
-                                width, height, term, 0);
-       break;
-    case CTX_TERM_BRAILLE_MONO:
-       ctx_term_output_buf_braille (term->pixels,
-                                width, height, term, 1);
-       break;
-  }
-#if CTX_BRAILLE_TEXT
-  CtxRasterizer *rasterizer = (CtxRasterizer*)(term->host->renderer);
-  // XXX instead sort and inject along with braille
-  //
+  uint8_t *contents = NULL;
+  long length = 0;
+  ctx_get_contents (path, &contents, &length);
+  if (!contents)
+    {
+      ctx_log ( "File load failed\n");
+      return -1;
+    }
+  return ctx_load_font_ctx (name, contents, length);
+}
+#endif
+#endif
 
-  //uint8_t rgba_bg[4]={0,0,0,0};
-  //uint8_t rgba_fg[4]={255,0,255,255};
+#if CTX_FONT_ENGINE_CTX_FS
 
-  for (CtxList *l = rasterizer->glyphs; l; l = l->next)
-  {
-    CtxTermGlyph *glyph = l->data;
+static float
+ctx_glyph_kern_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB)
+{
+#if 0
+  float font_size = ctx->state.gstate.font_size;
+  int first_kern = ctx_glyph_find_ctx (font, ctx, unicharA);
+  if (first_kern < 0) return 0.0;
+  for (int i = first_kern + 1; i < font->ctx.length; i++)
+    {
+      CtxEntry *entry = (CtxEntry *) &font->ctx.data[i];
+      if (entry->code == CTX_KERNING_PAIR)
+        {
+          if (entry->data.u16[0] == unicharA && entry->data.u16[1] == unicharB)
+            { return entry->data.s32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE; }
+        }
+      if (entry->code == CTX_DEFINE_GLYPH)
+        return 0.0;
+    }
+#endif
+  return 0.0;
+}
 
-    uint8_t *pixels = term->pixels;
-    long rgb_sum[4]={0,0,0};
-    for (int v = 0; v <  ctx_term_ch; v ++)
-    for (int u = 0; u <  ctx_term_cw; u ++)
+static float
+ctx_glyph_width_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unichar)
+{
+  CtxState *state = &ctx->state;
+  char path[1024];
+  sprintf (path, "%s/%010p", font->ctx_fs.path, unichar);
+  uint8_t *data = NULL;
+  long int len_bytes = 0;
+  ctx_get_contents (path, &data, &len_bytes);
+  float ret = 0.0;
+  float font_size = state->gstate.font_size;
+  if (data){
+    Ctx *glyph_ctx = ctx_new ();
+    ctx_parse (glyph_ctx, data);
+    for (int i = 0; i < glyph_ctx->drawlist.count; i++)
     {
-      int i = ((glyph->row-1) * ctx_term_ch + v) * rasterizer->blit_width + 
-              ((glyph->col-1) * ctx_term_cw + u);
-      for (int c = 0; c < 3; c ++)
-        rgb_sum[c] += pixels[i*4+c];
+      CtxEntry *e = &glyph_ctx->drawlist.entries[i];
+      if (e->code == CTX_DEFINE_GLYPH)
+        ret = e->data.u32[1] / 255.0 * font_size / CTX_BAKE_FONT_SIZE;
     }
-    for (int c = 0; c < 3; c ++)
-      glyph->rgba_bg[c] = rgb_sum[c] / (ctx_term_ch * ctx_term_cw);
-    char utf8[8];
-    utf8[ctx_unichar_to_utf8(glyph->unichar, (uint8_t*)utf8)]=0;
-    ctx_term_set (term, glyph->col, glyph->row, 
-                     utf8, glyph->rgba_fg, glyph->rgba_bg);
-    free (glyph);
+    free (data);
+    ctx_free (glyph_ctx);
   }
-
-  printf ("\e[H");
-  printf ("\e[0m");
-  ctx_term_scanout (term);
-  printf ("\e[0m");
-  fflush(NULL);
-  while (rasterizer->glyphs)
-    ctx_list_remove (&rasterizer->glyphs, rasterizer->glyphs->data);
-#endif
+  return ret;
 }
 
-void ctx_term_free (CtxTerm *term)
+static int
+ctx_glyph_ctx_fs (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke)
 {
-  while (term->lines)
-  {
-    free (term->lines->data);
-    ctx_list_remove (&term->lines, term->lines->data);
+  char path[1024];
+  sprintf (path, "file://%s/%010p", font->ctx_fs.path, unichar);
+  uint8_t *data = NULL;
+  long int len_bytes = 0;
+  ctx_get_contents (path, &data, &len_bytes);
+
+  if (data){
+    Ctx *glyph_ctx = ctx_new ();
+    ctx_parse (glyph_ctx, data);
+    int ret = ctx_glyph_drawlist (font, ctx, &(glyph_ctx->drawlist),
+                                  unichar, stroke);
+    free (data);
+    ctx_free (glyph_ctx);
+    return ret;
   }
-  printf ("\e[?25h"); // cursor on
-  nc_at_exit ();
-  free (term->pixels);
-  ctx_free (term->host);
-  free (term);
-  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
+  return -1;
 }
 
-int ctx_renderer_is_term (Ctx *ctx)
+int
+ctx_load_font_ctx_fs (const char *name, const void *data, int length);
+
+static CtxFontEngine ctx_font_engine_ctx_fs =
 {
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_term_free)
-          return 1;
-  return 0;
+#if CTX_FONTS_FROM_FILE
+  NULL,
+#endif
+  ctx_load_font_ctx_fs,
+  ctx_glyph_ctx_fs,
+  ctx_glyph_width_ctx_fs,
+  ctx_glyph_kern_ctx_fs,
+};
+
+int
+ctx_load_font_ctx_fs (const char *name, const void *path, int length) // length is ignored
+{
+  if (ctx_font_count >= CTX_MAX_FONTS)
+    { return -1; }
+
+  ctx_fonts[ctx_font_count].type = 42;
+  ctx_fonts[ctx_font_count].name = name;
+  ctx_fonts[ctx_font_count].ctx_fs.path = strdup (path);
+  int path_len = strlen (path);
+  if (ctx_fonts[ctx_font_count].ctx_fs.path[path_len-1] == '/')
+   ctx_fonts[ctx_font_count].ctx_fs.path[path_len-1] = 0;
+  ctx_fonts[ctx_font_count].engine = &ctx_font_engine_ctx_fs;
+  ctx_font_count++;
+  return ctx_font_count-1;
 }
 
-float ctx_term_get_cell_width (Ctx *ctx)
+#endif
+
+int
+_ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke)
 {
-  return ctx_term_cw;
+  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
+  // a begin-path here did not remove stray spikes in terminal
+  return font->engine->glyph (font, ctx, unichar, stroke);
 }
 
-float ctx_term_get_cell_height (Ctx *ctx)
+int
+ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke)
 {
-  return ctx_term_ch;
+#if CTX_BACKEND_TEXT
+  CtxEntry commands[3]; // 3 to silence incorrect warning from static analysis
+  ctx_memset (commands, 0, sizeof (commands) );
+  commands[0] = ctx_u32 (CTX_GLYPH, unichar, 0);
+  commands[0].data.u8[4] = stroke;
+  ctx_process (ctx, commands);
+  return 0; // XXX is return value used?
+#else
+  return _ctx_glyph (ctx, unichar, stroke);
+#endif
 }
 
-Ctx *ctx_new_term (int width, int height)
+float
+ctx_glyph_width (Ctx *ctx, int unichar)
 {
-  Ctx *ctx = ctx_new ();
-#if CTX_RASTERIZER
-  CtxTerm *term = (CtxTerm*)calloc (sizeof (CtxTerm), 1);
- 
-  const char *mode = getenv ("CTX_TERM_MODE");
-  ctx_term_cw = 2;
-  ctx_term_ch = 3;
+  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
 
-  if (!mode) term->mode = CTX_TERM_SEXTANT;
-  else if (!strcmp (mode, "sextant")) term->mode = CTX_TERM_SEXTANT;
-  else if (!strcmp (mode, "ascii")) term->mode = CTX_TERM_ASCII_MONO;
-  //else if (!strcmp (mode, "ascii-mono")) term->mode = CTX_TERM_ASCII_MONO;
-  else if (!strcmp (mode, "quarter")) term->mode = CTX_TERM_QUARTER;
-  //else if (!strcmp (mode, "braille")){
-  //  term->mode = CTX_TERM_BRAILLE;
-  //  ctx_term_ch = 4;
-  //}
-  else if (!strcmp (mode, "braille")){
-    term->mode = CTX_TERM_BRAILLE_MONO;
-    ctx_term_ch = 4;
-  }
-  else {
-    fprintf (stderr, "recognized values for CTX_TERM_MODE:\n"
-                    " sextant ascii quarter braille\n");
-    exit (1);
-  }
+  return font->engine->glyph_width (font, ctx, unichar);
+}
 
-  mode = getenv ("CTX_TERM_FORCE_FULL");
-  if (mode && strcmp (mode, "0") && strcmp (mode, "no"))
-    _ctx_term_force_full = 1;
+static float
+ctx_glyph_kern (Ctx *ctx, int unicharA, int unicharB)
+{
+  CtxFont *font = &ctx_fonts[ctx->state.gstate.font];
+  return font->engine->glyph_kern (font, ctx, unicharA, unicharB);
+}
 
-  fprintf (stdout, "\e[?1049h");
-  fprintf (stdout, "\e[?25l"); // cursor off
+float
+ctx_text_width (Ctx        *ctx,
+                const char *string)
+{
+  float sum = 0.0;
+  if (!string)
+    return 0.0f;
+  for (const char *utf8 = string; *utf8; utf8 = ctx_utf8_skip (utf8, 1) )
+    {
+      sum += ctx_glyph_width (ctx, ctx_utf8_to_unichar (utf8) );
+    }
+  return sum;
+}
 
-  int maxwidth = ctx_terminal_cols  () * ctx_term_cw;
-  int maxheight = (ctx_terminal_rows ()) * ctx_term_ch;
-  if (width <= 0 || height <= 0)
-  {
-    width = maxwidth;
-    height = maxheight;
-  }
-  if (width > maxwidth) width = maxwidth;
-  if (height > maxheight) height = maxheight;
-  term->ctx = ctx;
-  term->width  = width;
-  term->height = height;
+static void
+_ctx_glyphs (Ctx     *ctx,
+             CtxGlyph *glyphs,
+             int       n_glyphs,
+             int       stroke)
+{
+  for (int i = 0; i < n_glyphs; i++)
+    {
+      {
+        uint32_t unichar = glyphs[i].index;
+        ctx_move_to (ctx, glyphs[i].x, glyphs[i].y);
+        ctx_glyph (ctx, unichar, stroke);
+      }
+    }
+}
 
-  term->cols = (width + 1) / ctx_term_cw;
-  term->rows = (height + 2) / ctx_term_ch;
-  term->lines = 0;
-  term->pixels = (uint8_t*)malloc (width * height * 4);
-  term->host = ctx_new_for_framebuffer (term->pixels,
-                                           width, height,
-                                           width * 4, CTX_FORMAT_RGBA8);
-#if CTX_BRAILLE_TEXT
-  ((CtxRasterizer*)term->host->renderer)->term_glyphs=1;
-#endif
-  _ctx_mouse (ctx, NC_MOUSE_DRAG);
-  ctx_set_renderer (ctx, term);
-  ctx_set_size (ctx, width, height);
-  ctx_font_size (ctx, ctx_term_ch); 
-  term->render = ctx_term_render;
-  term->flush = (void(*)(void*))ctx_term_flush;
-  term->free  = (void(*)(void*))ctx_term_free;
-#endif
+static void
+_ctx_text (Ctx        *ctx,
+           const char *string,
+           int         stroke,
+           int         visible)
+{
+  CtxState *state = &ctx->state;
+  float x = ctx->state.x;
+  switch ( (int) ctx_state_get (state, CTX_text_align) )
+    //switch (state->gstate.text_align)
+    {
+      case CTX_TEXT_ALIGN_START:
+      case CTX_TEXT_ALIGN_LEFT:
+        break;
+      case CTX_TEXT_ALIGN_CENTER:
+        x -= ctx_text_width (ctx, string) /2;
+        break;
+      case CTX_TEXT_ALIGN_END:
+      case CTX_TEXT_ALIGN_RIGHT:
+        x -= ctx_text_width (ctx, string);
+        break;
+    }
+  float y = ctx->state.y;
+  float baseline_offset = 0.0f;
+  switch ( (int) ctx_state_get (state, CTX_text_baseline) )
+    {
+      case CTX_TEXT_BASELINE_HANGING:
+        /* XXX : crude */
+        baseline_offset = ctx->state.gstate.font_size  * 0.55;
+        break;
+      case CTX_TEXT_BASELINE_TOP:
+        /* XXX : crude */
+        baseline_offset = ctx->state.gstate.font_size  * 0.7;
+        break;
+      case CTX_TEXT_BASELINE_BOTTOM:
+        baseline_offset = -ctx->state.gstate.font_size * 0.1;
+        break;
+      case CTX_TEXT_BASELINE_ALPHABETIC:
+      case CTX_TEXT_BASELINE_IDEOGRAPHIC:
+        baseline_offset = 0.0f;
+        break;
+      case CTX_TEXT_BASELINE_MIDDLE:
+        baseline_offset = ctx->state.gstate.font_size * 0.25;
+        break;
+    }
+  float x0 = x;
+  for (const char *utf8 = string; *utf8; utf8 = ctx_utf8_skip (utf8, 1) )
+    {
+      if (*utf8 == '\n')
+        {
+          y += ctx->state.gstate.font_size * ctx_state_get (state, CTX_line_spacing);
+          x = x0;
+          if (visible)
+            { ctx_move_to (ctx, x, y); }
+        }
+      else
+        {
+          uint32_t unichar = ctx_utf8_to_unichar (utf8);
+          if (visible)
+            {
+              ctx_move_to (ctx, x, y + baseline_offset);
+              _ctx_glyph (ctx, unichar, stroke);
+            }
+          const char *next_utf8 = ctx_utf8_skip (utf8, 1);
+          if (next_utf8)
+            {
+              x += ctx_glyph_width (ctx, unichar);
+              x += ctx_glyph_kern (ctx, unichar, ctx_utf8_to_unichar (next_utf8) );
+            }
+          if (visible)
+            { ctx_move_to (ctx, x, y); }
+        }
+    }
+  if (!visible)
+    { ctx_move_to (ctx, x, y); }
+}
 
 
-  return ctx;
+CtxGlyph *
+ctx_glyph_allocate (int n_glyphs)
+{
+  return (CtxGlyph *) malloc (sizeof (CtxGlyph) * n_glyphs);
+}
+void
+gtx_glyph_free     (CtxGlyph *glyphs)
+{
+  free (glyphs);
 }
 
-#endif
+void
+ctx_glyphs (Ctx        *ctx,
+            CtxGlyph   *glyphs,
+            int         n_glyphs)
+{
+  _ctx_glyphs (ctx, glyphs, n_glyphs, 0);
+}
 
-#if CTX_EVENTS
+void
+ctx_glyphs_stroke (Ctx        *ctx,
+                   CtxGlyph   *glyphs,
+                   int         n_glyphs)
+{
+  _ctx_glyphs (ctx, glyphs, n_glyphs, 1);
+}
 
-#if !__COSMOPOLITAN__
-#include <fcntl.h>
-#include <sys/ioctl.h>
+void
+ctx_text (Ctx        *ctx,
+          const char *string)
+{
+  if (!string)
+    return;
+#if CTX_BACKEND_TEXT
+  ctx_process_cmd_str (ctx, CTX_TEXT, string, 0, 0);
+  _ctx_text (ctx, string, 0, 0);
+#else
+  _ctx_text (ctx, string, 0, 1);
 #endif
+}
 
-typedef struct _CtxTermImg CtxTermImg;
-struct _CtxTermImg
-{
-   void (*render)         (void *termimg, CtxCommand *command);
-   void (*reset)          (void *termimg);
-   void (*flush)          (void *termimg);
-   char *(*get_clipboard) (void *ctxctx);
-   void (*set_clipboard)  (void *ctxctx, const char *text);
-   void (*free)           (void *termimg);
-   Ctx      *ctx;
-   int       width;
-   int       height;
-   int       cols;
-   int       rows;
-   int       was_down;
-   // we need to have the above members in that order up to here
-   uint8_t  *pixels;
-   Ctx      *host;
-   CtxList  *lines;
-};
 
-inline static void ctx_termimg_render (void       *ctx,
-                                       CtxCommand *command)
+void
+ctx_fill_text (Ctx *ctx, const char *string,
+               float x, float y)
 {
-  CtxTermImg *termimg = (void*)ctx;
-  /* directly forward */
-  ctx_process (termimg->host, &command->entry);
+  ctx_move_to (ctx, x, y);
+  ctx_text (ctx, string);
 }
 
-inline static void ctx_termimg_flush (CtxTermImg *termimg)
+void
+ctx_text_stroke (Ctx        *ctx,
+                 const char *string)
 {
-  int width =  termimg->width;
-  int height = termimg->height;
-  if (!termimg->pixels) return;
-  char *encoded = malloc (width * height * 3 * 3);
-  ctx_bin2base64 (termimg->pixels, width * height * 3,
-                  encoded);
-  int encoded_len = strlen (encoded);
-
-  int i = 0;
+  if (!string)
+    return;
+#if CTX_BACKEND_TEXT
+  ctx_process_cmd_str (ctx, CTX_STROKE_TEXT, string, 0, 0);
+  _ctx_text (ctx, string, 1, 0);
+#else
+  _ctx_text (ctx, string, 1, 1);
+#endif
+}
 
-  printf ("\e[H");
-  printf ("\e_Gf=24,s=%i,v=%i,t=d,a=T,m=1;\e\\", width, height);
-  while (i <  encoded_len)
-  {
-     if (i + 4096 <  encoded_len)
-     {
-       printf  ("\e_Gm=1;");
-     }
-     else
-     {
-       printf  ("\e_Gm=0;");
-     }
-     for (int n = 0; n < 4000 && i < encoded_len; n++)
-     {
-       printf ("%c", encoded[i]);
-       i++;
-     }
-     printf ("\e\\");
-  }
-  free (encoded);
-  
-  fflush (NULL);
+void
+ctx_stroke_text (Ctx *ctx, const char *string,
+               float x, float y)
+{
+  ctx_move_to (ctx, x, y);
+  ctx_text_stroke (ctx, string);
 }
 
-void ctx_termimg_free (CtxTermImg *termimg)
+static int _ctx_resolve_font (const char *name)
 {
-  while (termimg->lines)
-  {
-    free (termimg->lines->data);
-    ctx_list_remove (&termimg->lines, termimg->lines->data);
-  }
-  printf ("\e[?25h"); // cursor on
-  nc_at_exit ();
-  free (termimg->pixels);
-  ctx_free (termimg->host);
-  free (termimg);
-  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
+  for (int i = 0; i < ctx_font_count; i ++)
+    {
+      if (!ctx_strcmp (ctx_fonts[i].name, name) )
+        { return i; }
+    }
+  for (int i = 0; i < ctx_font_count; i ++)
+    {
+      if (ctx_strstr (ctx_fonts[i].name, name) )
+        { return i; }
+    }
+  return -1;
 }
 
-int ctx_renderer_is_termimg (Ctx *ctx)
+int ctx_resolve_font (const char *name)
 {
-  if (ctx->renderer &&
-      ctx->renderer->free == (void*)ctx_termimg_free)
-          return 1;
+  int ret = _ctx_resolve_font (name);
+  if (ret >= 0)
+    { return ret; }
+  if (!ctx_strcmp (name, "regular") )
+    {
+      int ret = _ctx_resolve_font ("sans");
+      if (ret >= 0) { return ret; }
+      ret = _ctx_resolve_font ("serif");
+      if (ret >= 0) { return ret; }
+    }
   return 0;
 }
 
-Ctx *ctx_new_termimg (int width, int height)
+static void ctx_font_setup (void)
 {
-  Ctx *ctx = ctx_new ();
-#if CTX_RASTERIZER
-  fprintf (stdout, "\e[?1049h");
-  fprintf (stdout, "\e[?25l"); // cursor off
-  CtxTermImg *termimg = (CtxTermImg*)calloc (sizeof (CtxTermImg), 1);
-
-
-  int maxwidth = ctx_terminal_width ();
+  static int initialized = 0;
+  if (initialized) { return; }
+  initialized = 1;
+#if CTX_FONT_ENGINE_CTX
+  ctx_font_count = 0; // oddly - this is needed in arduino
 
-  int colwidth = maxwidth/ctx_terminal_cols ();
-  maxwidth-=colwidth;
+#if CTX_FONT_ENGINE_CTX_FS
+  ctx_load_font_ctx_fs ("sans-ctx", "/tmp/ctx-regular", 0);
+#else
+#if CTX_FONT_ascii
+  ctx_load_font_ctx ("sans-ctx", ctx_font_ascii, sizeof (ctx_font_ascii) );
+#endif
+#if CTX_FONT_regular
+  ctx_load_font_ctx ("sans-ctx", ctx_font_regular, sizeof (ctx_font_regular) );
+#endif
+#endif
 
-  int maxheight = ctx_terminal_height ();
-  if (width <= 0 || height <= 0)
-  {
-    width  = maxwidth;
-    height = maxheight;
-  }
-  if (width > maxwidth) width = maxwidth;
-  if (height > maxheight) height = maxheight;
-  termimg->ctx = ctx;
-  termimg->width  = width;
-  termimg->height = height;
-  termimg->lines = 0;
-  termimg->pixels = (uint8_t*)malloc (width * height * 3);
-  termimg->host = ctx_new_for_framebuffer (termimg->pixels,
-                                           width, height,
-                                           width * 3, CTX_FORMAT_RGB8);
-  _ctx_mouse (ctx, NC_MOUSE_DRAG);
-  ctx_set_renderer (ctx, termimg);
-  ctx_set_size (ctx, width, height);
-  ctx_font_size (ctx, 14.0f);
-  termimg->render = ctx_termimg_render;
-  termimg->flush = (void(*)(void*))ctx_termimg_flush;
-  termimg->free  = (void(*)(void*))ctx_termimg_free;
+#if CTX_FONT_mono
+  ctx_load_font_ctx ("mono-ctx", ctx_font_mono, sizeof (ctx_font_mono) );
+#endif
+#if CTX_FONT_bold
+  ctx_load_font_ctx ("bold-ctx", ctx_font_bold, sizeof (ctx_font_bold) );
+#endif
+#if CTX_FONT_italic
+  ctx_load_font_ctx ("italic-ctx", ctx_font_italic, sizeof (ctx_font_italic) );
+#endif
+#if CTX_FONT_sans
+  ctx_load_font_ctx ("sans-ctx", ctx_font_sans, sizeof (ctx_font_sans) );
+#endif
+#if CTX_FONT_serif
+  ctx_load_font_ctx ("serif-ctx", ctx_font_serif, sizeof (ctx_font_serif) );
+#endif
+#if CTX_FONT_symbol
+  ctx_load_font_ctx ("symbol-ctx", ctx_font_symbol, sizeof (ctx_font_symbol) );
+#endif
+#if CTX_FONT_emoji
+  ctx_load_font_ctx ("emoji-ctx", ctx_font_emoji, sizeof (ctx_font_emoji) );
+#endif
 #endif
 
-  return ctx;
+#if NOTO_EMOJI_REGULAR
+  ctx_load_font_ttf ("sans-NotoEmoji_Regular", ttf_NotoEmoji_Regular_ttf, ttf_NotoEmoji_Regular_ttf_len);
+#endif
+#if ROBOTO_LIGHT
+  ctx_load_font_ttf ("sans-light-Roboto_Light", ttf_Roboto_Light_ttf, ttf_Roboto_Light_ttf_len);
+#endif
+#if ROBOTO_REGULAR
+  ctx_load_font_ttf ("sans-Roboto_Regular", ttf_Roboto_Regular_ttf, ttf_Roboto_Regular_ttf_len);
+#endif
+#if ROBOTO_BOLD
+  ctx_load_font_ttf ("sans-bold-Roboto_Bold", ttf_Roboto_Bold_ttf, ttf_Roboto_Bold_ttf_len);
+#endif
+#if DEJAVU_SANS
+  ctx_load_font_ttf ("sans-DejaVuSans", ttf_DejaVuSans_ttf, ttf_DejaVuSans_ttf_len);
+#endif
+#if VERA
+  ctx_load_font_ttf ("sans-Vera", ttf_Vera_ttf, ttf_Vera_ttf_len);
+#endif
+#if UNSCII_16
+  ctx_load_font_ttf ("mono-unscii16", ttf_unscii_16_ttf, ttf_unscii_16_ttf_len);
+#endif
+#if XA000_MONO
+  ctx_load_font_ttf ("mono-0xA000", ttf_0xA000_Mono_ttf, ttf_0xA000_Mono_ttf_len);
+#endif
+#if DEJAVU_SANS_MONO
+  ctx_load_font_ttf ("mono-DejaVuSansMono", ttf_DejaVuSansMono_ttf, ttf_DejaVuSansMono_ttf_len);
+#endif
+#if NOTO_MONO_REGULAR
+  ctx_load_font_ttf ("mono-NotoMono_Regular", ttf_NotoMono_Regular_ttf, ttf_NotoMono_Regular_ttf_len);
+#endif
 }
 
-#endif
+
 
 #if CTX_FORMATTER
 
@@ -34085,29 +36003,13 @@ ctx_render_string (Ctx *ctx, int longform, int *retlen)
 #include <ctype.h>
 #include <sys/stat.h>
 
-#if CTX_EVENTS
 int ctx_width (Ctx *ctx)
 {
-  return ctx->events.width;
+  return ctx->width;
 }
 int ctx_height (Ctx *ctx)
 {
-  return ctx->events.height;
-}
-#else
-int ctx_width (Ctx *ctx)
-{
-  return 512;
-}
-int ctx_height (Ctx *ctx)
-{
-  return 384;
-}
-#endif
-
-int ctx_rev (Ctx *ctx)
-{
-  return ctx->rev;
+  return ctx->height;
 }
 
 CtxState *ctx_get_state (Ctx *ctx)
@@ -34266,9 +36168,9 @@ ctx_get_image_data (Ctx *ctx, int sx, int sy, int sw, int sh,
    {
    }
 #if CTX_RASTERIZER
-   else if (_ctx_is_rasterizer (ctx))
+   else if (ctx_backend_type (ctx) == CTX_BACKEND_RASTERIZER)
    {
-     CtxRasterizer *rasterizer = (CtxRasterizer*)ctx->renderer;
+     CtxRasterizer *rasterizer = (CtxRasterizer*)ctx->backend;
      if (rasterizer->format->pixel_format == format)
      {
        if (dst_stride <= 0) dst_stride = ctx_pixel_format_get_stride (format, sw);
@@ -34287,20 +36189,10 @@ ctx_get_image_data (Ctx *ctx, int sx, int sy, int sw, int sh,
      }
    }
 #endif
-   else if (format == CTX_FORMAT_RGBA8 &&
-                   ( 1
-#if CTX_FB
-                   || ctx_renderer_is_fb (ctx)
-#endif
-#if CTX_KMS
-                   || ctx_renderer_is_kms (ctx)
-#endif
-#if CTX_SDL
-                   || ctx_renderer_is_sdl (ctx)
-#endif
-                   ))
+   else if (format == CTX_FORMAT_RGBA8 && ctx_backend_is_tiled (ctx))
    {
-     CtxTiled *tiled = (CtxTiled*)ctx->renderer;
+     /* synchronize */
+     CtxTiled *tiled = (CtxTiled*)ctx->backend;
      {
        if (dst_stride <= 0) dst_stride = ctx_pixel_format_get_stride (format, sw);
        int bytes_per_pix = 4;
@@ -34317,6 +36209,15 @@ ctx_get_image_data (Ctx *ctx, int sx, int sy, int sw, int sh,
        return;
      }
    }
+#if CTX_RASTERIZER
+   else
+   {
+     Ctx *rasterizer = ctx_new_for_framebuffer (dst_data, sw, sh, dst_stride, format);
+     ctx_translate (rasterizer, sx, sy);
+     ctx_render_ctx (ctx, rasterizer);
+     ctx_free (rasterizer);
+   }
+#endif
 }
 
 void
@@ -34500,7 +36401,7 @@ void ctx_define_texture (Ctx *ctx,
   {
     CtxEntry *commands;
     int command_size = 1 + (data_len+1+1)/9 + 1 + (eid_len+1+1)/9 + 1 +   8;
-    if (ctx->renderer && ctx->renderer->process)
+    if (ctx->backend && (void*)ctx->backend->process != (void*)ctx_drawlist_process)
     {
        commands = (CtxEntry*)calloc (sizeof (CtxEntry), command_size);
     }
@@ -34546,7 +36447,7 @@ void ctx_define_texture (Ctx *ctx,
     }
     ((char *) &commands[pos+1].data.u8[0])[data_len]=0;
 
-    if (ctx->renderer && ctx->renderer->process)
+    if (ctx->backend && (void*)ctx->backend->process != (void*)ctx_drawlist_process)
     {
       ctx_process (ctx, commands);
       free (commands);
@@ -34767,12 +36668,9 @@ void ctx_stroke (Ctx *ctx)
 static void ctx_empty (Ctx *ctx)
 {
 #if CTX_RASTERIZER
-  if (ctx->renderer == NULL)
+  if (ctx->backend == NULL)
 #endif
-    {
-      ctx->drawlist.count = 0;
-      ctx->drawlist.bitpack_pos = 0;
-    }
+    ctx_drawlist_clear (ctx);
 }
 
 void _ctx_set_store_clear (Ctx *ctx)
@@ -34808,18 +36706,18 @@ static void _ctx_bindings_key_press (CtxEvent *event, void *data1, void *data2);
 
 void ctx_reset (Ctx *ctx)
 {
+  ctx_drawlist_clear (ctx);
         /* we do the callback reset first - maybe we need two cbs,
          * one for before and one after default impl?
          *
          * tiled fb and sdl needs to sync
          */
-  if (ctx->renderer && ctx->renderer->reset)
-    ctx->renderer->reset (ctx->renderer);
+  if (ctx->backend && ctx->backend->reset)
+    ctx->backend->reset (ctx);
 
   //CTX_PROCESS_VOID (CTX_RESET);
   //if (ctx->transformation & CTX_TRANSFORMATION_STORE_CLEAR)
   //  { return; }
-  ctx_empty (ctx);
   ctx_state_init (&ctx->state);
 #if CTX_EVENTS
   ctx_list_free (&ctx->events.items);
@@ -34839,11 +36737,12 @@ void ctx_reset (Ctx *ctx)
                      CTX_KEY_DOWN, ctx_collect_events, ctx, ctx,
                      NULL, NULL);
 
-    ctx_listen_full (ctx, 0, 0, ctx->events.width, ctx->events.height,
+    ctx_listen_full (ctx, 0, 0, ctx->width, ctx->height,
                      (CtxEventType)(CTX_PRESS|CTX_RELEASE|CTX_MOTION),
                      ctx_collect_events, ctx, ctx,
                      NULL, NULL);
   }
+  ctx->dirty = 0;
 #endif
 }
 
@@ -35373,34 +37272,12 @@ ctx_exit (Ctx *ctx)
 void
 ctx_flush (Ctx *ctx)
 {
-  /* XXX: should be fully moved into the renderers
-   *      to permit different behavior and get rid
-   *      of the extranous flush() vfunc.
-   */
-  ctx->rev++;
-//  CTX_PROCESS_VOID (CTX_FLUSH);
-#if 0
-  //printf (" \e[?2222h");
-  ctx_drawlist_compact (&ctx->drawlist);
-  for (int i = 0; i < ctx->drawlist.count - 1; i++)
-    {
-      CtxEntry *entry = &ctx->drawlist.entries[i];
-      fwrite (entry, 9, 1, stdout);
-#if 0
-      uint8_t  *buf = (void *) entry;
-      for (int j = 0; j < 9; j++)
-        { printf ("%c", buf[j]); }
-#endif
-    }
-  printf ("Xx.Xx.Xx.");
-  fflush (NULL);
-#endif
-  if (ctx->renderer && ctx->renderer->flush)
-    ctx->renderer->flush (ctx->renderer);
+  if (ctx->backend && ctx->backend->flush)
+    ctx->backend->flush (ctx);
   ctx->frame++;
   if (ctx->texture_cache != ctx)
     ctx->texture_cache->frame++;
-  ctx->drawlist.count = 0;
+  ctx_drawlist_clear (ctx);
   ctx_state_init (&ctx->state);
 }
 
@@ -35903,7 +37780,7 @@ ctx_state_init (CtxState *state)
   ctx_memset (state, 0, sizeof (CtxState) );
   state->gstate.global_alpha_u8 = 255;
   state->gstate.global_alpha_f  = 1.0;
-  state->gstate.font_size       = 23; // default HTML canvas is 10px sans
+  state->gstate.font_size       = 32; // default HTML canvas is 10px sans
   state->gstate.line_width      = 2.0;
   state->gstate.image_smoothing = 1;
   state->gstate.source_stroke.type = CTX_SOURCE_INHERIT_FILL;
@@ -35926,15 +37803,33 @@ void _ctx_set_transformation (Ctx *ctx, int transformation)
   ctx->transformation = transformation;
 }
 
+#if CTX_SIMD
+void ctx_simd_setup (void);
+#endif
 static void
 _ctx_init (Ctx *ctx)
 {
+#if CTX_SIMD
+   {
+     static int simd_inited = 0;
+     if (!simd_inited)
+     {
+       simd_inited = 1;
+       ctx_simd_setup ();
+     }
+   }
+#endif
+#if CTX_U8_TO_FLOAT_LUT
+  static int lut_inited = 0;
+  if (!lut_inited){
   for (int i = 0; i <256;i++)
     ctx_u8_float[i] = i/255.0f;
+  lut_inited = 1;
+  }
+#endif
 
   ctx_state_init (&ctx->state);
 
-  ctx->renderer = NULL;
 #if CTX_CURRENT_PATH
   ctx->current_path.flags |= CTX_DRAWLIST_CURRENT_PATH;
 #endif
@@ -35952,21 +37847,24 @@ static void ctx_setup (void);
 static Ctx ctx_state;
 #endif
 
-void ctx_set_renderer (Ctx  *ctx,
-                       void *renderer)
+void ctx_set_backend (Ctx  *ctx,
+                       void *backend)
 {
-  if (ctx->renderer && ctx->renderer->free)
-    ctx->renderer->free (ctx->renderer);
-  ctx->renderer = (CtxImplementation*)renderer;
+  if (ctx->backend && ctx->backend->free)
+    ctx->backend->free (ctx->backend);
+  ctx->backend = (CtxBackend*)backend;
+  if (ctx->backend->process == NULL)
+    ctx->backend->process = (void(*)(Ctx*,CtxCommand*))ctx_drawlist_process;
 }
 
-void *ctx_get_renderer (Ctx *ctx)
+void *ctx_get_backend (Ctx *ctx)
 {
-  return ctx->renderer;
+  return ctx->backend;
 }
 
-Ctx *
-ctx_new (void)
+
+static Ctx *
+_ctx_new_drawlist (int width, int height)
 {
   ctx_setup ();
 #if CTX_DRAWLIST_STATIC
@@ -35976,9 +37874,33 @@ ctx_new (void)
 #endif
   ctx_memset (ctx, 0, sizeof (Ctx) );
   _ctx_init (ctx);
+
+  ctx_set_backend (ctx, ctx_drawlist_backend_new ());
+  ctx_set_size (ctx, width, height);
   return ctx;
 }
 
+Ctx *
+ctx_new_drawlist (int width, int height)
+{
+  return _ctx_new_drawlist (width, height);
+}
+
+Ctx *
+ctx_new (int width, int height, const char *backend)
+{
+#if CTX_EVENTS
+  if (backend && !strcmp (backend, "drawlist"))
+#endif
+  {
+    return _ctx_new_drawlist (width, height);
+  }
+#if CTX_EVENTS
+  else
+    return ctx_new_ui (width, height, backend);
+#endif
+}
+
 static inline void
 ctx_drawlist_deinit (CtxDrawlist *drawlist)
 {
@@ -35992,24 +37914,39 @@ ctx_drawlist_deinit (CtxDrawlist *drawlist)
   drawlist->size = 0;
 }
 
+
 static void ctx_deinit (Ctx *ctx)
 {
-  if (ctx->renderer)
+#if CTX_EVENTS
+  ctx_events_deinit (ctx);
+#endif
+
+
+  if (ctx->backend)
     {
-      if (ctx->renderer->free)
-        ctx->renderer->free (ctx->renderer);
-      ctx->renderer    = NULL;
+      if (ctx->backend->free)
+        ctx->backend->free (ctx->backend);
+      ctx->backend    = NULL;
     }
   ctx_drawlist_deinit (&ctx->drawlist);
 #if CTX_CURRENT_PATH
   ctx_drawlist_deinit (&ctx->current_path);
 #endif
+
+  for (int no = 0; no < CTX_MAX_TEXTURES; no++)
+    ctx_buffer_deinit (&ctx->texture[no]);
 }
 
 void ctx_free (Ctx *ctx)
 {
   if (!ctx)
     { return; }
+
+#if CTX_CLIENTS
+  while (ctx_clients (ctx))
+    ctx_client_remove (ctx, ctx_clients(ctx)->data);
+#endif
+
 #if CTX_EVENTS
   ctx_clear_bindings (ctx);
 #endif
@@ -36019,15 +37956,18 @@ void ctx_free (Ctx *ctx)
 #endif
 }
 
-Ctx *ctx_new_for_drawlist (void *data, size_t length)
+
+Ctx *
+ctx_new_for_drawlist (int width, int height, void *data, size_t length)
 {
-  Ctx *ctx = ctx_new ();
+  Ctx *ctx = _ctx_new_drawlist (width, height);
   ctx->drawlist.flags   |= CTX_DRAWLIST_DOESNT_OWN_ENTRIES;
   ctx->drawlist.entries  = (CtxEntry *) data;
   ctx->drawlist.count    = length / sizeof (CtxEntry);
   return ctx;
 }
 
+
 static void ctx_setup (void)
 {
   ctx_font_setup ();
@@ -36074,6 +38014,11 @@ ctx_render_ctx_textures (Ctx *ctx, Ctx *d_ctx)
 
 void ctx_quit (Ctx *ctx)
 {
+#if CTX_CLIENTS
+  while (ctx_clients (ctx))
+    ctx_client_remove (ctx, ctx_clients(ctx)->data);
+#endif
+
 #if CTX_EVENTS
   ctx->quit ++;
 #endif
@@ -36138,7 +38083,7 @@ void         ctx_set_cursor (Ctx *ctx, CtxCursor cursor)
 {
   if (ctx->cursor != cursor)
   {
-    ctx_set_dirty (ctx, 1);
+    ctx_queue_draw (ctx);
     ctx->cursor = cursor;
   }
 }
@@ -36149,25 +38094,34 @@ CtxCursor    ctx_get_cursor (Ctx *ctx)
 
 void ctx_set_clipboard (Ctx *ctx, const char *text)
 {
-  if (ctx->renderer && ctx->renderer->set_clipboard)
+  if (ctx->backend && ctx->backend->set_clipboard)
   {
-    ctx->renderer->set_clipboard (ctx->renderer, text);
+    ctx->backend->set_clipboard (ctx, text);
+    return;
+  }
+}
+
+void ctx_windowtitle (Ctx *ctx, const char *text)
+{
+  if (ctx->backend && ctx->backend->set_windowtitle)
+  {
+    ctx->backend->set_windowtitle (ctx, text);
     return;
   }
 }
 
 char *ctx_get_clipboard (Ctx *ctx)
 {
-  if (ctx->renderer && ctx->renderer->get_clipboard)
+  if (ctx->backend && ctx->backend->get_clipboard)
   {
-    return ctx->renderer->get_clipboard (ctx->renderer);
+    return ctx->backend->get_clipboard (ctx);
   }
   return strdup ("");
 }
 
 void ctx_set_texture_source (Ctx *ctx, Ctx *texture_source)
 {
-  ((CtxRasterizer*)ctx->renderer)->texture_source = texture_source;
+  ((CtxRasterizer*)ctx->backend)->texture_source = texture_source;
 }
 
 void ctx_set_texture_cache (Ctx *ctx, Ctx *texture_cache)
@@ -36297,8 +38251,14 @@ typedef struct CtxMagicEntry {
 static CtxMagicEntry ctx_magics[]={
   {"image/bmp",  ".bmp", 0, {0}},
   {"image/png",  ".png", 8, {0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a}},
-  {"image/jpeg", ".jpg", 8, {0xff, 0xd8, 0xff, 0xdb, 0xff, 0xd8, 0xff, 0xe0}},
+  {"image/jpeg", ".jpg", 8,  {0xff, 0xd8, 0xff, 0xdb, 0xff, 0xd8, 0xff, 0xe0}},
+  {"image/jpeg", ".jpg", 4,  {0xff, 0xd8, 0xff, 0xe0}},
+  {"image/jpeg", ".jpg", 4,  {0xff, 0xd8, 0xff, 0xee}},
+  {"image/jpeg", ".jpg", 4,  {0xff, 0xd8, 0xff, 0xe1}},
   {"image/jpeg", ".jpeg", 8, {0xff, 0xd8, 0xff, 0xdb, 0xff, 0xd8, 0xff, 0xe0}},
+
+  {"image/psd", ".psd", 4,  {0x38, 0x42, 0x50, 0x53}},
+
   {"image/gif",  ".gif", 6, {0x47, 0x49, 0x46, 0x38, 0x37, 0x61}},
   {"image/gif",  ".gif", 6, {0x47, 0x49, 0x46, 0x38, 0x39, 0x61}},
   {"image/exr",  ".exr", 4, {0x76, 0x2f, 0x31, 0x01}},
@@ -36306,10 +38266,19 @@ static CtxMagicEntry ctx_magics[]={
   {"application/blender", ".blend", 8, {0x42, 0x4c,0x45,0x4e,0x44,0x45,0x52}},
   {"image/xcf",  ".xcf", 8, {0x67, 0x69,0x6d,0x70,0x20,0x78,0x63,0x66}},
   {"application/bzip2", ".bz2", 3, {0x42, 0x5a, 0x68}},
-  {"application/gzip", ".gz", 0, {0x0}},
+  {"application/gzip", ".gz", 2, {0x1f, 0x8b}},
+  {"application/zip", ".zip", 4, {0x50, 0x4b, 0x03, 0x04}},
+  {"application/zip", ".zip", 4, {0x50, 0x4b, 0x05, 0x06}},
+  {"application/rar", ".rar", 6, {0x52, 0x61, 0x72, 0x1a, 0x07, 0x00}},
+  {"application/rar", ".rar", 7, {0x52, 0x61, 0x72, 0x1a, 0x07, 0x01, 0x00}},
   {"text/x-csrc", ".c", 0, {0,}},
   {"text/x-chdr", ".h", 0, {0,}},
   {"text/css", ".css", 0, {0x0}},
+
+  {"application/gzip", ".z", 2, {0x1f, 0x9d}},
+
+  {"application/dos-mz", ".exe", 2, {0x4d, 0x5a}},
+
   {"text/csv", ".csv", 0, {0x0}},
   {"text/html", ".htm", 0, {0x0}},
   {"text/html", ".html", 0, {0x0}},
@@ -36320,18 +38289,23 @@ static CtxMagicEntry ctx_magics[]={
   {"application/json", ".json", 0, {0x0}},
   {"application/octet-stream", ".bin", 0, {0x0}},
   {"application/x-object", ".o", 0, {0x0}},
+  {"text/utf-8", ".txt", 0, {0xef, 0xbb, 0xbf}}, // utf8 bom
   {"text/x-sh", ".sh", 0, {0x0}},
   {"text/x-python", ".py", 0, {0x0}},
   {"text/x-perl", ".pl", 0, {0x0}},
   {"text/x-perl", ".pm", 0, {0x0}},
+  {"application/x-shellscript", ".sh", 2, {0x23, 0x21}}, // #!
   {"application/pdf", ".pdf", 0, {0x0}},
   {"application/ctx", ".ctx", 0, {0x0}},
+  {"application/wasm", ".wasm", 0, {0x00, 0x61, 0x73, 0x6d}},
   {"text/xml", ".xml",     0, {0x0}},
-  {"video/mp4", ".mp4",    0, {0x0}},
+  {"video/mp4", ".mp4",    7, {0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f}},
+  {"video/matroska", ".mkv", 4, {0x1a, 0x45, 0xdf, 0xa3}},
   {"video/ogg", ".ogv",    0, {0x0}},
-  {"audio/sp-midi", ".mid",  0, {0x0}},
-  {"audio/x-wav", ".wav",  0, {0x0}},
-  {"audio/ogg", ".ogg",    0, {0x0}},
+  {"audio/flac", ".flac",  0, {0x66, 0x4c, 0x61, 0x43}},
+  {"audio/sp-midi", ".mid",  4, {0x4d, 0x54, 0x68, 0x64}},
+  {"audio/x-wav", ".wav",  4, {0x52, 0x49, 0x46, 0x46}},
+  {"audio/ogg", ".ogg",    4, {0x4f, 0x67, 0x67, 0x53}},
   {"audio/ogg", ".opus",   0, {0x0}},
   {"audio/ogg", ".oga",    0, {0x0}},
   {"audio/mpeg", ".mp1",   0, {0x0}},
@@ -36342,11 +38316,11 @@ static CtxMagicEntry ctx_magics[]={
   {"audio/mpeg", ".mpga",  0, {0x0}},
   {"audio/mpeg", ".mpega", 0, {0x0}},
   {"font/otf", ".otf", 0,{0x0}},
-  {"font/ttf", ".ttf", 0,{0x0}},
+  {"font/ttf", ".ttf", 5,{0x0, 0x01, 0x00, 0x00, 0x00}},
   // inode-directory
 };
 
-static int ctx_path_is_dir (const char *path)
+int ctx_path_is_dir (const char *path)
 {
   struct stat stat_buf;
   if (!path || path[0]==0) return 0;
@@ -36362,9 +38336,11 @@ static int ctx_path_is_exec (const char *path)
   return stat_buf.st_mode & 0x1;
 }
 
+int ctx_media_matched_content = 0;
 const char *ctx_guess_media_type (const char *path, const char *content, int len)
 {
   const char *extension_match = NULL;
+  ctx_media_matched_content = 0;
   if (path && strrchr (path, '.'))
   {
     char *pathdup = strdup (strrchr(path, '.'));
@@ -36386,6 +38362,7 @@ const char *ctx_guess_media_type (const char *path, const char *content, int len
        if (ctx_magics[i].len) // skip extension only matches
        if (!memcmp (content, ctx_magics[i].magic, ctx_magics[i].len))
        {
+         ctx_media_matched_content = 1;
          return ctx_magics[i].mime_type;
        }
     }
@@ -36406,6 +38383,7 @@ const char *ctx_guess_media_type (const char *path, const char *content, int len
   {
     int p = content[i];
     if (p > 127) non_ascii = 1;
+    if (p < ' ' && (p!='\n')) non_ascii = 1;
     if (p == 0) non_ascii = 1;
   }
   if (non_ascii)
@@ -36419,6 +38397,14 @@ const char *ctx_path_get_media_type (const char *path)
   char *content = NULL;
   long length = 0;
 
+  if (strchr(path, ':'))
+  {
+    path = strchr (path, ':') + 1;
+    if (path[0]=='/')path++;
+    if (path[0]=='/')path++;
+  }
+
+#if 0
   /* XXX : code duplication, factor out in separate fun */
   if (path && strrchr (path, '.'))
   {
@@ -36434,10 +38420,11 @@ const char *ctx_path_get_media_type (const char *path)
     }
     free (pathdup);
   }
+#endif
   if (ctx_path_is_dir (path))
     return "inode/directory";
 
-  ctx_get_contents2 (path, (uint8_t**)&content, &length, 32);
+  ctx_get_contents2 (path, (uint8_t**)&content, &length, 128);
   if (content)
   {
   const char *guess = ctx_guess_media_type (path, content, length);
@@ -36508,127 +38495,159 @@ CtxMediaTypeClass ctx_media_type_class (const char *media_type)
 
 #endif
 
-#endif // CTX_IMPLEMENTATION
-#ifndef __CTX_CLIENTS_H
-#define __CTX_CLIENTS_H
 
-typedef enum CtxClientFlags {
-  ITK_CLIENT_UI_RESIZABLE = 1<<0,
-  ITK_CLIENT_CAN_LAUNCH   = 1<<1,
-  ITK_CLIENT_MAXIMIZED    = 1<<2,
-  ITK_CLIENT_ICONIFIED    = 1<<3,
-  ITK_CLIENT_SHADED       = 1<<4,
-  ITK_CLIENT_TITLEBAR     = 1<<5,
-  ITK_CLIENT_LAYER2       = 1<<6,  // used for having a second set
-                                   // to draw - useful for splitting
-                                   // scrolled and HUD items
-                                   // with HUD being LAYER2
-                                  
-  ITK_CLIENT_KEEP_ALIVE   = 1<<7,  // do not automatically
-  ITK_CLIENT_FINISHED     = 1<<8,  // do not automatically
-                                   // remove after process quits
-  ITK_CLIENT_PRELOAD      = 1<<9
-} CtxClientFlags;
+void
+ctx_current_point (Ctx *ctx, float *x, float *y)
+{
+  if (!ctx)
+    { 
+      if (x) { *x = 0.0f; }
+      if (y) { *y = 0.0f; }
+    }
+#if CTX_RASTERIZER
+  if (ctx->backend && ctx->backend->process == ctx_rasterizer_process)
+    {
+      if (x) { *x = ( (CtxRasterizer *) (ctx->backend) )->x; }
+      if (y) { *y = ( (CtxRasterizer *) (ctx->backend) )->y; }
+      return;
+    }
+#endif
+  if (x) { *x = ctx->state.x; }
+  if (y) { *y = ctx->state.y; }
+}
 
-typedef struct _CtxClient CtxClient;
-typedef void (*CtxClientFinalize)(CtxClient *client, void *user_data);
 
-struct _CtxClient {
-  VT    *vt;
-  Ctx   *ctx;
-  char  *title;
-  int    x;
-  int    y;
-  int    width;
-  int    height;
-  CtxClientFlags flags;
-#if 0
-  int    shaded;
-  int    iconified;
-  int    maximized;
-  int    resizable;
-#endif
-  int    unmaximized_x;
-  int    unmaximized_y;
-  int    unmaximized_width;
-  int    unmaximized_height;
-  int    do_quit;
-  long   drawn_rev;
-  int    id;
-  int    internal; // render a settings window rather than a vt
-  void  *user_data;
-  CtxClientFinalize finalize;
-#if CTX_THREADS
-  mtx_t  mtx;
-#endif
-#if VT_RECORD
-  Ctx   *recording;
-#endif
-};
 
+float ctx_x (Ctx *ctx)
+{
+  float x = 0, y = 0;
+  ctx_current_point (ctx, &x, &y);
+  return x;
+}
 
+float ctx_y (Ctx *ctx)
+{
+  float x = 0, y = 0;
+  ctx_current_point (ctx, &x, &y);
+  return y;
+}
 
-extern CtxList *clients;
-extern CtxClient *active;
-extern CtxClient *active_tab;
+CtxBackendType ctx_backend_type (Ctx *ctx)
+{
+  CtxBackend *backend = ctx->backend;
+  if (backend == NULL)
+    return CTX_BACKEND_NONE;
+#if CTX_EVENTS
+  else if (backend->free == (void*) ctx_ctx_free) return CTX_BACKEND_CTX;
+#endif
+#if CTX_TERM
+  else if (backend->free == (void*) ctx_term_free) return CTX_BACKEND_TERM;
+#endif
+#if CTX_HEADLESS
+  else if (backend->free == (void*) ctx_headless_free) return CTX_BACKEND_HEADLESS;
+#endif
+#if CTX_RASTERIZER
+  else if (backend->process == (void*) ctx_hasher_process) return CTX_BACKEND_HASHER;
+#endif
+#if CTX_RASTERIZER
+  else if (backend->free == (void*) ctx_rasterizer_deinit) return CTX_BACKEND_RASTERIZER;
+#endif
+#if CTX_KMS
+  else if (backend->free == (void*) ctx_kms_free) return CTX_BACKEND_KMS;
+#endif
+#if CTX_FB
+  else if (backend->free == (void*) ctx_fb_free) return CTX_BACKEND_FB;
+#endif
+#if CTX_SDL
+  else if (backend->free == (void*) ctx_sdl_free) return CTX_BACKEND_SDL;
+#endif
+#if CTX_CAIRO
+  else if (backend->free == (void*) ctx_cairo_free) return CTX_BACKEND_CAIRO;
+#endif
+#if CTX_TERMIMG
+  else if (backend->free == (void*) ctx_termimg_free) return CTX_BACKEND_TERMIMG;
+#endif
+  return CTX_BACKEND_NONE;
+}
 
 
-int ctx_client_resize (int id, int width, int height);
-void ctx_client_set_font_size (int id, float font_size);
-float ctx_client_get_font_size (int id);
-void ctx_client_maximize (int id);
+void ctx_set_fullscreen (Ctx *ctx, int val)
+{
+#if CTX_SDL
+    if (ctx_backend_type (ctx) == CTX_BACKEND_SDL)
+      ctx_sdl_set_fullscreen (ctx, val);
+#endif
+}
 
+int ctx_get_fullscreen (Ctx *ctx)
+{
+#if CTX_SDL
+    if (ctx_backend_type (ctx) == CTX_BACKEND_SDL)
+      return ctx_sdl_get_fullscreen (ctx);
+#endif
+    return 0;
+}
 
-CtxClient *vt_get_client (VT *vt);
-CtxClient *ctx_client_new (Ctx *ctx,
-                           const char *commandline,
-                           int x, int y, int width, int height,
-                           float font_size,
-                           CtxClientFlags flags,
-                           void *user_data,
-                           CtxClientFinalize client_finalize);
-CtxClient *ctx_client_new_argv (Ctx *ctx, const char **argv, int x, int y, int width, int height, float 
font_size, CtxClientFlags flags, void *user_data,
-                CtxClientFinalize client_finalize);
-int ctx_clients_need_redraw (Ctx *ctx);
+CtxPixelFormatInfo *ctx_pixel_formats =
+#if CTX_COMPOSITE
+ctx_pixel_formats_generic;
+#else
+NULL;
+#endif
 
-extern float ctx_shape_cache_rate;
-extern int _ctx_max_threads;
+CtxPixelFormatInfo *
+ctx_pixel_format_info (CtxPixelFormat format)
+{
+  if (!ctx_pixel_formats)
+  {
+    assert (0);
+    return NULL;
+  }
+  for (unsigned int i = 0; ctx_pixel_formats[i].pixel_format; i++)
+    {
+      if (ctx_pixel_formats[i].pixel_format == format)
+        {
+          return &ctx_pixel_formats[i];
+        }
+    }
+  assert (0);
+  return NULL;
+}
 
-void ctx_client_move (int id, int x, int y);
-int ctx_client_resize (int id, int w, int h);
-void ctx_client_shade_toggle (int id);
-float ctx_client_min_y_pos (Ctx *ctx);
-float ctx_client_max_y_pos (Ctx *ctx);
 
-CtxClient *client_by_id (int id);
+#if CTX_RASTERIZER
 
-int ctx_clients_draw (Ctx *ctx, int layer2);
 
-void ctx_client_remove (Ctx *ctx, CtxClient *client);
+void (*ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule
+#if CTX_SHAPE_CACHE
+                ,CtxShapeEntry *shape
+#endif
+                ) =
+      ctx_rasterizer_rasterize_edges_generic;
 
-int ctx_client_height (int id);
-
-int ctx_client_x (int id);
-int ctx_client_y (int id);
-void ctx_client_raise_top (int id);
-void ctx_client_lower_bottom (int id);
-void ctx_client_iconify (int id);
-int ctx_client_is_iconified (int id);
-void ctx_client_uniconify (int id);
-void ctx_client_maximize (int id);
-int ctx_client_is_maximized (int id);
-void ctx_client_unmaximize (int id);
-void ctx_client_maximized_toggle (int id);
-void ctx_client_shade (int id);
-int ctx_client_is_shaded (int id);
-void ctx_client_unshade (int id);
-void ctx_client_toggle_maximized (int id);
-void ctx_client_shade_toggle (int id);
-void ctx_client_move (int id, int x, int y);
-int ctx_client_resize (int id, int width, int height);
+void (*ctx_composite_setup) (CtxRasterizer *rasterizer) =
+      ctx_composite_setup_generic;
+#if CTX_FAST_FILL_RECT
+void (*ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
+                           float          x0,
+                           float          y0,
+                           float          x1,
+                           float          y1,
+                           float          line_width) =
+      ctx_composite_stroke_rect_generic;
 
+void (*ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
+                           float        x0,
+                           float        y0,
+                           float        x1,
+                           float        y1,
+                           uint8_t      cov) =
+      ctx_composite_fill_rect_generic;
+#endif
 
 #endif
+
+
 #ifndef MRG_UTF8_H
 #define MRG_UTF8_H
 
@@ -36707,7 +38726,7 @@ utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
 }
 
 #endif
-#if CTX_VT
+#if CTX_CLIENTS
 
 /* mrg - MicroRaptor Gui
  * Copyright (c) 2014 Øyvind Kolås <pippin hodefoting com>
@@ -36747,14 +38766,13 @@ struct _VtLine
   /* line extends string, permitting string ops to operate on it  */
 
   uint64_t *style;
-  int       style_size;
 
   void     *ctx; // each line can have an attached ctx context;
   char     *prev;
+  int       style_size;
   int       prev_length;
   CtxString *frame;
 
-  int       wrapped;
 
   void     *ctx_copy; // each line can have an attached ctx context;
   // clearing should be brutal enough to unset the context of the current
@@ -36767,6 +38785,7 @@ struct _VtLine
   float     yscale;
   float     y_offset;
   int       in_scrolling_region;
+  int       wrapped;
 
   /*  XXX:  needs refactoring to a CtxList of links/images */
   void     *images[4];
@@ -37015,16 +39034,23 @@ VtLine *vt_line_new (const char *initial)
 
 typedef struct VtPty
 {
-  int        pty;
-  pid_t      pid;
+  int        pty; //    0 if thread
+  pid_t      pid; //    0 if thread
   int        done;
+
+  void      *userdata;
+
+  uint8_t   *shm;
+  int        shm_size;
 } VtPty;
+
+
+
 ssize_t vtpty_read     (void *vtpty, void *buf, size_t count);
 ssize_t vtpty_write    (void *vtpty, const void *buf, size_t count);
 void    vtpty_resize   (void *vtpty, int cols, int rows,
                         int px_width, int px_height);
 int     vtpty_waitdata (void  *vtpty, int timeout);
-extern  CtxList *vts;
 #define MAX_COLS 2048 // used for tabstops
 
 
@@ -37089,14 +39115,13 @@ struct _VT
   int       lastx;
   int       lasty;
   int        result;
-  long       rev;
   //SDL_Rect   dirty;
   float  dirtpad;
   float  dirtpad1;
   float  dirtpad2;
   float  dirtpad3;
 
-  void  *client;
+  CtxClient *client;
 
   ssize_t (*write)   (void *serial_obj, const void *buf, size_t count);
   ssize_t (*read)    (void *serial_obj, void *buf, size_t count);
@@ -37259,6 +39284,10 @@ struct _VT
 
 
 VT *vt_new (const char *command, int width, int height, float font_size, float line_spacing, int id, int 
can_launch);
+VT *vt_new_argv (char **argv, int width, int height, float font_size, float line_spacing, int id, int 
can_launch);
+VT *vt_new_thread (void (*start_routine)(void *userdata), void *userdata,
+                   int width, int height, float font_size, float line_spacing, int id, int can_launch);
+
 
 void vt_open_log (VT *vt, const char *path);
 
@@ -37272,7 +39301,7 @@ void        vt_set_font_size      (VT *vt, float font_size);
 float       vt_get_font_size      (VT *vt);
 void        vt_set_line_spacing   (VT *vt, float line_spacing);
 
-const char *vt_find_shell_command (void);
+const char *ctx_find_shell_command (void);
 
 int         vt_keyrepeat          (VT *vt);
 
@@ -37320,7 +39349,9 @@ int         vt_get_cursor_x         (VT *vt);
 int         vt_get_cursor_y         (VT *vt);
 
 void        vt_draw                 (VT *vt, Ctx *ctx, double x, double y);
+#if 0
 void        vt_register_events      (VT *vt, Ctx *ctx, double x0, double y0);
+#endif
 
 void        vt_rev_inc              (VT *vt);
 
@@ -38942,13 +40973,11 @@ void vt_audio (VT *vt, const char *command)
  *
  */
 
-#if !__COSMOPOLITAN__
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <errno.h>
 #include <assert.h>
-
 #include <string.h>
 #include <signal.h>
 #include <stdlib.h>
@@ -38962,7 +40991,6 @@ void vt_audio (VT *vt, const char *command)
 #include <sys/ioctl.h>
 #include <termios.h>
 #include <zlib.h>
-#endif
 
 #include "ctx.h"
 
@@ -39179,6 +41207,7 @@ static Image *image_query (int id)
 
 static int image_eid_no = 0;
 
+static CtxList *ctx_vts;
 static Image *image_add (int width,
                          int height,
                          int id,
@@ -39264,7 +41293,7 @@ static int vt_col_to_pos (VT *vt, int col)
   int pos = col;
   if (vt->current_line->contains_proportional)
     {
-      Ctx *ctx = ctx_new ();
+      Ctx *ctx = _ctx_new_drawlist (vt->width, vt->height);
       ctx_font (ctx, "regular");
       ctx_font_size (ctx, vt->font_size);
       int x = 0;
@@ -39316,18 +41345,6 @@ static int vt_margin_right (VT *vt)
 
 #define VT_MARGIN_RIGHT vt_margin_right(vt)
 
-
-void vt_rev_inc (VT *vt)
-{
-  if (vt)
-    vt->rev++;
-}
-
-long vt_rev (VT *vt)
-{
-  return vt?vt->rev:0;
-}
-
 static void vtcmd_reset_to_initial_state (VT *vt, const char *sequence);
 int vt_set_prop (VT *vt, uint32_t key_hash, const char *val);
 uint32_t ctx_strhash (const char *utf8);
@@ -39335,7 +41352,6 @@ uint32_t ctx_strhash (const char *utf8);
 static void vt_set_title (VT *vt, const char *new_title)
 {
   if (vt->inert) return;
-
   if (vt->title)
     { free (vt->title); }
   vt->title = strdup (new_title);
@@ -39524,9 +41540,36 @@ void vt_set_line_spacing (VT *vt, float line_spacing)
   _vt_compute_cw_ch (vt);
 }
 
-VT *vt_new (const char *command, int width, int height, float font_size, float line_spacing, int id, int 
can_launch)
+
+static void ctx_clients_signal_child (int signum)
 {
-  VT *vt                 = calloc (sizeof (VT), 1);
+  pid_t pid;
+  int   status;
+  if ( (pid = waitpid (-1, &status, WNOHANG) ) != -1)
+    {
+      if (pid)
+        {
+          for (CtxList *l = ctx_vts; l; l=l->next)
+            {
+              VtPty *vt = l->data;
+              if (vt->pid == pid)
+                {
+                  vt->done = 1;
+                  //vt->result = status;
+                }
+            }
+        }
+    }
+}
+
+static void vt_init (VT *vt, int width, int height, float font_size, float line_spacing, int id, int 
can_launch)
+{
+  static int signal_installed = 0;
+  if (!signal_installed)
+  {
+    signal (SIGCHLD,ctx_clients_signal_child);
+    signal_installed = 1;
+  }
   vt->id                 = id;
   vt->lastx              = -1;
   vt->lasty              = -1;
@@ -39556,28 +41599,227 @@ VT *vt_new (const char *command, int width, int height, float font_size, float l
   vt->line_spacing       = 1.0;
   vt->scale_x            = 1.0;
   vt->scale_y            = 1.0;
+  vt->fg_color[0] = 216;
+  vt->fg_color[1] = 216;
+  vt->fg_color[2] = 216;
+  vt->bg_color[0] = 0;
+  vt->bg_color[1] = 0;
+  vt->bg_color[2] = 0;
+}
+
+static pid_t
+vt_forkpty (int  *amaster,
+            char *aname,
+            const struct termios *termp,
+            const struct winsize *winsize)
+{
+  pid_t pid;
+  int master = posix_openpt (O_RDWR|O_NOCTTY);
+  int slave;
+
+  if (master < 0)
+    return -1;
+  if (grantpt (master) != 0)
+    return -1;
+  if (unlockpt (master) != 0)
+    return -1;
+#if 0
+  char name[1024];
+  if (ptsname_r (master, name, sizeof(name)-1))
+    return -1;
+#else
+  char *name = NULL;
+  if ((name = ptsname (master)) == NULL)
+    return -1;
+#endif
+
+  slave = open(name, O_RDWR|O_NOCTTY);
+
+  if (termp)   tcsetattr(slave, TCSAFLUSH, termp);
+  if (winsize) ioctl(slave, TIOCSWINSZ, winsize);
+
+  pid = fork();
+  if (pid < 0)
+  {
+    return pid;
+  } else if (pid == 0)
+  {
+    close (master);
+    setsid ();
+    dup2 (slave, STDIN_FILENO);
+    dup2 (slave, STDOUT_FILENO);
+    dup2 (slave, STDERR_FILENO);
+
+    close (slave);
+    return 0;
+  }
+  ioctl (slave, TIOCSCTTY, NULL);
+  close (slave);
+  *amaster = master;
+  return pid;
+}
+
+static void
+ctx_child_prepare_env (int was_pidone, const char *term)
+{
+  if (was_pidone)
+  {
+    if (setuid(1000)) fprintf (stderr, "setuid failed\n");
+  }
+  else
+  {
+    for (int i = 3; i<768; i++) { close (i); } /*hack, trying to close xcb */
+  }
+  unsetenv ("TERM");
+  unsetenv ("COLUMNS");
+  unsetenv ("LINES");
+  unsetenv ("TERMCAP");
+  unsetenv ("COLOR_TERM");
+  unsetenv ("COLORTERM");
+  unsetenv ("VTE_VERSION");
+  unsetenv ("CTX_BACKEND");
+  //setenv ("TERM", "ansi", 1);
+  //setenv ("TERM", "vt102", 1);
+  //setenv ("TERM", "vt100", 1);
+  // setenv ("TERM", term?term:"xterm", 1);
+  setenv ("TERM", term?term:"xterm-256color", 1);
+  setenv ("COLORTERM", "truecolor", 1);
+  //setenv ("CTX_VERSION", "0", 1);
+  setenv ("CTX_BACKEND", "ctx", 1); // speeds up launching of clients
+}
+
+void _ctx_add_listen_fd (int fd);
+void _ctx_remove_listen_fd (int fd);
+
+static void vt_run_argv (VT *vt, char **argv, const char *term)
+{
+  struct winsize ws;
+  //signal (SIGCHLD,signal_child);
+#if 0
+  int was_pidone = (getpid () == 1);
+#else
+  int was_pidone = 0; // do no special treatment, all child processes belong
+                      // to root
+#endif
+  signal (SIGINT,SIG_DFL);
+  ws.ws_row = vt->rows;
+  ws.ws_col = vt->cols;
+  ws.ws_xpixel = ws.ws_col * vt->cw;
+  ws.ws_ypixel = ws.ws_row * vt->ch;
+  vt->vtpty.pid = vt_forkpty (&vt->vtpty.pty, NULL, NULL, &ws);
+  if (vt->vtpty.pid == 0)
+    {
+      ctx_child_prepare_env (was_pidone, term);
+
+      execvp (argv[0], (char**)argv);
+      exit (0);
+    }
+  else if (vt->vtpty.pid < 0)
+    {
+      VT_error ("forkpty failed (%s)", argv[0]);
+      return;
+    }
+  fcntl(vt->vtpty.pty, F_SETFL, O_NONBLOCK|O_NOCTTY);
+  _ctx_add_listen_fd (vt->vtpty.pty);
+}
+
+
+VT *vt_new_argv (char **argv, int width, int height, float font_size, float line_spacing, int id, int 
can_launch)
+{
+  VT *vt                 = calloc (sizeof (VT), 1);
+  vt_init (vt, width, height, font_size, line_spacing, id, can_launch);
   vt_set_font_size (vt, font_size);
   vt_set_line_spacing (vt, line_spacing);
-  if (command)
+  if (argv)
     {
-      vt_run_command (vt, command, NULL);
+      vt_run_argv (vt, argv, NULL);
     }
   if (width <= 0) width = 640;
   if (height <= 0) width = 480;
   vt_set_px_size (vt, width, height);
 
-  vt->fg_color[0] = 216;
-  vt->fg_color[1] = 216;
-  vt->fg_color[2] = 216;
-  vt->bg_color[0] = 0;
-  vt->bg_color[1] = 0;
-  vt->bg_color[2] = 0;
   vtcmd_reset_to_initial_state (vt, NULL);
   //vt->ctx = ctx_new ();
-  ctx_list_prepend (&vts, vt);
+  ctx_list_prepend (&ctx_vts, vt);
   return vt;
 }
 
+static char *string_chop_head (char *orig) /* return pointer to reset after arg */
+{
+  int j=0;
+  int eat=0; /* number of chars to eat at start */
+
+  if(orig)
+    {
+      int got_more;
+      char *o = orig;
+      while(o[j] == ' ')
+        {j++;eat++;}
+
+      if (o[j]=='"')
+        {
+          eat++;j++;
+          while(o[j] != '"' &&
+                o[j] != 0)
+            j++;
+          o[j]='\0';
+          j++;
+        }
+      else if (o[j]=='\'')
+        {
+          eat++;j++;
+          while(o[j] != '\'' &&
+                o[j] != 0)
+            j++;
+          o[j]='\0';
+          j++;
+        }
+      else
+        {
+          while(o[j] != ' ' &&
+                o[j] != 0 &&
+                o[j] != ';')
+            j++;
+        }
+      if (o[j] == 0 ||
+          o[j] == ';')
+        got_more = 0;
+      else
+        got_more = 1;
+      o[j]=0; /* XXX: this is where foo;bar won't work but foo ;bar works*/
+
+      if(eat)
+       {
+         int k;
+         for (k=0; k<j-eat; k++)
+           orig[k] = orig[k+eat];
+       }
+      if (got_more)
+        return &orig[j+1];
+    }
+  return NULL;
+}
+
+
+VT *vt_new (const char *command, int width, int height, float font_size, float line_spacing, int id, int 
can_launch)
+{
+  char *cargv[32];
+  int   cargc;
+  char *rest, *copy;
+  copy = calloc (strlen (command)+2, 1);
+  strcpy (copy, command);
+  rest = copy;
+  cargc = 0;
+  while (rest && cargc < 30 && rest[0] != ';')
+  {
+    cargv[cargc++] = rest;
+    rest = string_chop_head (rest);
+  }
+  cargv[cargc] = NULL;
+  return vt_new_argv ((char**)cargv, width, height, font_size, line_spacing, id, can_launch);
+}
+
+
 int vt_cw (VT *vt)
 {
   return vt->cw;
@@ -39781,7 +42023,7 @@ void vt_set_term_size (VT *vt, int icols, int irows)
   vt->margin_bottom  = vt->rows;
   vt->margin_right   = vt->cols;
   _vt_move_to (vt, vt->cursor_y, vt->cursor_x);
-  vt->rev++;
+  ctx_client_rev_inc (vt->client);
   VT_info ("resize %i %i", irows, icols);
   if (vt->ctxp)
     ctx_parser_free (vt->ctxp);
@@ -39849,7 +42091,7 @@ _vt_move_to (VT *vt, int y, int x)
         }
     }
   VT_cursor ("%i,%i (_vt_move_to)", y, x);
-  vt->rev++;
+  ctx_client_rev_inc (vt->client);
 }
 
 static void vt_scroll (VT *vt, int amount);
@@ -39919,7 +42161,7 @@ static void _vt_add_str (VT *vt, const char *str)
   vt_line_set_style (vt->current_line, vt->cursor_x-1, vt->cstyle);
   vt->cursor_x += 1;
   vt->at_line_home = 0;
-  vt->rev++;
+  ctx_client_rev_inc (vt->client);
 }
 
 static void _vt_backspace (VT *vt)
@@ -39935,7 +42177,7 @@ static void _vt_backspace (VT *vt)
         }
       VT_cursor ("backspace");
     }
-  vt->rev++;
+  ctx_client_rev_inc (vt->client);
 }
 
 static void vtcmd_set_top_and_bottom_margins (VT *vt, const char *sequence)
@@ -40908,7 +43150,7 @@ static void vt_ctx_exit (void *data)
 {
   VT *vt = data;
   vt->state = vt_state_neutral;
-  vt->rev ++;
+  ctx_client_rev_inc (vt->client);
   if (!vt->current_line)
     return;
 #if 0
@@ -41156,8 +43398,8 @@ qagain:
               {
                 if (!vt->current_line->ctx)
                   {
-                    vt->current_line->ctx = ctx_new ();
-                    vt->current_line->ctx_copy = ctx_new ();
+                    vt->current_line->ctx = ctx_new (vt->width, vt->height, "drawlist");
+                    vt->current_line->ctx_copy = ctx_new (vt->width, vt->height, "drawlist");
                     ctx_set_texture_cache (vt->current_line->ctx_copy, vt->current_line->ctx);
                     _ctx_set_transformation (vt->current_line->ctx, 0);
                     _ctx_set_transformation (vt->current_line->ctx_copy, 0);
@@ -41396,12 +43638,12 @@ static void vtcmd_request_mode (VT *vt, const char *sequence)
 static void vtcmd_set_t (VT *vt, const char *sequence)
 {
   /* \e[21y is request title - allows inserting keychars */
-  if      (!strcmp (sequence,  "[1t")) { ctx_client_unshade (vt->id); }
-  else if (!strcmp (sequence,  "[2t")) { ctx_client_shade (vt->id); } 
+  if      (!strcmp (sequence,  "[1t")) { ctx_client_unshade (vt->root_ctx, vt->id); }
+  else if (!strcmp (sequence,  "[2t")) { ctx_client_shade (vt->root_ctx, vt->id); } 
   else if (!strncmp (sequence, "[3;", 3)) {
     int x=0,y=0;
     sscanf (sequence, "[3;%i;%ir", &y, &x);
-    ctx_client_move (vt->id, x, y);
+    ctx_client_move (vt->root_ctx, vt->id, x, y);
   }
   else if (!strncmp (sequence, "[4;", 3))
   {
@@ -41411,11 +43653,13 @@ static void vtcmd_set_t (VT *vt, const char *sequence)
     if (height < 0) height = vt->rows * vt->ch;
     if (width == 0) width = ctx_width (vt->root_ctx);
     if (height == 0) height = ctx_height (vt->root_ctx);
-    ctx_client_resize (vt->id, width, height);
+    ctx_client_resize (vt->root_ctx, vt->id, width, height);
   }
-  else if (!strcmp (sequence, "[5t") ) { ctx_client_raise_top (vt->id); } 
-  else if (!strcmp (sequence, "[6t") ) { ctx_client_lower_bottom (vt->id); } 
-  else if (!strcmp (sequence, "[7t") ) { vt->rev++; /* refresh */ }
+  else if (!strcmp (sequence, "[5t") ) { ctx_client_raise_top (vt->root_ctx, vt->id); } 
+  else if (!strcmp (sequence, "[6t") ) { ctx_client_lower_bottom (vt->root_ctx, vt->id); } 
+  else if (!strcmp (sequence, "[7t") ) { 
+          ctx_client_rev_inc (vt->client);
+          /* refresh */ }
   else if (!strncmp (sequence, "[8;", 3) )
   {
     int cols = 0, rows = 0;
@@ -41424,20 +43668,20 @@ static void vtcmd_set_t (VT *vt, const char *sequence)
     if (rows < 0) rows = vt->rows;
     if (cols == 0) cols = ctx_width (vt->root_ctx) / vt->cw;
     if (rows == 0) rows = ctx_height (vt->root_ctx) / vt->ch;
-    ctx_client_resize (vt->id, cols * vt->cw, rows * vt->ch);
+    ctx_client_resize (vt->root_ctx, vt->id, cols * vt->cw, rows * vt->ch);
   }
-  else if (!strcmp (sequence, "[9;0t") ) { ctx_client_unmaximize (vt->id); } 
-  else if (!strcmp (sequence, "[9;1t") ) { ctx_client_maximize (vt->id);} 
+  else if (!strcmp (sequence, "[9;0t") ) { ctx_client_unmaximize (vt->root_ctx, vt->id); } 
+  else if (!strcmp (sequence, "[9;1t") ) { ctx_client_maximize (vt->root_ctx, vt->id);} 
 
   /* should actually be full-screen */
-  else if (!strcmp (sequence, "[10;0t") ) { ctx_client_unmaximize (vt->id); } 
-  else if (!strcmp (sequence, "[10;1t") ) { ctx_client_maximize (vt->id);} 
-  else if (!strcmp (sequence, "[10;2t") ) { ctx_client_toggle_maximized (vt->id);} 
+  else if (!strcmp (sequence, "[10;0t") ) { ctx_client_unmaximize (vt->root_ctx, vt->id); } 
+  else if (!strcmp (sequence, "[10;1t") ) { ctx_client_maximize (vt->root_ctx, vt->id);} 
+  else if (!strcmp (sequence, "[10;2t") ) { ctx_client_toggle_maximized (vt->root_ctx, vt->id);} 
 
   else if (!strcmp (sequence, "[11t") )  /* report window state  */
     {
       char buf[128];
-      if (ctx_client_is_iconified (vt->id))
+      if (ctx_client_is_iconified (vt->root_ctx, vt->id))
         sprintf (buf, "\033[2t");
       else
         sprintf (buf, "\033[1t");
@@ -41446,7 +43690,7 @@ static void vtcmd_set_t (VT *vt, const char *sequence)
   else if (!strcmp (sequence, "[13t") ) /* request terminal position */
     {
       char buf[128];
-      sprintf (buf, "\033[3;%i;%it", ctx_client_y (vt->id), ctx_client_x (vt->id));
+      sprintf (buf, "\033[3;%i;%it", ctx_client_y (vt->root_ctx, vt->id), ctx_client_x (vt->root_ctx, 
vt->id));
       vt_write (vt, buf, strlen (buf) );
     }
   else if (!strcmp (sequence, "[14t") ) /* request terminal dimensions in px */
@@ -41615,12 +43859,54 @@ static void vtcmd_graphics (VT *vt, const char *sequence)
 {
   fprintf (stderr, "gfx intro [%s]\n",sequence); // maybe implement such as well?
 }
+void vt_audio_task (VT *vt, int click);
+
+#if CTX_TILED
+static void ctx_show_frame (Ctx *ctx, int block)
+{
+  CtxTiled *tiled = (CtxTiled*)(ctx->backend);
+  tiled->show_frame (tiled, block);
+}
+#endif
+
+static void ctx_wait_frame (Ctx *ctx, VT *vt)
+{
+#if CTX_TILED
+  if (ctx_backend_is_tiled (ctx))
+  {
+    CtxTiled *tiled = (CtxTiled*)(ctx->backend);
+    int max_wait    = 500;
+    //int wait_frame  = tiled->frame;  // tiled->frame and tiled->render_frame are expected
+                                       // to be equal, unless something else has timed out
+    int wait_frame  = tiled->render_frame;
+    ctx_show_frame (ctx, 0);
+    while (wait_frame > tiled->shown_frame &&
+           max_wait-- > 0)
+    {
+#if CTX_AUDIO
+      usleep (5);
+      vt_audio_task (vt, 0);
+#else
+      usleep (5);
+#endif
+      ctx_show_frame (ctx, 0);
+    }
+#if 1
+    if (max_wait > 0)
+    {}//fprintf (stderr, "[%i]", max_wait);
+    else
+      fprintf (stderr, "[wait-drop]");
+#endif
+  }
+#endif
+}
 
 static void vtcmd_report (VT *vt, const char *sequence)
 {
   char buf[64]="";
   if (!strcmp (sequence, "[5n") ) // DSR device status report
     {
+      ctx_wait_frame (vt->root_ctx, vt);
       sprintf (buf, "\033[0n"); // we're always OK :)
     }
   else if (!strcmp (sequence, "[?15n") ) // printer status
@@ -41668,7 +43954,8 @@ static void vtcmd_report (VT *vt, const char *sequence)
         { sprintf (buf, "\033[2;1;1;120;120;1;0x"); }
     }
   if (buf[0])
-    { vt_write (vt, buf, strlen (buf) ); }
+    { vt_write (vt, buf, strlen (buf) );
+    }
 }
 
 static char *charmap_cp437[]=
@@ -41950,7 +44237,7 @@ ESC [ 2 0 0 ~,
 {
   int i0 = strlen (sequence)-1;
   int i;
-  vt->rev ++;
+  ctx_client_rev_inc (vt->client);
   for (i = 0; sequences[i].prefix; i++)
     {
       if (!strncmp (sequence, sequences[i].prefix, strlen (sequences[i].prefix) ) )
@@ -42366,6 +44653,7 @@ void vt_gfx (VT *vt, const char *command)
           vt->gfx.data_size = actual_uncompressed_size;
           vt->gfx.compression = 0;
         }
+#ifdef STBI_INCLUDE_STB_IMAGE_H
       if (vt->gfx.format == 100)
         {
           int channels;
@@ -42381,6 +44669,7 @@ void vt_gfx (VT *vt, const char *command)
           vt->gfx.data = new_data;
           vt->gfx.data_size= vt->gfx.buf_width * vt->gfx.buf_height * 4;
         }
+#endif
       Image *image = NULL;
       switch (vt->gfx.action)
         {
@@ -42782,7 +45071,7 @@ static void vt_sixels (VT *vt, const char *sixels)
       vt_line_feed (vt);
       vt_carriage_return (vt);
     }
-  vt->rev++;
+  ctx_client_rev_inc (vt->client);
 }
 
 static inline void vt_ctx_unrled (VT *vt, char byte)
@@ -43202,6 +45491,7 @@ static void vt_state_osc (VT *vt, int byte)
 #if 0
     {"]1337;key=value:base64data\b\",  0, vtcmd_erase_in_line, VT100}, /* args:keyvalue id: iterm2 graphics 
*/ "
 #endif
+#ifdef STBI_INCLUDE_STB_IMAGE_H
           case 1337:
             if (!strncmp (&vt->argument_buf[6], "File=", 5) )
               {
@@ -43375,6 +45665,7 @@ static void vt_state_osc (VT *vt, int byte)
                 }
               }
             break;
+#endif
           case 104:
             break;
           case 8:
@@ -43956,28 +46247,28 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
   else if (!strcmp (str, "shift-control-home"))
     {
       vt_set_scroll (vt, vt->scrollback_count);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-control-end"))
     {
       int new_scroll = 0;
       vt_set_scroll (vt, new_scroll);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-control-down"))
     {
       int new_scroll = vt_get_scroll (vt) - 1;
       vt_set_scroll (vt, new_scroll);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-control-up"))
     {
       int new_scroll = vt_get_scroll (vt) + 1;
       vt_set_scroll (vt, new_scroll);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-page-up") ||
@@ -43985,7 +46276,7 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
     {
       int new_scroll = vt_get_scroll (vt) + vt_get_rows (vt) /2;
       vt_set_scroll (vt, new_scroll);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-page-down") ||
@@ -43994,7 +46285,7 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
       int new_scroll = vt_get_scroll (vt) - vt_get_rows (vt) /2;
       if (new_scroll < 0) { new_scroll = 0; }
       vt_set_scroll (vt, new_scroll);
-      vt_rev_inc (vt);
+      ctx_client_rev_inc (vt->client);
       return;
     }
   else if (!strcmp (str, "shift-control--") ||
@@ -44035,11 +46326,11 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
       vt_set_local (vt, !vt_get_local (vt) );
       return;
     }
-  else if (!strncmp (str, "mouse-", 5) )
+  else if (str[0]=='p' && str[1] != 0 && str[2] == ' ')
     {
       int cw = vt_cw (vt);
       int ch = vt_ch (vt);
-      if (!strncmp (str + 6, "motion", 6) )
+      if (!strncmp (str, "pm", 2))
         {
           int x = 0, y = 0;
           char *s = strchr (str, ' ');
@@ -44054,7 +46345,7 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
                 }
             }
         }
-      else if (!strncmp (str + 6, "press", 5) )
+      else if (!strncmp (str, "pp", 2))
         {
           int x = 0, y = 0, b = 0;
           char *s = strchr (str, ' ');
@@ -44075,7 +46366,7 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
             }
           //clients[active].drawn_rev = 0;
         }
-      else if (!strncmp (str + 6, "drag", 4) )
+      else if (!strncmp (str, "pd", 2))
         {
           int x = 0, y = 0, b = 0; // XXX initialize B
           char *s = strchr (str, ' ');
@@ -44095,7 +46386,7 @@ void vt_feed_keystring (VT *vt, CtxEvent *event, const char *str)
             }
           //clients[active].drawn_rev = 0;
         }
-      else if (!strncmp (str + 6, "release", 7) )
+      else if (!strncmp (str, "pr", 2))
         {
           int x = 0, y = 0, b = 0;
           char *s = strchr (str, ' ');
@@ -44185,6 +46476,7 @@ done:
     }
 }
 
+
 void vt_paste (VT *vt, const char *str)
 {
   if (vt->bracket_paste)
@@ -44198,7 +46490,7 @@ void vt_paste (VT *vt, const char *str)
     }
 }
 
-const char *vt_find_shell_command (void)
+const char *ctx_find_shell_command (void)
 {
   if (access ("/.flatpak-info", F_OK) != -1)
   {
@@ -44242,116 +46534,8 @@ const char *vt_find_shell_command (void)
   return command;
 }
 
-static char *string_chop_head (char *orig) /* return pointer to reset after arg */
-{
-  int j=0;
-  int eat=0; /* number of chars to eat at start */
 
-  if(orig)
-    {
-      int got_more;
-      char *o = orig;
-      while(o[j] == ' ')
-        {j++;eat++;}
 
-      if (o[j]=='"')
-        {
-          eat++;j++;
-          while(o[j] != '"' &&
-                o[j] != 0)
-            j++;
-          o[j]='\0';
-          j++;
-        }
-      else if (o[j]=='\'')
-        {
-          eat++;j++;
-          while(o[j] != '\'' &&
-                o[j] != 0)
-            j++;
-          o[j]='\0';
-          j++;
-        }
-      else
-        {
-          while(o[j] != ' ' &&
-                o[j] != 0 &&
-                o[j] != ';')
-            j++;
-        }
-      if (o[j] == 0 ||
-          o[j] == ';')
-        got_more = 0;
-      else
-        got_more = 1;
-      o[j]=0; /* XXX: this is where foo;bar won't work but foo ;bar works*/
-
-      if(eat)
-       {
-         int k;
-         for (k=0; k<j-eat; k++)
-           orig[k] = orig[k+eat];
-       }
-      if (got_more)
-        return &orig[j+1];
-    }
-  return NULL;
-}
-
-void _ctx_add_listen_fd (int fd);
-void _ctx_remove_listen_fd (int fd);
-
-static pid_t
-vt_forkpty (int  *amaster,
-            char *aname,
-            const struct termios *termp,
-            const struct winsize *winsize)
-{
-  pid_t pid;
-  int master = posix_openpt (O_RDWR|O_NOCTTY);
-  int slave;
-
-  if (master < 0)
-    return -1;
-  if (grantpt (master) != 0)
-    return -1;
-  if (unlockpt (master) != 0)
-    return -1;
-#if 0
-  char name[1024];
-  if (ptsname_r (master, name, sizeof(name)-1))
-    return -1;
-#else
-  char *name = NULL;
-  if ((name = ptsname (master)) == NULL)
-    return -1;
-#endif
-
-  slave = open(name, O_RDWR|O_NOCTTY);
-
-  if (termp)   tcsetattr(slave, TCSAFLUSH, termp);
-  if (winsize) ioctl(slave, TIOCSWINSZ, winsize);
-
-  pid = fork();
-  if (pid < 0)
-  {
-    return pid;
-  } else if (pid == 0)
-  {
-    close (master);
-    setsid ();
-    dup2 (slave, STDIN_FILENO);
-    dup2 (slave, STDOUT_FILENO);
-    dup2 (slave, STDERR_FILENO);
-
-    close (slave);
-    return 0;
-  }
-  ioctl (slave, TIOCSCTTY, NULL);
-  close (slave);
-  *amaster = master;
-  return pid;
-}
 
 static void vt_run_command (VT *vt, const char *command, const char *term)
 {
@@ -44371,48 +46555,7 @@ static void vt_run_command (VT *vt, const char *command, const char *term)
   vt->vtpty.pid = vt_forkpty (&vt->vtpty.pty, NULL, NULL, &ws);
   if (vt->vtpty.pid == 0)
     {
-      int i;
-      if (was_pidone)
-      {
-        if (setuid(1000)) fprintf (stderr, "setuid failed\n");
-      }
-      else
-      {
-        for (i = 3; i<768; i++) { close (i); } /*hack, trying to close xcb */
-      }
-      unsetenv ("TERM");
-      unsetenv ("COLUMNS");
-      unsetenv ("LINES");
-      unsetenv ("TERMCAP");
-      unsetenv ("COLOR_TERM");
-      unsetenv ("COLORTERM");
-      unsetenv ("VTE_VERSION");
-      unsetenv ("CTX_BACKEND");
-      //setenv ("TERM", "ansi", 1);
-      //setenv ("TERM", "vt102", 1);
-      //setenv ("TERM", "vt100", 1);
-      // setenv ("TERM", term?term:"xterm", 1);
-      setenv ("TERM", term?term:"xterm-256color", 1);
-      setenv ("COLORTERM", "truecolor", 1);
-      //setenv ("CTX_VERSION", "0", 1);
-      setenv ("CTX_BACKEND", "ctx", 1); // speeds up launching of clients
-
-      {
-        char *cargv[32];
-        int   cargc;
-        char *rest, *copy;
-        copy = calloc (strlen (command)+2, 1);
-        strcpy (copy, command);
-        rest = copy;
-        cargc = 0;
-        while (rest && cargc < 30 && rest[0] != ';')
-        {
-          cargv[cargc++] = rest;
-          rest = string_chop_head (rest);
-        }
-        cargv[cargc] = NULL;
-        execvp (cargv[0], cargv);
-      }
+      ctx_child_prepare_env (was_pidone, term);
       exit (0);
     }
   else if (vt->vtpty.pid < 0)
@@ -44424,6 +46567,7 @@ static void vt_run_command (VT *vt, const char *command, const char *term)
   _ctx_add_listen_fd (vt->vtpty.pty);
 }
 
+
 void vt_destroy (VT *vt)
 {
   while (vt->lines)
@@ -44442,7 +46586,7 @@ void vt_destroy (VT *vt)
   //if (vt->ctx)
   //  { ctx_free (vt->ctx); }
   free (vt->argument_buf);
-  ctx_list_remove (&vts, vt);
+  ctx_list_remove (&ctx_vts, vt);
   kill (vt->vtpty.pid, 9);
   _ctx_remove_listen_fd (vt->vtpty.pty);
   close (vt->vtpty.pty);
@@ -46135,7 +48279,9 @@ void vt_ctx_glyph (Ctx *ctx, VT *vt, float x, float y, int unichar, int bold, fl
   scale_x *= vt->scale_x;
   scale_y *= vt->scale_y;
 
-  if (!ctx_renderer_is_term (ctx))
+  CtxBackendType backend_type = ctx_backend_type (ctx);
+
+  if (backend_type != CTX_BACKEND_TERM)
   {
     // TODO : use our own special glyphs when glyphs are not passed through
     if (!vt_special_glyph (ctx, vt, x, y + offset_y * vt->ch, vt->cw * scale_x, vt->ch * scale_y, unichar) )
@@ -46165,8 +48311,7 @@ void vt_ctx_glyph (Ctx *ctx, VT *vt, float x, float y, int unichar, int bold, fl
   }
   y -= vt->font_size * 0.22;
   if (bold
-      && !ctx_renderer_is_term (ctx)
-     )
+      && backend_type != CTX_BACKEND_TERM)
     {
       ctx_move_to (ctx, x - vt->font_size/30.0, y);
       //ctx_line_width (ctx, vt->font_size/30.0);
@@ -46676,10 +48821,101 @@ static char *primary = NULL;
 static void scrollbar_drag (CtxEvent *event, void *data, void *data2);
 static int scrollbar_down = 0;
 
+void ctx_client_mouse_event (CtxEvent *event, void *data, void *data2)
+{
+  CtxClient *client = data;
+  if (!client)
+  {
+    event->stop_propagate = 1;
+    return;
+  }
+  VT *vt = client->vt;
+
+  float  x = event->x;
+  float  y = event->y;
+  int device_no = event->device_no;
+  char buf[128]="";
+
+  if (vt)
+  {
+  if ((!vt->in_alt_screen) &&
+      (event->x > vt->width - vt->cw * 1.5 || scrollbar_down) &&
+      (event->type == CTX_DRAG_MOTION ||
+      event->type == CTX_DRAG_PRESS ||
+      event->type == CTX_DRAG_RELEASE))
+    return scrollbar_drag (event, data, data2);
+  switch (event->type)
+  {
+    case CTX_MOTION:
+    case CTX_DRAG_MOTION:
+      //if (event->device_no==1)
+      {
+        sprintf (buf, "pm %.0f %.0f %i", x, y, device_no);
+//      ctx_queue_draw (event->ctx);
+        ctx_client_lock (client);
+        vt_feed_keystring (vt, event, buf);
+        ctx_client_unlock (client);
+//      vt->rev++;
+      }
+      break;
+    case CTX_DRAG_PRESS:
+      if (event->device_no==2)
+      {
+        if (primary)
+        {
+          if (vt)
+            vt_paste (vt, primary);
+        }
+      }
+      else if (event->device_no==3 && !vt->in_alt_screen)
+      {
+        vt->popped = 1;
+      }
+      else
+      {
+        sprintf (buf, "pp %.0f %.0f %i", x, y, device_no);
+        ctx_client_lock (client);
+        vt_feed_keystring (vt, event, buf);
+        ctx_client_unlock (client);
+//      ctx_queue_draw (event->ctx);
+//      vt->rev++;
+      }
+      break;
+    case CTX_DRAG_RELEASE:
+      if (event->device_no==3 && !vt->in_alt_screen)
+      {
+        vt->popped = 0;
+      }
+        ctx_queue_draw (event->ctx);
+        sprintf (buf, "pr %.0f %.0f %i", x, y, device_no);
+        ctx_client_lock (client);
+        vt_feed_keystring (vt, event, buf);
+        ctx_client_unlock (client);
+      break;
+    default:
+      // we should not stop propagation
+      return;
+      break;
+  }
+  }
+  else
+  {
+     CtxEvent *copy = ctx_event_copy (event);
+     ctx_list_append (&client->ctx_events, copy);
+  }
+  event->stop_propagate = 1;
+//vt->rev++;
+}
+
 void vt_mouse_event (CtxEvent *event, void *data, void *data2)
 {
   VT   *vt = data;
   CtxClient *client = vt_get_client (vt);
+  if (!client)
+  {
+    event->stop_propagate = 1;
+    return;
+  }
   float  x = event->x;
   float  y = event->y;
   int device_no = event->device_no;
@@ -46696,8 +48932,8 @@ void vt_mouse_event (CtxEvent *event, void *data, void *data2)
     case CTX_DRAG_MOTION:
       //if (event->device_no==1)
       {
-        sprintf (buf, "mouse-motion %.0f %.0f %i", x, y, device_no);
-//      ctx_set_dirty (event->ctx, 1);
+        sprintf (buf, "pm %.0f %.0f %i", x, y, device_no);
+//      ctx_queue_draw (event->ctx);
         ctx_client_lock (client);
         vt_feed_keystring (vt, event, buf);
         ctx_client_unlock (client);
@@ -46719,11 +48955,11 @@ void vt_mouse_event (CtxEvent *event, void *data, void *data2)
       }
       else
       {
-        sprintf (buf, "mouse-press %.0f %.0f %i", x, y, device_no);
+        sprintf (buf, "pp %.0f %.0f %i", x, y, device_no);
         ctx_client_lock (client);
         vt_feed_keystring (vt, event, buf);
         ctx_client_unlock (client);
-//      ctx_set_dirty (event->ctx, 1);
+//      ctx_queue_draw (event->ctx);
 //      vt->rev++;
       }
       break;
@@ -46732,8 +48968,8 @@ void vt_mouse_event (CtxEvent *event, void *data, void *data2)
       {
         vt->popped = 0;
       }
-        ctx_set_dirty (event->ctx, 1);
-        sprintf (buf, "mouse-release %.0f %.0f %i", x, y, device_no);
+        ctx_queue_draw (event->ctx);
+        sprintf (buf, "pr %.0f %.0f %i", x, y, device_no);
         ctx_client_lock (client);
         vt_feed_keystring (vt, event, buf);
         ctx_client_unlock (client);
@@ -46774,11 +49010,11 @@ static void scrollbar_drag (CtxEvent *event, void *data, void *data2)
   float disp_lines = vt->rows;
   float tot_lines = vt->line_count + vt->scrollback_count;
 
-  vt->scroll = tot_lines - disp_lines - (event->y*1.0/ ctx_client_height (vt->id)) * tot_lines + 
disp_lines/2;
+  vt->scroll = tot_lines - disp_lines - (event->y*1.0/ ctx_client_height (vt->root_ctx, vt->id)) * tot_lines 
+ disp_lines/2;
   if (vt->scroll < 0) { vt->scroll = 0.0; }
   if (vt->scroll > vt->scrollback_count) { vt->scroll = vt->scrollback_count; }
-  vt->rev++;
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_rev_inc (vt->client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 
   switch (event->type)
@@ -46815,8 +49051,8 @@ static void test_popup (Ctx *ctx, void *data)
 {
   VT *vt = data;
 
-  float x = ctx_client_x (vt->id);
-  float y = ctx_client_y (vt->id);
+  float x = ctx_client_x (vt->root_ctx, vt->id);
+  float y = ctx_client_y (vt->root_ctx, vt->id);
   ctx_rectangle (ctx, x, y, 100, 100);
   ctx_rgb (ctx, 1,0,0);
   ctx_fill (ctx);
@@ -46866,6 +49102,19 @@ void vt_use_images (VT *vt, Ctx *ctx)
 }
 
 
+void ctx_client_register_events (CtxClient *client, Ctx *ctx, double x0, double y0)
+{
+  ctx_begin_path (ctx);
+  ctx_save (ctx);
+  ctx_translate (ctx, x0, y0);
+  ctx_rectangle (ctx, 0, 0, client->width, client->height);
+  ctx_listen (ctx, CTX_DRAG,   ctx_client_mouse_event, client, NULL);
+  ctx_listen (ctx, CTX_MOTION, ctx_client_mouse_event, client, NULL);
+  ctx_begin_path (ctx);
+  ctx_restore (ctx);
+}
+
+#if 0
 void vt_register_events (VT *vt, Ctx *ctx, double x0, double y0)
 {
   ctx_begin_path (ctx);
@@ -46877,6 +49126,7 @@ void vt_register_events (VT *vt, Ctx *ctx, double x0, double y0)
   ctx_begin_path (ctx);
   ctx_restore (ctx);
 }
+#endif
 
 void vt_draw (VT *vt, Ctx *ctx, double x0, double y0)
 {
@@ -47139,7 +49389,7 @@ void vt_draw (VT *vt, Ctx *ctx, double x0, double y0)
            {
              vt->scroll_offset = 0;
              vt->in_smooth_scroll = 0;
-             vt->rev++;
+             ctx_client_rev_inc (vt->client);
            }
        }
      else
@@ -47149,7 +49399,7 @@ void vt_draw (VT *vt, Ctx *ctx, double x0, double y0)
            {
              vt->scroll_offset = 0;
              vt->in_smooth_scroll = 0;
-             vt->rev++;
+             ctx_client_rev_inc (vt->client);
            }
        }
    }
@@ -47308,7 +49558,7 @@ void vt_mouse (VT *vt, CtxEvent *event, VtMouseEvent type, int button, int x, in
 {
  char buf[64]="";
  int button_state = 0;
- vt->rev++;
+ ctx_client_rev_inc (vt->client);
  ctx_ticks();
  if ((! (vt->mouse | vt->mouse_all | vt->mouse_drag)) ||
      (event && (event->state & CTX_MODIFIER_STATE_SHIFT)))
@@ -47413,7 +49663,7 @@ void vt_mouse (VT *vt, CtxEvent *event, VtMouseEvent type, int button, int x, in
          vt->select_begin_x = px_x;
          vt->select_begin_y = px_y;
          prev_press_time = ctx_ticks ();
-         vt->rev++;
+         ctx_client_rev_inc (vt->client);
        }
      else if (type == VT_MOUSE_RELEASE)
        {
@@ -47472,7 +49722,7 @@ void vt_mouse (VT *vt, CtxEvent *event, VtMouseEvent type, int button, int x, in
              vt->scroll = 0.0f;
          }
 
-         vt->rev++;
+         ctx_client_rev_inc (vt->client);
        }
      return;
    }
@@ -47533,6 +49783,15 @@ void vt_set_ctx (VT *vt, Ctx *ctx)
 {
   vt->root_ctx = ctx;
 }
+#endif
+
+float ctx_target_fps = 100.0; /* this might end up being the resolution of our
+                                 idle callback firing
+                               */
+
+#if CTX_CLIENTS
+
+
 #ifndef _DEFAULT_SOURCE
 #define _DEFAULT_SOURCE
 #endif
@@ -47569,48 +49828,34 @@ extern Ctx *ctx;
 #define CTX_maximize     CTX_STRH('m','a','x','i','m','i','z','e',0,0,0,0,0,0)
 #define CTX_unmaximize   CTX_STRH('u','n','m','a','x','i','m','i','z','e',0,0,0,0)
 //#define CTX_width        CTX_STRH('w','i','d','t','h',0,0,0,0,0,0,0,0,0)
-//#define CTX_title        CTX_STRH('t','i','t','l','e',0,0,0,0,0,0,0,0,0)
-#define CTX_title        15643372
 #define CTX_action       CTX_STRH('a','c','t','i','o','n',0,0,0,0,0,0,0,0)
 //#define CTX_height       CTX_STRH('h','e','i','g','h','t',0,0,0,0,0,0,0,0)
 
-void terminal_update_title  (const char *title);
-int  ctx_renderer_is_sdl    (Ctx *ctx);
-int  ctx_renderer_is_fb     (Ctx *ctx);
-int  ctx_renderer_is_kms    (Ctx *ctx);
-int  ctx_renderer_is_tiled  (Ctx *ctx);
-int  ctx_renderer_is_term   (Ctx *ctx);
-void ctx_sdl_set_fullscreen (Ctx *ctx, int val);
-int  ctx_sdl_get_fullscreen (Ctx *ctx);
-float ctx_target_fps = 25.0;
+void terminal_update_title    (const char *title);
+void ctx_sdl_set_fullscreen   (Ctx *ctx, int val);
+int  ctx_sdl_get_fullscreen   (Ctx *ctx);
 static int ctx_fetched_bytes = 1;
 
 CtxClient *vt_get_client (VT *vt);
 
-CtxList *vts = NULL;
-
-void ctx_clients_signal_child (int signum)
+void ctx_client_set_title        (Ctx *ctx, int id, const char *title)
 {
-  pid_t pid;
-  int   status;
-  if ( (pid = waitpid (-1, &status, WNOHANG) ) != -1)
-    {
-      if (pid)
-        {
-          for (CtxList *l = vts; l; l=l->next)
-            {
-              VtPty *vt = l->data;
-              if (vt->pid == pid)
-                {
-                  vt->done = 1;
-                  //vt->result = status;
-                }
-            }
-        }
-    }
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (!client)
+     return;
+   if (client->title)
+     free (client->title);
+   client->title = NULL;
+   if (title)
+     client->title = strdup (title);
+}
+const char *ctx_client_get_title (Ctx *ctx, int id)
+{
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (!client)
+     return NULL;
+   return client->title;
 }
-
-
 
 int vt_set_prop (VT *vt, uint32_t key_hash, const char *val)
 {
@@ -47622,11 +49867,11 @@ int vt_set_prop (VT *vt, uint32_t key_hash, const char *val)
        CtxClient *client = vt_get_client (vt);
        if (client)
        {
-         if (client->title) free (client->title);
-         client->title = strdup (val);
+         ctx_client_set_title (vt->root_ctx, client->id, val);
+         //if (client->title) free (client->title);
+         //client->title = strdup (val);
        }
      }
-
      break;
   }
 #else
@@ -47680,18 +49925,13 @@ int vt_set_prop (VT *vt, uint32_t key_hash, const char *val)
 
 static float _ctx_font_size = 10.0;
 
-CtxList *clients = NULL;
-CtxClient *active = NULL;
-CtxClient *active_tab = NULL;
-
-static CtxClient *ctx_client_by_id (int id);
 
-int ctx_client_resize (int id, int width, int height);
-void ctx_client_maximize (int id);
+int ctx_client_resize (Ctx *ctx, int id, int width, int height);
+void ctx_client_maximize (Ctx *ctx, int id);
 
 CtxClient *vt_get_client (VT *vt)
 {
-  for (CtxList *l = clients; l; l =l->next)
+  for (CtxList *l = ctx_clients (vt->root_ctx); l; l =l->next)
   {
     CtxClient *client = l->data;
     if (client->vt == vt)
@@ -47700,19 +49940,18 @@ CtxClient *vt_get_client (VT *vt)
   return NULL;
 }
 
-CtxClient *ctx_client_new (Ctx *ctx,
-                           const char *commandline,
-                           int x, int y, int width, int height,
-                           float font_size,
-                           CtxClientFlags flags,
-                           void *user_data,
-                           CtxClientFinalize finalize)
+static void ctx_client_init (Ctx *ctx, CtxClient *client, int x, int y, int width, int height, float 
font_size,
+                             CtxClientFlags flags, void *user_data, CtxClientFinalize finalize)
 {
   static int global_id = 0;
+
+
   if (font_size <= 0.0) font_size = ctx_get_font_size (ctx);
-  CtxClient *client = calloc (sizeof (CtxClient), 1);
-  ctx_list_append (&clients, client);
-  client->id = global_id++;
+  if (ctx_backend_type (ctx) == CTX_BACKEND_TERM)
+  {
+    font_size = 3;
+  }
+  client->id = ++global_id; // starting at 1 is nicer, then we can use 0 for none
   client->x = x;
   client->y = y;
   client->flags = flags;
@@ -47721,43 +49960,77 @@ CtxClient *ctx_client_new (Ctx *ctx,
   client->height = height;
   client->user_data = user_data;
   client->finalize = finalize;
-
-  if (ctx_renderer_is_term (ctx))
-  {
-    font_size = 3;
-  }
+  client->opacity = 1.0f;
 
       //fprintf (stderr, "client new:%f\n", font_size);
 #if CTX_THREADS
   mtx_init (&client->mtx, mtx_plain);
 #endif
+}
+
+CtxClient *ctx_client_new (Ctx *ctx,
+                           const char *commandline,
+                           int x, int y, int width, int height,
+                           float font_size,
+                           CtxClientFlags flags,
+                           void *user_data,
+                           CtxClientFinalize finalize)
+{
+  CtxClient *client = calloc (sizeof (CtxClient), 1);
+  ctx_list_append (&ctx->events.clients, client);
+  ctx_client_init (ctx, client, x, y, width, height, font_size, flags, user_data, finalize);
   float line_spacing = 2.0f;
   client->vt = vt_new (commandline, width, height, font_size,line_spacing, client->id, (flags & 
ITK_CLIENT_CAN_LAUNCH)!=0);
+  client->vt->client = client;
   vt_set_ctx (client->vt, ctx);
   return client;
 }
 
-CtxClient *ctx_client_new_argv (Ctx *ctx, const char **argv, int x, int y, int width, int height, float 
font_size, CtxClientFlags flags, void *user_data, CtxClientFinalize finalize)
+CtxClient *ctx_client_new_argv (Ctx *ctx, char **argv, int x, int y, int width, int height, float font_size, 
CtxClientFlags flags, void *user_data, CtxClientFinalize finalize)
 {
-  CtxString *string = ctx_string_new ("");
-  for (int i = 0; argv[i]; i++)
-  {
-    char space = ' ';
-    if (i > 0)
-      ctx_string_append_data (string, &space, 1);
-    for (int c = 0; argv[i][c]; c++)
-    {
-       switch (argv[i][c])
-       {
-         case '"':ctx_string_append_str (string, "\\\"");break;
-         case '\'':ctx_string_append_str (string, "\\\'");break;
-         default:ctx_string_append_data (string, &argv[i][c], 1);break;
-       }
-    }
-  }
-  CtxClient *ret = ctx_client_new (ctx, string->str, x, y, width, height, font_size, flags, user_data, 
finalize);
-  ctx_string_free (string, 1);
-  return ret;
+
+  CtxClient *client = calloc (sizeof (CtxClient), 1);
+  ctx_client_init (ctx, client, x, y, width, height, font_size, flags, user_data, finalize);
+  ctx_list_append (&ctx->events.clients, client);
+
+  float line_spacing = 2.0f;
+  client->vt = vt_new_argv (argv, width, height, font_size,line_spacing, client->id, (flags & 
ITK_CLIENT_CAN_LAUNCH)!=0);
+  client->vt->client = client;
+  vt_set_ctx (client->vt, ctx);
+  return client;
+}
+
+static void *launch_client_thread (void *data)
+{
+  CtxClient *client = data;
+
+  client->sub_ctx = ctx_new (client->width, client->height,
+                                "headless");
+
+  client->start_routine (client->sub_ctx, client->user_data);
+
+  fprintf (stderr, "%s: cleanup\n", __FUNCTION__);
+  ctx_free (client->sub_ctx);
+  return NULL;
+}
+
+CtxClient *ctx_client_new_thread (Ctx *ctx, void (*start_routine)(Ctx *ctx, void *user_data),
+                                  int x, int y, int width, int height, float font_size, CtxClientFlags 
flags, void *user_data, CtxClientFinalize finalize)
+{
+  CtxClient *client = calloc (sizeof (CtxClient), 1);
+  ctx_client_init (ctx, client, x, y, width, height, font_size, flags, user_data, finalize);
+
+  ctx_list_append (&ctx->events.clients, client);
+
+
+  client->start_routine = start_routine;
+  thrd_create (&client->tid, launch_client_thread, client);
+  //float line_spacing = 2.0f;
+  //client->vt = vt_new_thread (start_routine, userdata, width, height, font_size,line_spacing, client->id, 
(flags & ITK_CLIENT_CAN_LAUNCH)!=0);
+  //vt_set_ctx (client->vt, ctx);
+  if (client->vt)
+    client->vt->client = client;
+  return client;
 }
 
 extern float ctx_shape_cache_rate;
@@ -47765,16 +50038,16 @@ extern int _ctx_max_threads;
 
 static int focus_follows_mouse = 0;
 
-static CtxClient *find_active (int x, int y)
+static CtxClient *find_active (Ctx *ctx, int x, int y)
 {
   CtxClient *ret = NULL;
   float titlebar_height = _ctx_font_size;
   int resize_border = titlebar_height/2;
 
-  for (CtxList *l = clients; l; l = l->next)
+  for (CtxList *l = ctx_clients (ctx); l; l = l->next)
   {
      CtxClient *c = l->data;
-     if ((c->flags & ITK_CLIENT_MAXIMIZED) && c == active_tab)
+     if ((c->flags & ITK_CLIENT_MAXIMIZED) && c == ctx->events.active_tab)
      if (x > c->x - resize_border && x < c->x+c->width + resize_border &&
          y > c->y - titlebar_height && y < c->y+c->height + resize_border)
      {
@@ -47782,7 +50055,7 @@ static CtxClient *find_active (int x, int y)
      }
   }
 
-  for (CtxList *l = clients; l; l = l->next)
+  for (CtxList *l = ctx_clients (ctx); l; l = l->next)
   {
      CtxClient *c = l->data;
      if (!(c->flags &  ITK_CLIENT_MAXIMIZED))
@@ -47795,12 +50068,12 @@ static CtxClient *find_active (int x, int y)
   return ret;
 }
 
-int id_to_no (int id)
+int id_to_no (Ctx *ctx, int id)
 {
   CtxList *l;
   int no = 0;
 
-  for (l = clients; l; l = l->next)
+  for (l = ctx_clients (ctx); l; l = l->next)
   {
     CtxClient *client = l->data;
     if (client->id == id)
@@ -47810,25 +50083,25 @@ int id_to_no (int id)
   return -1;
 }
 
-void ctx_client_move (int id, int x, int y);
-int ctx_client_resize (int id, int w, int h);
-void ctx_client_shade_toggle (int id);
+void ctx_client_move (Ctx *ctx, int id, int x, int y);
+int ctx_client_resize (Ctx *ctx, int id, int w, int h);
+void ctx_client_shade_toggle (Ctx *ctx, int id);
 float ctx_client_min_y_pos (Ctx *ctx);
 float ctx_client_max_y_pos (Ctx *ctx);
 
-#if 0
-void ensure_layout ()
+static void ctx_clients_ensure_layout (Ctx *ctx)
 {
+  CtxList *clients = ctx_clients (ctx);
   int n_clients = ctx_list_length (clients);
   if (n_clients == 1)
   {
     CtxClient *client = clients->data;
     if (client->flags & ITK_CLIENT_MAXIMIZED)
     {
-      ctx_client_move (client->id, 0, 0);
-      ctx_client_resize (client->id, ctx_width (ctx), ctx_height(ctx));
-      if (active_tab == NULL)
-        active_tab = client;
+      ctx_client_move (ctx, client->id, 0, 0);
+      ctx_client_resize (ctx, client->id, ctx_width (ctx), ctx_height(ctx));
+      if (ctx->events.active_tab == NULL)
+        ctx->events.active_tab = client;
     }
   }
   else
@@ -47837,19 +50110,18 @@ void ensure_layout ()
     CtxClient *client = l->data;
     if (client->flags & ITK_CLIENT_MAXIMIZED)
     {
-      ctx_client_move   (client->id, 0, client_min_y_pos (ctx));
-      ctx_client_resize (client->id, ctx_width (ctx), ctx_height(ctx) -
+      ctx_client_move (ctx, client->id, 0, ctx_client_min_y_pos (ctx));
+      ctx_client_resize (ctx, client->id, ctx_width (ctx), ctx_height(ctx) -
                       ctx_client_min_y_pos (ctx) / 2);   // /2 to counter the double titlebar of 
non-maximized
-      if (active_tab == NULL)
-        active_tab = client;
+      if (ctx->events.active_tab == NULL)
+        ctx->events.active_tab = client;
     }
   }
 }
-#endif
 
-static CtxClient *ctx_client_by_id (int id)
+CtxClient *ctx_client_by_id (Ctx *ctx, int id)
 {
-  for (CtxList *l = clients; l; l = l->next)
+  for (CtxList *l = ctx_clients (ctx); l; l = l->next)
   {
     CtxClient *client = l->data;
     if (client->id == id)
@@ -47878,212 +50150,259 @@ void ctx_client_remove (Ctx *ctx, CtxClient *client)
   if (client->finalize)
      client->finalize (client, client->user_data);
 
-  ctx_list_remove (&clients, client);
+  ctx_list_remove (&ctx->events.clients, client);
 
-  if (client == active_tab)
+  if (client == ctx->events.active_tab)
   {
-    active_tab = NULL;
+    ctx->events.active_tab = NULL;
   }
 
   if (ctx)
-  if (client == active)
+  if (client == ctx->events.active)
   {
-    active = find_active (ctx_pointer_x (ctx), ctx_pointer_y (ctx));
-    if (!active) active = clients?clients->data:NULL;
+    ctx->events.active = find_active (ctx, ctx_pointer_x (ctx), ctx_pointer_y (ctx));
+    if (!ctx->events.active)
+    {
+      if (ctx->events.clients)
+        ctx->events.active = ctx->events.clients->data;
+    }
   }
 
   ctx_client_unlock (client);
   free (client);
-  //ensure_layout();
 }
 
-#if 0
-void ctx_client_remove_by_id (int id)
+void ctx_client_remove_by_id (Ctx *ctx, int id)
 {
-  int no = id_to_no (id);
-  if (no>=0)
-    ctx_client_remove (no);
+  CtxClient *client = ctx_client_by_id (ctx, id);
+  if (client)
+    ctx_client_remove (ctx, client);
 }
-#endif
 
-int ctx_client_height (int id)
+int ctx_client_height (Ctx *ctx, int id)
 {
-  CtxClient *client = ctx_client_by_id (id);
+  CtxClient *client = ctx_client_by_id (ctx, id);
   if (!client) return 0;
   return client->height;
 }
 
-int ctx_client_x (int id)
+int ctx_client_x (Ctx *ctx, int id)
 {
-  CtxClient *client = ctx_client_by_id (id);
+  CtxClient *client = ctx_client_by_id (ctx, id);
   if (!client) return 0;
   return client->x;
 }
 
-int ctx_client_y (int id)
+int ctx_client_y (Ctx *ctx, int id)
 {
-  CtxClient *client = ctx_client_by_id (id);
+  CtxClient *client = ctx_client_by_id (ctx, id);
   if (!client) return 0;
   return client->y;
 }
 
-void ctx_client_raise_top (int id)
+void ctx_client_raise_top (Ctx *ctx, int id)
 {
-  CtxClient *client = ctx_client_by_id (id);
+  CtxClient *client = ctx_client_by_id (ctx, id);
   if (!client) return;
-  ctx_list_remove (&clients, client);
-  ctx_list_append (&clients, client);
+  ctx_list_remove (&ctx->events.clients, client);
+  ctx_list_append (&ctx->events.clients, client);
+  ctx_queue_draw (ctx);
 }
 
-void ctx_client_lower_bottom (int id)
+void ctx_client_lower_bottom (Ctx *ctx, int id)
 {
-  CtxClient *client = ctx_client_by_id (id);
+  CtxClient *client = ctx_client_by_id (ctx, id);
   if (!client) return;
-  ctx_list_remove (&clients, client);
-  ctx_list_prepend (&clients, client);
+  ctx_list_remove (&ctx->events.clients, client);
+  ctx_list_prepend (&ctx->events.clients, client);
+  ctx_queue_draw (ctx);
 }
 
 
-void ctx_client_iconify (int id)
+void ctx_client_iconify (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
    client->flags |= ITK_CLIENT_ICONIFIED;
+   ctx_queue_draw (ctx);
 }
 
-int ctx_client_is_iconified (int id)
+int ctx_client_is_iconified (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return -1;
    return (client->flags & ITK_CLIENT_ICONIFIED) != 0;
 }
 
-void ctx_client_uniconify (int id)
+void ctx_client_uniconify (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
    client->flags &= ~ITK_CLIENT_ICONIFIED;
+   ctx_queue_draw (ctx);
 }
 
-void ctx_client_maximize (int id)
+void ctx_client_maximize (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
-   if (client->flags &  ITK_CLIENT_MAXIMIZED)
-     return;
-   client->flags |= ITK_CLIENT_MAXIMIZED;
-   client->unmaximized_x = client->x;
-   client->unmaximized_y = client->y;
-   client->unmaximized_width  = client->width;
-   client->unmaximized_height = client->height;
+   if (!(client->flags &  ITK_CLIENT_MAXIMIZED))
+   {
+     client->flags |= ITK_CLIENT_MAXIMIZED;
+     client->unmaximized_x = client->x;
+     client->unmaximized_y = client->y;
+     client->unmaximized_width  = client->width;
+     client->unmaximized_height = client->height;
+     ctx_client_move (ctx, id, 0, ctx_client_min_y_pos (client->ctx));
+   }
 
    // enforce_layout does the size
-   //client_resize (id, ctx_width (ctx), ctx_height(ctx) - ctx_client_min_y_pos (ctx));
+   //client_resize (ctx, id, ctx_width (ctx), ctx_height(ctx) - ctx_client_min_y_pos (ctx));
    
-   ctx_client_move (id, 0, ctx_client_min_y_pos (client->ctx));
-   active_tab = client;
+   ctx->events.active = ctx->events.active_tab = client;
+   ctx_queue_draw (ctx);
 }
 
-int ctx_client_is_maximized (int id)
+int ctx_client_is_maximized (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return -1;
    return (client->flags & ITK_CLIENT_MAXIMIZED) != 0;
 }
 
-void ctx_client_unmaximize (int id)
+void ctx_client_unmaximize (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
    if ((client->flags & ITK_CLIENT_MAXIMIZED) == 0)
      return;
    client->flags &= ~ITK_CLIENT_MAXIMIZED;
-   ctx_client_resize (id, client->unmaximized_width, client->unmaximized_height);
-   ctx_client_move (id, client->unmaximized_x, client->unmaximized_y);
-   active_tab = NULL;
+   ctx_client_resize (ctx, id, client->unmaximized_width, client->unmaximized_height);
+   ctx_client_move (ctx, id, client->unmaximized_x, client->unmaximized_y);
+   ctx->events.active_tab = NULL;
+   ctx_queue_draw (ctx);
 }
 
-void ctx_client_maximized_toggle (int id)
+void ctx_client_maximized_toggle (Ctx *ctx, int id)
 {
-  if (ctx_client_is_maximized (id))
-    ctx_client_unmaximize (id);
+  if (ctx_client_is_maximized (ctx, id))
+    ctx_client_unmaximize (ctx, id);
   else
-    ctx_client_maximize (id);
+    ctx_client_maximize (ctx, id);
 }
 
 
-void ctx_client_shade (int id)
+void ctx_client_shade (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
    client->flags |= ITK_CLIENT_SHADED;
+   ctx_queue_draw (ctx);
 }
 
-int ctx_client_is_shaded (int id)
+int ctx_client_is_shaded (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return -1;
    return (client->flags & ITK_CLIENT_SHADED) != 0;
 }
 
-void ctx_client_unshade (int id)
+void ctx_client_unshade (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
    client->flags &= ~ITK_CLIENT_SHADED;
+   ctx_queue_draw (ctx);
 }
 
-void ctx_client_toggle_maximized (int id)
+void ctx_client_toggle_maximized (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
-   if (ctx_client_is_maximized (id))
-     ctx_client_unmaximize (id);
+   if (ctx_client_is_maximized (ctx, id))
+     ctx_client_unmaximize (ctx, id);
    else
-     ctx_client_maximize (id);
+     ctx_client_maximize (ctx, id);
 }
 
-void ctx_client_shade_toggle (int id)
+void ctx_client_shade_toggle (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (!client) return;
-   if (ctx_client_is_shaded (id))
-    ctx_client_shade (id);
+   if (ctx_client_is_shaded (ctx, id))
+    ctx_client_shade (ctx, id);
    else
-    ctx_client_unshade (id);
+    ctx_client_unshade (ctx, id);
+}
+
+
+void ctx_client_paste (Ctx *ctx, int id, const char *str)
+{
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (client && client->vt)
+     vt_paste (client->vt, str);
 }
 
-void ctx_client_move (int id, int x, int y)
+char  *ctx_client_get_selection (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (client && client->vt)
+     return vt_get_selection (client->vt);
+   return strdup ("");
+}
+
+void ctx_client_move (Ctx *ctx, int id, int x, int y)
+{
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (client && (client->x != x || client->y != y))
    {
      client->x = x;
      client->y = y;
-     vt_rev_inc (client->vt);
+     ctx_client_rev_inc (client);
+     ctx_queue_draw (ctx);
    }
 }
 
-void ctx_client_set_font_size (int id, float font_size)
+void ctx_client_set_font_size (Ctx *ctx, int id, float font_size)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (client->vt)
    {
      if (vt_get_font_size (client->vt) != font_size)
        vt_set_font_size (client->vt, font_size);
+     ctx_queue_draw (ctx);
    }
 }
-float ctx_client_get_font_size (int id)
+
+float ctx_client_get_font_size (Ctx *ctx, int id)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
    if (client->vt)
      return vt_get_font_size (client->vt);
    return 14.0;
 }
 
-int ctx_client_resize (int id, int width, int height)
+void ctx_client_set_opacity (Ctx *ctx, int id, float opacity)
+{
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (!client)
+     return;
+   if (opacity > 0.98) opacity = 1.0f;
+   client->opacity = opacity;
+   ctx_queue_draw (ctx);
+}
+
+float ctx_client_get_opacity (Ctx *ctx, int id)
+{
+   CtxClient *client = ctx_client_by_id (ctx, id);
+   if (!client)
+     return 1.0f;
+   return client->opacity;
+}
+
+int ctx_client_resize (Ctx *ctx, int id, int width, int height)
 {
-   CtxClient *client = ctx_client_by_id (id);
+   CtxClient *client = ctx_client_by_id (ctx, id);
 
    if (client && ((height != client->height) || (width != client->width) ))
    {
@@ -48091,6 +50410,7 @@ int ctx_client_resize (int id, int width, int height)
      client->height = height;
      if (client->vt)
        vt_set_px_size (client->vt, width, height);
+     ctx_queue_draw (ctx);
      return 1;
    }
    return 0;
@@ -48105,8 +50425,8 @@ static void ctx_client_titlebar_drag (CtxEvent *event, void *data, void *data2)
     static int prev_drag_end_time = 0;
     if (event->time - prev_drag_end_time < 500)
     {
-      //client_shade_toggle (client->id);
-      ctx_client_maximized_toggle (client->id);
+      //client_shade_toggle (ctx, client->id);
+      ctx_client_maximized_toggle (event->ctx, client->id);
     }
     prev_drag_end_time = event->time;
   }
@@ -48116,19 +50436,17 @@ static void ctx_client_titlebar_drag (CtxEvent *event, void *data, void *data2)
 
   float snap_threshold = 8;
 
-  if (ctx_renderer_is_term (event->ctx))
+  if (ctx_backend_type (event->ctx) == CTX_BACKEND_TERM)
      snap_threshold = 1;
 
   if (new_y < ctx_client_min_y_pos (event->ctx)) new_y = ctx_client_min_y_pos (event->ctx);
   if (new_y > ctx_client_max_y_pos (event->ctx)) new_y = ctx_client_max_y_pos (event->ctx);
 
   if (fabs (new_x - 0) < snap_threshold) new_x = 0.0;
-  if (fabs (ctx_width (event->ctx) - (new_x + client->width)) < snap_threshold) new_x = ctx_width 
(event->ctx) - client->width;
-
-  ctx_client_move (client->id, new_x, new_y);
+  if (fabs (ctx_width (event->ctx) - (new_x + client->width)) < snap_threshold)
+       new_x = ctx_width (event->ctx) - client->width;
 
-  //vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_move (event->ctx, client->id, new_x, new_y);
 
   event->stop_propagate = 1;
 }
@@ -48142,10 +50460,9 @@ static void ctx_client_resize_se (CtxEvent *event, void *data, void *data2)
   int new_h = client->height + event->delta_y;
   if (new_w <= min_win_dim) new_w = min_win_dim;
   if (new_h <= min_win_dim) new_h = min_win_dim;
-  ctx_client_resize (client->id, new_w, new_h);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, new_h);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48154,10 +50471,9 @@ static void ctx_client_resize_e (CtxEvent *event, void *data, void *data2)
   CtxClient *client = data;
   int new_w = client->width + event->delta_x;
   if (new_w <= min_win_dim) new_w = min_win_dim;
-  ctx_client_resize (client->id, new_w, client->height);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, client->height);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48166,10 +50482,9 @@ static void ctx_client_resize_s (CtxEvent *event, void *data, void *data2)
   CtxClient *client = data;
   int new_h = client->height + event->delta_y;
   if (new_h <= min_win_dim) new_h = min_win_dim;
-  ctx_client_resize (client->id, client->width, new_h);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, client->width, new_h);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48179,11 +50494,10 @@ static void ctx_client_resize_n (CtxEvent *event, void *data, void *data2)
   float new_y = client->y +  event->delta_y;
   int new_h = client->height - event->delta_y;
   if (new_h <= min_win_dim) new_h = min_win_dim;
-  ctx_client_resize (client->id, client->width, new_h);
-  ctx_client_move (client->id, client->x, new_y);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, client->width, new_h);
+  ctx_client_move (event->ctx, client->id, client->x, new_y);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48195,11 +50509,10 @@ static void ctx_client_resize_ne (CtxEvent *event, void *data, void *data2)
   int new_w = client->width + event->delta_x;
   if (new_h <= min_win_dim) new_h = min_win_dim;
   if (new_w <= min_win_dim) new_w = min_win_dim;
-  ctx_client_resize (client->id, new_w, new_h);
-  ctx_client_move (client->id, client->x, new_y);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, new_h);
+  ctx_client_move (event->ctx, client->id, client->x, new_y);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48213,11 +50526,10 @@ static void ctx_client_resize_sw (CtxEvent *event, void *data, void *data2)
 
   if (new_h <= min_win_dim) new_h = min_win_dim;
   if (new_w <= min_win_dim) new_w = min_win_dim;
-  ctx_client_resize (client->id, new_w, new_h);
-  ctx_client_move (client->id, new_x, client->y);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, new_h);
+  ctx_client_move (event->ctx, client->id, new_x, client->y);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48230,11 +50542,10 @@ static void ctx_client_resize_nw (CtxEvent *event, void *data, void *data2)
   int new_h = client->height - event->delta_y;
   if (new_h <= min_win_dim) new_h = min_win_dim;
   if (new_w <= min_win_dim) new_w = min_win_dim;
-  ctx_client_resize (client->id, new_w, new_h);
-  ctx_client_move (client->id, new_x, new_y);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, new_h);
+  ctx_client_move (event->ctx, client->id, new_x, new_y);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48245,11 +50556,10 @@ static void ctx_client_resize_w (CtxEvent *event, void *data, void *data2)
   float new_x = client->x +  event->delta_x;
   int new_w = client->width - event->delta_x;
   if (new_w <= min_win_dim) new_w = min_win_dim;
-  ctx_client_resize (client->id, new_w, client->height);
-  ctx_client_move (client->id, new_x, client->y);
-  if (client->vt) // force redraw
-    vt_rev_inc (client->vt);
-  ctx_set_dirty (event->ctx, 1);
+  ctx_client_resize (event->ctx, client->id, new_w, client->height);
+  ctx_client_move (event->ctx, client->id, new_x, client->y);
+  ctx_client_rev_inc (client);
+  ctx_queue_draw (event->ctx);
 
   event->stop_propagate = 1;
 }
@@ -48263,7 +50573,7 @@ static void ctx_client_close (CtxEvent *event, void *data, void *data2)
   
   ctx_client_remove (event->ctx, client);
 
-  ctx_set_dirty (event->ctx, 1);
+  ctx_queue_draw (event->ctx);
   event->stop_propagate = 1;
 }
 
@@ -48273,21 +50583,19 @@ float _ctx_green = 0.5;
 
 static void ctx_client_draw (Ctx *ctx, CtxClient *client, float x, float y)
 {
-    if (client->internal)
+    if (client->tid)
     {
 #if 0
       ctx_save (ctx);
-
       ctx_translate (ctx, x, y);
       int width = client->width;
       int height = client->height;
-
       itk_panel_start (itk, "", 0, 0, width, height);
       //itk_seperator (itk);
 #if 0
       if (itk_button (itk, "add tab"))
       {
-        add_tab (vt_find_shell_command(), 1);
+        add_tab (ctx_find_shell_command(), 1);
       }
 #endif
       //itk_sameline (itk);
@@ -48296,11 +50604,9 @@ static void ctx_client_draw (Ctx *ctx, CtxClient *client, float x, float y)
       itk_slider_float (itk, "CTX_GREEN", &_ctx_green, 0.0, 1.0, 0.5);
       itk_ctx_settings (itk);
       itk_itk_settings (itk);
-
       itk_panel_end (itk);
       itk_done (itk);
       //itk_key_bindings (itk);
-
       ctx_restore (ctx);
 #endif
     }
@@ -48308,18 +50614,18 @@ static void ctx_client_draw (Ctx *ctx, CtxClient *client, float x, float y)
     {
        ctx_client_lock (client);
 
-          int found = 0;
-          for (CtxList *l2 = clients; l2; l2 = l2->next)
-            if (l2->data == client) found = 1;
-          if (found)
-          {
+       int found = 0;
+       for (CtxList *l2 = ctx_clients (ctx); l2; l2 = l2->next)
+         if (l2->data == client) found = 1;
+       if (found)
+       {
 
-      int rev = vt_rev (client->vt);
+      int rev = ctx_client_rev (client);
 #if VT_RECORD
       if (client->drawn_rev != rev)
       {
         if (!client->recording)
-          client->recording = ctx_new ();
+          client->recording = _ctx_new_drawlist (client->width, client->height);
         else
           ctx_reset (client->recording);
         vt_draw (client->vt, client->recording, 0.0, 0.0);
@@ -48329,18 +50635,30 @@ static void ctx_client_draw (Ctx *ctx, CtxClient *client, float x, float y)
       {
         ctx_save (ctx);
         ctx_translate (ctx, x, y);
+        if (client->opacity != 1.0f)
+        {
+          ctx_global_alpha (ctx, client->opacity);
+        }
         ctx_render_ctx (client->recording, ctx);
         vt_register_events (client->vt, ctx, 0.0, 0.0);
         ctx_restore (ctx);
       }
 #else
-
+      if (client->opacity != 1.0)
+      {
+        ctx_save (ctx);
+        ctx_global_alpha (ctx, client->opacity);
+      }
       vt_draw (client->vt, ctx, x, y);
-      vt_register_events (client->vt, ctx, x, y);
+      if (client->opacity != 1.0)
+      {
+        ctx_restore (ctx);
+      }
+      ctx_client_register_events (client, ctx, x, y);
 #endif
       client->drawn_rev = rev;
       ctx_client_unlock (client);
-          }
+      }
     }
 }
 
@@ -48348,12 +50666,12 @@ static void ctx_client_use_images (Ctx *ctx, CtxClient *client)
 {
   if (!client->internal)
   {
-      uint32_t rev = vt_rev (client->vt);
+      uint32_t rev = ctx_client_rev (client);
 #if VT_RECORD
       if (client->drawn_rev != rev)
       {
         if (!client->recording)
-          client->recording = ctx_new ();
+          client->recording = _ctx_new_drawlist (client->width, client->height);
         else
           ctx_reset (client->recording);
         vt_draw (client->vt, client->recording, 0.0, 0.0);
@@ -48362,11 +50680,15 @@ static void ctx_client_use_images (Ctx *ctx, CtxClient *client)
       if (client->recording)
       {
         ctx_save (ctx);
+        if (client->opacity != 1.0f)
+        {
+          ctx_global_alpha (ctx, client->opacity);
+        }
         ctx_render_ctx_textures (client->recording, ctx);
         ctx_restore (ctx);
       }
 #else
-    vt_use_images (client->vt, ctx);
+    if (client->vt)vt_use_images (client->vt, ctx);
 #endif
     client->drawn_rev = rev;
   }
@@ -48386,14 +50708,26 @@ void ctx_client_unlock (CtxClient *client)
 #endif
 }
 
+
+CtxEvent *ctx_event_copy (CtxEvent *event)
+{
+  CtxEvent *copy = calloc (1, sizeof (CtxEvent));
+  *copy = *event;
+  if (copy->string) {
+    copy->string = strdup (copy->string);
+    copy->owns_string = 1;
+  }
+  return copy;
+}
+
 #if 0
 void ctx_client_handle_event (Ctx *ctx, CtxEvent *ctx_event, const char *event)
 {
-  if (!active)
+  if (!ctx->events.active)
     return;
-  if (active->internal)
+  if (ctx->events.active->internal)
     return;
-  VT *vt = active->vt;
+  VT *vt = ctx->events.active->vt;
   CtxClient *client = vt_get_client (vt);
 
   ctx_client_lock (client);
@@ -48401,7 +50735,7 @@ void ctx_client_handle_event (Ctx *ctx, CtxEvent *ctx_event, const char *event)
   if (!strcmp (event, "F11"))
   {
 #if CTX_SDL
-    if (ctx_renderer_is_sdl (ctx))
+    if (ctx_backend_is_sdl (ctx))
     {
       ctx_sdl_set_fullscreen (ctx, !ctx_sdl_get_fullscreen (ctx));
     }
@@ -48431,10 +50765,10 @@ void ctx_client_handle_event (Ctx *ctx, CtxEvent *ctx_event, const char *event)
         }
     }
   else if (!strcmp (event, "shift-control-t") ||
-           ((ctx_renderer_is_fb (ctx) || ctx_renderer_is_term (ctx) || ctx_renderer_is_kms (ctx))
+           ((ctx_backend_is_fb (ctx) || ctx_backend_is_term (ctx) || ctx_backend_is_kms (ctx))
            &&   !strcmp (event, "control-t") ))
   {
-    //XXX add_tab (vt_find_shell_command(), 1);
+    //XXX add_tab (ctx_find_shell_command(), 1);
   }
   else if (!strcmp (event, "shift-control-n") )
     {
@@ -48465,7 +50799,7 @@ void ctx_client_handle_event (Ctx *ctx, CtxEvent *ctx_event, const char *event)
     }
   else if (!strcmp (event, "shift-control-w") )
     {
-      active->do_quit = 1;
+      ctx->events.active->do_quit = 1;
     }
   else if (!strcmp (event, "shift-control-s") )
     {
@@ -48488,13 +50822,13 @@ void ctx_client_handle_event (Ctx *ctx, CtxEvent *ctx_event, const char *event)
 }
 #endif
 
-static int ctx_clients_dirty_count (void)
+static int ctx_clients_dirty_count (Ctx *ctx)
 {
   int changes = 0;
-  for (CtxList *l = clients; l; l = l->next)
+  for (CtxList *l = ctx_clients (ctx); l; l = l->next)
   {
     CtxClient *client = l->data;
-    if ((client->drawn_rev != vt_rev (client->vt) ) ||
+    if ((client->drawn_rev != ctx_client_rev (client) ) ||
         vt_has_blink (client->vt))
       changes++;
   }
@@ -48504,22 +50838,22 @@ static int ctx_clients_dirty_count (void)
 static void ctx_client_titlebar_drag_maximized (CtxEvent *event, void *data, void *data2)
 {
   CtxClient *client = data;
-
-  active = active_tab = client;
+  Ctx *ctx = event->ctx;
+  ctx->events.active = ctx->events.active_tab = client;
   if (event->type == CTX_DRAG_RELEASE)
   {
     static int prev_drag_end_time = 0;
     if (event->time - prev_drag_end_time < 500)
     {
-      //client_shade_toggle (client->id);
-      ctx_client_unmaximize (client->id);
-      ctx_client_raise_top (client->id);
-      active_tab = NULL;
+      //client_shade_toggle (ctx, client->id);
+      ctx_client_unmaximize (ctx, client->id);
+      ctx_client_raise_top (ctx, client->id);
+      ctx->events.active_tab = NULL;
     }
     prev_drag_end_time = event->time;
   }
-  ctx_set_dirty (event->ctx, 1);
-  vt_rev_inc (client->vt);
+  ctx_queue_draw (event->ctx);
+  ctx_client_rev_inc (client);
   event->stop_propagate = 1;
 }
 
@@ -48538,7 +50872,7 @@ void ctx_client_titlebar_draw (Ctx *ctx, CtxClient *client,
 {
 #if 0
   ctx_move_to (ctx, x, y + height * 0.8);
-  if (client == active)
+  if (client == ctx->events.active)
     ctx_rgba (ctx, 1, 1,0.4, 1.0);
   else
     ctx_rgba (ctx, 1, 1,1, 0.8);
@@ -48546,7 +50880,7 @@ void ctx_client_titlebar_draw (Ctx *ctx, CtxClient *client,
 #else
   ctx_rectangle (ctx, x, y - titlebar_height,
                  width, titlebar_height);
-  if (client == active)
+  if (client == ctx->events.active)
      itk_style_color (ctx, "titlebar-focused-bg");
   else
      itk_style_color (ctx, "titlebar-bg");
@@ -48564,7 +50898,7 @@ void ctx_client_titlebar_draw (Ctx *ctx, CtxClient *client,
   ctx_fill (ctx);
   //ctx_font_size (ctx, itk->font_size);//titlebar_height);// * 0.85);
 
-  if (client == active &&
+  if (client == ctx->events.active &&
       (flag_is_set(client->flags, ITK_CLIENT_MAXIMIZED) || y != titlebar_height))
 #if 1
   ctx_rectangle (ctx, x + width - titlebar_height,
@@ -48577,14 +50911,14 @@ void ctx_client_titlebar_draw (Ctx *ctx, CtxClient *client,
   //ctx_fill (ctx);
   ctx_begin_path (ctx);
   ctx_move_to (ctx, x + width - titlebar_height * 0.8, y - titlebar_height * 0.22);
-  if (client == active)
+  if (client == ctx->events.active)
     itk_style_color (ctx, "titlebar-focused-close");
   else
     itk_style_color (ctx, "titlebar-close");
   ctx_text (ctx, "X");
 
   ctx_move_to (ctx, x +  width/2, y - titlebar_height * 0.22);
-  if (client == active)
+  if (client == ctx->events.active)
     itk_style_color (ctx, "titlebar-focused-fg");
   else
     itk_style_color (ctx, "titlebar-fg");
@@ -48616,22 +50950,22 @@ static void key_press (CtxEvent *event, void *data1, void *data2)
 
 int ctx_clients_draw (Ctx *ctx, int layer2)
 {
+  CtxList *clients = ctx_clients (ctx);
   _ctx_font_size = ctx_get_font_size (ctx);
   float titlebar_height = _ctx_font_size;
   int n_clients         = ctx_list_length (clients);
 
-  if (active && flag_is_set(active->flags, ITK_CLIENT_MAXIMIZED) && n_clients == 1)
+  if (ctx->events.active && flag_is_set(ctx->events.active->flags, ITK_CLIENT_MAXIMIZED) && n_clients == 1)
   {
-    ctx_client_draw (ctx, active, 0, 0);
+    ctx_client_draw (ctx, ctx->events.active, 0, 0);
     return 0;
   }
-
   for (CtxList *l = clients; l; l = l->next)
   {
     CtxClient *client = l->data;
     if (flag_is_set(client->flags, ITK_CLIENT_MAXIMIZED))
     {
-      if (client == active_tab)
+      if (client == ctx->events.active_tab)
       {
         ctx_client_draw (ctx, client, 0, titlebar_height);
       }
@@ -48670,7 +51004,7 @@ int ctx_clients_draw (Ctx *ctx, int layer2)
         ctx_client_draw (ctx, client, client->x, client->y);
 
       // resize regions
-      if (client == active &&
+      if (client == ctx->events.active &&
          !flag_is_set(client->flags, ITK_CLIENT_SHADED) &&
          !flag_is_set(client->flags, ITK_CLIENT_MAXIMIZED) &&
          flag_is_set(client->flags, ITK_CLIENT_UI_RESIZABLE))
@@ -48758,28 +51092,33 @@ extern int _ctx_enable_hash_cache;
 void vt_audio_task (VT *vt, int click);
 
 int ctx_input_pending (Ctx *ctx, int timeout);
+int ctx_clients_active (Ctx *ctx)
+{
+  if (ctx->events.active) return ctx->events.active->id;
+  return -1;
+}
 
 int ctx_clients_need_redraw (Ctx *ctx)
 {
   int changes = 0;
   int follow_mouse = focus_follows_mouse;
       CtxList *to_remove = NULL;
-  //ensure_layout ();
+  ctx_clients_ensure_layout (ctx);
 
 //  if (print_shape_cache_rate)
 //    fprintf (stderr, "\r%f ", ctx_shape_cache_rate);
 
-   CtxClient *client = find_active (ctx_pointer_x (ctx),
+   CtxClient *client = find_active (ctx, ctx_pointer_x (ctx),
                                     ctx_pointer_y (ctx));
 
    if (follow_mouse || ctx_pointer_is_down (ctx, 0) ||
-       ctx_pointer_is_down (ctx, 1) || (active==NULL))
+       ctx_pointer_is_down (ctx, 1) || (ctx->events.active==NULL))
    {
         if (client)
         {
-          if (active != client)
+          if (ctx->events.active != client)
           {
-            active = client;
+            ctx->events.active = client;
             if (follow_mouse == 0 ||
                 (ctx_pointer_is_down (ctx, 0) ||
                  ctx_pointer_is_down (ctx, 1)))
@@ -48788,8 +51127,8 @@ int ctx_clients_need_redraw (Ctx *ctx)
        #if 1
               if ((client->flags & ITK_CLIENT_MAXIMIZED)==0)
               {
-                ctx_list_remove (&clients, client);
-                ctx_list_append (&clients, client);
+                ctx_list_remove (&ctx->events.clients, client);
+                ctx_list_append (&ctx->events.clients, client);
               }
 #endif
             }
@@ -48798,7 +51137,7 @@ int ctx_clients_need_redraw (Ctx *ctx)
         }
    }
 
-   for (CtxList *l = clients; l; l = l->next)
+   for (CtxList *l = ctx_clients (ctx); l; l = l->next)
    {
      CtxClient *client = l->data;
      if (client->vt)
@@ -48823,132 +51162,143 @@ int ctx_clients_need_redraw (Ctx *ctx)
      ctx_list_remove (&to_remove, to_remove->data);
    }
 
-   changes += ctx_clients_dirty_count ();
+   changes += ctx_clients_dirty_count (ctx);
    return changes != 0;
 }
-
 float ctx_avg_bytespeed = 0.0;
 
-static void ctx_client_handle_events_iteration (Ctx *ctx)
+int ctx_clients_tab_to_id (Ctx *ctx, int tab_no)
 {
-  static int fail_safe = 0;
-  //int n_clients = ctx_list_length (clients);
-      int pending_data = 0;
-      long time_start = ctx_ticks ();
-      int sleep_time = 1000000/ctx_target_fps;
+  CtxList *clients = ctx_clients (ctx);
+  int no = 0;
+  for (CtxList *l = clients; l; l = l->next)
+  {
+    CtxClient *client = l->data;
+    if (flag_is_set(client->flags, ITK_CLIENT_MAXIMIZED))
+    {
+      if (no == tab_no)
+        return client->id;
+      no++;
+    }
+  }
+  return -1;
+}
 
-      pending_data = ctx_input_pending (ctx, sleep_time);
+CtxList *ctx_clients (Ctx *ctx)
+{
+  return ctx?ctx->events.clients:NULL;
+}
 
-      ctx_fetched_bytes = 0;
-      if (pending_data || fail_safe>100)
-      {
-        if (!pending_data)pending_data = 1;
-        /* record amount of time spent - and adjust time of reading for
-         * vts?
-         */
-        long int fractional_sleep = sleep_time / pending_data;
-        for (CtxList *l = clients; l; l = l->next)
-        {
-          CtxClient *client = l->data;
-          ctx_client_lock (client);
-          int found = 0;
-          for (CtxList *l2 = clients; l2; l2 = l2->next)
-            if (l2->data == client) found = 1;
-          if (!found)
-            goto done;
-          
-          ctx_fetched_bytes += vt_poll (client->vt, fractional_sleep);
-          //ctx_fetched_bytes += vt_poll (client->vt, sleep_time); //fractional_sleep);
-          ctx_client_unlock (client);
-        }
+#endif /* CTX_CLIENTS */
+
+int ctx_clients_handle_events (Ctx *ctx)
+{
+  //int n_clients = ctx_list_length (clients);
+#if CTX_CLIENTS
+  int pending_data = 0;
+  long time_start = ctx_ticks ();
+  int sleep_time = 1000000/ctx_target_fps;
+  pending_data += ctx_input_pending (ctx, sleep_time);
+
+  CtxList *clients = ctx_clients (ctx);
+  if (!clients)
+    return pending_data != 0;
+  ctx_fetched_bytes = 0;
+  if (pending_data)
+  {
+    if (!pending_data)pending_data = 1;
+    /* record amount of time spent - and adjust time of reading for
+     * vts?
+     */
+    //long int fractional_sleep = sleep_time / pending_data;
+    long int fractional_sleep = sleep_time * 0.75;
+    for (CtxList *l = clients; l; l = l->next)
+    {
+      CtxClient *client = l->data;
+      ctx_client_lock (client);
+      int found = 0;
+      for (CtxList *l2 = clients; l2; l2 = l2->next)
+        if (l2->data == client) found = 1;
+      if (!found)
+        goto done;
+      
+      ctx_fetched_bytes += vt_poll (client->vt, fractional_sleep);
+      //ctx_fetched_bytes += vt_poll (client->vt, sleep_time); //fractional_sleep);
+      ctx_client_unlock (client);
+    }
 done:
-        fail_safe = 0;
-      }
-      else
-      {
-        fail_safe ++;
-        for (CtxList *l = clients; l; l = l->next)
-        {
-          CtxClient *client = l->data;
-          vt_audio_task (client->vt, 0);
-        }
-      }
+    if(0){
+    }
+  }
+  else
+  {
+    for (CtxList *l = clients; l; l = l->next)
+    {
+      CtxClient *client = l->data;
+      vt_audio_task (client->vt, 0);
+    }
+  }
 
-      //int got_events = 0;
+  //int got_events = 0;
 
-      //while (ctx_get_event (ctx)) { }
+  //while (ctx_get_event (ctx)) { }
 #if 0
-      if (changes /*|| pending_data */)
-      {
-        ctx_target_fps *= 1.6;
-        if (ctx_target_fps > 60) ctx_target_fps = 60;
-      }
-      else
-      {
-        ctx_target_fps = ctx_target_fps * 0.95 + 30.0 * 0.05;
+  if (changes /*|| pending_data */)
+  {
+    ctx_target_fps *= 1.6;
+    if (ctx_target_fps > 60) ctx_target_fps = 60;
+  }
+  else
+  {
+    ctx_target_fps = ctx_target_fps * 0.95 + 30.0 * 0.05;
 
-        // 20fps is the lowest where sun 8bit ulaw 8khz works reliably
-      }
+    // 20fps is the lowest where sun 8bit ulaw 8khz works reliably
+  }
 
-      if (ctx_avg_bytespeed > 1024 * 1024) ctx_target_fps = 10.0;
+  if (ctx_avg_bytespeed > 1024 * 1024) ctx_target_fps = 10.0;
 
-      if (_ctx_green < 0.4)
-        ctx_target_fps = 120.0;
-      else if (_ctx_green > 0.6)
-        ctx_target_fps = 25.0;
+  if (_ctx_green < 0.4)
+    ctx_target_fps = 120.0;
+  else if (_ctx_green > 0.6)
+    ctx_target_fps = 25.0;
 
-      //ctx_target_fps = 30.0;
+  //ctx_target_fps = 30.0;
 #else
-      ctx_target_fps = 30.0;
+  ctx_target_fps = 100.0; // need to be higher than vsync rate to hit vsync
 #endif
 
-      long time_end = ctx_ticks ();
+  long time_end = ctx_ticks ();
 
-      int timed = (time_end-time_start);
-      float bytespeed = ctx_fetched_bytes / ((timed)/ (1000.0f * 1000.0f));
+  int timed = (time_end-time_start);
+  float bytespeed = ctx_fetched_bytes / ((timed)/ (1000.0f * 1000.0f));
 
-      ctx_avg_bytespeed = bytespeed * 0.2 + ctx_avg_bytespeed * 0.8;
+  ctx_avg_bytespeed = bytespeed * 0.2 + ctx_avg_bytespeed * 0.8;
 #if 0
-      fprintf (stderr, "%.2fmb/s %i/%i  %.2f                    \r", ctx_avg_bytespeed/1024/1024, 
ctx_fetched_bytes, timed, ctx_target_fps);
+  fprintf (stderr, "%.2fmb/s %i/%i  %.2f                    \r", ctx_avg_bytespeed/1024/1024, 
ctx_fetched_bytes, timed, ctx_target_fps);
 #endif
-}
 
+#endif
+  return 0;
+}
 
-static int ctx_clients_handle_events_fun (void *data)
+void ctx_client_rev_inc (CtxClient *client)
 {
-  Ctx *ctx = data;
-  while (!ctx->quit)
-  {
-    int n_clients = ctx_list_length (clients);
-    ctx_client_handle_events_iteration (data);
-    switch (n_clients)
-    {
-      case 0:
-        usleep (1000 * 10);
-        break;
-      case 1:
-        usleep (1); // letting quit work - and also makes framerate for dump
-        break;
-      default:
-        usleep (0); // the switching between clients should be enough
-        break;
-    }
-  }
-  return 0;
+  if (client) client->rev++;
+}
+long ctx_client_rev (CtxClient *client)
+{
+  return client?client->rev:0;
 }
 
-void ctx_clients_handle_events (Ctx *ctx)
+void
+ctx_client_feed_keystring (CtxClient *client, CtxEvent *event, const char *str)
 {
-#if 1 //#if CTX_THREADS==0
-    ctx_client_handle_events_iteration (ctx);
-#else
-    static thrd_t tid = 0;
-    if (tid == 0)
-    {
-      thrd_create (&tid, (void*)ctx_clients_handle_events_fun, ctx);
-    }
+#if CTX_CLIENTS
+  if (!client || !client->vt) return;
+  vt_feed_keystring (client->vt, event, str);
 #endif
 }
 
-#endif /* CTX_VT */
+
+#endif // CTX_IMPLEMENTATION
 #endif //  __CTX_H__
diff --git a/operations/common/vector-fill.c b/operations/common/vector-fill.c
index 17e888bfd..8a1c5706e 100644
--- a/operations/common/vector-fill.c
+++ b/operations/common/vector-fill.c
@@ -264,7 +264,7 @@ static GeglNode *detect (GeglOperation *operation,
                          gint           y)
 {
   GeglProperties *o = GEGL_PROPERTIES (operation);
-  Ctx     *ctx = ctx_new ();
+  Ctx     *ctx = ctx_new_drawlist (-1, -1);
   gboolean result = FALSE;
 
   gegl_path_ctx_play (o->d, ctx);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]