/* ctx git commit: 9332e7c2 */
/* 
 * ctx.h is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * ctx.h is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with ctx; if not, see <https://www.gnu.org/licenses/>.
 *
 * 2012, 2015, 2019, 2020 Øyvind Kolås <pippin@gimp.org>
 *
 * ctx is a single header 2d vector graphics processing framework.
 *
 * To use ctx in a project, do the following:
 *
 * #define CTX_IMPLEMENTATION
 * #include "ctx.h"
 *
 * Ctx contains a minimal default fallback font with only ascii, so
 * you probably want to also include a font, and perhaps enable
 * the cairo or SDL2 optional backends, a more complete example
 * could be:
 *
 * #include <cairo.h>
 * #include <SDL.h>
 * #include "ctx-font-regular.h"
 * #define CTX_IMPLEMENTATION
 * #include "ctx.h"
 *
 * The behavior of ctx can be tweaked, and features can be configured, enabled
 * or disabled with other #defines, see further down in the start of this file
 * for details.
 */

#ifndef CTX_H
#define CTX_H

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>
#include <string.h>
#include <stdio.h>

typedef struct _Ctx            Ctx;

/**
 * ctx_new:
 * @width: with in device units
 * @height: height in device units
 * @backend: backend to use
 *
 *   valid values are:
 *     NULL/"auto", "drawlist", "sdl", "term", "ctx" the strings are
 *     the same as are valid for the CTX_BACKEND environment variable.
 *
 * Create a new drawing context, this context has no pixels but
 * accumulates commands and can be played back on other ctx
 * render contexts, this is a ctx context using the drawlist backend.
 */
Ctx *ctx_new (int width, int height, const char *backend);


/**
 * ctx_new_drawlist:
 *
 * Create a new drawing context that can record drawing commands,
 * this is also the basis for creating more complex contexts with
 * the backend swapped out.
 */
Ctx * ctx_new_drawlist (int width, int height);

typedef struct _CtxEntry CtxEntry;


/**
 * ctx_get_drawlist:
 * @ctx: a ctx context.
 * @count: return location for length of drawlist
 *
 * Returns a read only pointer to the first entry of the contexts drawlist.
 */
const CtxEntry *ctx_get_drawlist (Ctx *ctx, int *count);

/**
 * ctx_new_for_drawlist:
 *
 * Create a new drawing context for a pre-existing raw drawlist.
 */
Ctx *ctx_new_for_drawlist   (int    width,
                             int    height,
                             void  *data,
                             size_t length);

/**
 * ctx_set_drawlist:
 *
 * Replaces the drawlist of a ctx context with a new one.  the length of the
 * data is expected to be length * 9;
 */
int  ctx_set_drawlist       (Ctx *ctx, void *data, int length);

/**
 * ctx_append_drawlist:
 *
 * Appends the commands in a binary drawlist, the length of the data is expected to
 * be length * 9;
 */
int  ctx_append_drawlist    (Ctx *ctx, void *data, int length);

/**
 * ctx_drawlist_clear:
 *
 * Clears the drawlist associated with the context.
 */
void  ctx_drawlist_clear (Ctx *ctx);


/**
 * ctx_free:
 * @ctx: a ctx context
 */
void ctx_free (Ctx *ctx);

/**
 * ctx_reset:
 *
 * Prepare for rendering a new frame, this clears the drawlist and initializes
 * the state.
 *
 */
void ctx_reset          (Ctx *ctx);

/**
 * ctx_flush:
 *
 * We're done rendering a frame, this does nothing on a context created for a framebuffer, there
 * the drawing commands are immediate.
 */
void ctx_flush          (Ctx *ctx);


/**
 * ctx_begin_path:
 *
 * Clears the current path if any.
 */
void ctx_begin_path     (Ctx *ctx);

/**
 * ctx_save:
 *
 * Stores the transform, clipping state, fill and stroke sources, font size,
 * stroking and dashing options.
 */
void ctx_save           (Ctx *ctx);

/**
 * ctx_restore:
 *
 * Restores the state previously saved with ctx_save, calls to
 * ctx_save/ctx_restore should be balanced.
 */
void ctx_restore        (Ctx *ctx);

/**
 * ctx_start_group:
 *
 * Start a compositing group.
 *
 */
void ctx_start_group    (Ctx *ctx);

/**
 * ctx_end_group:
 *
 * End a compositing group, the global alpha, compositing mode and blend mode
 * set before this call is used to apply the group.
 */
void ctx_end_group      (Ctx *ctx);

/**
 * ctx_clip:
 *
 * Use the current path as a clipping mask, subsequent draw calls are limited
 * by the path. The only way to increase the visible area is to first call
 * ctx_save and then later ctx_restore to undo the clip.
 */
void ctx_clip           (Ctx *ctx);


/**
 * ctx_image_smoothing:
 *
 * Set or unset bilinear / box filtering for textures, turning it off uses the
 * faster nearest neighbor for all cases.
 */
void ctx_image_smoothing  (Ctx *ctx, int enabled);

#define CTX_LINE_WIDTH_HAIRLINE -1000.0
#define CTX_LINE_WIDTH_ALIASED  -1.0
#define CTX_LINE_WIDTH_FAST     -1.0  /* aliased 1px wide line */



/**
 * ctx_line_to:
 */
void  ctx_line_to         (Ctx *ctx, float x, float y);
/**
 * ctx_move_to:
 */
void  ctx_move_to         (Ctx *ctx, float x, float y);
/**
 * ctx_curve_to:
 */
void  ctx_curve_to        (Ctx *ctx, float cx0, float cy0,
                           float cx1, float cy1,
                           float x, float y);
/**
 * ctx_quad_to:
 */
void  ctx_quad_to         (Ctx *ctx, float cx, float cy,
                           float x, float y);
/**
 * ctx_arc:
 */
void  ctx_arc             (Ctx  *ctx,
                           float x, float y,
                           float radius,
                           float angle1, float angle2,
                           int   direction);
/**
 * ctx_arc_to:
 */
void  ctx_arc_to          (Ctx *ctx, float x1, float y1,
                           float x2, float y2, float radius);
/**
 * ctx_rel_arc_to:
 */
void  ctx_rel_arc_to      (Ctx *ctx, float x1, float y1,
                           float x2, float y2, float radius);

enum {
  CTX_TVG_FLAG_NONE       = 0,
  CTX_TVG_FLAG_LOAD_PAL   = 1<<0,
  CTX_TVG_FLAG_BBOX_CHECK = 1<<1,
  CTX_TVG_FLAG_DEFAULTS   = CTX_TVG_FLAG_LOAD_PAL
};


int ctx_tinyvg_get_size (uint8_t *data, int length, int *width, int *height);
int ctx_tinyvg_draw (Ctx *ctx, uint8_t *data, int length, int flags);

int ctx_tinyvg_fd_get_size (int fd, int *width, int *height);
int ctx_tinyvg_fd_draw (Ctx *ctx, int fd, int flags);

/**
 * ctx_rectangle:
 */
void  ctx_rectangle       (Ctx *ctx,
                           float x0, float y0,
                           float w, float h);
/**
 * ctx_round_rectangle:
 */
void  ctx_round_rectangle (Ctx *ctx,
                           float x0, float y0,
                           float w, float h,
                           float radius);
/**
 * ctx_rel_line_to:
 */
void  ctx_rel_line_to     (Ctx *ctx,
                           float x, float y);
/**
 * ctx_rel_move_to:
 */
void  ctx_rel_move_to     (Ctx *ctx,
                           float x, float y);
/**
 * ctx_rel_curve_to:
 */
void  ctx_rel_curve_to    (Ctx *ctx,
                           float x0, float y0,
                           float x1, float y1,
                           float x2, float y2);
/**
 * ctx_rel_quad_to:
 */
void  ctx_rel_quad_to     (Ctx *ctx,
                           float cx, float cy,
                           float x, float y);
/**
 * ctx_close_path:
 */
void  ctx_close_path      (Ctx *ctx);


/**
 * ctx_fill:
 */
void ctx_fill             (Ctx *ctx);

/**
 * ctx_stroke:
 */
void ctx_stroke           (Ctx *ctx);

/**
 * ctx_paint:
 */
void ctx_paint            (Ctx *ctx);

/**
 * ctx_preserve:
 */
void ctx_preserve         (Ctx *ctx);

/**
 * ctx_identity:
 *
 * Restore context to identity transform, NOTE: a bug makes this call currently
 * breaks mult-threaded rendering when used; since the rendering threads are
 * expecting an initial transform on top of the base identity.
 */
void ctx_identity       (Ctx *ctx);


/**
 * ctx_scale:
 *
 * Scales the user to device transform.
 */
void  ctx_scale           (Ctx *ctx, float x, float y);

/**
 * ctx_translate:
 *
 * Adds translation to the user to device transform.
 */
void  ctx_translate       (Ctx *ctx, float x, float y);

/**
 * ctx_rotate:
 *
 * Add rotatation to the user to device space transform.
 */
void ctx_rotate         (Ctx *ctx, float x);

/**
 * ctx_apply_transform:
 *
 * Adds a 3x3 matrix on top of the existing user to device space transform.
 */
void ctx_apply_transform (Ctx *ctx,
                     float a, float b, float c,
                     float d, float e, float f,
                     float g, float h, float i);

/**
 * ctx_set_transform:
 *
 * Redundant with identity+apply?
 */
void ctx_set_transform    (Ctx *ctx, float a, float b, float c,
                                     float d, float e, float f,
                                     float g, float h, float i);

/**
 * ctx_miter_limit:
 *
 * Specify the miter limit used when stroking.
 */
void ctx_miter_limit      (Ctx *ctx, float limit);

/**
 * ctx_line_width:
 *
 * Set the line width used when stroking.
 */
void ctx_line_width       (Ctx *ctx, float x);

/**
 * ctx_line_dash_offset:
 *
 * Specify phase offset for line dash pattern.
 */
void ctx_line_dash_offset (Ctx *ctx, float line_dash);

/**
 * ctx_line_dash:
 *
 * Specify the line dash pattern.
 */
void  ctx_line_dash       (Ctx *ctx, float *dashes, int count);

/**
 * ctx_font_size:
 */
void  ctx_font_size       (Ctx *ctx, float x);

/**
 * ctx_font:
 */
void  ctx_font            (Ctx *ctx, const char *font);

/**
 * ctx_font_family:
 */
void  ctx_font_family     (Ctx *ctx, const char *font_family);

/**
 * ctx_parse:
 *
 * Parses a string containg text ctx protocol data.
 */
void ctx_parse            (Ctx *ctx, const char *string);

/**
 * low level glyph drawing calls, unless you are integrating harfbuzz
 * you probably want to use ctx_text instead.
 */
typedef struct _CtxGlyph CtxGlyph;

/**
 */
CtxGlyph *ctx_glyph_allocate     (int n_glyphs);
/**
 */
void      gtx_glyph_free         (CtxGlyph   *glyphs);
/**
 */
int       ctx_glyph              (Ctx        *ctx, uint32_t unichar, int stroke);
/**
 */
void      ctx_glyphs             (Ctx        *ctx,
                                  CtxGlyph   *glyphs,
                                  int         n_glyphs);
/**
 */
void  ctx_glyphs_stroke          (Ctx        *ctx,
                                  CtxGlyph   *glyphs,
                                  int         n_glyphs);

void ctx_shadow_rgba      (Ctx *ctx, float r, float g, float b, float a);
void ctx_shadow_blur      (Ctx *ctx, float x);
void ctx_shadow_offset_x  (Ctx *ctx, float x);
void ctx_shadow_offset_y  (Ctx *ctx, float y);

/**
 * ctx_view_box:
 *
 * Specify the view box for the current page.
 */
void ctx_view_box         (Ctx *ctx,
                           float x0, float y0,
                           float w, float h);

/**
 * ctx_set_pixel_u8:
 *
 * Set a single pixel to the nearest possible the specified r,g,b,a value. Fast
 * for individual few pixels, slow for doing textures.
 */
void
ctx_set_pixel_u8          (Ctx *ctx, uint16_t x, uint16_t y, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/**
 * ctx_global_alpha:
 *
 * Set a global alpha value that the colors, textures and gradients are modulated by.
 */
void  ctx_global_alpha     (Ctx *ctx, float global_alpha);


/**
 * ctx_stroke_source:
 *
 * The next source definition applies to stroking rather than filling, when a stroke source is
 * not explicitly set the value of filling is inherited.
 */
void ctx_stroke_source  (Ctx *ctx); // next source definition is for stroking

void ctx_rgba_stroke   (Ctx *ctx, float r, float g, float b, float a);
void ctx_rgb_stroke    (Ctx *ctx, float r, float g, float b);
void ctx_rgba8_stroke  (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

void ctx_gray_stroke   (Ctx *ctx, float gray);
void ctx_drgba_stroke  (Ctx *ctx, float r, float g, float b, float a);
void ctx_cmyka_stroke  (Ctx *ctx, float c, float m, float y, float k, float a);
void ctx_cmyk_stroke   (Ctx *ctx, float c, float m, float y, float k);
void ctx_dcmyka_stroke (Ctx *ctx, float c, float m, float y, float k, float a);
void ctx_dcmyk_stroke  (Ctx *ctx, float c, float m, float y, float k);

void ctx_rgba   (Ctx *ctx, float r, float g, float b, float a);
void ctx_rgb    (Ctx *ctx, float r, float g, float b);
void ctx_rgba8  (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

void ctx_gray   (Ctx *ctx, float gray);
void ctx_drgba  (Ctx *ctx, float r, float g, float b, float a);
void ctx_cmyka  (Ctx *ctx, float c, float m, float y, float k, float a);
void ctx_cmyk   (Ctx *ctx, float c, float m, float y, float k);
void ctx_dcmyka (Ctx *ctx, float c, float m, float y, float k, float a);
void ctx_dcmyk  (Ctx *ctx, float c, float m, float y, float k);

/* there is also getters for colors, by first setting a color in one format and getting
 * it with another color conversions can be done
 */

void ctx_get_rgba   (Ctx *ctx, float *rgba);
void ctx_get_graya  (Ctx *ctx, float *ya);
void ctx_get_drgba  (Ctx *ctx, float *drgba);
void ctx_get_cmyka  (Ctx *ctx, float *cmyka);
void ctx_get_dcmyka (Ctx *ctx, float *dcmyka);
int  ctx_in_fill    (Ctx *ctx, float x, float y);
int  ctx_in_stroke  (Ctx *ctx, float x, float y);

/**
 * ctx_linear_gradient:
 * Change the source to a linear gradient from x0,y0 to x1 y1, by default an empty gradient
 * from black to white exist, add stops with ctx_gradient_add_stop to specify a custom gradient.
 */
void ctx_linear_gradient (Ctx *ctx, float x0, float y0, float x1, float y1);

/**
 * ctx_radial_gradient:
 * Change the source to a radial gradient from a circle x0,y0 with radius r0 to an outher circle x1, y1 with radius r1. (NOTE: currently ctx is only using the second circles origin, both radiuses are in use.)
 */
void ctx_radial_gradient (Ctx *ctx, float x0, float y0, float r0,
                          float x1, float y1, float r1);

/* ctx_graident_add_stop:
 *
 * Add an RGBA gradient stop to the current gradient at position pos.
 *
 * XXX should be ctx_gradient_add_stop_rgba */
void ctx_gradient_add_stop (Ctx *ctx, float pos, float r, float g, float b, float a);

/* ctx_graident_add_stop:
 *
 * Add an RGBA gradient stop to the current gradient at position pos.
 *
 * XXX should be ctx_gradient_add_stop_u8 */
void ctx_gradient_add_stop_u8 (Ctx *ctx, float pos, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/* ctx_define_texture:
 */
void ctx_define_texture (Ctx *ctx,
                         const char *eid,
                         int         width,
                         int         height,
                         int         stride,
                         int         format,
                         void       *data,
                         char       *ret_eid);

/* ctx_source_transform:
 */
void
ctx_source_transform (Ctx *ctx, float a, float b,  float c,
                      float d, float e, float f, 
                      float g, float h, float i); 
typedef struct _CtxMatrix     CtxMatrix;

/* ctx_source_transform_matrix:
 */
void
ctx_source_transform_matrix (Ctx *ctx, CtxMatrix *matrix);



int   ctx_width                (Ctx *ctx);
int   ctx_height               (Ctx *ctx);
float ctx_x                    (Ctx *ctx);
float ctx_y                    (Ctx *ctx);
float ctx_get_global_alpha     (Ctx *ctx);
float ctx_get_font_size        (Ctx *ctx);
float ctx_get_miter_limit      (Ctx *ctx);
int   ctx_get_image_smoothing   (Ctx *ctx);
float ctx_get_line_dash_offset (Ctx *ctx);
const char *ctx_get_font       (Ctx *ctx);
float ctx_get_line_width       (Ctx *ctx);
void  ctx_current_point        (Ctx *ctx, float *x, float *y);
void  ctx_get_transform        (Ctx *ctx, float *a, float *b,
                                float *c, float *d,
                                float *e, float *f,
                                float *g, float *h,
                                float *i);


/* The pixel formats supported as render targets
 */
enum _CtxPixelFormat
{
  CTX_FORMAT_NONE=0,
  CTX_FORMAT_GRAY8,  // 1  - these enum values are not coincidence
  CTX_FORMAT_GRAYA8, // 2  -
  CTX_FORMAT_RGB8,   // 3  -
  CTX_FORMAT_RGBA8,  // 4  -
  CTX_FORMAT_BGRA8,  // 5
  CTX_FORMAT_RGB565, // 6
  CTX_FORMAT_RGB565_BYTESWAPPED, // 7
  CTX_FORMAT_RGB332, // 8
  CTX_FORMAT_RGBAF,  // 9
  CTX_FORMAT_GRAYF,  // 10
  CTX_FORMAT_GRAYAF, // 11
  CTX_FORMAT_GRAY1,  //12 MONO
  CTX_FORMAT_GRAY2,  //13 DUO
  CTX_FORMAT_GRAY4,  //14
  CTX_FORMAT_CMYK8,  //15
  CTX_FORMAT_CMYKA8, //16 
  CTX_FORMAT_CMYKAF, //17
  CTX_FORMAT_YUV420, //18
  CTX_FORMAT_RGBA8_SEPARATE_ALPHA, // 19
};
typedef enum   _CtxPixelFormat CtxPixelFormat;

/**
 * ctx_new_for_framebuffer:
 *
 * Create a new drawing context for a framebuffer, rendering happens
 * immediately.
 */
Ctx *ctx_new_for_framebuffer (void *data,
                              int   width,
                              int   height,
                              int   stride,
                              CtxPixelFormat pixel_format);

void
ctx_get_image_data (Ctx *ctx, int sx, int sy, int sw, int sh,
                    CtxPixelFormat format, int dst_stride,
                    uint8_t *dst_data);

void
ctx_put_image_data (Ctx *ctx, int w, int h, int stride, int format,
                    uint8_t *data,
                    int ox, int oy,
                    int dirtyX, int dirtyY,
                    int dirtyWidth, int dirtyHeight);


/* loads an image file from disk into texture, returning pixel width, height
 * and eid, the eid is based on the path; not the contents - avoiding doing
 * sha1 checksum of contents. The width and height of the image is returned
 * along with the used eid, width height or eid can be NULL if we
 * do not care about their values.
 */
void ctx_texture_load (Ctx        *ctx,
                       const char *path,
                       int        *width,
                       int        *height,
                       char       *eid);

/* sets the paint source to be a texture by eid
 */
void ctx_texture              (Ctx *ctx, const char *eid, float x, float y);

void ctx_draw_texture         (Ctx *ctx, const char *eid, float x, float y, float w, float h);

void ctx_draw_texture_clipped (Ctx *ctx, const char *eid, float x, float y, float w, float h, float sx, float sy, float swidth, float sheight);

void ctx_draw_image           (Ctx *ctx, const char *path, float x, float y, float w, float h);

void ctx_draw_image_clipped   (Ctx *ctx, const char *path, float x, float y, float w, float h, float sx, float sy, float swidth, float sheight);

/* used by the render threads of fb and sdl backends.
 */
void ctx_set_texture_source (Ctx *ctx, Ctx *texture_source);
/* used when sharing cache state of eids between clients
 */
void ctx_set_texture_cache (Ctx *ctx, Ctx *texture_cache);

typedef struct _CtxDrawlist CtxDrawlist;
typedef void (*CtxFullCb) (CtxDrawlist *drawlist, void *data);

int ctx_pixel_format_bits_per_pixel (CtxPixelFormat format); // bits per pixel
int ctx_pixel_format_get_stride (CtxPixelFormat format, int width);
int ctx_pixel_format_components (CtxPixelFormat format);

void _ctx_set_store_clear (Ctx *ctx);
void _ctx_set_transformation (Ctx *ctx, int transformation);

Ctx *ctx_hasher_new (int width, int height, int cols, int rows);
uint32_t ctx_hasher_get_hash (Ctx *ctx, int col, int row);

int ctx_utf8_strlen (const char *s);

#ifndef CTX_BABL
#ifdef _BABL_H
#define CTX_BABL 1
#else
#define CTX_BABL 0
#endif
#endif

/* If cairo.h is included before ctx.h add cairo integration code
 */
#ifdef CAIRO_H
#ifndef CTX_CAIRO
#define CTX_CAIRO 1
#endif
#endif

#ifndef CTX_TFT_ESPI
#ifdef _TFT_eSPIH_
#define CTX_TFT_ESPI 1
#else
#define CTX_TFT_ESPI 0
#endif
#endif

#ifndef CTX_SDL
#ifdef SDL_h_
#define CTX_SDL 1
#else
#define CTX_SDL 0
#endif
#endif

#ifndef CTX_FB
#define CTX_FB 0
#endif

#ifndef CTX_KMS
#define CTX_KMS 0
#endif

#if CTX_SDL
#define ctx_mutex_t            SDL_mutex
#define ctx_create_mutex()     SDL_CreateMutex()
#define ctx_lock_mutex(a)      SDL_LockMutex(a)
#define ctx_unlock_mutex(a)    SDL_UnlockMutex(a)
#else
#define ctx_mutex_t           int
#define ctx_create_mutex()    NULL
#define ctx_lock_mutex(a)   
#define ctx_unlock_mutex(a)  
#endif


typedef enum CtxCbFlags {
  CTX_CB_DEFAULTS   = 0,
  CTX_CB_GRAY       = 1 << 0,
  CTX_CB_HASH_CACHE = 1 << 1,
  CTX_CB_332        = 1 << 2, // might do a 332 render
                               // that is tear-free but slower
                               // before queueing slotted redraws
                               // of higher quality tiles
                               // this is a pre-amble to eink modes
                               //
  CTX_CB_CYCLE_BUF  = 1 << 4, // if set then we free buffers after each
                               // use, higher risk of memory fragmentation
                               // but making each frame blit a memory use peak

  CTX_CB_DAMAGE_CONTROL = 1 << 5,
  CTX_CB_SHOW_FPS   = 1 << 6,
  CTX_CB_AUTO_332   = 1 << 7,
} CtxCbFlags;


Ctx *ctx_new_cb (int width, int height, CtxPixelFormat format,
                 void (*set_pixels) (Ctx *ctx, void *user_data, 
                                     int x, int y, int w, int h, void *buf),
                 void (*update_fb) (Ctx *ctx, void *user_data),
                 void *user_data,
                 int   memory_budget,
                 void *scratch_fb,
                 int flags);
void ctx_cb_set_flags (Ctx *ctx, int flags);
int ctx_cb_get_flags  (Ctx *ctx);

#if CTX_TFT_ESPI
Ctx *ctx_new_tft (TFT_eSPI *tft, int memory_budget, void *scratch_fb, int flags);

#endif


#if CTX_CAIRO
#ifndef CAIRO_H
typedef struct _cairo_t cairo_t;
#endif

/* render the deferred commands of a ctx context to a cairo
 * context
 */
void  ctx_render_cairo  (Ctx *ctx, cairo_t *cr);

/* create a ctx context that directly renders to the specified
 * cairo context
 */
Ctx * ctx_new_for_cairo (cairo_t *cr);
#endif

/* free with free() */
char *ctx_render_string (Ctx *ctx, int longform, int *retlen);

void ctx_render_stream  (Ctx *ctx, FILE *stream, int formatter);

void ctx_render_ctx     (Ctx *ctx, Ctx *d_ctx);
void ctx_render_ctx_textures (Ctx *ctx, Ctx *d_ctx); /* cycles through all
                                                        used texture eids
                                                      */

void ctx_start_move     (Ctx *ctx);


int ctx_add_single      (Ctx *ctx, void *entry);

uint32_t ctx_utf8_to_unichar (const char *input);
int      ctx_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);


typedef enum
{
  CTX_FILL_RULE_WINDING = 0,
  CTX_FILL_RULE_EVEN_ODD
} CtxFillRule;

typedef enum
{
#if 0
  CTX_COMPOSITE_SOURCE_OVER      = 0,
  CTX_COMPOSITE_COPY             = 32,
  CTX_COMPOSITE_SOURCE_IN        = 64,
  CTX_COMPOSITE_SOURCE_OUT       = 96,
  CTX_COMPOSITE_SOURCE_ATOP      = 128,
  CTX_COMPOSITE_CLEAR            = 160,

  CTX_COMPOSITE_DESTINATION_OVER = 192,
  CTX_COMPOSITE_DESTINATION      = 224,
  CTX_COMPOSITE_DESTINATION_IN   = 256,
  CTX_COMPOSITE_DESTINATION_OUT  = 288,
  CTX_COMPOSITE_DESTINATION_ATOP = 320,
  CTX_COMPOSITE_XOR              = 352,

  CTX_COMPOSITE_ALL              = (32+64+128+256)
#else
  CTX_COMPOSITE_SOURCE_OVER      =0,
  CTX_COMPOSITE_COPY             ,
  CTX_COMPOSITE_SOURCE_IN        ,
  CTX_COMPOSITE_SOURCE_OUT       ,
  CTX_COMPOSITE_SOURCE_ATOP      ,
  CTX_COMPOSITE_CLEAR            ,

  CTX_COMPOSITE_DESTINATION_OVER ,
  CTX_COMPOSITE_DESTINATION      ,
  CTX_COMPOSITE_DESTINATION_IN   ,
  CTX_COMPOSITE_DESTINATION_OUT  ,
  CTX_COMPOSITE_DESTINATION_ATOP ,
  CTX_COMPOSITE_XOR              ,
#endif
} CtxCompositingMode;

typedef enum
{
  CTX_BLEND_NORMAL,
  CTX_BLEND_MULTIPLY,
  CTX_BLEND_SCREEN,
  CTX_BLEND_OVERLAY,
  CTX_BLEND_DARKEN,
  CTX_BLEND_LIGHTEN,
  CTX_BLEND_COLOR_DODGE,
  CTX_BLEND_COLOR_BURN,
  CTX_BLEND_HARD_LIGHT,
  CTX_BLEND_SOFT_LIGHT,
  CTX_BLEND_DIFFERENCE,
  CTX_BLEND_EXCLUSION,
  CTX_BLEND_HUE, 
  CTX_BLEND_SATURATION, 
  CTX_BLEND_COLOR, 
  CTX_BLEND_LUMINOSITY,  // 15
  CTX_BLEND_DIVIDE,
  CTX_BLEND_ADDITION,
  CTX_BLEND_SUBTRACT,    // 18
} CtxBlend;

void ctx_blend_mode (Ctx *ctx, CtxBlend mode);

typedef enum
{
  CTX_JOIN_BEVEL = 0,
  CTX_JOIN_ROUND = 1,
  CTX_JOIN_MITER = 2
} CtxLineJoin;

typedef enum
{
  CTX_CAP_NONE   = 0,
  CTX_CAP_ROUND  = 1,
  CTX_CAP_SQUARE = 2
} CtxLineCap;

typedef enum
{
  CTX_EXTEND_NONE    = 0,
  CTX_EXTEND_REPEAT  = 1,
  CTX_EXTEND_REFLECT = 2,
  CTX_EXTEND_PAD     = 3
} CtxExtend;

void ctx_extend (Ctx *ctx, CtxExtend extend);

typedef enum
{
  CTX_TEXT_BASELINE_ALPHABETIC = 0,
  CTX_TEXT_BASELINE_TOP,
  CTX_TEXT_BASELINE_HANGING,
  CTX_TEXT_BASELINE_MIDDLE,
  CTX_TEXT_BASELINE_IDEOGRAPHIC,
  CTX_TEXT_BASELINE_BOTTOM
} CtxTextBaseline;

typedef enum
{
  CTX_TEXT_ALIGN_START = 0,  // in mrg these didnt exist
  CTX_TEXT_ALIGN_END,        // but left/right did
  CTX_TEXT_ALIGN_JUSTIFY, // not handled in ctx
  CTX_TEXT_ALIGN_CENTER,
  CTX_TEXT_ALIGN_LEFT,
  CTX_TEXT_ALIGN_RIGHT
} CtxTextAlign;

typedef enum
{
  CTX_TEXT_DIRECTION_INHERIT = 0,
  CTX_TEXT_DIRECTION_LTR,
  CTX_TEXT_DIRECTION_RTL
} CtxTextDirection;

struct
_CtxGlyph
{
  uint32_t index;
  float    x;
  float    y;
};

CtxTextAlign       ctx_get_text_align (Ctx *ctx);
CtxTextBaseline    ctx_get_text_baseline (Ctx *ctx);
CtxTextDirection   ctx_get_text_direction (Ctx *ctx);
CtxFillRule        ctx_get_fill_rule (Ctx *ctx);
CtxLineCap         ctx_get_line_cap (Ctx *ctx);
CtxLineJoin        ctx_get_line_join (Ctx *ctx);
CtxCompositingMode ctx_get_compositing_mode (Ctx *ctx);
CtxBlend           ctx_get_blend_mode (Ctx *ctx);
CtxExtend          ctx_get_extend     (Ctx *ctx);

void ctx_gradient_add_stop_string (Ctx *ctx, float pos, const char *color);

void ctx_text_align           (Ctx *ctx, CtxTextAlign      align);
void ctx_text_baseline        (Ctx *ctx, CtxTextBaseline   baseline);
void ctx_text_direction       (Ctx *ctx, CtxTextDirection  direction);
void ctx_fill_rule            (Ctx *ctx, CtxFillRule       fill_rule);
void ctx_line_cap             (Ctx *ctx, CtxLineCap        cap);
void ctx_line_join            (Ctx *ctx, CtxLineJoin       join);
void ctx_compositing_mode     (Ctx *ctx, CtxCompositingMode mode);
/* we only care about the tight packing for this specific
 * struct as we do indexing across members in arrays of it,
 * to make sure its size becomes 9bytes -
 * the pack pragma is also sufficient on recent gcc versions
 */
#pragma pack(push,1)
struct
  _CtxEntry
{
  uint8_t code;
  union
  {
    float    f[2];
    uint8_t  u8[8];
    int8_t   s8[8];
    uint16_t u16[4];
    int16_t  s16[4];
    uint32_t u32[2];
    int32_t  s32[2];
    uint64_t u64[1]; // unused
  } data; // 9bytes long, we're favoring compactness and correctness
  // over performance. By sacrificing float precision, zeroing
  // first 8bit of f[0] would permit 8bytes long and better
  // aglinment and cacheline behavior.
};
#pragma pack(pop)


void  ctx_text          (Ctx        *ctx,
                         const char *string);
void  ctx_text_stroke   (Ctx        *ctx,
                         const char *string);

void  ctx_fill_text     (Ctx        *ctx,
                         const char *string,
                         float       x,
                         float       y);

void  ctx_stroke_text   (Ctx        *ctx,
                         const char *string,
                         float       x,
                         float       y);

/* returns the total horizontal advance if string had been rendered */
float ctx_text_width    (Ctx        *ctx,
                         const char *string);

float ctx_glyph_width   (Ctx *ctx, int unichar);

int   ctx_load_font_ttf (const char *name, const void *ttf_contents, int length);


/**
 * ctx_dirty_rect:
 *
 * Query the dirtied bounding box of drawing commands thus far.
 */
void  ctx_dirty_rect      (Ctx *ctx, int *x, int *y, int *width, int *height);


#ifdef CTX_X86_64
int ctx_x86_64_level (void);
#endif


enum _CtxModifierState
{
  CTX_MODIFIER_STATE_SHIFT   = (1<<0),
  CTX_MODIFIER_STATE_CONTROL = (1<<1),
  CTX_MODIFIER_STATE_ALT     = (1<<2),
  CTX_MODIFIER_STATE_BUTTON1 = (1<<3),
  CTX_MODIFIER_STATE_BUTTON2 = (1<<4),
  CTX_MODIFIER_STATE_BUTTON3 = (1<<5),
  CTX_MODIFIER_STATE_DRAG    = (1<<6), // pointer button is down (0 or any)
};
typedef enum _CtxModifierState CtxModifierState;

enum _CtxScrollDirection
{
  CTX_SCROLL_DIRECTION_UP,
  CTX_SCROLL_DIRECTION_DOWN,
  CTX_SCROLL_DIRECTION_LEFT,
  CTX_SCROLL_DIRECTION_RIGHT
};
typedef enum _CtxScrollDirection CtxScrollDirection;

typedef struct _CtxEvent CtxEvent;

void ctx_set_backend (Ctx *ctx, void *backend);
void *ctx_get_backend (Ctx *ctx);

/* the following API is only available when CTX_EVENTS is defined to 1
 *
 * it provides the ability to register callbacks with the current path
 * that get delivered with transformed coordinates.
 */
int ctx_need_redraw (Ctx *ctx);
void ctx_queue_draw (Ctx *ctx);
float ctx_get_float (Ctx *ctx, uint32_t hash);
void ctx_set_float (Ctx *ctx, uint32_t hash, float value);

unsigned long ctx_ticks (void);
void ctx_flush (Ctx *ctx);

void ctx_set_clipboard (Ctx *ctx, const char *text);
char *ctx_get_clipboard (Ctx *ctx);

void _ctx_events_init     (Ctx *ctx);
typedef struct _CtxIntRectangle CtxIntRectangle;
struct _CtxIntRectangle {
  int x;
  int y;
  int width;
  int height;
};

void ctx_quit (Ctx *ctx);
int  ctx_has_quit (Ctx *ctx);

typedef void (*CtxCb) (CtxEvent *event,
                       void     *data,
                       void     *data2);
typedef void (*CtxDestroyNotify) (void *data);

enum _CtxEventType {
  CTX_PRESS        = 1 << 0,
  CTX_MOTION       = 1 << 1,
  CTX_RELEASE      = 1 << 2,
  CTX_ENTER        = 1 << 3,
  CTX_LEAVE        = 1 << 4,
  CTX_TAP          = 1 << 5,
  CTX_TAP_AND_HOLD = 1 << 6,

  /* NYI: SWIPE, ZOOM ROT_ZOOM, */

  CTX_DRAG_PRESS   = 1 << 7,
  CTX_DRAG_MOTION  = 1 << 8,
  CTX_DRAG_RELEASE = 1 << 9,
  CTX_KEY_PRESS    = 1 << 10,
  CTX_KEY_DOWN     = 1 << 11,
  CTX_KEY_UP       = 1 << 12,
  CTX_SCROLL       = 1 << 13,
  CTX_MESSAGE      = 1 << 14,
  CTX_DROP         = 1 << 15,

  CTX_SET_CURSOR   = 1 << 16, // used internally

  /* client should store state - preparing
                                 * for restart
                                 */
  CTX_POINTER  = (CTX_PRESS | CTX_MOTION | CTX_RELEASE | CTX_DROP),
  CTX_TAPS     = (CTX_TAP | CTX_TAP_AND_HOLD),
  CTX_CROSSING = (CTX_ENTER | CTX_LEAVE),
  CTX_DRAG     = (CTX_DRAG_PRESS | CTX_DRAG_MOTION | CTX_DRAG_RELEASE),
  CTX_KEY      = (CTX_KEY_DOWN | CTX_KEY_UP | CTX_KEY_PRESS),
  CTX_MISC     = (CTX_MESSAGE),
  CTX_ANY      = (CTX_POINTER | CTX_DRAG | CTX_CROSSING | CTX_KEY | CTX_MISC | CTX_TAPS),
};
typedef enum _CtxEventType CtxEventType;

#define CTX_CLICK   CTX_PRESS   // SHOULD HAVE MORE LOGIC
typedef struct _CtxClient CtxClient;

struct _CtxEvent {
  CtxEventType  type;
  uint32_t time;
  Ctx     *ctx;
  int stop_propagate; /* when set - propagation is stopped */

  CtxModifierState state;

  int     device_no; /* 0 = left mouse button / virtual focus */
                     /* 1 = middle mouse button */
                     /* 2 = right mouse button */
                     /* 3 = first multi-touch .. (NYI) */

  float   device_x; /* untransformed (device) coordinates  */
  float   device_y;

  /* coordinates; and deltas for motion/drag events in user-coordinates: */
  float   x;
  float   y;
  float   start_x; /* start-coordinates (press) event for drag, */
  float   start_y; /*    untransformed coordinates */
  float   prev_x;  /* previous events coordinates */
  float   prev_y;
  float   delta_x; /* x - prev_x, redundant - but often useful */
  float   delta_y; /* y - prev_y, redundant - ..  */


  unsigned int unicode; /* only valid for key-events, re-use as keycode? */
  const char *string;   /* as key can be "up" "down" "space" "backspace" "a" "b" "ø" etc .. */
                        /* this is also where the message is delivered for
                         * MESSAGE events
                         *
                         * and the data for drop events are delivered
                         *
                         */
                         /* XXX lifetime of this string should be longer
                         * than the events, preferably interned. XXX
                         * maybe add a flag for this?
                         */
  int owns_string; /* if 1 call free.. */
  CtxScrollDirection scroll_direction;


  // would be nice to add the bounding box of the hit-area causing
  // the event, making for instance scissored enter/leave repaint easier.
};

// layer-event "layer"  motion x y device_no 

void ctx_add_key_binding_full (Ctx *ctx,
                               const char *key,
                               const char *action,
                               const char *label,
                               CtxCb       cb,
                               void       *cb_data,
                               CtxDestroyNotify destroy_notify,
                               void       *destroy_data);
void ctx_add_key_binding (Ctx *ctx,
                          const char *key,
                          const char *action,
                          const char *label,
                          CtxCb cb,
                          void  *cb_data);
typedef struct CtxBinding {
  char *nick;
  char *command;
  char *label;
  CtxCb cb;
  void *cb_data;
  CtxDestroyNotify destroy_notify;
  void  *destroy_data;
} CtxBinding;
CtxBinding *ctx_get_bindings (Ctx *ctx);
void  ctx_clear_bindings     (Ctx *ctx);
void  ctx_remove_idle        (Ctx *ctx, int handle);
int   ctx_add_timeout_full   (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                              void (*destroy_notify)(void *destroy_data), void *destroy_data);
int   ctx_add_timeout        (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data);
int   ctx_add_idle_full      (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                              void (*destroy_notify)(void *destroy_data), void *destroy_data);
int   ctx_add_idle           (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data);


void ctx_add_hit_region (Ctx *ctx, const char *id);

void ctx_listen_full (Ctx     *ctx,
                      float    x,
                      float    y,
                      float    width,
                      float    height,
                      CtxEventType  types,
                      CtxCb    cb,
                      void    *data1,
                      void    *data2,
                      void   (*finalize)(void *listen_data, void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data);
void  ctx_event_stop_propagate (CtxEvent *event);
void  ctx_listen               (Ctx          *ctx,
                                CtxEventType  types,
                                CtxCb         cb,
                                void*         data1,
                                void*         data2);
void  ctx_listen_with_finalize (Ctx          *ctx,
                                CtxEventType  types,
                                CtxCb         cb,
                                void*         data1,
                                void*         data2,
                      void   (*finalize)(void *listen_data, void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data);

void ctx_init (int *argc, char ***argv); // is a no-op but could launch
                                         // terminal
CtxEvent *ctx_get_event (Ctx *ctx);
void      ctx_get_event_fds (Ctx *ctx, int *fd, int *count);


int   ctx_pointer_is_down (Ctx *ctx, int no);
float ctx_pointer_x (Ctx *ctx);
float ctx_pointer_y (Ctx *ctx);
void  ctx_freeze (Ctx *ctx);
void  ctx_thaw   (Ctx *ctx);
int   ctx_events_frozen (Ctx *ctx);
void  ctx_events_clear_items (Ctx *ctx);

/* The following functions drive the event delivery, registered callbacks
 * are called in response to these being called.
 */

int ctx_key_down  (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);
int ctx_key_up    (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);
int ctx_key_press (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);


int ctx_scrolled  (Ctx *ctx, float x, float y, CtxScrollDirection scroll_direction, uint32_t time);
void ctx_incoming_message (Ctx *ctx, const char *message, long time);
int ctx_pointer_motion    (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_release   (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_press     (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_drop      (Ctx *ctx, float x, float y, int device_no, uint32_t time,
                           char *string);

typedef enum
{
  CTX_CONT             = '\0', // - contains args from preceding entry
  CTX_NOP              = ' ', //
                   //     !    UNUSED
                   //     "    start/end string
                   //     #    comment in parser
                   //     $    UNUSED
                   //     %    percent of viewport width or height
  CTX_EDGE             = '&', // not occuring in commandstream
                   //     '    start/end string
  CTX_DATA             = '(', // size size-in-entries - u32
  CTX_DATA_REV         = ')', // reverse traversal data marker
  CTX_SET_RGBA_U8      = '*', // r g b a - u8
  CTX_NEW_EDGE         = '+', // x0 y0 x1 y1 - s16
                   //     ,    UNUSED/RESERVED
  CTX_SET_PIXEL        = '-', // 8bit "fast-path" r g b a x y - u8 for rgba, and u16 for x,y
  // set pixel might want a shorter ascii form with hex-color? or keep it an embedded
  // only option?
                   //     .    decimal seperator
                   //     /    UNUSED
 
  /* optimizations that reduce the number of entries used,
   * not visible outside the drawlist compression, thus
   * using entries that cannot be used directly as commands
   * since they would be interpreted as numbers - if values>127
   * then the embedded font data is harder to escape.
   */
  CTX_REL_LINE_TO_X4            = '0', // x1 y1 x2 y2 x3 y3 x4 y4   -- s8
  CTX_REL_LINE_TO_REL_CURVE_TO  = '1', // x1 y1 cx1 cy1 cx2 cy2 x y -- s8
  CTX_REL_CURVE_TO_REL_LINE_TO  = '2', // cx1 cy1 cx2 cy2 x y x1 y1 -- s8
  CTX_REL_CURVE_TO_REL_MOVE_TO  = '3', // cx1 cy1 cx2 cy2 x y x1 y1 -- s8
  CTX_REL_LINE_TO_X2            = '4', // x1 y1 x2 y2 -- s16
  CTX_MOVE_TO_REL_LINE_TO       = '5', // x1 y1 x2 y2 -- s16
  CTX_REL_LINE_TO_REL_MOVE_TO   = '6', // x1 y1 x2 y2 -- s16
  CTX_FILL_MOVE_TO              = '7', // x y
  CTX_REL_QUAD_TO_REL_QUAD_TO   = '8', // cx1 x1 cy1 y1 cx1 x2 cy1 y1 -- s8
  CTX_REL_QUAD_TO_S16           = '9', // cx1 cy1 x y                 - s16
                   //     :    UNUSED
  CTX_FLUSH            = ';',
                   //     <    UNUSED
                   //     =    UNUSED/RESERVED
                   //     >    UNUSED
                   //     ?    UNUSED

  CTX_DEFINE_GLYPH     = '@', // unichar width - u32
  CTX_ARC_TO           = 'A', // x1 y1 x2 y2 radius
  CTX_ARC              = 'B', // x y radius angle1 angle2 direction
  CTX_CURVE_TO         = 'C', // cx1 cy1 cx2 cy2 x y
  CTX_PAINT            = 'D', // 
  CTX_STROKE           = 'E', //
  CTX_FILL             = 'F', //
  CTX_RESTORE          = 'G', //
  CTX_HOR_LINE_TO      = 'H', // x
  CTX_DEFINE_TEXTURE   = 'I', // "eid" width height format "data"
  CTX_ROTATE           = 'J', // radians
  CTX_COLOR            = 'K', // model, c1 c2 c3 ca - variable arg count
  CTX_LINE_TO          = 'L', // x y
  CTX_MOVE_TO          = 'M', // x y
  CTX_BEGIN_PATH       = 'N', //
  CTX_SCALE            = 'O', // xscale yscale
  CTX_NEW_PAGE         = 'P', // - NYI - optional page-size
  CTX_QUAD_TO          = 'Q', // cx cy x y
  CTX_VIEW_BOX         = 'R', // x y width height
  CTX_SMOOTH_TO        = 'S', // cx cy x y
  CTX_SMOOTHQ_TO       = 'T', // x y
  CTX_RESET            = 'U', //
  CTX_VER_LINE_TO      = 'V', // y
  CTX_APPLY_TRANSFORM  = 'W', // a b c d e f g h i j - for set_transform combine with identity
  CTX_EXIT             = 'X', //
  CTX_ROUND_RECTANGLE  = 'Y', // x y width height radius

  CTX_CLOSE_PATH2      = 'Z', //
  CTX_KERNING_PAIR     = '[', // glA glB kerning, glA and glB in u16 kerning in s32
                       // \   UNUSED
                       // ^   PARSER - vh unit
  CTX_COLOR_SPACE      = ']', // IccSlot  data  data_len,
                         //    data can be a string with a name,
                         //    icc data or perhaps our own serialization
                         //    of profile data
  CTX_EDGE_FLIPPED     = '^', // x0 y0 x1 y1 - s16  | also unit
  CTX_STROKE_SOURCE    = '_', // next source definition applies to strokes
  CTX_SOURCE_TRANSFORM = '`',
  CTX_REL_ARC_TO       = 'a', // x1 y1 x2 y2 radius
  CTX_CLIP             = 'b',
  CTX_REL_CURVE_TO     = 'c', // cx1 cy1 cx2 cy2 x y
  CTX_LINE_DASH        = 'd', // dashlen0 [dashlen1 ...]
  CTX_TRANSLATE        = 'e', // x y
  CTX_LINEAR_GRADIENT  = 'f', // x1 y1 x2 y2
  CTX_SAVE             = 'g',
  CTX_REL_HOR_LINE_TO  = 'h', // x
  CTX_TEXTURE          = 'i',
  CTX_PRESERVE         = 'j', // XXX - fix!
  CTX_SET_KEY          = 'k', // - used together with another char to identify
                              //   a key to set
  CTX_REL_LINE_TO      = 'l', // x y
  CTX_REL_MOVE_TO      = 'm', // x y
  CTX_FONT             = 'n', // as used by text parser XXX: move to keyvals?
  CTX_RADIAL_GRADIENT  = 'o', // x1 y1 radius1 x2 y2 radius2
  CTX_GRADIENT_STOP    = 'p', // argument count depends on current color model
  CTX_REL_QUAD_TO      = 'q', // cx cy x y
  CTX_RECTANGLE        = 'r', // x y width height
  CTX_REL_SMOOTH_TO    = 's', // cx cy x y
  CTX_REL_SMOOTHQ_TO   = 't', // x y
  CTX_STROKE_TEXT      = 'u', // string - utf8 string
  CTX_REL_VER_LINE_TO  = 'v', // y
  CTX_GLYPH            = 'w', // unichar fontsize
  CTX_TEXT             = 'x', // string | kern - utf8 data to shape or horizontal kerning amount
  CTX_IDENTITY         = 'y', // XXX remove?
  CTX_CLOSE_PATH       = 'z', //
  CTX_START_GROUP      = '{',
                       // |    UNUSED
  CTX_END_GROUP        = '}',
                       // ~    UNUSED/textenc


  /* though expressed as two chars in serialization we have
   * dedicated byte commands for the setters to keep the dispatch
   * simpler. There is no need for these to be human readable thus we go >128
   * they also should not be emitted when outputting, even compact mode ctx.
   *
   * rasterizer:    &^+
   * font:          @[
   *
   * unused:        !&<=>?: =/\`,
   * reserved:      '"&   #. %^@
   */

  CTX_FILL_RULE        = 128, // kr rule - u8, default = CTX_FILLE_RULE_EVEN_ODD
  CTX_BLEND_MODE       = 129, // kB mode - u32 , default=0

  CTX_MITER_LIMIT      = 130, // km limit - float, default = 0.0

  CTX_LINE_JOIN        = 131, // kj join - u8 , default=0
  CTX_LINE_CAP         = 132, // kc cap - u8, default = 0
  CTX_LINE_WIDTH       = 133, // kw width, default = 2.0
  CTX_GLOBAL_ALPHA     = 134, // ka alpha - default=1.0
  CTX_COMPOSITING_MODE = 135, // kc mode - u32 , default=0

  CTX_FONT_SIZE        = 136, // kf size - float, default=?
  CTX_TEXT_ALIGN       = 137, // kt align - u8, default = CTX_TEXT_ALIGN_START
  CTX_TEXT_BASELINE    = 138, // kb baseline - u8, default = CTX_TEXT_ALIGN_ALPHABETIC
  CTX_TEXT_DIRECTION   = 139, // kd

  CTX_SHADOW_BLUR      = 140, // ks
  CTX_SHADOW_COLOR     = 141, // kC
  CTX_SHADOW_OFFSET_X  = 142, // kx
  CTX_SHADOW_OFFSET_Y  = 143, // ky
  CTX_IMAGE_SMOOTHING  = 144, // kS
  CTX_LINE_DASH_OFFSET = 145, // kD lineDashOffset

  CTX_EXTEND           = 146, // ke u32 extend mode, default=0
                              //
  CTX_STROKE_RECT      = 200, // strokeRect - only exist in long form
  CTX_FILL_RECT        = 201, // fillRect   - only exist in long form
} CtxCode;


#pragma pack(push,1)


typedef struct _CtxCommand CtxCommand;
#define CTX_ASSERT               0

#if CTX_ASSERT==1
#define ctx_assert(a)  if(!(a)){fprintf(stderr,"%s:%i assertion failed\n", __FUNCTION__, __LINE__);  }
#else
#define ctx_assert(a)
#endif


struct
  _CtxCommand
{
  union
  {
    uint8_t  code;
    CtxEntry entry;
    struct
    {
      uint8_t code;
      float scalex;
      float scaley;
    } scale;
    struct
    {
      uint8_t code;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t cont;
      uint8_t data[8]; /* ... and continues */
    } data;
    struct
    {
      uint8_t code;
      uint32_t stringlen;
      uint32_t blocklen;
    } data_rev;
    struct
    {
      uint8_t code;
      float pad;
      float pad2;
      uint8_t code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t code_cont;
      uint8_t utf8[8]; /* .. and continues */
    } text;
    struct
    {
      uint8_t  code;
      uint32_t key_hash;
      float    pad;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } set;
    struct
    {
      uint8_t  code;
      uint32_t pad0;
      float    pad1;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } get;
    struct {
      uint8_t  code;
      uint32_t count; /* better than byte_len in code, but needs to then be set   */
      float    pad1;
      uint8_t  code_data;
      uint32_t byte_len;
      uint32_t blocklen;
      uint8_t  code_cont;
      float    data[2]; /* .. and - possibly continues */
    } line_dash;
    struct {
      uint8_t  code;
      uint32_t space_slot;
      float    pad1;
      uint8_t  code_data;
      uint32_t data_len;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  data[8]; /* .. and continues */
    } colorspace;
    struct
    {
      uint8_t  code;
      float    x;
      float    y;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      char     eid[8]; /* .. and continues */
    } texture;
    struct
    {
      uint8_t  code;
      uint32_t width;
      uint32_t height;
      uint8_t  code_cont0;
      uint16_t format;
      uint16_t pad0;
      uint32_t pad1;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont1;
      char     eid[8]; /* .. and continues */
      // followed by - in variable offset code_Data, data_len, datablock_len, cont, pixeldata
    } define_texture;
    struct
    {
      uint8_t  code;
      float    pad;
      float    pad2;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } text_stroke;
    struct
    {
      uint8_t  code;
      float    pad;
      float    pad2;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } set_font;
    struct
    {
      uint8_t code;
      float model;
      float r;
      uint8_t pad1;
      float g;
      float b;
      uint8_t pad2;
      float a;
    } rgba;
    struct
    {
      uint8_t code;
      float model;
      float c;
      uint8_t pad1;
      float m;
      float y;
      uint8_t pad2;
      float k;
      float a;
    } cmyka;
    struct
    {
      uint8_t code;
      float model;
      float g;
      uint8_t pad1;
      float a;
    } graya;

    struct
    {
      uint8_t code;
      float model;
      float c0;
      uint8_t pad1;
      float c1;
      float c2;
      uint8_t pad2;
      float c3;
      float c4;
      uint8_t pad3;
      float c5;
      float c6;
      uint8_t pad4;
      float c7;
      float c8;
      uint8_t pad5;
      float c9;
      float c10;
    } set_color;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } rel_move_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } rel_line_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } line_to;
    struct
    {
      uint8_t code;
      float cx1;
      float cy1;
      uint8_t pad0;
      float cx2;
      float cy2;
      uint8_t pad1;
      float x;
      float y;
    } rel_curve_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } move_to;
    struct
    {
      uint8_t code;
      float cx1;
      float cy1;
      uint8_t pad0;
      float cx2;
      float cy2;
      uint8_t pad1;
      float x;
      float y;
    } curve_to;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float r1;
      float x2;
      uint8_t pad1;
      float y2;
      float r2;
    } radial_gradient;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float x2;
      float y2;
    } linear_gradient;
    struct
    {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float width;
      float height;
      uint8_t pad1;
      float radius;
    } rectangle;
    struct {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float width;
      float height;
    } view_box;

    struct
    {
      uint8_t code;
      uint16_t glyph_before;
      uint16_t glyph_after;
       int32_t amount;
    } kern;

    struct
    {
      uint8_t code;
      uint32_t glyph;
      uint32_t advance; // * 256
    } define_glyph;

    struct
    {
      uint8_t code;
      uint8_t rgba[4];
      uint16_t x;
      uint16_t y;
    } set_pixel;
    struct
    {
      uint8_t code;
      float cx;
      float cy;
      uint8_t pad0;
      float x;
      float y;
    } quad_to;
    struct
    {
      uint8_t code;
      float cx;
      float cy;
      uint8_t pad0;
      float x;
      float y;
    } rel_quad_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float radius;
      float angle1;
      uint8_t pad1;
      float angle2;
      float direction;
    }
    arc;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float x2;
      float y2;
      uint8_t pad1;
      float radius;
    }
    arc_to;
    /* some format specific generic accesors:  */
    struct
    {
      uint8_t code;
      float   x0;
      float   y0;
      uint8_t pad0;
      float   x1;
      float   y1;
      uint8_t pad1;
      float   x2;
      float   y2;
      uint8_t pad2;
      float   x3;
      float   y3;
      uint8_t pad3;
      float   x4;
      float   y4;
    } c;
    struct
    {
      uint8_t code;
      float   a0;
      float   a1;
      uint8_t pad0;
      float   a2;
      float   a3;
      uint8_t pad1;
      float   a4;
      float   a5;
      uint8_t pad2;
      float   a6;
      float   a7;
      uint8_t pad3;
      float   a8;
      float   a9;
    } f;
    struct
    {
      uint8_t  code;
      uint32_t a0;
      uint32_t a1;
      uint8_t  pad0;
      uint32_t a2;
      uint32_t a3;
      uint8_t  pad1;
      uint32_t a4;
      uint32_t a5;
      uint8_t  pad2;
      uint32_t a6;
      uint32_t a7;
      uint8_t  pad3;
      uint32_t a8;
      uint32_t a9;
    } u32;
    struct
    {
      uint8_t  code;
      uint64_t a0;
      uint8_t  pad0;
      uint64_t a1;
      uint8_t  pad1;
      uint64_t a2;
      uint8_t  pad2;
      uint64_t a3;
      uint8_t  pad3;
      uint64_t a4;
    } u64;
    struct
    {
      uint8_t code;
      int32_t a0;
      int32_t a1;
      uint8_t pad0;
      int32_t a2;
      int32_t a3;
      uint8_t pad1;
      int32_t a4;
      int32_t a5;
      uint8_t pad2;
      int32_t a6;
      int32_t a7;
      uint8_t pad3;
      int32_t a8;
      int32_t a9;
    } s32;
    struct
    {
      uint8_t code;
      int16_t a0;
      int16_t a1;
      int16_t a2;
      int16_t a3;
      uint8_t pad0;
      int16_t a4;
      int16_t a5;
      int16_t a6;
      int16_t a7;
      uint8_t pad1;
      int16_t a8;
      int16_t a9;
      int16_t a10;
      int16_t a11;
      uint8_t pad2;
      int16_t a12;
      int16_t a13;
      int16_t a14;
      int16_t a15;
      uint8_t pad3;
      int16_t a16;
      int16_t a17;
      int16_t a18;
      int16_t a19;
    } s16;
    struct
    {
      uint8_t code;
      uint16_t a0;
      uint16_t a1;
      uint16_t a2;
      uint16_t a3;
      uint8_t pad0;
      uint16_t a4;
      uint16_t a5;
      uint16_t a6;
      uint16_t a7;
      uint8_t pad1;
      uint16_t a8;
      uint16_t a9;
      uint16_t a10;
      uint16_t a11;
      uint8_t pad2;
      uint16_t a12;
      uint16_t a13;
      uint16_t a14;
      uint16_t a15;
      uint8_t pad3;
      uint16_t a16;
      uint16_t a17;
      uint16_t a18;
      uint16_t a19;
    } u16;
    struct
    {
      uint8_t code;
      uint8_t a0;
      uint8_t a1;
      uint8_t a2;
      uint8_t a3;
      uint8_t a4;
      uint8_t a5;
      uint8_t a6;
      uint8_t a7;
      uint8_t pad0;
      uint8_t a8;
      uint8_t a9;
      uint8_t a10;
      uint8_t a11;
      uint8_t a12;
      uint8_t a13;
      uint8_t a14;
      uint8_t a15;
      uint8_t pad1;
      uint8_t a16;
      uint8_t a17;
      uint8_t a18;
      uint8_t a19;
      uint8_t a20;
      uint8_t a21;
      uint8_t a22;
      uint8_t a23;
    } u8;
    struct
    {
      uint8_t code;
      int8_t a0;
      int8_t a1;
      int8_t a2;
      int8_t a3;
      int8_t a4;
      int8_t a5;
      int8_t a6;
      int8_t a7;
      uint8_t pad0;
      int8_t a8;
      int8_t a9;
      int8_t a10;
      int8_t a11;
      int8_t a12;
      int8_t a13;
      int8_t a14;
      int8_t a15;
      uint8_t pad1;
      int8_t a16;
      int8_t a17;
      int8_t a18;
      int8_t a19;
      int8_t a20;
      int8_t a21;
      int8_t a22;
      int8_t a23;
    } s8;
  };
  CtxEntry next_entry; // also pads size of CtxCommand slightly.
};

typedef struct _CtxBackend CtxBackend;
void ctx_windowtitle (Ctx *ctx, const char *text);
struct _CtxBackend
{
  Ctx                      *ctx;
  void  (*process)         (Ctx *ctx, CtxCommand *entry);
  void  (*reset)           (Ctx *ctx);
  void  (*flush)           (Ctx *ctx);

  void  (*set_windowtitle) (Ctx *ctx, const char *text);

  char *(*get_event)       (Ctx *ctx, int timout_ms);

  void  (*consume_events)  (Ctx *ctx);
  void  (*get_event_fds)   (Ctx *ctx, int *fd, int *count);
  char *(*get_clipboard)   (Ctx *ctx);
  void  (*set_clipboard)   (Ctx *ctx, const char *text);
  void (*free)             (void *backend); /* the free pointers are abused as the differentiatior
                                               between different backends   */
  void                     *user_data; // not used by ctx core
};

typedef struct _CtxIterator CtxIterator;

CtxIterator *
ctx_current_path (Ctx *ctx);
void
ctx_path_extents (Ctx *ctx, float *ex1, float *ey1, float *ex2, float *ey2);
CtxCommand *ctx_iterator_next (CtxIterator *iterator);
void
ctx_iterator_init (CtxIterator  *iterator,
                   CtxDrawlist  *drawlist,  // replace with Ctx*  ?
                   int           start_pos,
                   int           flags);    // need exposing for font bits
int ctx_iterator_pos (CtxIterator *iterator);

void ctx_handle_events (Ctx *ctx);
#define ctx_arg_string()  ((char*)&entry[2].data.u8[0])


/* The above should be public API
 */

#pragma pack(pop)

/* access macros for nth argument of a given type when packed into
 * an CtxEntry pointer in current code context
 */
#define ctx_arg_float(no) entry[(no)>>1].data.f[(no)&1]
#define ctx_arg_u64(no)   entry[(no)].data.u64[0]
#define ctx_arg_u32(no)   entry[(no)>>1].data.u32[(no)&1]
#define ctx_arg_s32(no)   entry[(no)>>1].data.s32[(no)&1]
#define ctx_arg_u16(no)   entry[(no)>>2].data.u16[(no)&3]
#define ctx_arg_s16(no)   entry[(no)>>2].data.s16[(no)&3]
#define ctx_arg_u8(no)    entry[(no)>>3].data.u8[(no)&7]
#define ctx_arg_s8(no)    entry[(no)>>3].data.s8[(no)&7]
#define ctx_arg_string()  ((char*)&entry[2].data.u8[0])

typedef enum
{
  CTX_GRAY           = 1,
  CTX_RGB            = 3,
  CTX_DRGB           = 4,
  CTX_CMYK           = 5,
  CTX_DCMYK          = 6,
  CTX_LAB            = 7,
  CTX_LCH            = 8,
  CTX_GRAYA          = 101,
  CTX_RGBA           = 103,
  CTX_DRGBA          = 104,
  CTX_CMYKA          = 105,
  CTX_DCMYKA         = 106,
  CTX_LABA           = 107,
  CTX_LCHA           = 108,
  CTX_GRAYA_A        = 201,
  CTX_RGBA_A         = 203,
  CTX_RGBA_A_DEVICE  = 204,
  CTX_CMYKA_A        = 205,
  CTX_DCMYKA_A       = 206,
  // RGB  device and  RGB  ?
} CtxColorModel;

enum _CtxAntialias
{
  CTX_ANTIALIAS_DEFAULT, //
  CTX_ANTIALIAS_NONE, // non-antialiased
  CTX_ANTIALIAS_FAST, // aa 3    // deprected or is default equal to this now?
  CTX_ANTIALIAS_GOOD, // aa 5    // this should perhaps still be 5?
};
typedef enum _CtxAntialias CtxAntialias;

enum _CtxCursor
{
  CTX_CURSOR_UNSET,
  CTX_CURSOR_NONE,
  CTX_CURSOR_ARROW,
  CTX_CURSOR_IBEAM,
  CTX_CURSOR_WAIT,
  CTX_CURSOR_HAND,
  CTX_CURSOR_CROSSHAIR,
  CTX_CURSOR_RESIZE_ALL,
  CTX_CURSOR_RESIZE_N,
  CTX_CURSOR_RESIZE_S,
  CTX_CURSOR_RESIZE_E,
  CTX_CURSOR_RESIZE_NE,
  CTX_CURSOR_RESIZE_SE,
  CTX_CURSOR_RESIZE_W,
  CTX_CURSOR_RESIZE_NW,
  CTX_CURSOR_RESIZE_SW,
  CTX_CURSOR_MOVE
};
typedef enum _CtxCursor CtxCursor;

/* to be used immediately after a ctx_listen or ctx_listen_full causing the
 * cursor to change when hovering the listen area.
 */
void ctx_listen_set_cursor (Ctx      *ctx,
                            CtxCursor cursor);

/* lower level cursor setting that is independent of ctx event handling
 */
void         ctx_set_cursor (Ctx *ctx, CtxCursor cursor);
CtxCursor    ctx_get_cursor (Ctx *ctx);
void         ctx_set_antialias (Ctx *ctx, CtxAntialias antialias);
CtxAntialias ctx_get_antialias (Ctx *ctx);
void         ctx_set_render_threads   (Ctx *ctx, int n_threads);
int          ctx_get_render_threads   (Ctx *ctx);

void         ctx_set_hash_cache (Ctx *ctx, int enable_hash_cache);
int          ctx_get_hash_cache (Ctx *ctx);


typedef struct _CtxParser CtxParser;
  CtxParser *ctx_parser_new (
  Ctx       *ctx,
  int        width,
  int        height,
  float      cell_width,
  float      cell_height,
  int        cursor_x,
  int        cursor_y,
  int   (*set_prop)(void *prop_data, uint32_t key, const char *data,  int len),
  int   (*get_prop)(void *prop_Data, const char *key, char **data, int *len),
  void  *prop_data,
  void (*exit) (void *exit_data),
  void *exit_data);


enum _CtxColorSpace
{
  CTX_COLOR_SPACE_DEVICE_RGB,
  CTX_COLOR_SPACE_DEVICE_CMYK,
  CTX_COLOR_SPACE_USER_RGB,
  CTX_COLOR_SPACE_USER_CMYK,
  CTX_COLOR_SPACE_TEXTURE
};
typedef enum _CtxColorSpace CtxColorSpace;


/* sets the color space for a slot, the space is either a string of
 * "sRGB" "rec2020" .. etc or an icc profile.
 *
 * The slots device_rgb and device_cmyk is mostly to be handled outside drawing 
 * code, and user_rgb and user_cmyk is to be used. With no user_cmyk set
 * user_cmyk == device_cmyk.
 *
 * The set profiles follows the graphics state.
 */
void ctx_colorspace (Ctx           *ctx,
                     CtxColorSpace  space_slot,
                     unsigned char *data,
                     int            data_length);







void
ctx_parser_set_size (CtxParser *parser,
                     int        width,
                     int        height,
                     float      cell_width,
                     float      cell_height);

void ctx_parser_feed_bytes (CtxParser *parser, const char *data, int count);

int
ctx_get_contents (const char     *path,
                   unsigned char **contents,
                   long           *length);
int
ctx_get_contents2 (const char     *path,
                   unsigned char **contents,
                   long           *length,
                   long            max_len);

void ctx_parser_free (CtxParser *parser);
typedef struct _CtxSHA1 CtxSHA1;

void
ctx_bin2base64 (const void *bin,
                int         bin_length,
                char       *ascii);
int
ctx_base642bin (const char    *ascii,
                int           *length,
                unsigned char *bin);


struct
  _CtxMatrix
{
  float m[3][3];
};

void ctx_apply_matrix (Ctx *ctx, CtxMatrix *matrix);
void ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y);
void ctx_matrix_invert (CtxMatrix *m);
void ctx_matrix_identity (CtxMatrix *matrix);
void ctx_matrix_scale (CtxMatrix *matrix, float x, float y);
void ctx_matrix_rotate (CtxMatrix *matrix, float angle);
void ctx_matrix_multiply (CtxMatrix       *result,
                          const CtxMatrix *t,
                          const CtxMatrix *s);


/* we already have the start of the file available which disambiguates some
 * of our important supported formats, give preference to magic, then extension
 * then text plain vs binary.
 */
const char *ctx_guess_media_type (const char *path, const char *content, int len);

/* get media-type, with preference towards using extension of path and
 * not reading the data at all.
 */
const char *ctx_path_get_media_type (const char *path);

typedef enum {
  CTX_MEDIA_TYPE_NONE=0,
  CTX_MEDIA_TYPE_TEXT,
  CTX_MEDIA_TYPE_IMAGE,
  CTX_MEDIA_TYPE_VIDEO,
  CTX_MEDIA_TYPE_AUDIO,
  CTX_MEDIA_TYPE_INODE,
  CTX_MEDIA_TYPE_APPLICATION,
} CtxMediaTypeClass;

CtxMediaTypeClass ctx_media_type_class (const char *media_type);


float ctx_term_get_cell_width (Ctx *ctx);
float ctx_term_get_cell_height (Ctx *ctx);


void ctx_logo (Ctx *ctx, float x, float y, float dim);



#if 1 // CTX_VT

typedef struct _VT VT;
void vt_feed_keystring    (VT *vt, CtxEvent *event, const char *str);
void vt_paste             (VT *vt, const char *str);
char *vt_get_selection    (VT *vt);
long vt_rev               (VT *vt);
int  vt_has_blink         (VT *vt);
int ctx_vt_had_alt_screen (VT *vt);

int ctx_clients_handle_events (Ctx *ctx);

typedef struct _CtxList CtxList;
CtxList *ctx_clients (Ctx *ctx);

void ctx_set_fullscreen (Ctx *ctx, int val);
int ctx_get_fullscreen (Ctx *ctx);

typedef struct _CtxBuffer CtxBuffer;
CtxBuffer *ctx_buffer_new_for_data (void *data, int width, int height,
                                    int stride,
                                    CtxPixelFormat pixel_format,
                                    void (*freefunc) (void *pixels, void *user_data),
                                    void *user_data);

typedef enum CtxBackendType {
  CTX_BACKEND_NONE,
  CTX_BACKEND_CTX,
  CTX_BACKEND_RASTERIZER,
  CTX_BACKEND_HASHER,
  CTX_BACKEND_HEADLESS,
  CTX_BACKEND_TERM,
  CTX_BACKEND_FB,
  CTX_BACKEND_KMS,
  CTX_BACKEND_TERMIMG,
  CTX_BACKEND_CAIRO,
  CTX_BACKEND_SDL,
  CTX_BACKEND_DRAWLIST,
} CtxBackendType;

CtxBackendType ctx_backend_type (Ctx *ctx);

static inline int ctx_backend_is_tiled (Ctx *ctx)
{
  switch (ctx_backend_type (ctx))
  {
    case CTX_BACKEND_FB:
    case CTX_BACKEND_SDL:
    case CTX_BACKEND_KMS:
    case CTX_BACKEND_HEADLESS:
      return 1;
    default:
      return 0;
  }
}

#endif


#ifndef CTX_CODEC_CHAR
//#define CTX_CODEC_CHAR '\035'
//#define CTX_CODEC_CHAR 'a'
#define CTX_CODEC_CHAR '\020' // datalink escape
//#define CTX_CODEC_CHAR '^'
#endif

#ifndef assert
#define assert(a)
#endif

#ifdef __cplusplus
}
#endif
#endif
#ifndef __CTX_H__
#define __CTX_H__
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif

#ifndef CTX_STRING_H
#define CTX_STRING_H

typedef struct _CtxString CtxString;
struct _CtxString
{
  char *str;
  int   length;
  int   utf8_length;
  int   allocated_length;
  int   is_line;
};

CtxString   *ctx_string_new_with_size  (const char *initial, int initial_size);
CtxString   *ctx_string_new            (const char *initial);
CtxString   *ctx_string_new_printf (const char *format, ...);
char       *ctx_string_dissolve       (CtxString *string);
void        ctx_string_free           (CtxString *string, int freealloc);
const char *ctx_string_get            (CtxString *string);
uint32_t    ctx_string_get_unichar    (CtxString *string, int pos);
int         ctx_string_get_length     (CtxString *string);
int         ctx_string_get_utf8length (CtxString *string);
void        ctx_string_set            (CtxString *string, const char *new_string);
void        ctx_string_clear          (CtxString *string);
void        ctx_string_append_str     (CtxString *string, const char *str);
void        ctx_string_append_byte    (CtxString *string, char  val);
void        ctx_string_append_string  (CtxString *string, CtxString *string2);
void        ctx_string_append_unichar (CtxString *string, unsigned int unichar);
void        ctx_string_append_data    (CtxString *string, const char *data, int len);

void        ctx_string_pre_alloc       (CtxString *string, int size);
void        ctx_string_append_utf8char (CtxString *string, const char *str);
void        ctx_string_append_printf  (CtxString *string, const char *format, ...);
void        ctx_string_replace_utf8   (CtxString *string, int pos, const char *new_glyph);
void        ctx_string_insert_utf8    (CtxString *string, int pos, const char *new_glyph);

void        ctx_string_insert_unichar (CtxString *string, int pos, uint32_t unichar);
void        ctx_string_replace_unichar (CtxString *string, int pos, uint32_t unichar);
void        ctx_string_remove         (CtxString *string, int pos);
char       *ctx_strdup_printf         (const char *format, ...);

#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif

#endif
#ifndef _CTX_INTERNAL_FONT_
#define _CTX_INTERNAL_FONT_

#ifndef CTX_FONT_ascii
/* this is a ctx encoded font based on DejaVuSans.ttf */
/* CTX_SUBDIV:8  CTX_BAKE_FONT_SIZE:160 */
/* glyphs covered: 

 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghi
  jklmnopqrstuvwxyz{|}~  */
static const struct __attribute__ ((packed)) {uint8_t code; uint32_t a; uint32_t b;}
ctx_font_ascii[]={
{'@', 0x00000020, 0x00002bb0},/*                 x-advance: 43.687500 */
{'@', 0x00000021, 0x00003719},/*        !        x-advance: 55.097656 */
{'M', 0x41a5e7f2, 0xc1886037},
{'l', 0x4159fc90, 0x00000000},
{'4', 0x00880000, 0x0000ff94},
{'6', 0xff780000, 0xfd670000},
{'l', 0x4159fc90, 0x00000000},
{'l', 0x00000000, 0x422fd6c4},
{'l', 0xbfabcfe0, 0x41bfad86},
{'l', 0xc12df5b2, 0x00000000},
{'l', 0xbfb46710, 0xc1bfad86},
{'l', 0x00000000, 0xc22fd6c4},
{'@', 0x00000022, 0x00003f38},/*        "        x-advance: 63.218750 */
{'M', 0x41c50c07, 0xc2c86716},
{'l', 0x00000000, 0x4214fe48},
{'4', 0x0000ffa5, 0xfed70000},
{'6', 0x0000005b, 0x000000ca},
{'l', 0x00000000, 0x4214fe48},
{'l', 0xc1368ce4, 0x00000000},
{'l', 0x00000000, 0xc214fe48},
{'l', 0x41368ce4, 0x00000000},
{'@', 0x00000023, 0x0000732a},/*        #        x-advance: 115.164062 */
{'M', 0x428c8973, 0xc271e113},
{'l', 0xc19c3dda, 0x00000000},
{'4', 0x00b3ffd3, 0x0000009d},
{'6', 0xff4d002c, 0xfecfffb0},
{'l', 0xc0df5b10, 0x41ded19c},
{'l', 0x419cc74c, 0x00000000},
{'l', 0x40e180e0, 0xc1ded19c},
{'l', 0x412bcfe8, 0x00000000},
{'l', 0xc0dd3540, 0x41ded19c},
{'l', 0x41a78448, 0x00000000},
{'l', 0x00000000, 0x41255e7c},
{'l', 0xc1bc74d4, 0x00000000},
{'l', 0xc0b01b80, 0x41b35430},
{'l', 0x41aabd00, 0x00000000},
{'l', 0x00000000, 0x41244b9a},
{'l', 0xc1bfad88, 0x00000000},
{'l', 0xc0df5b10, 0x41de4829},
{'l', 0xc12bcfe4, 0x00000000},
{'l', 0x40dd3540, 0xc1de4829},
{'l', 0xc19d50c0, 0x00000000},
{'l', 0xc0dd3540, 0x41de4829},
{'l', 0xc12ce2ca, 0x00000000},
{'l', 0x40df5b10, 0xc1de4829},
{'l', 0xc1a920a5, 0x00000000},
{'l', 0x00000000, 0xc1244b9a},
{'l', 0x41bcfe48, 0x00000000},
{'l', 0x40b46718, 0xc1b35430},
{'l', 0xc1ace2cb, 0x00000000},
{'l', 0x00000000, 0xc1255e7c},
{'l', 0x41c1d353, 0x00000000},
{'l', 0x40db0f78, 0xc1ded19c},
{'l', 0x412df5b0, 0x00000000},
{'@', 0x00000024, 0x00005773},/*        $        x-advance: 87.449219 */
{'M', 0x4239c595, 0x41a19c59},
{'4', 0x0000ffcb, 0xff5f0000},
{'q', 0xc0e180d8, 0xbe09731d},
{0, 0xc16180dc, 0xbfce2cac},
{'9', 0xfff4ffc8, 0xffdcff8f},
{'l', 0x00000000, 0xc14149e1},
{'q', 0x40db0f76, 0x4089731e},
{0, 0x415d3543, 0x40d05278},
{'9', 0x00110038, 0x00110073},
{'l', 0x00000000, 0xc1f4d50c},
{'q', 0xc16d50c2, 0xc01aa180},
{0, 0xc1ace2cb, 0xc10301b8},
{'q', 0xc0d6c3de, 0xc0b8b2b0},
{0, 0xc0d6c3de, 0xc17d6c3c},
{'q', 0x00000000, 0xc12f0898},
{0, 0x40ea180e, 0xc189fc90},
{'9', 0xffce003a, 0xffc700a8},
{'4', 0xff820000, 0x00000035},
{'l', 0x00000000, 0x417920a8},
{'8', 0x0a600231, 0x165b082e},
{'l', 0x00000000, 0x413beb60},
{'8', 0xdea5ead4, 0xf2a0f4d2},
{'l', 0x00000000, 0x41e54302},
{'q', 0x4173c228, 0x401655f0},
{0, 0x41b35432, 0x41063a6c},
{'q', 0x40e5cc70, 0x40c149e0},
{0, 0x40e5cc70, 0x4184149e},
{'q', 0x00000000, 0x413579fc},
{0, 0xc0f4d510, 0x418f5b0f},
{'9', 0x0034ffc4, 0x003cff51},
{'6', 0x00a20000, 0xfdc1ffcb},
{'l', 0x00000000, 0xc1dc2258},
{'8', 0x23a106c2, 0x4be01ce0},
{'8', 0x471e2e00, 0x2561191e},
{'m', 0x40d6c3d8, 0x414e2cac},
{'l', 0x00000000, 0x41e87bb4},
{'8', 0xda66f744, 0xb322e322},
{'8', 0xb5dfd100, 0xd898e5e0},
{'@', 0x00000025, 0x0000829a},/*        %        x-advance: 130.601562 */
{'M', 0x42c7dda3, 0xc2306037},
{'8', 0x27b700d2, 0x6ee627e6},
{'8', 0x6e1a4500, 0x2749271a},
{'8', 0xd947002d, 0x921ad81a},
{'8', 0x92e6ba00, 0xd8b9d8e6},
{'m', 0x00000000, 0xc1086034},
{'q', 0x4129aa18, 0x00000000},
{0, 0x4186c3dc, 0x40ec3dd8},
{'q', 0x40c7bb50, 0x40ec3dd8},
{0, 0x40c7bb50, 0x419f768c},
{'q', 0x00000000, 0x4148ce2d},
{0, 0xc0c9e120, 0x419f768d},
{'q', 0xc0c7bb40, 0x40ea180d},
{0, 0xc1863a68, 0x40ea180d},
{'q', 0xc12bcfe8, 0x34000000},
{0, 0xc187d6c4, 0xc0ea180d},
{'q', 0xc0c7bb40, 0xc0ec3dda},
{0, 0xc0c7bb40, 0xc19f768d},
{'q', 0x00000000, 0xc149e114},
{0, 0x40c7bb40, 0xc19f768c},
{'9', 0xffc50032, 0xffc50087},
{'m', 0xc28a8603, 0xc2237d6c},
{'8', 0x28b700d2, 0x6de627e6},
{'8', 0x6e1a4600, 0x2749271a},
{'8', 0xd949002e, 0x921ad91a},
{'8', 0x93e6bb00, 0xd8b7d8e6},
{'m', 0x42726a86, 0xc1086038},
{'l', 0x412bcfe0, 0x00000000},
{'4', 0x033ffe0b, 0x0000ffab},
{'6', 0xfcc101f5, 0x0000fe1c},
{'q', 0x4129aa14, 0x00000000},
{0, 0x41874d50, 0x40ec3de0},
{'q', 0x40c9e110, 0x40ea1800},
{0, 0x40c9e110, 0x419eed18},
{'q', 0x00000000, 0x414af3f8},
{0, 0xc0c9e110, 0x41a00000},
{'q', 0xc0c7bb48, 0x40ea1808},
{0, 0xc1874d51, 0x40ea1808},
{'q', 0xc12abcfe, 0x00000000},
{0, 0xc1874d51, 0xc0ea1808},
{'q', 0xc0c59579, 0xc0ec3dd8},
{0, 0xc0c59579, 0xc1a00000},
{'q', 0x00000000, 0xc147bb48},
{0, 0x40c7bb47, 0xc19eed18},
{'q', 0x40c7bb46, 0xc0ec3de0},
{0, 0x4186c3de, 0xc0ec3de0},
{'@', 0x00000026, 0x00006b2e},/*        &        x-advance: 107.179688 */
{'M', 0x4205b0f7, 0xc257920a},
{'8', 0x56b92bd0, 0x5aea2aea},
{'q', 0x00000000, 0x411cc74e},
{0, 0x40e3a6a8, 0x41827845},
{'q', 0x40e3a6a8, 0x40d05278},
{0, 0x418ed19c, 0x40d05278},
{'8', 0xf05f0033, 0xcd53ef2c},
{'6', 0xfeddfee4, 0xffc4004b},
{'l', 0x42086037, 0x420b98e9},
{'q', 0x407d6c40, 0xc0bf2410},
{0, 0x40c59570, 0xc14c06e0},
{'9', 0xffca0011, 0xff8d0014},
{'l', 0x4147bb40, 0x00000000},
{'q', 0xbf4e2c80, 0x410dbeb8},
{0, 0xc0897310, 0x418c225c},
{'9', 0x0045ffe5, 0x0088ffb3},
{'4', 0x00990095, 0x0000ff79},
{'l', 0xc1198e98, 0xc11dda33},
{'q', 0xc0df5b10, 0x40bf2414},
{0, 0xc16a1810, 0x410ed19c},
{'q', 0xc0f4d510, 0x4038b2ae},
{0, 0xc1838b2c, 0x4038b2ae},
{'q', 0xc181655e, 0x34000000},
{0, 0xc1d38b2a, 0xc1131d35},
{'q', 0xc1244b99, 0xc114301c},
{0, 0xc1244b99, 0xc1bd87bb},
{'q', 0x00000000, 0xc109731e},
{0, 0x408fe482, 0xc180dbeb},
{'q', 0x408fe484, 0xc0f2af40},
{0, 0x4157d6c4, 0xc163a6a8},
{'8', 0xbdd9dfe7, 0xbef3dff3},
{'q', 0x00000000, 0xc12df5b0},
{0, 0x40ee63a4, 0xc18b98e8},
{'q', 0x40ee63a8, 0xc0d49e10},
{0, 0x419e63a6, 0xc0d49e10},
{'8', 0x0958002c, 0x1c5a092c},
{'l', 0x00000000, 0x41436fb0},
{'8', 0xdaa7e7d2, 0xf3b2f3d6},
{'8', 0x1ea500c8, 0x4cde1dde},
{'8', 0x370f1b00, 0x4d401b10},
{'@', 0x00000027, 0x000025c9},/*        '        x-advance: 37.785156 */
{'M', 0x41c50c07, 0xc2c86716},
{'l', 0x00000000, 0x4214fe48},
{'l', 0xc1368ce3, 0x00000000},
{'l', 0x00000000, 0xc214fe48},
{'l', 0x41368ce3, 0x00000000},
{'@', 0x00000028, 0x0000359f},/*        (        x-advance: 53.621094 */
{'M', 0x422a7844, 0xc2d09732},
{'q', 0xc10fe480, 0x4176fae0},
{0, 0xc155b0f6, 0x41f44b9c},
{'q', 0xc08b98e8, 0x41719c54},
{0, 0xc08b98e8, 0x41f4d50a},
{'q', 0x00000000, 0x41780dbe},
{0, 0x408b98e8, 0x41f5e7f2},
{'9', 0x00790023, 0x00f4006a},
{'l', 0xc12bcfe2, 0x00000000},
{'q', 0xc12112e6, 0xc17c5958},
{0, 0xc1719c5a, 0xc1f80dbf},
{'q', 0xc09eed18, 0xc173c224},
{0, 0xc09eed18, 0xc1f225cc},
{'q', 0x00000000, 0xc16f768c},
{0, 0x409eed18, 0xc1f112e6},
{'q', 0x409eed1c, 0xc172af40},
{0, 0x41719c5a, 0xc1f80dc0},
{'l', 0x412bcfe2, 0x00000000},
{'@', 0x00000029, 0x0000359f},/*        )        x-advance: 53.621094 */
{'M', 0x41301b7d, 0xc2d09732},
{'l', 0x412bcfe5, 0x00000000},
{'q', 0x412112e6, 0x417d6c40},
{0, 0x41708972, 0x41f80dc0},
{'q', 0x40a112e8, 0x4172af40},
{0, 0x40a112e8, 0x41f112e6},
{'q', 0x00000000, 0x41708974},
{0, 0xc0a112e8, 0x41f225cc},
{'9', 0x0079ffd9, 0x00f8ff88},
{'l', 0xc12bcfe5, 0x00000000},
{'q', 0x410ed19d, 0xc175e7f3},
{0, 0x41549e11, 0xc1f44b99},
{'q', 0x408dbeb4, 0xc173c226},
{0, 0x408dbeb4, 0xc1f5e7f2},
{'q', 0x00000000, 0xc1780dc0},
{0, 0xc08dbeb4, 0xc1f4d50a},
{'q', 0xc08b98e8, 0xc1719c58},
{0, 0xc1549e11, 0xc1f44b9c},
{'@', 0x0000002a, 0x000044b9},/*        *        x-advance: 68.722656 */
{'M', 0x42814302, 0xc2a761ef},
{'l', 0xc1c0c070, 0x41505278},
{'l', 0x41c0c070, 0x41516560},
{'l', 0xc07920b0, 0x40d27848},
{'l', 0xc1b46716, 0xc159fc94},
{'l', 0x00000000, 0x41ca6a88},
{'l', 0xc0f4d50c, 0x00000000},
{'l', 0x00000000, 0xc1ca6a88},
{'l', 0xc1b46716, 0x4159fc94},
{'l', 0xc07920a8, 0xc0d27848},
{'l', 0x41c0c06e, 0xc1516560},
{'l', 0xc1c0c06e, 0xc1505278},
{'l', 0x407920a4, 0xc0d49e10},
{'l', 0x41b46716, 0x4159fc90},
{'l', 0x36000000, 0xc1ca6a84},
{'l', 0x40f4d50c, 0x00000000},
{'l', 0x00000000, 0x41ca6a84},
{'l', 0x41b46716, 0xc159fc90},
{'l', 0x407920b0, 0x40d49e10},
{'@', 0x0000002b, 0x0000732a},/*        +        x-advance: 115.164062 */
{'M', 0x427ce2ca, 0xc2ac5957},
{'l', 0x00000000, 0x421587ba},
{'l', 0x421587bc, 0x00000000},
{'l', 0x00000000, 0x41368ce4},
{'l', 0xc21587bc, 0x00000000},
{'l', 0x00000000, 0x421587bb},
{'l', 0xc1346714, 0x00000000},
{'l', 0x00000000, 0xc21587bb},
{'l', 0xc21587bb, 0x00000000},
{'l', 0xb5800000, 0xc1368ce4},
{'l', 0x421587bb, 0x00000000},
{'l', 0x00000000, 0xc21587ba},
{'l', 0x41346714, 0x00000000},
{'@', 0x0000002c, 0x00002bb0},/*        ,        x-advance: 43.687500 */
{'M', 0x4180dbeb, 0xc1886037},
{'l', 0x416293c2, 0x00000000},
{'l', 0x00000000, 0x4138b2b0},
{'l', 0xc1301b7c, 0x41abcfe4},
{'l', 0xc10a8604, 0x00000000},
{'l', 0x40b01b7c, 0xc1abcfe4},
{'l', 0x00000000, 0xc138b2b0},
{'@', 0x0000002d, 0x00003198},/*        -        x-advance: 49.593750 */
{'M', 0x40d6c3dd, 0xc22c9e11},
{'l', 0x4210b2af, 0x00000000},
{'l', 0x00000000, 0x41301b7c},
{'l', 0xc210b2af, 0x00000000},
{'l', 0xb5c00000, 0xc1301b7c},
{'[', 0x0047002d, 0x00000508},
{'[', 0x004a002d, 0x000007a6},
{'[', 0x004f002d, 0x000003d3},
{'[', 0x0051002d, 0x00000508},
{'[', 0x006f002d, 0x0000028c},
{'@', 0x0000002e, 0x00002bb0},/*        .        x-advance: 43.687500 */
{'M', 0x416b2af4, 0xc1886037},
{'l', 0x416293c2, 0x00000000},
{'l', 0x00000000, 0x41886037},
{'l', 0xc16293c2, 0x00000000},
{'l', 0x00000000, 0xc1886037},
{'@', 0x0000002f, 0x00002e4f},/*        /        x-advance: 46.308594 */
{'M', 0x420b98e9, 0xc2c86716},
{'l', 0x41368ce4, 0x00000000},
{'l', 0xc20b98e9, 0x42e1e7f2},
{'l', 0xc1368ce4, 0xb5800000},
{'l', 0x420b98e9, 0xc2e1e7f2},
{'@', 0x00000030, 0x00005773},/*        0        x-advance: 87.449219 */
{'M', 0x422ec3dd, 0xc2b68ce3},
{'q', 0xc1278448, 0x00000000},
{0, 0xc17c5956, 0x41255e80},
{'q', 0xc0a7844c, 0x41244b98},
{0, 0xc0a7844c, 0x41f7844c},
{'q', 0x00000000, 0x41a4d50c},
{0, 0x40a7844c, 0x41f7844c},
{'q', 0x40a9aa1c, 0x41244b98},
{0, 0x417c5956, 0x41244b98},
{'q', 0x41289734, 0x00000000},
{0, 0x417c5958, 0xc1244b98},
{'q', 0x40a9aa18, 0xc1255e80},
{0, 0x40a9aa18, 0xc1f7844c},
{'q', 0x00000000, 0xc1a55e80},
{0, 0xc0a9aa18, 0xc1f7844c},
{'9', 0xffaeffd7, 0xffaeff82},
{'m', 0x00000000, 0xc12bcfe0},
{'q', 0x4186c3de, 0x00000000},
{0, 0x41cda33a, 0x4155b0f8},
{'q', 0x410ed198, 0x41549e10},
{0, 0x410ed198, 0x421aa180},
{'q', 0x00000000, 0x41ca6a86},
{0, 0xc10ed198, 0x421aa181},
{'q', 0xc10dbeb8, 0x41549e11},
{0, 0xc1cda33a, 0x41549e11},
{'q', 0xc186c3dd, 0xb4c00000},
{0, 0xc1ce2cab, 0xc1549e11},
{'q', 0xc10dbeb5, 0xc155b0f8},
{0, 0xc10dbeb5, 0xc21aa181},
{'q', 0x00000000, 0xc1caf3f8},
{0, 0x410dbeb5, 0xc21aa180},
{'q', 0x410ed19c, 0xc155b0f8},
{0, 0x41ce2cab, 0xc155b0f8},
{'@', 0x00000031, 0x00005773},/*        1        x-advance: 87.449219 */
{'M', 0x41886037, 0xc1368ce3},
{'l', 0x41b12e63, 0x00000000},
{'l', 0x00000000, 0xc298e2cb},
{'l', 0xc1c0c06e, 0x409aa180},
{'l', 0x35800000, 0xc1459578},
{'l', 0x41bfad88, 0xc09aa180},
{'l', 0x4158e9a8, 0x00000000},
{'l', 0x00000000, 0x42b1957a},
{'l', 0x41b12e64, 0xb6400000},
{'l', 0x00000000, 0x41368ce3},
{'l', 0xc266df5a, 0x00000000},
{'l', 0xb6000000, 0xc1368ce3},
{'@', 0x00000032, 0x00005773},/*        2        x-advance: 87.449219 */
{'M', 0x41d301b8, 0xc1368ce3},
{'l', 0x423d4302, 0x00000000},
{'4', 0x005b0000, 0x0000fe04},
{'l', 0xb6000000, 0xc1368ce3},
{'q', 0x40f6fad8, 0xc0ff920a},
{0, 0x41a80dbe, 0xc1ab4670},
{'q', 0x4155b0f6, 0xc157d6c4},
{0, 0x41863a6b, 0xc18b0f76},
{'8', 0x9e48c634, 0xb114d814},
{'q', 0x00000000, 0xc0ff9210},
{0, 0xc0b46718, 0xc1505278},
{'q', 0xc0b24148, 0xc0a112f0},
{0, 0xc1690528, 0xc0a112f0},
{'q', 0xc0cc06e0, 0x00000000},
{0, 0xc157d6c2, 0x400dbec0},
{'9', 0x0011ffc8, 0x0035ff88},
{'l', 0x00000000, 0xc15b0f78},
{'q', 0x410301b8, 0xc0527840},
{0, 0x4174d50c, 0xc09eed20},
{'q', 0x40e3a6a8, 0xbfd6c3c0},
{0, 0x41505278, 0xbfd6c3c0},
{'q', 0x417920a4, 0x00000000},
{0, 0x41c6a860, 0x40f920a0},
{'q', 0x41143018, 0x40f920b0},
{0, 0x41143018, 0x41a67168},
{'8', 0x5dee3100, 0x68bd2cee},
{'q', 0xbfd6c3e0, 0x3ff920c0},
{0, 0xc12abd00, 0x41346718},
{'q', 0xc10fe480, 0x4114301c},
{0, 0xc1caf3f8, 0x41cfc904},
{'@', 0x00000033, 0x00005773},/*        3        x-advance: 87.449219 */
{'M', 0x425f1656, 0xc2581b7d},
{'q', 0x411bb468, 0x40052780},
{0, 0x4172af40, 0x410a8604},
{'q', 0x40b01b80, 0x40d27840},
{0, 0x40b01b80, 0x4181eed1},
{'q', 0x00000000, 0x416d50c0},
{0, 0xc12338b8, 0x41b79fc8},
{'q', 0xc12338b0, 0x4101eed3},
{0, 0xc1e7f240, 0x4101eed3},
{'8', 0xf79800ce, 0xe292f6cb},
{'l', 0x00000000, 0xc151655e},
{'q', 0x40b46716, 0x40527844},
{0, 0x4145957b, 0x409eed18},
{'q', 0x40d6c3dc, 0x3fd6c3e0},
{0, 0x41606df6, 0x3fd6c3e0},
{'q', 0x414c06dc, 0x00000000},
{0, 0x419b2af2, 0xc0a112e6},
{'q', 0x40d6c3e0, 0xc0a112e6},
{0, 0x40d6c3e0, 0xc16a180d},
{'q', 0x00000000, 0xc10dbeb6},
{0, 0xc0c7bb48, 0xc15d3542},
{'9', 0xffd8ffcf, 0xffd8ff77},
{'4', 0x0000ffa3, 0xffa70000},
{'l', 0x41436fae, 0x00000000},
{'q', 0x41200000, 0x00000000},
{0, 0x4174d50c, 0xc07d6c40},
{'8', 0xa42ae02a, 0xa2d4c300},
{'q', 0xc0adf5b0, 0xc0852790},
{0, 0xc17a338c, 0xc0852790},
{'q', 0xc0b24148, 0x00000000},
{0, 0xc13f2414, 0x3f9aa180},
{'9', 0x0009ffcd, 0x001eff90},
{'l', 0x00000000, 0xc14149e0},
{'q', 0x40f6fad8, 0xc0097320},
{0, 0x4166df5a, 0xc04e2cc0},
{'q', 0x40d8e9ac, 0xbf897300},
{0, 0x414c06de, 0xbf897300},
{'q', 0x4176fad8, 0x00000000},
{0, 0x41c36fae, 0x40e180e0},
{'q', 0x410fe480, 0x40df5b10},
{0, 0x410fe480, 0x419768cc},
{'q', 0x00000000, 0x41052788},
{0, 0xc0987bb0, 0x416180dc},
{'q', 0xc0987bb0, 0x40b68ce8},
{0, 0xc158e9a8, 0x40fd6c40},
{'@', 0x00000034, 0x00005773},/*        4        x-advance: 87.449219 */
{'M', 0x424fc905, 0xc2b0c74d},
{'4', 0x01abfeef, 0x00000111},
{'6', 0xfe550000, 0xffa2ffe4},
{'l', 0x41886038, 0x00000000},
{'l', 0x00000000, 0x42829aa2},
{'l', 0x4164b990, 0xb6800000},
{'l', 0x00000000, 0x41346714},
{'l', 0xc164b990, 0x00000000},
{'l', 0x00000000, 0x41bcfe48},
{'l', 0xc157d6c4, 0x00000000},
{'l', 0x00000000, 0xc1bcfe48},
{'l', 0xc234f089, 0x00000000},
{'l', 0xb5c00000, 0xc151655c},
{'l', 0x4226b61e, 0xc27df5b1},
{'@', 0x00000035, 0x00005773},/*        5        x-advance: 87.449219 */
{'M', 0x416d50c0, 0xc2c86716},
{'l', 0x4254e2ca, 0x00000000},
{'4', 0x005b0000, 0x0000feba},
{'l', 0x00000000, 0x41c48294},
{'8', 0xf52ff817, 0xfc2ffc17},
{'q', 0x41863a6a, 0x00000000},
{0, 0x41d49e10, 0x41131d34},
{'q', 0x411cc750, 0x41131d34},
{0, 0x411cc750, 0x41c731d2},
{'q', 0x00000000, 0x4181655f},
{0, 0xc12112e8, 0x41c957a0},
{'q', 0xc12112e4, 0x410ed19d},
{0, 0xc1e31d34, 0x410ed19d},
{'8', 0xf89900ce, 0xe795f8cc},
{'l', 0x00000000, 0xc159fc90},
{'8', 0x27631a30, 0x0c6c0c33},
{'q', 0x4139c598, 0x00000000},
{0, 0x41931d36, 0xc0c36fae},
{'q', 0x40d8e9a8, 0xc0c36fae},
{0, 0x40d8e9a8, 0xc1849e12},
{'q', 0x00000000, 0xc1278448},
{0, 0xc0d8e9a8, 0xc1849e10},
{'q', 0xc0d8e9a8, 0xc0c36fb0},
{0, 0xc1931d36, 0xc0c36fb0},
{'8', 0x09aa00d5, 0x1ea809d6},
{'l', 0x00000000, 0xc249579f},
{'@', 0x00000036, 0x00005773},/*        6        x-advance: 87.449219 */
{'M', 0x423579fc, 0xc25e036f},
{'q', 0xc1120a4c, 0x00000000},
{0, 0xc167f240, 0x40c7bb40},
{'q', 0xc0a9aa18, 0x40c7bb48},
{0, 0xc0a9aa18, 0x4188e9aa},
{'q', 0x00000000, 0x412ce2cc},
{0, 0x40a9aa18, 0x4188e9aa},
{'q', 0x40abcfe8, 0x40c7bb48},
{0, 0x4167f240, 0x40c7bb48},
{'q', 0x41120a50, 0x00000000},
{0, 0x4166df5c, 0xc0c7bb46},
{'q', 0x40abcfe8, 0xc0c9e112},
{0, 0x40abcfe8, 0xc188e9aa},
{'q', 0x00000000, 0xc12df5b0},
{0, 0xc0abcfe8, 0xc188e9aa},
{'9', 0xffcfffd6, 0xffcfff8d},
{'m', 0x41d74d50, 0xc229eed1},
{'l', 0x00000000, 0x41459578},
{'8', 0xe3aeedd8, 0xf6aef6d7},
{'q', 0xc156c3e0, 0x00000000},
{0, 0xc1a44b99, 0x4110f768},
{'q', 0xc0e180dc, 0x4110f768},
{0, 0xc100dbec, 0x41db0f78},
{'8', 0xb94fd21f, 0xe769e72f},
{'q', 0x41719c58, 0x00000000},
{0, 0x41be9aa2, 0x41131d34},
{'q', 0x410cabd0, 0x41120a50},
{0, 0x410cabd0, 0x41c731d2},
{'q', 0x00000000, 0x4176fada},
{0, 0xc1120a50, 0x41c61eee},
{'q', 0xc1120a50, 0x41154301},
{0, 0xc1c25cc8, 0x41154301},
{'q', 0xc18b0f76, 0xb4c00000},
{0, 0xc1d49e10, 0xc1549e11},
{'q', 0xc1131d36, 0xc155b0f8},
{0, 0xc1131d36, 0xc21aa181},
{'q', 0x00000000, 0xc1be112c},
{0, 0x41346716, 0xc21768ce},
{'q', 0x41346718, 0xc16293c0},
{0, 0x41f225cc, 0xc16293c0},
{'8', 0x08520028, 0x18560829},
{'@', 0x00000037, 0x00005773},/*        7        x-advance: 87.449219 */
{'M', 0x41346716, 0xc2c86716},
{'l', 0x4280dbeb, 0x00000000},
{'l', 0x00000000, 0x40b8b2b0},
{'l', 0xc21180dc, 0x42bcdbeb},
{'l', 0xc16293c2, 0x00000000},
{'l', 0x4208e9aa, 0xc2b1957a},
{'l', 0xc2407bb4, 0x00000000},
{'l', 0xb6000000, 0xc1368ce0},
{'@', 0x00000038, 0x00005773},/*        8        x-advance: 87.449219 */
{'M', 0x422ec3dd, 0xc23e55e8},
{'q', 0xc11aa180, 0x00000000},
{0, 0xc173c224, 0x40a55e80},
{'q', 0xc0b01b7c, 0x40a55e80},
{0, 0xc0b01b7c, 0x4163a6a8},
{'q', 0x00000000, 0x4110f76a},
{0, 0x40b01b7c, 0x4163a6a9},
{'q', 0x40b24148, 0x40a55e7e},
{0, 0x4173c224, 0x40a55e7e},
{'q', 0x411aa184, 0x00000000},
{0, 0x4173c228, 0xc0a55e7e},
{'q', 0x40b24148, 0xc0a7844a},
{0, 0x40b24148, 0xc163a6a9},
{'q', 0x00000000, 0xc110f768},
{0, 0xc0b24148, 0xc163a6a8},
{'9', 0xffd7ffd4, 0xffd7ff87},
{'m', 0xc158e9a8, 0xc0b8b2b0},
{'q', 0xc10b98ea, 0xc0097310},
{0, 0xc159fc90, 0xc101eed0},
{'q', 0xc09aa182, 0xc0bf2410},
{0, 0xc09aa182, 0xc1690528},
{'q', 0x00000000, 0xc14036f8},
{0, 0x41086037, 0xc197f240},
{'q', 0x4109731e, 0xc0df5b10},
{0, 0x41bbeb61, 0xc0df5b10},
{'q', 0x416f7690, 0x00000000},
{0, 0x41bbeb62, 0x40df5b10},
{'q', 0x41086038, 0x40df5b10},
{0, 0x41086038, 0x4197f240},
{'q', 0x00000000, 0x41097320},
{0, 0xc09cc750, 0x41690528},
{'q', 0xc09aa180, 0x40bf2418},
{0, 0xc157d6c4, 0x4101eed0},
{'q', 0x411cc74c, 0x40120a50},
{0, 0x4173c224, 0x410ed1a0},
{'q', 0x40b01b80, 0x40d49e10},
{0, 0x40b01b80, 0x4181eed0},
{'q', 0x00000000, 0x41690528},
{0, 0xc10ed198, 0x41b2cabd},
{'q', 0xc10dbeb8, 0x40f920a5},
{0, 0xc1cb7d6e, 0x40f920a5},
{'q', 0xc1849e10, 0x34000000},
{0, 0xc1cc06de, 0xc0f920a4},
{'q', 0xc10dbeb7, 0xc0f920a5},
{0, 0xc10dbeb7, 0xc1b2cabd},
{'q', 0x00000000, 0xc1198e98},
{0, 0x40b01b7e, 0xc181eed0},
{'9', 0xffcb002c, 0xffb9007a},
{'m', 0xc09eed1c, 0xc1ab4670},
{'8', 0x61263e00, 0x226d2227},
{'8', 0xde6c0045, 0x9f27de27},
{'8', 0x9fd9c200, 0xde94ded9},
{'8', 0x229300ba, 0x61da22da},
{'@', 0x00000039, 0x00005773},/*        9        x-advance: 87.449219 */
{'M', 0x41719c59, 0xc0052784},
{'l', 0x00000000, 0xc145957a},
{'8', 0x1d521328, 0x0a520a29},
{'q', 0x4156c3dc, 0x00000000},
{0, 0x41a3c226, 0xc10fe482},
{'q', 0x40e3a6a8, 0xc110f768},
{0, 0x4101eed4, 0xc1db98ea},
{'8', 0x46b22ee1, 0x189718d1},
{'q', 0xc1708974, 0x00000000},
{0, 0xc1be9aa2, 0xc110f768},
{'q', 0xc10b98e9, 0xc1120a50},
{0, 0xc10b98e9, 0xc1c731d4},
{'q', 0x00000000, 0xc176fad8},
{0, 0x41120a4f, 0xc1c61eec},
{'q', 0x41120a4e, 0xc1154300},
{0, 0x41c25cc7, 0xc1154300},
{'q', 0x418b0f76, 0x00000000},
{0, 0x41d4149c, 0x4155b0f8},
{'q', 0x41131d38, 0x41549e10},
{0, 0x41131d38, 0x421aa180},
{'q', 0x00000000, 0x41bd87bc},
{0, 0xc1346718, 0x421768ce},
{'q', 0xc133542c, 0x416180dd},
{0, 0xc1f19c58, 0x416180dd},
{'8', 0xf8ae00d8, 0xe8aaf8d7},
{'m', 0x41d7d6c4, 0xc229eed2},
{'q', 0x41120a50, 0x00000000},
{0, 0x4166df5c, 0xc0c7bb40},
{'q', 0x40abcfe0, 0xc0c7bb48},
{0, 0x40abcfe0, 0xc188e9ac},
{'q', 0x00000000, 0xc12ce2c8},
{0, 0xc0abcfe0, 0xc1886034},
{'q', 0xc0a9aa18, 0xc0c9e120},
{0, 0xc166df5c, 0xc0c9e120},
{'q', 0xc1120a50, 0x00000000},
{0, 0xc167f240, 0x40c9e120},
{'q', 0xc0a9aa18, 0x40c7bb40},
{0, 0xc0a9aa18, 0x41886034},
{'q', 0x00000000, 0x412df5b4},
{0, 0x40a9aa18, 0x4188e9ac},
{'q', 0x40abcfe0, 0x40c7bb40},
{0, 0x4167f240, 0x40c7bb40},
{'@', 0x0000003a, 0x00002e4f},/*        :        x-advance: 46.308594 */
{'M', 0x4180dbeb, 0xc1886037},
{'l', 0x416293c2, 0x00000000},
{'4', 0x00880000, 0x0000ff8f},
{'6', 0xff780000, 0xfe500000},
{'l', 0x416293c2, 0x00000000},
{'l', 0x00000000, 0x41886036},
{'l', 0xc16293c2, 0x00000000},
{'l', 0x00000000, 0xc1886036},
{'@', 0x0000003b, 0x00002e4f},/*        ;        x-advance: 46.308594 */
{'M', 0x4180dbeb, 0xc28e25cc},
{'l', 0x416293c2, 0x00000000},
{'4', 0x00880000, 0x0000ff8f},
{'6', 0xff780000, 0x01b00000},
{'l', 0x416293c2, 0x36000000},
{'l', 0x00000000, 0x4138b2b0},
{'l', 0xc1301b7c, 0x41abcfe4},
{'l', 0xc10a8604, 0x00000000},
{'l', 0x40b01b7c, 0xc1abcfe4},
{'l', 0x00000000, 0xc138b2b0},
{'@', 0x0000003c, 0x0000732a},/*        <        x-advance: 115.164062 */
{'M', 0x42c93543, 0xc2874d51},
{'l', 0xc28a8604, 0x41c50c08},
{'l', 0x428a8604, 0x41c3f921},
{'l', 0x00000000, 0x41436fac},
{'l', 0xc2ac149e, 0xc1f9aa17},
{'l', 0xb5800000, 0xc132414c},
{'l', 0x42ac149e, 0xc1f9aa16},
{'l', 0x00000000, 0x41436fa8},
{'@', 0x0000003d, 0x0000732a},/*        =        x-advance: 115.164062 */
{'M', 0x41690527, 0xc279aa18},
{'l', 0x42ac149e, 0x00000000},
{'4', 0x005a0000, 0x0000fd50},
{'6', 0xffa60000, 0x00db0000},
{'l', 0x42ac149e, 0x00000000},
{'l', 0x00000000, 0x41368ce4},
{'l', 0xc2ac149e, 0x00000000},
{'l', 0xb5800000, 0xc1368ce4},
{'@', 0x0000003e, 0x0000732a},/*        >        x-advance: 115.164062 */
{'M', 0x41690527, 0xc2874d51},
{'l', 0x00000000, 0xc1436fa8},
{'l', 0x42ac149e, 0x41f9aa16},
{'l', 0x00000000, 0x4132414c},
{'l', 0xc2ac149e, 0x41f9aa17},
{'l', 0xb5800000, 0xc1436fac},
{'l', 0x428a414a, 0xc1c3f921},
{'l', 0xc28a414a, 0xc1c50c08},
{'@', 0x0000003f, 0x000048f3},/*        ?        x-advance: 72.949219 */
{'M', 0x41d1eed1, 0xc1886037},
{'l', 0x4159fc92, 0x00000000},
{'4', 0x00880000, 0x0000ff94},
{'6', 0xff780000, 0xffb20069},
{'4', 0x0000ff9a, 0xffae0000},
{'8', 0xa70fca00, 0xaf3fde0f},
{'l', 0x40c149e0, 0xc0bf2418},
{'8', 0xcb2ce41e, 0xcd0de70d},
{'8', 0xb3ddd100, 0xe3a4e3de},
{'8', 0x12a600d6, 0x369d12d1},
{'l', 0x00000000, 0xc149e110},
{'8', 0xd366e232, 0xf16bf134},
{'q', 0x41459578, 0x00000000},
{0, 0x419e63a6, 0x40d05270},
{'q', 0x40f08970, 0x40d05280},
{0, 0x40f08970, 0x41897320},
{'8', 0x4ded2800, 0x52bd24ed},
{'l', 0xc0bcfe48, 0x40b8b2b0},
{'8', 0x27dd19e7, 0x1bf20df6},
{'8', 0x1bfc0bfd, 0x2cff10ff},
{'l', 0x00000000, 0x410414a0},
{'@', 0x00000040, 0x00008973},/*        @        x-advance: 137.449219 */
{'M', 0x424c9052, 0xc210293c},
{'q', 0x00000000, 0x41198e9a},
{0, 0x40987bb8, 0x41719c5a},
{'8', 0x2b682b26, 0xd4670042},
{'q', 0x40987bc0, 0xc0b01b80},
{0, 0x40987bc0, 0xc1708974},
{'q', 0x00000000, 0xc11655e8},
{0, 0xc09aa180, 0xc16e63a4},
{'8', 0xd498d4da, 0x2c9900c0},
{'9', 0x002cffda, 0x0077ffda},
{'m', 0x42124f08, 0x41a08973},
{'8', 0x3db629e0, 0x13a013d7},
{'q', 0xc138b2b0, 0x00000000},
{0, 0xc19655e8, 0xc1052784},
{'q', 0xc0e5cc78, 0xc1063a6a},
{0, 0xc0e5cc78, 0xc1ae7f24},
{'q', 0x00000000, 0xc156c3dc},
{0, 0x40e7f240, 0xc1ae7f24},
{'q', 0x40e7f240, 0xc1063a68},
{0, 0x4195cc76, 0xc1063a68},
{'8', 0x14610037, 0x3c491329},
{'4', 0xffba0000, 0x0000004c},
{'l', 0x00000000, 0x4245957a},
{'q', 0x411cc748, 0xbfbcfe40},
{0, 0x4174d508, 0xc10ed19a},
{'q', 0x40b24150, 0xc0f08974},
{0, 0x40b24150, 0xc19b2af3},
{'8', 0x95efc700, 0xa3cdcef0},
{'q', 0xc0df5b10, 0xc10cabd0},
{0, 0xc1886038, 0xc156c3e0},
{'q', 0xc1200000, 0xc09655e0},
{0, 0xc1ae7f24, 0xc09655e0},
{'q', 0xc104149c, 0x00000000},
{0, 0xc17d6c3c, 0x400dbea0},
{'q', 0xc0f2af40, 0x40097320},
{0, 0xc1606df4, 0x40ce2cb0},
{'q', 0xc1289734, 0x40db0f80},
{0, 0xc184149f, 0x418fe482},
{'q', 0xc0bcfe48, 0x41312e64},
{0, 0xc0bcfe48, 0x41c036fc},
{'q', 0x00000000, 0x412abcfc},
{0, 0x4074d510, 0x419fffff},
{'q', 0x407920a0, 0x41154302},
{0, 0x4133542e, 0x41838b2b},
{'q', 0x40e180e0, 0x40df5b0f},
{0, 0x41827846, 0x412abcfe},
{'q', 0x41143018, 0x4067f240},
{0, 0x419e63a6, 0x4067f240},
{'q', 0x410a8600, 0x00000000},
{0, 0x4187d6c4, 0xc038b2ac},
{'9', 0xffe90043, 0xffbd007a},
{'l', 0x40c149e0, 0x40ee63a6},
{'q', 0xc1063a68, 0x40d0527a},
{0, 0xc19293c0, 0x41200001},
{'q', 0xc11dda38, 0x405b0f70},
{0, 0xc1a08974, 0x405b0f70},
{'q', 0xc146a860, 0x00000000},
{0, 0xc1bb61ee, 0xc08b98e8},
{'q', 0xc1301b80, 0xc08dbeb6},
{0, 0xc19cc74e, 0xc14d19c6},
{'q', 0xc109731e, 0xc1063a6a},
{0, 0xc151655e, 0xc19b2af4},
{'q', 0xc08fe482, 0xc1312e62},
{0, 0xc08fe482, 0xc1be112d},
{'q', 0x00000000, 0xc1436fb0},
{0, 0x40920a4e, 0xc1ba4f08},
{'q', 0x40920a50, 0xc1312e64},
{0, 0x41505278, 0xc19bb466},
{'q', 0x410a8604, 0xc1086038},
{0, 0x41a00000, 0xc1505278},
{'q', 0x413579fc, 0xc0920a50},
{0, 0x41c036fc, 0xc0920a50},
{'q', 0x4163a6a8, 0x00000000},
{0, 0x41d301b4, 0x40bad870},
{'q', 0x41436fb0, 0x40bad880},
{0, 0x41a3c228, 0x41849e14},
{'q', 0x40a112e0, 0x40d27840},
{0, 0x40f4d510, 0x4164b98c},
{'q', 0x402bcfe0, 0x40f6fad8},
{0, 0x402bcfe0, 0x417f9208},
{'q', 0x00000000, 0x418d3543},
{0, 0xc12abd00, 0x41ded19d},
{'q', 0xc12abd00, 0x412338b2},
{0, 0xc1ebb468, 0x4129aa17},
{'l', 0x00000000, 0xc1255e7f},
{'@', 0x00000041, 0x00005e06},/*        A        x-advance: 94.023438 */
{'M', 0x423beb62, 0xc2adb0f7},
{'4', 0x018eff6d, 0x00000126},
{'6', 0xfe72ff6d, 0xff96ffc3},
{'l', 0x4175e7f4, 0x00000000},
{'l', 0x4218c06d, 0x42c86716},
{'l', 0xc16180d8, 0x00000000},
{'l', 0xc1120a50, 0xc1cda338},
{'l', 0xc234abd0, 0x00000000},
{'l', 0xc1120a4e, 0x41cda338},
{'l', 0xc164b98e, 0x00000000},
{'l', 0x42190527, 0xc2c86716},
{'[', 0x00410041, 0x000003d3},
{'@', 0x00000042, 0x00005e4b},/*        B        x-advance: 94.292969 */
{'M', 0x41d86037, 0xc23f68ce},
{'4', 0x01250000, 0x000000ad},
{'q', 0x412f0894, 0x00000000},
{0, 0x4181655c, 0xc08fe482},
{'8', 0x912adc2a, 0x92d6b500},
{'9', 0xffddffd7, 0xffddff7f},
{'6', 0x0000ff53, 0xfeb70000},
{'4', 0x00f10000, 0x000000a0},
{'q', 0x411eed18, 0x00000000},
{0, 0x416c3dd8, 0xc06c3de0},
{'8', 0xa527e227, 0xa6d9c400},
{'9', 0xffe2ffda, 0xffe2ff8a},
{'6', 0x0000ff60, 0xffa7ff94},
{'l', 0x420a8603, 0x00000000},
{'q', 0x41780dc0, 0x00000000},
{0, 0x41bf2414, 0x40ce2cb0},
{'q', 0x41063a68, 0x40ce2ca0},
{0, 0x41063a68, 0x419293c0},
{'q', 0x00000000, 0x41131d38},
{0, 0xc0897310, 0x416a1810},
{'q', 0xc0897320, 0x40adf5b0},
{0, 0xc149e114, 0x40d8e9a8},
{'q', 0x411ffffc, 0x40097320},
{0, 0x41780dbc, 0x410fe480},
{'q', 0x40b24150, 0x40d8e9b0},
{0, 0x40b24150, 0x4187d6c5},
{'q', 0x00000000, 0x4156c3dd},
{0, 0xc1120a50, 0x41a5e7f2},
{'9', 0x003affb7, 0x003aff31},
{'l', 0xc20fe482, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x00000043, 0x00005ff9},/*        C        x-advance: 95.972656 */
{'M', 0x42b10c07, 0xc2b8f769},
{'l', 0x00000000, 0x4164b990},
{'q', 0xc0db0f80, 0xc0cc06e0},
{0, 0xc16a1810, 0xc1187bb0},
{'q', 0xc0f6fae0, 0xc049e120},
{0, 0xc1838b2c, 0xc049e120},
{'q', 0xc189731c, 0x00000000},
{0, 0xc1d27843, 0x41289730},
{'q', 0xc1120a50, 0x41278450},
{0, 0xc1120a50, 0x41f2af40},
{'q', 0x00000000, 0x419e63a7},
{0, 0x41120a50, 0x41f2af40},
{'q', 0x41120a4e, 0x4127844b},
{0, 0x41d27843, 0x4127844b},
{'q', 0x410b98e8, 0x00000000},
{0, 0x41838b2c, 0xc049e114},
{'9', 0xffe7003e, 0xffb40075},
{'l', 0x00000000, 0x416293c2},
{'q', 0xc0e3a6b0, 0x409aa180},
{0, 0xc1719c60, 0x40e7f241},
{'q', 0xc0fd6c30, 0x401aa180},
{0, 0xc1863a68, 0x401aa180},
{'q', 0xc1b60370, 0x34000000},
{0, 0xc20f5b10, 0xc15e4828},
{'q', 0xc151655c, 0xc15f5b10},
{0, 0xc151655c, 0xc21836fb},
{'q', 0x00000000, 0xc1c149e0},
{0, 0x4151655e, 0xc21836fa},
{'q', 0x4151655e, 0xc15f5b10},
{0, 0x420f5b10, 0xc15f5b10},
{'q', 0x410fe480, 0x00000000},
{0, 0x41874d50, 0x401aa180},
{'q', 0x40ff9210, 0x401655e0},
{0, 0x416f7690, 0x40e3a6a0},
{'@', 0x00000044, 0x000069d6},/*        D        x-advance: 105.835938 */
{'M', 0x41d86037, 0xc2b21eed},
{'4', 0x026f0000, 0x00000083},
{'q', 0x41a5e7f2, 0x00000000},
{0, 0x41f2af3e, 0xc11655e8},
{'q', 0x411aa180, 0xc11655e8},
{0, 0x411aa180, 0xc1ed50bf},
{'q', 0x00000000, 0xc1a112e8},
{0, 0xc11aa180, 0xc1ebb468},
{'9', 0xffb5ffb4, 0xffb5ff0e},
{'6', 0x0000ff7d, 0xffa7ff94},
{'l', 0x41ded19c, 0x00000000},
{'q', 0x41e90526, 0x00000000},
{0, 0x422b01b7, 0x41425cc8},
{'q', 0x4159fc90, 0x414149e0},
{0, 0x4159fc90, 0x421768ce},
{'q', 0x00000000, 0x41cf3f91},
{0, 0xc15b0f78, 0x421836fa},
{'9', 0x0061ff93, 0x0061feab},
{'l', 0xc1ded19c, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x00000045, 0x000056d8},/*        E        x-advance: 86.843750 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x427d6c3d, 0x00000000},
{'l', 0x00000000, 0x41368ce0},
{'l', 0xc24731d2, 0x00000000},
{'l', 0xb6000000, 0x41ed50c2},
{'l', 0x423edf5a, 0x00000000},
{'l', 0x00000000, 0x41368ce0},
{'l', 0xc23edf5a, 0x00000000},
{'l', 0xb6000000, 0x42113c22},
{'l', 0x424c06de, 0x35800000},
{'l', 0x00000000, 0x41368ce3},
{'l', 0xc28120a4, 0x00000000},
{'l', 0xb6800000, 0xc2c86716},
{'@', 0x00000046, 0x00004f0f},/*        F        x-advance: 79.058594 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x426655e7, 0x00000000},
{'l', 0x00000000, 0x41368ce0},
{'l', 0xc2301b7c, 0x00000000},
{'l', 0xb6000000, 0x41ec3dda},
{'l', 0x421eed18, 0x00000000},
{'l', 0x00000000, 0x41368ce4},
{'l', 0xc21eed18, 0x00000000},
{'l', 0xb6000000, 0x423f68ce},
{'l', 0xc158e9aa, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x00000047, 0x00006a82},/*        G        x-advance: 106.507812 */
{'M', 0x42a39fc9, 0xc164b98e},
{'l', 0x00000000, 0xc1d74d51},
{'l', 0xc1b12e64, 0x00000000},
{'4', 0xffa70000, 0x0000011c},
{'l', 0x00000000, 0x422c149e},
{'q', 0xc0fb4670, 0x40b24146},
{0, 0xc18a8600, 0x41074d4f},
{'q', 0xc11768d0, 0x40346716},
{0, 0xc1a19c5a, 0x40346716},
{'q', 0xc1bbeb62, 0x34000000},
{0, 0xc2131d36, 0xc15b0f76},
{'q', 0xc1538b28, 0xc15c225c},
{0, 0xc1538b28, 0xc2190528},
{'q', 0x00000000, 0xc1c48294},
{0, 0x41538b2a, 0xc2190526},
{'q', 0x41549e12, 0xc15c2260},
{0, 0x42131d36, 0xc15c2260},
{'q', 0x411cc748, 0x00000000},
{0, 0x4194b98c, 0x401aa180},
{'9', 0x00130046, 0x00380082},
{'l', 0x00000000, 0x4166df60},
{'q', 0xc0f08970, 0xc0cc06e0},
{0, 0xc17f9208, 0xc1198e98},
{'q', 0xc1074d50, 0xc04e2cc0},
{0, 0xc18e482a, 0xc04e2cc0},
{'q', 0xc1931d34, 0x00000000},
{0, 0xc1dd3542, 0x41244b98},
{'q', 0xc1131d36, 0x41244b98},
{0, 0xc1131d36, 0x41f4d50c},
{'q', 0x00000000, 0x41a225cd},
{0, 0x41131d36, 0x41f44b99},
{'q', 0x4114301c, 0x41244b99},
{0, 0x41dd3542, 0x41244b99},
{'8', 0xf7660039, 0xe151f62d},
{'@', 0x00000048, 0x0000675b},/*        H        x-advance: 103.355469 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x4158e9aa, 0x00000000},
{'l', 0x00000000, 0x42244b99},
{'l', 0x42450c06, 0x00000000},
{'l', 0x00000000, 0xc2244b99},
{'l', 0x4158e9a8, 0x00000000},
{'l', 0x00000000, 0x42c86716},
{'l', 0xc158e9a8, 0x00000000},
{'l', 0x00000000, 0xc23edf5b},
{'l', 0xc2450c06, 0x00000000},
{'l', 0xb6000000, 0x423edf5b},
{'l', 0xc158e9aa, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x00000049, 0x00002889},/*        I        x-advance: 40.535156 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x4158e9aa, 0x00000000},
{'l', 0x00000000, 0x42c86716},
{'l', 0xc158e9aa, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x0000004a, 0x00002889},/*        J        x-advance: 40.535156 */
{'M', 0x4157d6c4, 0xc2c86716},
{'4', 0x0000006c, 0x02e90000},
{'q', 0x00000000, 0x4190f769},
{0, 0xc0dd3544, 0x41d27845},
{'9', 0x0041ffca, 0x0041ff50},
{'4', 0x0000ffd7, 0xffa50000},
{'l', 0x40874d50, 0x00000000},
{'q', 0x410fe482, 0x00000000},
{0, 0x414af3f9, 0xc0a112e6},
{'q', 0x406c3ddc, 0xc0a112e5},
{0, 0x406c3ddc, 0xc1906df5},
{'l', 0x00000000, 0xc2ba7165},
{'@', 0x0000004b, 0x00005a22},/*        K        x-advance: 90.132812 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x4158e9aa, 0x00000000},
{'l', 0x00000000, 0x4229655e},
{'l', 0x4233dda2, 0xc229655e},
{'l', 0x418b98ec, 0x00000000},
{'l', 0xc246ed1a, 0x423ad87b},
{'l', 0x42552784, 0x4255f5b1},
{'l', 0xc18ed19c, 0x00000000},
{'l', 0xc2407bb4, 0xc2410527},
{'l', 0xb6000000, 0x42410527},
{'l', 0xc158e9aa, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x0000004c, 0x00004c93},/*        L        x-advance: 76.574219 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x4158e9aa, 0x00000000},
{'l', 0x00000000, 0x42b1957a},
{'l', 0x42432af4, 0xb6400000},
{'l', 0x00000000, 0x41368ce3},
{'l', 0xc279655f, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'[', 0x0041004c, 0x00000327},
{'@', 0x0000004d, 0x00007697},/*        M        x-advance: 118.589844 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x41a19c58, 0x00000000},
{'l', 0x41cc9054, 0x42886036},
{'l', 0x41cda336, 0xc2886036},
{'l', 0x41a19c5c, 0x00000000},
{'l', 0x00000000, 0x42c86716},
{'l', 0xc1538b30, 0x00000000},
{'l', 0x00000000, 0xc2aff920},
{'l', 0xc1ceb61c, 0x4289731c},
{'l', 0xc159fc94, 0x36800000},
{'l', 0xc1ceb61e, 0xc289731c},
{'l', 0x00000000, 0x42aff920},
{'l', 0xc1527844, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x0000004e, 0x000066d1},/*        N        x-advance: 102.816406 */
{'M', 0x4157d6c4, 0xc2c86716},
{'l', 0x41920a4f, 0x00000000},
{'l', 0x4231b7d6, 0x42a7a6a8},
{'l', 0x00000000, 0xc2a7a6a8},
{'l', 0x41527848, 0x00000000},
{'l', 0x00000000, 0x42c86716},
{'l', 0xc1920a50, 0x00000000},
{'l', 0xc231b7d6, 0xc2a7a6a8},
{'l', 0x00000000, 0x42a7a6a8},
{'l', 0xc1527844, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x0000004f, 0x00006c30},/*        O        x-advance: 108.187500 */
{'M', 0x4258a4f0, 0xc2b6036f},
{'q', 0xc16c3dd8, 0x00000000},
{0, 0xc1bbeb61, 0x41301b78},
{'q', 0xc10a8604, 0x41301b80},
{0, 0xc10a8604, 0x41f00000},
{'q', 0x00000000, 0x419768cf},
{0, 0x410a8604, 0x41ef768d},
{'q', 0x410b98ea, 0x41301b7d},
{0, 0x41bbeb61, 0x41301b7d},
{'q', 0x416c3dd8, 0x00000000},
{0, 0x41bad87c, 0xc1301b7d},
{'q', 0x410a8600, 0xc1301b7c},
{0, 0x410a8600, 0xc1ef768d},
{'q', 0x00000000, 0xc197f240},
{0, 0xc10a8600, 0xc1f00000},
{'9', 0xffa8ffbc, 0xffa8ff46},
{'m', 0x00000000, 0xc1301b80},
{'q', 0x41a89730, 0x00000000},
{0, 0x4206c3de, 0x416293c0},
{'q', 0x4149e110, 0x416180e0},
{0, 0x4149e110, 0x421768ce},
{'q', 0x00000000, 0x41bd87bc},
{0, 0xc149e110, 0x421768ce},
{'q', 0xc149e118, 0x416180dd},
{0, 0xc206c3de, 0x416180dd},
{'q', 0xc1a920a4, 0xb4c00000},
{0, 0xc2074d50, 0xc16180dc},
{'q', 0xc149e114, 0xc16180db},
{0, 0xc149e114, 0xc21768ce},
{'q', 0x00000000, 0xc1be112c},
{0, 0x4149e112, 0xc21768ce},
{'q', 0x414af3fa, 0xc16293c0},
{0, 0x42074d50, 0xc16293c0},
{'[', 0x002d004f, 0x000003d3},
{'@', 0x00000050, 0x000052e2},/*        P        x-advance: 82.882812 */
{'M', 0x41d86037, 0xc2b21eed},
{'4', 0x012d0000, 0x00000088},
{'q', 0x411768cc, 0x00000000},
{0, 0x416a180c, 0xc09cc750},
{'8', 0x9129d929, 0x91d7b900},
{'9', 0xffd9ffd7, 0xffd9ff8b},
{'6', 0x0000ff78, 0xffa7ff94},
{'l', 0x41f4d50c, 0x00000000},
{'q', 0x4186c3dc, 0x00000000},
{0, 0x41cb7d6a, 0x40f4d510},
{'q', 0x410a8608, 0x40f2af40},
{0, 0x410a8608, 0x41b24148},
{'q', 0x00000000, 0x416d50c0},
{0, 0xc10a8608, 0x41b35430},
{'9', 0x003cffbc, 0x003cff35},
{'l', 0xc1886037, 0x00000000},
{'l', 0x00000000, 0x422112e6},
{'l', 0xc158e9aa, 0x00000000},
{'l', 0x00000000, 0xc2c86716},
{'@', 0x00000051, 0x00006c30},/*        Q        x-advance: 108.187500 */
{'M', 0x4258a4f0, 0xc2b6036f},
{'q', 0xc16c3dd8, 0x00000000},
{0, 0xc1bbeb61, 0x41301b78},
{'q', 0xc10a8604, 0x41301b80},
{0, 0xc10a8604, 0x41f00000},
{'q', 0x00000000, 0x419768cf},
{0, 0x410a8604, 0x41ef768d},
{'q', 0x410b98ea, 0x41301b7d},
{0, 0x41bbeb61, 0x41301b7d},
{'q', 0x416c3dd8, 0x00000000},
{0, 0x41bad87c, 0xc1301b7d},
{'q', 0x410a8600, 0xc1301b7c},
{0, 0x410a8600, 0xc1ef768d},
{'q', 0x00000000, 0xc197f240},
{0, 0xc10a8600, 0xc1f00000},
{'9', 0xffa8ffbc, 0xffa8ff46},
{'m', 0x4197f240, 0x42b263a6},
{'4', 0x009c008e, 0x0000ff7d},
{'l', 0xc16d50bc, 0xc1805278},
{'8', 0x01e501ef, 0x00ef00f7},
{'q', 0xc1a920a4, 0x00000000},
{0, 0xc2074d50, 0xc16180dc},
{'q', 0xc149e114, 0xc16293c1},
{0, 0xc149e114, 0xc21768ce},
{'q', 0x00000000, 0xc1be112c},
{0, 0x4149e112, 0xc21768ce},
{'q', 0x414af3fa, 0xc16293c0},
{0, 0x42074d50, 0xc16293c0},
{'q', 0x41a89730, 0x00000000},
{0, 0x4206c3de, 0x416293c0},
{'q', 0x4149e110, 0x416180e0},
{0, 0x4149e110, 0x421768ce},
{'q', 0x00000000, 0x418b98ea},
{0, 0xc0e180e0, 0x41eeed1a},
{'q', 0xc0df5b10, 0x4146a860},
{0, 0xc1a225cc, 0x419293c2},
{'[', 0x002d0051, 0x000003d3},
{'@', 0x00000052, 0x00005f80},/*        R        x-advance: 95.500000 */
{'M', 0x427406df, 0xc23beb62},
{'8', 0x32430b22, 0x6a422621},
{'4', 0x00db006e, 0x0000ff8c},
{'l', 0xc14d19c8, 0xc1cda338},
{'8', 0x96b3b0d9, 0xe69be6db},
{'l', 0xc16c3dda, 0x00000000},
{'l', 0x00000000, 0x4229655e},
{'4', 0x0000ff94, 0xfcdf0000},
{'l', 0x41f4d50c, 0x00000000},
{'q', 0x4189731c, 0x00000000},
{0, 0x41cd19c6, 0x40e5cc70},
{'q', 0x41074d50, 0x40e5cc80},
{0, 0x41074d50, 0x41ad6c40},
{'q', 0x00000000, 0x411768cc},
{0, 0xc08dbec0, 0x417b4670},
{'9', 0x0031ffde, 0x0045ff9a},
{'m', 0xc207d6c4, 0xc2285278},
{'4', 0x011c0000, 0x00000088},
{'q', 0x411cc74c, 0x00000000},
{0, 0x416c3dd8, 0xc08fe480},
{'8', 0x9628dc28, 0x97d8ba00},
{'q', 0xc09eed18, 0xc08fe480},
{0, 0xc16c3dd8, 0xc08fe480},
{'l', 0xc1886037, 0x00000000},
{'@', 0x00000053, 0x0000573f},/*        S        x-advance: 87.246094 */
{'M', 0x42931d35, 0xc2c1d354},
{'l', 0x00000000, 0x41538b28},
{'q', 0xc0f6fad0, 0xc06c3dc0},
{0, 0xc1690528, 0xc0b01b70},
{'q', 0xc0db0f70, 0xbfe7f240},
{0, 0xc1538b28, 0xbfe7f240},
{'q', 0xc1312e64, 0x00000000},
{0, 0xc188e9ab, 0x40897310},
{'8', 0x61d122d1, 0x501f3500},
{'9', 0x001a0020, 0x002b0079},
{'l', 0x410301b8, 0x3fd6c3e0},
{'q', 0x4172af40, 0x4038b2b0},
{0, 0x41b2cabc, 0x412338b0},
{'q', 0x40e7f240, 0x40e7f240},
{0, 0x40e7f240, 0x419bb467},
{'q', 0x00000000, 0x41690528},
{0, 0xc11cc750, 0x41b0a4f0},
{'q', 0xc11bb464, 0x40f08975},
{0, 0xc1e4b98c, 0x40f08975},
{'q', 0xc0e3a6a8, 0x34000000},
{0, 0xc172af40, 0xbfce2cab},
{'9', 0xfff4ffc1, 0xffdaff7c},
{'l', 0x00000000, 0xc15f5b0f},
{'q', 0x4104149e, 0x4094301c},
{0, 0x4181655e, 0x40df5b0e},
{'q', 0x40fd6c3c, 0x401655e8},
{0, 0x417920a6, 0x401655e8},
{'q', 0x4139c594, 0x00000000},
{0, 0x418f5b0e, 0xc0920a4e},
{'8', 0x9832dc32, 0xa4dcc500},
{'9', 0xffdfffdd, 0xffcfff8a},
{'l', 0xc10414a0, 0xbfce2cc0},
{'q', 0xc172af3e, 0xc04149e0},
{0, 0xc1af920a, 0xc11768cc},
{'q', 0xc0d8e9a6, 0xc0ce2cb0},
{0, 0xc0d8e9a6, 0xc18f5b0e},
{'q', 0x00000000, 0xc1549e18},
{0, 0x41154301, 0xc1a7844c},
{'q', 0x411655e8, 0xc0f4d510},
{0, 0x41ceb61f, 0xc0f4d510},
{'q', 0x40e180d8, 0x00000000},
{0, 0x4165cc74, 0x3fa338c0},
{'q', 0x40ea1808, 0x3fa338c0},
{0, 0x416f768c, 0x4074d500},
{'[', 0x00410053, 0x0000028c},
{'@', 0x00000054, 0x000053f5},/*        T        x-advance: 83.957031 */
{'M', 0xbece2cac, 0xc2c86716},
{'l', 0x42a987bb, 0x00000000},
{'l', 0x00000000, 0x41368ce0},
{'l', 0xc20e4828, 0x00000000},
{'l', 0x00000000, 0x42b1957a},
{'l', 0xc159fc90, 0x00000000},
{'l', 0x00000000, 0xc2b1957a},
{'l', 0xc20e4829, 0x00000000},
{'l', 0xb5b00000, 0xc1368ce0},
{'@', 0x00000055, 0x0000649a},/*        U        x-advance: 100.601562 */
{'M', 0x413f2414, 0xc2c86716},
{'4', 0x0000006c, 0x01e60000},
{'q', 0x00000000, 0x4180dbeb},
{0, 0x40bad87c, 0x41b9c595},
{'q', 0x40bad87c, 0x40e180da},
{0, 0x419768cd, 0x40e180da},
{'q', 0x41505278, 0x00000000},
{0, 0x4196df5a, 0xc0e180da},
{'9', 0xffc8002e, 0xff47002e},
{'4', 0xfe1a0000, 0x0000006c},
{'l', 0x00000000, 0x427a338b},
{'q', 0x00000000, 0x419cc74d},
{0, 0xc11bb468, 0x41ecc74c},
{'q', 0xc11aa180, 0x41200001},
{0, 0xc1e4b98e, 0x41200001},
{'q', 0xc197f240, 0xb4c00000},
{0, 0xc1e5cc74, 0xc1200000},
{'q', 0xc11aa180, 0xc11fffff},
{0, 0xc11aa180, 0xc1ecc74c},
{'l', 0x00000000, 0xc27a338b},
{'@', 0x00000056, 0x00005e06},/*        V        x-advance: 94.023438 */
{'M', 0x421d50c0, 0x00000000},
{'l', 0xc2190527, 0xc2c86716},
{'l', 0x416293c1, 0x00000000},
{'l', 0x41fdf5b2, 0x42a8b98e},
{'l', 0x41fe7f24, 0xc2a8b98e},
{'l', 0x416180d8, 0x00000000},
{'l', 0xc218c06d, 0x42c86716},
{'l', 0xc175e7f4, 0x00000000},
{'@', 0x00000057, 0x000087e7},/*        W        x-advance: 135.902344 */
{'M', 0x40920a4f, 0xc2c86716},
{'l', 0x415b0f76, 0x00000000},
{'l', 0x41a89731, 0x42a9655e},
{'l', 0x41a80dbe, 0xc2a9655e},
{'l', 0x4173c224, 0x00000000},
{'l', 0x41a89734, 0x42a9655e},
{'l', 0x41a80dbc, 0xc2a9655e},
{'l', 0x415c2260, 0x00000000},
{'l', 0xc1c957a0, 0x42c86716},
{'l', 0xc1886038, 0x00000000},
{'l', 0xc1a920a4, 0xc2adf5b1},
{'l', 0xc1aabcfe, 0x42adf5b1},
{'l', 0xc1886036, 0x00000000},
{'l', 0xc1c8ce2c, 0xc2c86716},
{'@', 0x00000058, 0x00005e29},/*        X        x-advance: 94.160156 */
{'M', 0x410a8603, 0xc2c86716},
{'l', 0x41690527, 0x00000000},
{'l', 0x41c731d3, 0x4214fe48},
{'l', 0x41c844b8, 0xc214fe48},
{'l', 0x41690528, 0x00000000},
{'l', 0xc200dbeb, 0x42407bb4},
{'l', 0x4209731d, 0x42505278},
{'l', 0xc1690528, 0x00000000},
{'l', 0xc1e180da, 0xc22a7844},
{'l', 0xc1e31d35, 0x422a7844},
{'l', 0xc16a180e, 0x00000000},
{'l', 0x420f1656, 0xc255f5b1},
{'l', 0xc1f9aa18, 0xc23ad87b},
{'@', 0x00000059, 0x000053f5},/*        Y        x-advance: 83.957031 */
{'M', 0xbe89731d, 0xc2c86716},
{'l', 0x41690527, 0x00000000},
{'l', 0x41de4829, 0x4224d50c},
{'l', 0x41dcabd0, 0xc224d50c},
{'l', 0x41690528, 0x00000000},
{'l', 0xc20dbeb6, 0x4251eed1},
{'l', 0x00000000, 0x423edf5b},
{'l', 0xc159fc90, 0x00000000},
{'l', 0x00000000, 0xc23edf5b},
{'l', 0xc20dbeb6, 0xc251eed1},
{'@', 0x0000005a, 0x00005e29},/*        Z        x-advance: 94.160156 */
{'M', 0x40f6fad8, 0xc2c86716},
{'l', 0x429d731c, 0x00000000},
{'l', 0x00000000, 0x41255e80},
{'l', 0xc27d6c3c, 0x429ce9aa},
{'l', 0x4281cc74, 0xb6400000},
{'l', 0x00000000, 0x41368ce3},
{'l', 0xc2a39fc8, 0x00000000},
{'l', 0xb6400000, 0xc1255e7f},
{'l', 0x427d6c3d, 0xc29ce9aa},
{'l', 0xc2773f91, 0x00000000},
{'l', 0x00000000, 0xc1368ce0},
{'@', 0x0000005b, 0x0000359f},/*        [        x-advance: 53.621094 */
{'M', 0x413cfe48, 0xc2d0dbeb},
{'l', 0x41e3a6a8, 0x00000000},
{'l', 0x00000000, 0x41198e98},
{'l', 0xc180dbeb, 0x00000000},
{'l', 0x00000000, 0x42ceb61f},
{'l', 0x4180dbeb, 0xb5800000},
{'l', 0x00000000, 0x41198e9b},
{'l', 0xc1e3a6a8, 0x00000000},
{'l', 0x00000000, 0xc2f519c5},
{'@', 0x0000005c, 0x00002e4f},/*       \         x-advance: 46.308594 */
{'M', 0x41368ce3, 0xc2c86716},
{'l', 0x420b98e9, 0x42e1e7f2},
{'l', 0xc1368ce4, 0xb5800000},
{'l', 0xc20b98e9, 0xc2e1e7f2},
{'l', 0x41368ce3, 0x00000000},
{'@', 0x0000005d, 0x0000359f},/*        ]        x-advance: 53.621094 */
{'M', 0x42273f92, 0xc2d0dbeb},
{'l', 0x00000000, 0x42f519c5},
{'l', 0xc1e3a6a8, 0x36000000},
{'l', 0xb5800000, 0xc1198e9b},
{'l', 0x41805278, 0x00000000},
{'l', 0x00000000, 0xc2ceb61f},
{'l', 0xc1805278, 0x00000000},
{'l', 0xb5800000, 0xc1198e98},
{'l', 0x41e3a6a8, 0x00000000},
{'@', 0x0000005e, 0x0000732a},/*        ^        x-advance: 115.164062 */
{'M', 0x42805278, 0xc2c86716},
{'l', 0x4211c596, 0x421587bb},
{'l', 0xc157d6c8, 0x00000000},
{'l', 0xc1ec3dd8, 0xc1d4149e},
{'l', 0xc1ec3ddb, 0x41d4149e},
{'l', 0xc157d6c3, 0x00000000},
{'l', 0x4211c595, 0xc21587bb},
{'l', 0x41527844, 0x00000000},
{'@', 0x0000005f, 0x000044b9},/*        _        x-advance: 68.722656 */
{'M', 0x428c225d, 0x41b68ce3},
{'l', 0x00000000, 0x41198e9a},
{'l', 0xc28ed19d, 0x00000000},
{'l', 0x36600000, 0xc1198e9a},
{'l', 0x428ed19d, 0x00000000},
{'@', 0x00000060, 0x000044b9},/*        `        x-advance: 68.722656 */
{'M', 0x41c50c07, 0xc2dbdda3},
{'l', 0x419768cd, 0x41c8ce2c},
{'l', 0xc1244b98, 0x00000000},
{'l', 0xc1af0896, 0xc1c8ce2c},
{'l', 0x41538b2a, 0x00000000},
{'@', 0x00000061, 0x0000543a},/*        a        x-advance: 84.226562 */
{'M', 0x423c74d5, 0xc2172414},
{'q', 0xc16f768c, 0x00000000},
{0, 0xc1a5e7f2, 0x405b0f70},
{'8', 0x5dd21bd2, 0x53223400},
{'q', 0x408b98e8, 0x4074d50c},
{0, 0x413cfe46, 0x4074d50c},
{'q', 0x41244b9c, 0x00000000},
{0, 0x41838b2c, 0xc0e7f242},
{'9', 0xffc60031, 0xff650031},
{'4', 0xffea0000, 0x0000ff9e},
{'m', 0x41c50c06, 0xc0a338b8},
{'4', 0x01570000, 0x0000ff9e},
{'l', 0x00000000, 0xc1368ce3},
{'q', 0xc0874d50, 0x40db0f77},
{0, 0xc1289734, 0x412225cd},
{'q', 0xc0c9e110, 0x404e2caa},
{0, 0xc176fad8, 0x404e2caa},
{'q', 0xc138b2ae, 0x34000000},
{0, 0xc1931d34, 0xc0ce2cab},
{'q', 0xc0d8e9ac, 0xc0d05278},
{0, 0xc0d8e9ac, 0xc18b0f76},
{'q', 0x00000000, 0xc14af3fc},
{0, 0x41074d50, 0xc1990528},
{'9', 0xffcd0044, 0xffcd00ca},
{'4', 0x0000008a, 0xfff70000},
{'q', 0x00000000, 0xc1086034},
{0, 0xc0b46718, 0xc1527844},
{'q', 0xc0b24148, 0xc09655e0},
{0, 0xc17b4670, 0xc09655e0},
{'8', 0x0c9c00cd, 0x25a30cd0},
{'l', 0x00000000, 0xc1368ce4},
{'8', 0xe169ec36, 0xf663f633},
{'q', 0x41827844, 0x00000000},
{0, 0x41c2e63a, 0x41074d50},
{'q', 0x4100dbec, 0x41074d54},
{0, 0x4100dbec, 0x41cd19c6},
{'@', 0x00000062, 0x0000573f},/*        b        x-advance: 87.246094 */
{'M', 0x4285d354, 0xc216112e},
{'q', 0x00000000, 0xc159fc90},
{0, 0xc0b46718, 0xc1aabcfe},
{'q', 0xc0b24148, 0xc0f920a8},
{0, 0xc175e7f0, 0xc0f920a8},
{'q', 0xc11cc750, 0x00000000},
{0, 0xc176fada, 0x40f920a8},
{'q', 0xc0b24148, 0x40f6fad8},
{0, 0xc0b24148, 0x41aabcfe},
{'q', 0x00000000, 0x4159fc90},
{0, 0x40b24148, 0x41ab4671},
{'q', 0x40b46714, 0x40f6fad8},
{0, 0x4176fada, 0x40f6fad8},
{'q', 0x411cc74c, 0x00000000},
{0, 0x4175e7f0, 0xc0f6fad8},
{'9', 0xffc2002d, 0xff55002d},
{'m', 0xc2280dbe, 0xc1d1eed2},
{'q', 0x407920a0, 0xc0d6c3d8},
{0, 0x411cc74c, 0xc11eed1c},
{'q', 0x40bf2410, 0xc0527840},
{0, 0x4163a6a8, 0xc0527840},
{'q', 0x415b0f74, 0x00000000},
{0, 0x41b1b7d6, 0x412df5b0},
{'q', 0x41097320, 0x412df5b4},
{0, 0x41097320, 0x41e4b990},
{'q', 0x00000000, 0x418dbeb6},
{0, 0xc1097320, 0x41e4b98e},
{'q', 0xc1086038, 0x412df5b1},
{0, 0xc1b1b7d6, 0x412df5b1},
{'q', 0xc10414a0, 0xb4c00000},
{0, 0xc163a6a8, 0xc04e2cad},
{'9', 0xffe6ffd1, 0xffb0ffb2},
{'l', 0x00000000, 0x41346716},
{'l', 0xc146a860, 0x00000000},
{'l', 0x00000000, 0xc2d0dbeb},
{'l', 0x4146a860, 0x00000000},
{'l', 0x00000000, 0x4222af3f},
{'@', 0x00000063, 0x00004b92},/*        c        x-advance: 75.570312 */
{'M', 0x4286180e, 0xc2909052},
{'l', 0x00000000, 0x4138b2ac},
{'8', 0xdeace9d7, 0xf5acf5d7},
{'q', 0xc14036fc, 0x00000000},
{0, 0xc1954302, 0x40f4d508},
{'q', 0xc0d49e10, 0x40f2af40},
{0, 0xc0d49e10, 0x41aabcfe},
{'q', 0x00000000, 0x415c225c},
{0, 0x40d49e10, 0x41ab4671},
{'q', 0x40d49e10, 0x40f2af3e},
{0, 0x41954302, 0x40f2af3e},
{'8', 0xf554002a, 0xde54f52a},
{'l', 0x00000000, 0x41368ce2},
{'8', 0x1cab13d7, 0x09a309d4},
{'q', 0xc187d6c4, 0x00000000},
{0, 0xc1d7d6c4, 0xc12abcfe},
{'q', 0xc1200000, 0xc12abcff},
{0, 0xc1200000, 0xc1e655e8},
{'q', 0xb5000000, 0xc1931d36},
{0, 0x412112e6, 0xc1e768d0},
{'q', 0x412225cc, 0xc1289730},
{0, 0x41ddbeb5, 0xc1289730},
{'8', 0x0959002d, 0x1b54092b},
{'@', 0x00000064, 0x0000573f},/*        d        x-advance: 87.246094 */
{'M', 0x4279aa18, 0xc27f0897},
{'l', 0x00000000, 0xc222af3f},
{'l', 0x41459578, 0x00000000},
{'4', 0x03430000, 0x0000ff9e},
{'l', 0x00000000, 0xc1346716},
{'q', 0xc07920b0, 0x40d6c3dd},
{0, 0xc11dda34, 0x41200000},
{'q', 0xc0bcfe48, 0x404e2caa},
{0, 0xc163a6a8, 0x404e2caa},
{'q', 0xc159fc90, 0x34000000},
{0, 0xc1b1b7d7, 0xc12df5b0},
{'q', 0xc1086036, 0xc12df5b0},
{0, 0xc1086036, 0xc1e4b98e},
{'q', 0xb5000000, 0xc18dbeb6},
{0, 0x41086036, 0xc1e4b990},
{'q', 0x4109731e, 0xc12df5b0},
{0, 0x41b1b7d7, 0xc12df5b0},
{'q', 0x41052784, 0x00000000},
{0, 0x4163a6a8, 0x40527840},
{'9', 0x0019002f, 0x004f004e},
{'m', 0xc2285278, 0x41d1eed2},
{'q', 0xb6000000, 0x4159fc90},
{0, 0x40b24148, 0x41ab4671},
{'q', 0x40b46714, 0x40f6fad8},
{0, 0x4176fad8, 0x40f6fad8},
{'q', 0x411cc74c, 0x00000000},
{0, 0x4176fad8, 0xc0f6fad8},
{'q', 0x40b46718, 0xc0f920a4},
{0, 0x40b46718, 0xc1ab4671},
{'q', 0x00000000, 0xc159fc90},
{0, 0xc0b46718, 0xc1aabcfe},
{'q', 0xc0b46718, 0xc0f920a8},
{0, 0xc176fad8, 0xc0f920a8},
{'q', 0xc11cc74e, 0x00000000},
{0, 0xc176fad8, 0x40f920a8},
{'q', 0xc0b2414c, 0x40f6fad8},
{0, 0xc0b2414c, 0x41aabcfe},
{'@', 0x00000065, 0x00005490},/*        e        x-advance: 84.562500 */
{'M', 0x429a7f24, 0xc222af3f},
{'4', 0x00300000, 0x0000fe3a},
{'q', 0x3f4e2ca0, 0x414c06de},
{0, 0x40f4d508, 0x419bb466},
{'q', 0x40dd3548, 0x40d49e12},
{0, 0x41998e9a, 0x40d49e12},
{'8', 0xf36e0038, 0xd76af335},
{'l', 0x00000000, 0x413ad87b},
{'q', 0xc0d49e10, 0x40346716},
{0, 0xc159fc8c, 0x4089731d},
{'q', 0xc0df5b10, 0x3fbcfe49},
{0, 0xc16293c4, 0x3fbcfe49},
{'q', 0xc18fe482, 0x00000000},
{0, 0xc1e4301b, 0xc127844c},
{'q', 0xc127844a, 0xc127844b},
{0, 0xc127844a, 0xc1e293c2},
{'q', 0xb5000000, 0xc193a6a8},
{0, 0x411eed1a, 0xc1ea180e},
{'q', 0x411ffffe, 0xc12df5b0},
{0, 0x41d74d4f, 0xc12df5b0},
{'q', 0x4172af40, 0x00000000},
{0, 0x41bfad88, 0x411cc750},
{'9', 0x004d0046, 0x00d40046},
{'m', 0xc1459578, 0xc067f240},
{'q', 0xbe097400, 0xc12225cc},
{0, 0xc0b68ce8, 0xc1816560},
{'q', 0xc0b01b80, 0xc0c149d8},
{0, 0xc16a180c, 0xc0c149d8},
{'q', 0xc1255e80, 0x00000000},
{0, 0xc1849e12, 0x40bad878},
{'q', 0xc0c59578, 0x40bad878},
{0, 0xc0e3a6a8, 0x41838b2a},
{'l', 0x42301b7e, 0xbd897200},
{'@', 0x00000066, 0x00003063},/*        f        x-advance: 48.386719 */
{'M', 0x424c06df, 0xc2d0dbeb},
{'4', 0x00520000, 0x0000ffa2},
{'8', 0x15b600cb, 0x4dec15ec},
{'l', 0x00000000, 0x40d49e10},
{'l', 0x41a2af40, 0x00000000},
{'l', 0x00000000, 0x41198ea0},
{'l', 0xc1a2af40, 0x00000000},
{'l', 0x00000000, 0x42832414},
{'l', 0xc146a85f, 0x00000000},
{'l', 0x00000000, 0xc2832414},
{'0', 0xb40000a2, 0xd700005e},
{'q', 0x00000000, 0xc148ce30},
{0, 0x40bad87a, 0xc1920a50},
{'q', 0x40bad87c, 0xc0b8b2b0},
{0, 0x4194301b, 0xc0b8b2b0},
{'l', 0x413ad87c, 0x00000000},
{'@', 0x00000067, 0x0000573f},/*        g        x-advance: 87.246094 */
{'M', 0x4279aa18, 0xc219d354},
{'q', 0x00000000, 0xc156c3dc},
{0, 0xc0b24150, 0xc1a67166},
{'q', 0xc0b01b78, 0xc0ec3dd8},
{0, 0xc1780dbc, 0xc0ec3dd8},
{'q', 0xc11eed1a, 0x00000000},
{0, 0xc1780dbe, 0x40ec3dd8},
{'q', 0xc0b01b80, 0x40ec3de0},
{0, 0xc0b01b80, 0x41a67166},
{'q', 0x00000000, 0x4155b0f8},
{0, 0x40b01b80, 0x41a5e7f2},
{'q', 0x40b24148, 0x40ec3dda},
{0, 0x41780dbe, 0x40ec3dda},
{'q', 0x41200000, 0x00000000},
{0, 0x41780dbc, 0xc0ec3dda},
{'9', 0xffc5002c, 0xff5b002c},
{'m', 0x41459578, 0x41e90528},
{'q', 0x00000000, 0x41998e9a},
{0, 0xc1086038, 0x41e4b98e},
{'q', 0xc1086034, 0x41154300},
{0, 0xc1d0dbea, 0x41154300},
{'8', 0xf99e00cc, 0xe8a7f8d2},
{'l', 0x00000000, 0xc14036fb},
{'8', 0x2255172b, 0x0b560b2a},
{'q', 0x41425cc4, 0x00000000},
{0, 0x419180dc, 0xc0c9e112},
{'9', 0xffcd0030, 0xff670030},
{'l', 0x00000000, 0xc0c36fb0},
{'q', 0xc074d510, 0x40d49e12},
{0, 0xc11cc750, 0x411eed1a},
{'q', 0xc0bf2410, 0x40527844},
{0, 0xc164b98c, 0x40527844},
{'q', 0xc15d3544, 0x00000000},
{0, 0xc1b2414a, 0xc1289732},
{'q', 0xc1074d50, 0xc1289732},
{0, 0xc1074d50, 0xc1df5b0f},
{'q', 0xb5000000, 0xc18b98ea},
{0, 0x41074d50, 0xc1dfe484},
{'q', 0x41074d50, 0xc1289730},
{0, 0x41b2414a, 0xc1289730},
{'q', 0x41052784, 0x00000000},
{0, 0x4164b98c, 0x40527840},
{'9', 0x001a002f, 0x004f004e},
{'l', 0x00000000, 0xc1368ce4},
{'l', 0x41459578, 0x00000000},
{'l', 0x00000000, 0x4283ad88},
{'@', 0x00000068, 0x0000571d},/*        h        x-advance: 87.113281 */
{'M', 0x4296df5b, 0xc23579fc},
{'4', 0x016a0000, 0x0000ff9e},
{'l', 0x00000000, 0xc233dda3},
{'q', 0x00000000, 0xc12abcfc},
{0, 0xc0852780, 0xc17f9208},
{'q', 0xc0852788, 0xc0a9aa18},
{0, 0xc147bb48, 0xc0a9aa18},
{'q', 0xc1200000, 0x00000000},
{0, 0xc17c5956, 0x40cc06d8},
{'9', 0x0033ffd2, 0x008bffd2},
{'l', 0x00000000, 0x4229eed1},
{'l', 0xc146a860, 0x00000000},
{'4', 0xfcbd0000, 0x00000063},
{'l', 0x00000000, 0x4223c225},
{'8', 0xaf53ca23, 0xe66fe630},
{'q', 0x414f3f90, 0x00000000},
{0, 0x419cc74e, 0x4100dbf0},
{'q', 0x40d49e10, 0x40ff9208},
{0, 0x40d49e10, 0x41bc74d4},
{'@', 0x00000069, 0x00002630},/*        i        x-advance: 38.187500 */
{'M', 0x414f3f92, 0xc29655e8},
{'l', 0x4145957a, 0x00000000},
{'4', 0x02590000, 0x0000ff9e},
{'6', 0xfda70000, 0xff160000},
{'l', 0x4145957a, 0x00000000},
{'l', 0x00000000, 0x417a3388},
{'l', 0xc145957a, 0x00000000},
{'l', 0x00000000, 0xc17a3388},
{'@', 0x0000006a, 0x00002630},/*        j        x-advance: 38.187500 */
{'M', 0x414f3f92, 0xc29655e8},
{'4', 0x00000062, 0x02640000},
{'q', 0x00000000, 0x4165cc71},
{0, 0xc0b01b80, 0x41a67163},
{'9', 0x0033ffd5, 0x0033ff74},
{'4', 0x0000ffdb, 0xffad0000},
{'l', 0x40527845, 0x00000000},
{'8', 0xe74c0038, 0x9414e614},
{'6', 0xfd9c0000, 0xff160000},
{'l', 0x4145957a, 0x00000000},
{'l', 0x00000000, 0x417a3388},
{'l', 0xc145957a, 0x00000000},
{'l', 0x00000000, 0xc17a3388},
{'@', 0x0000006b, 0x00004f98},/*        k        x-advance: 79.593750 */
{'M', 0x4147bb46, 0xc2d0dbeb},
{'l', 0x4146a860, 0x00000000},
{'l', 0x00000000, 0x4276b61e},
{'l', 0x421361ee, 0xc201aa18},
{'l', 0x417c5958, 0x00000000},
{'l', 0xc21f768d, 0x420cabd0},
{'l', 0x42262cab, 0x42200000},
{'l', 0xc180dbea, 0x00000000},
{'l', 0xc218c06e, 0xc212d87b},
{'l', 0x36000000, 0x4212d87b},
{'l', 0xc146a860, 0x00000000},
{'l', 0x00000000, 0xc2d0dbeb},
{'@', 0x0000006c, 0x00002630},/*        l        x-advance: 38.187500 */
{'M', 0x414f3f92, 0xc2d0dbeb},
{'l', 0x4145957a, 0x00000000},
{'l', 0x00000000, 0x42d0dbeb},
{'l', 0xc145957a, 0x00000000},
{'l', 0x00000000, 0xc2d0dbeb},
{'@', 0x0000006d, 0x000085e4},/*        m        x-advance: 133.890625 */
{'M', 0x428ef3f9, 0xc272f3f9},
{'q', 0x40943020, 0xc1052784},
{0, 0x41312e60, 0xc1448294},
{'q', 0x40ce2cb0, 0xc07d6c40},
{0, 0x4172af40, 0xc07d6c40},
{'q', 0x413beb60, 0x00000000},
{0, 0x4190f768, 0x410414a0},
{'9', 0x00410033, 0x00ba0033},
{'4', 0x016a0000, 0x0000ff9d},
{'l', 0x00000000, 0xc233dda3},
{'q', 0x00000000, 0xc12ce2cc},
{0, 0xc074d500, 0xc1805278},
{'q', 0xc074d500, 0xc0a78448},
{0, 0xc13ad878, 0xc0a78448},
{'q', 0xc1198ea0, 0x00000000},
{0, 0xc172af40, 0x40cc06d8},
{'9', 0x0033ffd4, 0x008bffd4},
{'4', 0x01530000, 0x0000ff9d},
{'l', 0x00000000, 0xc233dda3},
{'q', 0x00000000, 0xc12df5b0},
{0, 0xc074d510, 0xc1805278},
{'q', 0xc074d500, 0xc0a78448},
{0, 0xc13cfe48, 0xc0a78448},
{'q', 0xc11768cc, 0x00000000},
{0, 0xc1708972, 0x40ce2ca8},
{'9', 0x0033ffd4, 0x008affd4},
{'l', 0x00000000, 0x4229eed1},
{'l', 0xc146a860, 0x00000000},
{'4', 0xfda70000, 0x00000063},
{'l', 0x00000000, 0x413ad87c},
{'q', 0x40874d50, 0xc0dd3538},
{0, 0x412225ce, 0xc12338b4},
{'q', 0x40bcfe48, 0xc0527840},
{0, 0x41606df4, 0xc0527840},
{'8', 0x216f0041, 0x6044212e},
{'@', 0x0000006e, 0x0000571d},/*        n        x-advance: 87.113281 */
{'M', 0x4296df5b, 0xc23579fc},
{'4', 0x016a0000, 0x0000ff9e},
{'l', 0x00000000, 0xc233dda3},
{'q', 0x00000000, 0xc12abcfc},
{0, 0xc0852780, 0xc17f9208},
{'q', 0xc0852788, 0xc0a9aa18},
{0, 0xc147bb48, 0xc0a9aa18},
{'q', 0xc1200000, 0x00000000},
{0, 0xc17c5956, 0x40cc06d8},
{'9', 0x0033ffd2, 0x008bffd2},
{'l', 0x00000000, 0x4229eed1},
{'l', 0xc146a860, 0x00000000},
{'4', 0xfda70000, 0x00000063},
{'l', 0x00000000, 0x413ad87c},
{'8', 0xaf53ca23, 0xe66fe630},
{'q', 0x414f3f90, 0x00000000},
{0, 0x419cc74e, 0x4100dbf0},
{'q', 0x40d49e10, 0x40ff9208},
{0, 0x40d49e10, 0x41bc74d4},
{'@', 0x0000006f, 0x00005418},/*        o        x-advance: 84.093750 */
{'M', 0x42285278, 0xc2850527},
{'q', 0xc11eed18, 0x00000000},
{0, 0xc17b4670, 0x40f920a0},
{'q', 0xc0b8b2b0, 0x40f6fad8},
{0, 0xc0b8b2b0, 0x41a9aa18},
{'q', 0x00000000, 0x4157d6c2},
{0, 0x40b68ce0, 0x41aa338b},
{'q', 0x40b8b2b0, 0x40f6fad6},
{0, 0x417c5958, 0x40f6fad6},
{'q', 0x411dda34, 0x00000000},
{0, 0x417a338c, 0xc0f920a6},
{'q', 0x40b8b2b0, 0xc0f920a4},
{0, 0x40b8b2b0, 0xc1a9aa17},
{'q', 0x00000000, 0xc155b0f8},
{0, 0xc0b8b2b0, 0xc1a920a6},
{'9', 0xffc2ffd2, 0xffc2ff83},
{'m', 0x00000000, 0xc1278450},
{'q', 0x4180dbec, 0x00000000},
{0, 0x41ca6a84, 0x41278450},
{'q', 0x41131d38, 0x41278448},
{0, 0x41131d38, 0x41e7f240},
{'q', 0x00000000, 0x4193a6a8},
{0, 0xc1131d38, 0x41e7f240},
{'q', 0xc1131d30, 0x4127844d},
{0, 0xc1ca6a84, 0x4127844d},
{'q', 0xc181655e, 0xb4c00000},
{0, 0xc1caf3f9, 0xc127844c},
{'q', 0xc1120a4e, 0xc1289731},
{0, 0xc1120a4e, 0xc1e7f240},
{'q', 0xb5000000, 0xc194301c},
{0, 0x41120a4e, 0xc1e7f240},
{'q', 0x41131d36, 0xc1278450},
{0, 0x41caf3f9, 0xc1278450},
{'[', 0x002d006f, 0x0000028c},
{'@', 0x00000070, 0x0000573f},/*        p        x-advance: 87.246094 */
{'M', 0x41c731d3, 0xc1346716},
{'l', 0x00000000, 0x421f768c},
{'l', 0xc146a860, 0x36000000},
{'4', 0xfcc20000, 0x00000063},
{'l', 0x00000000, 0x41368ce4},
{'q', 0x407920a8, 0xc0d6c3d8},
{0, 0x411cc74e, 0xc11eed1c},
{'q', 0x40bf2410, 0xc0527840},
{0, 0x4163a6a8, 0xc0527840},
{'q', 0x415b0f74, 0x00000000},
{0, 0x41b1b7d6, 0x412df5b0},
{'q', 0x41097320, 0x412df5b4},
{0, 0x41097320, 0x41e4b990},
{'q', 0x00000000, 0x418dbeb6},
{0, 0xc1097320, 0x41e4b98e},
{'q', 0xc1086038, 0x412df5b1},
{0, 0xc1b1b7d6, 0x412df5b1},
{'q', 0xc10414a0, 0xb4c00000},
{0, 0xc163a6a8, 0xc04e2cad},
{'9', 0xffe6ffd1, 0xffb0ffb2},
{'m', 0x42280dbe, 0xc1d1eed1},
{'q', 0x00000000, 0xc159fc90},
{0, 0xc0b46718, 0xc1aabcfe},
{'q', 0xc0b24148, 0xc0f920a8},
{0, 0xc175e7f0, 0xc0f920a8},
{'q', 0xc11cc750, 0x00000000},
{0, 0xc176fada, 0x40f920a8},
{'q', 0xc0b24148, 0x40f6fad8},
{0, 0xc0b24148, 0x41aabcfe},
{'q', 0x00000000, 0x4159fc90},
{0, 0x40b24148, 0x41ab4671},
{'q', 0x40b46714, 0x40f6fad8},
{0, 0x4176fada, 0x40f6fad8},
{'q', 0x411cc74c, 0x00000000},
{0, 0x4175e7f0, 0xc0f6fad8},
{'q', 0x40b46718, 0xc0f920a4},
{0, 0x40b46718, 0xc1ab4671},
{'@', 0x00000071, 0x0000573f},/*        q        x-advance: 87.246094 */
{'M', 0x41a2af3f, 0xc216112e},
{'q', 0x00000000, 0x4159fc90},
{0, 0x40b2414c, 0x41ab4671},
{'q', 0x40b46714, 0x40f6fad8},
{0, 0x4176fad8, 0x40f6fad8},
{'q', 0x411cc74c, 0x00000000},
{0, 0x4176fad8, 0xc0f6fad8},
{'q', 0x40b46718, 0xc0f920a4},
{0, 0x40b46718, 0xc1ab4671},
{'q', 0x00000000, 0xc159fc90},
{0, 0xc0b46718, 0xc1aabcfe},
{'q', 0xc0b46718, 0xc0f920a8},
{0, 0xc176fad8, 0xc0f920a8},
{'q', 0xc11cc74e, 0x00000000},
{0, 0xc176fad8, 0x40f920a8},
{'9', 0x003dffd4, 0x00aaffd4},
{'m', 0x42285278, 0x41d1eed1},
{'q', 0xc07920b0, 0x40d6c3dd},
{0, 0xc11dda34, 0x41200000},
{'q', 0xc0bcfe48, 0x404e2caa},
{0, 0xc163a6a8, 0x404e2caa},
{'q', 0xc159fc90, 0x34000000},
{0, 0xc1b1b7d7, 0xc12df5b0},
{'q', 0xc1086036, 0xc12df5b0},
{0, 0xc1086036, 0xc1e4b98e},
{'q', 0xb5000000, 0xc18dbeb6},
{0, 0x41086036, 0xc1e4b990},
{'q', 0x4109731e, 0xc12df5b0},
{0, 0x41b1b7d7, 0xc12df5b0},
{'q', 0x41052784, 0x00000000},
{0, 0x4163a6a8, 0x40527840},
{'9', 0x0019002f, 0x004f004e},
{'l', 0x00000000, 0xc1368ce4},
{'l', 0x41459578, 0x00000000},
{'l', 0x00000000, 0x42cf844c},
{'l', 0xc1459578, 0xb6800000},
{'l', 0x00000000, 0xc21f768c},
{'@', 0x00000072, 0x00003882},/*        r        x-advance: 56.507812 */
{'M', 0x42620a4f, 0xc27e7f24},
{'8', 0xf3dcf7f0, 0xfcd6fced},
{'q', 0xc127844c, 0x00000000},
{0, 0xc180dbeb, 0x40db0f78},
{'9', 0x0036ffd4, 0x009cffd4},
{'l', 0x00000000, 0x421e63a6},
{'l', 0xc146a860, 0x00000000},
{'4', 0xfda70000, 0x00000063},
{'l', 0x00000000, 0x413ad87c},
{'q', 0x407920a8, 0xc0db0f78},
{0, 0x412225ce, 0xc12225cc},
{'q', 0x40c7bb40, 0xc056c3e0},
{0, 0x4172af3c, 0xc056c3e0},
{'8', 0x0116000a, 0x031b010c},
{'l', 0x3d897400, 0x414af3f8},
{'@', 0x00000073, 0x0000479c},/*        s        x-advance: 71.609375 */
{'M', 0x42737d6c, 0xc291e7f2},
{'l', 0x00000000, 0x413ad87c},
{'8', 0xe0aaebd7, 0xf6a3f6d3},
{'8', 0x169200b7, 0x43dc16dc},
{'8', 0x361a2200, 0x2569131a},
{'l', 0x40874d50, 0x3f708980},
{'q', 0x41527844, 0x40346720},
{0, 0x41954302, 0x40ff9208},
{'q', 0x40b24150, 0x40a338b4},
{0, 0x40b24150, 0x4164b990},
{'q', 0x00000000, 0x4127844b},
{0, 0xc1052788, 0x41849e11},
{'q', 0xc104149c, 0x40c36fad},
{0, 0xc1b6036e, 0x40c36fad},
{'8', 0xf79c00d0, 0xe492f7cc},
{'l', 0x00000000, 0xc14c06df},
{'8', 0x2a6b1c36, 0x0d690d35},
{'8', 0xe96b0045, 0xbd25e825},
{'q', 0x00000000, 0xc0a112e8},
{0, 0xc05b0f70, 0xc0f6fad8},
{'9', 0xffebffe6, 0xffd7ff8a},
{'l', 0xc0897320, 0xbf80dbe0},
{'q', 0xc1379fc8, 0xc01aa180},
{0, 0xc1849e11, 0xc0ec3de0},
{'q', 0xc0a338b2, 0xc0a112e0},
{0, 0xc0a338b2, 0xc15c225c},
{'q', 0x00000000, 0xc129aa18},
{0, 0x40f08972, 0xc18301b8},
{'q', 0x40f08974, 0xc0b8b2b0},
{0, 0x41aabcff, 0xc0b8b2b0},
{'8', 0x08670036, 0x18590830},
{'@', 0x00000074, 0x000035e4},/*        t        x-advance: 53.890625 */
{'M', 0x41c9579f, 0xc2c10527},
{'l', 0x00000000, 0x41aabcfc},
{'l', 0x41cb7d6d, 0x00000000},
{'4', 0x004c0000, 0x0000ff35},
{'l', 0x00000000, 0x422338b2},
{'8', 0x5e134900, 0x14521414},
{'4', 0x00000065, 0x00520000},
{'l', 0xc14af3f8, 0x00000000},
{'q', 0xc164b990, 0x00000000},
{0, 0xc19dda34, 0xc0a9aa18},
{'9', 0xffd6ffd5, 0xff65ffd5},
{'l', 0x00000000, 0xc22338b2},
{'l', 0xc110f768, 0x00000000},
{'l', 0xb5000000, 0xc1198ea0},
{'l', 0x4110f768, 0x00000000},
{'l', 0x35800000, 0xc1aabcfc},
{'l', 0x4146a85f, 0x00000000},
{'@', 0x00000075, 0x0000571d},/*        u        x-advance: 87.113281 */
{'M', 0x413ad87b, 0xc1ed50c0},
{'4', 0xfe940000, 0x00000062},
{'l', 0x00000000, 0x4234225d},
{'q', 0x00000000, 0x412abcfe},
{0, 0x40852784, 0x41805278},
{'q', 0x40852780, 0x40a9aa18},
{0, 0x4147bb44, 0x40a9aa18},
{'q', 0x41200000, 0x00000000},
{0, 0x417c5958, 0xc0cc06de},
{'9', 0xffcd002e, 0xff75002e},
{'l', 0x00000000, 0xc22a7845},
{'l', 0x41459574, 0x00000000},
{'4', 0x02590000, 0x0000ff9e},
{'l', 0x00000000, 0xc138b2af},
{'8', 0x51ad36dd, 0x1a921ad1},
{'q', 0xc14f3f94, 0x34000000},
{0, 0xc19d50c1, 0xc100dbeb},
{'9', 0xffc0ffcb, 0xff44ffcb},
{'m', 0x41f89732, 0xc23d4302},
{'l', 0x00000000, 0x00000000},
{'@', 0x00000076, 0x00005157},/*        v        x-advance: 81.339844 */
{'M', 0x408301b8, 0xc29655e8},
{'l', 0x4151655e, 0x00000000},
{'l', 0x41bbeb61, 0x427c5958},
{'l', 0x41bbeb62, 0xc27c5958},
{'l', 0x41516560, 0x00000000},
{'l', 0xc1e180dc, 0x429655e8},
{'l', 0xc1863a6a, 0x00000000},
{'l', 0xc1e180dc, 0xc29655e8},
{'@', 0x00000077, 0x0000706a},/*        w        x-advance: 112.414062 */
{'M', 0x40b8b2af, 0xc29655e8},
{'l', 0x4145957a, 0x00000000},
{'l', 0x4176fad6, 0x426aa181},
{'l', 0x4175e7f4, 0xc26aa181},
{'l', 0x41690528, 0x00000000},
{'l', 0x4176fad4, 0x426aa181},
{'l', 0x4175e7f8, 0xc26aa181},
{'l', 0x41459578, 0x00000000},
{'l', 0xc19d50c0, 0x429655e8},
{'l', 0xc1690528, 0x00000000},
{'l', 0xc1816560, 0xc2767165},
{'l', 0xc181eed0, 0x42767165},
{'l', 0xc1690528, 0x00000000},
{'l', 0xc19d50c0, 0xc29655e8},
{'@', 0x00000078, 0x00005157},/*        x        x-advance: 81.339844 */
{'M', 0x4296df5b, 0xc29655e8},
{'l', 0xc1d9731e, 0x42124f09},
{'l', 0x41e4b98e, 0x421a5cc7},
{'l', 0xc1690524, 0x00000000},
{'l', 0xc1af0898, 0xc1ec3dda},
{'l', 0xc1af0897, 0x41ec3dda},
{'l', 0xc1690527, 0x00000000},
{'l', 0x41e98e9a, 0xc21d50c0},
{'l', 0xc1d5b0f7, 0xc20f5b10},
{'l', 0x41690526, 0x00000000},
{'l', 0x419f768e, 0x41d63a6c},
{'l', 0x419f768c, 0xc1d63a6c},
{'l', 0x41690528, 0x00000000},
{'@', 0x00000079, 0x00005157},/*        y        x-advance: 81.339844 */
{'M', 0x4230e9aa, 0x40df5b0f},
{'q', 0xc0a78450, 0x4156c3dc},
{0, 0xc12338b4, 0x418c225c},
{'9', 0x0020ffd9, 0x0020ff96},
{'4', 0x0000ffb2, 0xffae0000},
{'l', 0x40e7f242, 0x00000000},
{'8', 0xed3f0028, 0xa531ed16},
{'l', 0x400dbeb0, 0xc0b46716},
{'l', 0xc1f338b2, 0xc293eb62},
{'l', 0x4151655e, 0x00000000},
{'l', 0x41bbeb61, 0x426b2af4},
{'l', 0x41bbeb62, 0xc26b2af4},
{'l', 0x41516560, 0x00000000},
{'l', 0xc204149e, 0x42a44b99},
{'@', 0x0000007a, 0x00004825},/*        z        x-advance: 72.144531 */
{'M', 0x40f2af3f, 0xc29655e8},
{'l', 0x426aa180, 0x00000000},
{'l', 0x00000000, 0x41346718},
{'l', 0xc239c595, 0x42581b7d},
{'l', 0x4239c595, 0x35800000},
{'l', 0x00000000, 0x411dda33},
{'l', 0xc271579f, 0x00000000},
{'l', 0x00000000, 0xc1346716},
{'l', 0x4239c595, 0xc2581b7c},
{'l', 0xc2330f76, 0x00000000},
{'l', 0xb5000000, 0xc11dda38},
{'@', 0x0000007b, 0x00005773},/*        {        x-advance: 87.449219 */
{'M', 0x428c8973, 0x414c06df},
{'4', 0x004d0000, 0x0000ffdf},
{'q', 0xc185b0f8, 0x00000000},
{0, 0xc1b35432, 0xc09eed1c},
{'9', 0xffd9ffd3, 0xff62ffd3},
{'l', 0x00000000, 0xc1805278},
{'q', 0x00000000, 0xc12225cc},
{0, 0xc067f240, 0xc1606df6},
{'9', 0xffe1ffe4, 0xffe1ff97},
{'4', 0x0000ffe0, 0xffb40000},
{'l', 0x408301b8, 0x00000000},
{'8', 0xe269004c, 0x911ce11c},
{'l', 0x00000000, 0xc180dbec},
{'q', 0x00000000, 0xc16d50c0},
{0, 0x40b46710, 0xc19dda30},
{'9', 0xffd9002d, 0xffd900b3},
{'4', 0x00000021, 0x004c0000},
{'l', 0xc0920a50, 0x00000000},
{'8', 0x179e00b5, 0x63e917e9},
{'l', 0x00000000, 0x41852786},
{'q', 0x00000000, 0x41289730},
{0, 0xc0459580, 0x4174d50c},
{'8', 0x33ad26e8, 0x34530e3b},
{'9', 0x00260018, 0x00790018},
{'l', 0x00000000, 0x41852785},
{'8', 0x63174b00, 0x17621717},
{'l', 0x40920a50, 0x00000000},
{'@', 0x0000007c, 0x00002e4f},/*        |        x-advance: 46.308594 */
{'M', 0x41e6df5b, 0xc2d2112e},
{'l', 0x00000000, 0x4309731d},
{'l', 0xc1368ce4, 0x00000000},
{'l', 0x00000000, 0xc309731d},
{'l', 0x41368ce4, 0x00000000},
{'@', 0x0000007d, 0x00005773},/*        }        x-advance: 87.449219 */
{'M', 0x4189731d, 0x414c06df},
{'l', 0x409655e8, 0x00000000},
{'8', 0xe961004b, 0x9d17e917},
{'l', 0x00000000, 0xc1852784},
{'q', 0x00000000, 0xc127844a},
{0, 0x404149e0, 0xc173c224},
{'8', 0xcc53da18, 0xcdadf3c5},
{'9', 0xffdaffe8, 0xff86ffe8},
{'l', 0x00000000, 0xc1852786},
{'8', 0x9de9b400, 0xe99fe9ea},
{'4', 0x0000ffdb, 0xffb40000},
{'l', 0x40874d50, 0x00000000},
{'q', 0x4185b0f7, 0x00000000},
{0, 0x41b24149, 0x409eed20},
{'9', 0x0027002d, 0x009d002d},
{'l', 0x00000000, 0x4180dbec},
{'8', 0x6f1c5000, 0x1e691e1c},
{'4', 0x00000021, 0x004c0000},
{'l', 0xc0852780, 0x00000000},
{'q', 0xc1187bb8, 0x00000000},
{0, 0xc1527848, 0x407920a0},
{'9', 0x001fffe4, 0x0070ffe4},
{'l', 0x00000000, 0x41805278},
{'q', 0x00000000, 0x416d50c0},
{0, 0xc0b46718, 0x419e63a6},
{'9', 0x0027ffd4, 0x0027ff4e},
{'l', 0xc0874d50, 0x00000000},
{'l', 0x00000000, 0xc11aa181},
{'@', 0x0000007e, 0x0000732a},/*        ~        x-advance: 115.164062 */
{'M', 0x42c93543, 0xc25b5430},
{'l', 0x00000000, 0x413f2414},
{'8', 0x3c982ac8, 0x129d12d1},
{'q', 0xc0ec3dd0, 0x00000000},
{0, 0xc189731c, 0xc07d6c40},
{'8', 0xfdf8fefb, 0xfcf5fffd},
{'q', 0xc1267168, 0xc0852780},
{0, 0xc185b0f8, 0xc0852780},
{'8', 0x14a300d1, 0x409e14d2},
{'l', 0x00000000, 0xc13f2414},
{'8', 0xc468d638, 0xee64ee30},
{'q', 0x40ec3dd8, 0x00000000},
{0, 0x4189fc90, 0x4080dbe8},
{'8', 0x03080205, 0x040b0104},
{'q', 0x41267164, 0x40852780},
{0, 0x4185b0f6, 0x40852780},
{'8', 0xec5b002e, 0xbf64ec2d},
};
#define CTX_FONT_ascii 1
#endif
#endif //_CTX_INTERNAL_FONT_
#ifndef __CTX_LIST__
#define __CTX_LIST__

#if !__COSMOPOLITAN__
#include <stdlib.h>
#endif

/* The whole ctx_list implementation is in the header and will be inlined
 * wherever it is used.
 */

static inline void *ctx_calloc (size_t size, size_t count)
{
  size_t byte_size = size * count;
  char *ret = (char*)malloc (byte_size);
  for (size_t i = 0; i < byte_size; i++)
     ret[i] = 0;
  return ret;
}

struct _CtxList {
  void *data;
  CtxList *next;
  void (*freefunc)(void *data, void *freefunc_data);
  void *freefunc_data;
};

static inline void ctx_list_prepend_full (CtxList **list, void *data,
    void (*freefunc)(void *data, void *freefunc_data),
    void *freefunc_data)
{
  CtxList *new_= (CtxList*)ctx_calloc (sizeof (CtxList), 1);
  new_->next = *list;
  new_->data=data;
  new_->freefunc=freefunc;
  new_->freefunc_data = freefunc_data;
  *list = new_;
}

static inline int ctx_list_length (CtxList *list)
{
  int length = 0;
  CtxList *l;
  for (l = list; l; l = l->next, length++);
  return length;
}

static inline void ctx_list_prepend (CtxList **list, void *data)
{
  CtxList *new_ = (CtxList*) ctx_calloc (sizeof (CtxList), 1);
  new_->next= *list;
  new_->data=data;
  *list = new_;
}

static inline CtxList *ctx_list_nth (CtxList *list, int no)
{
  while (no-- && list)
    { list = list->next; }
  return list;
}

static inline void *ctx_list_nth_data (CtxList *list, int no)
{
  CtxList *l = ctx_list_nth (list, no);
  if (l)
    return l->data;
  return NULL;
}


static inline void
ctx_list_insert_before (CtxList **list, CtxList *sibling,
                       void *data)
{
  if (*list == NULL || *list == sibling)
    {
      ctx_list_prepend (list, data);
    }
  else
    {
      CtxList *prev = NULL;
      for (CtxList *l = *list; l; l=l->next)
        {
          if (l == sibling)
            { break; }
          prev = l;
        }
      if (prev)
        {
          CtxList *new_ = (CtxList*)ctx_calloc (sizeof (CtxList), 1);
          new_->next = sibling;
          new_->data = data;
          prev->next=new_;
        }
    }
}

static inline void ctx_list_remove_link (CtxList **list, CtxList *link)
{
  CtxList *iter, *prev = NULL;
  if ((*list) == link)
    {
      prev = (*list)->next;
      *list = prev;
      link->next = NULL;
      return;
    }
  for (iter = *list; iter; iter = iter->next)
    if (iter == link)
      {
        if (prev)
          prev->next = iter->next;
        link->next = NULL;
        return;
      }
    else
      prev = iter;
}

static inline void ctx_list_remove (CtxList **list, void *data)
{
  CtxList *iter, *prev = NULL;
  if ((*list)->data == data)
    {
      if ((*list)->freefunc)
        (*list)->freefunc ((*list)->data, (*list)->freefunc_data);
      prev = (*list)->next;
      free (*list);
      *list = prev;
      return;
    }
  for (iter = *list; iter; iter = iter->next)
    if (iter->data == data)
      {
        if (iter->freefunc)
          iter->freefunc (iter->data, iter->freefunc_data);
        prev->next = iter->next;
        free (iter);
        break;
      }
    else
      prev = iter;
}

static inline void ctx_list_free (CtxList **list)
{
  while (*list)
    ctx_list_remove (list, (*list)->data);
}

static inline void
ctx_list_reverse (CtxList **list)
{
  CtxList *new_ = NULL;
  CtxList *l;
  for (l = *list; l; l=l->next)
    ctx_list_prepend (&new_, l->data);
  ctx_list_free (list);
  *list = new_;
}

static inline void *ctx_list_last (CtxList *list)
{
  if (list)
    {
      CtxList *last;
      for (last = list; last->next; last=last->next);
      return last->data;
    }
  return NULL;
}

static inline void ctx_list_concat (CtxList **list, CtxList *list_b)
{
  if (*list)
    {
      CtxList *last;
      for (last = *list; last->next; last=last->next);
      last->next = list_b;
      return;
    }
  *list = list_b;
}

static inline void ctx_list_append_full (CtxList **list, void *data,
    void (*freefunc)(void *data, void *freefunc_data),
    void *freefunc_data)
{
  CtxList *new_ = (CtxList*) ctx_calloc (sizeof (CtxList), 1);
  new_->data=data;
  new_->freefunc = freefunc;
  new_->freefunc_data = freefunc_data;
  ctx_list_concat (list, new_);
}

static inline void ctx_list_append (CtxList **list, void *data)
{
  ctx_list_append_full (list, data, NULL, NULL);
}

static inline void
ctx_list_insert_at (CtxList **list,
                    int       no,
                    void     *data)
{
  if (*list == NULL || no == 0)
    {
      ctx_list_prepend (list, data);
    }
  else
    {
      int pos = 0;
      CtxList *prev = NULL;
      CtxList *sibling = NULL;
      for (CtxList *l = *list; l && pos < no; l=l->next)
        {
          prev = sibling;
          sibling = l;
          pos ++;
        }
      if (prev)
        {
          CtxList *new_ = (CtxList*)ctx_calloc (sizeof (CtxList), 1);
          new_->next = sibling;
          new_->data = data;
          prev->next=new_;
          return;
        }
      ctx_list_append (list, data);
    }
}

static CtxList*
ctx_list_merge_sorted (CtxList* list1,
                       CtxList* list2,
    int(*compare)(const void *a, const void *b, void *userdata), void *userdata
)
{
  if (list1 == NULL)
     return(list2);
  else if (list2==NULL)
     return(list1);

  if (compare (list1->data, list2->data, userdata) >= 0)
  {
    list1->next = ctx_list_merge_sorted (list1->next,list2, compare, userdata);
    /*list1->next->prev = list1;
      list1->prev = NULL;*/
    return list1;
  }
  else
  {
    list2->next = ctx_list_merge_sorted (list1,list2->next, compare, userdata);
    /*list2->next->prev = list2;
      list2->prev = NULL;*/
    return list2;
  }
}

static void
ctx_list_split_half (CtxList*  head,
                     CtxList** list1,
                     CtxList** list2)
{
  CtxList* fast;
  CtxList* slow;
  if (head==NULL || head->next==NULL)
  {
    *list1 = head;
    *list2 = NULL;
  }
  else
  {
    slow = head;
    fast = head->next;

    while (fast != NULL)
    {
      fast = fast->next;
      if (fast != NULL)
      {
        slow = slow->next;
        fast = fast->next;
      }
    }

    *list1 = head;
    *list2 = slow->next;
    slow->next = NULL;
  }
}

static inline void ctx_list_sort (CtxList **head,
    int(*compare)(const void *a, const void *b, void *userdata),
    void *userdata)
{
  CtxList* list1;
  CtxList* list2;

  /* Base case -- length 0 or 1 */
  if ((*head == NULL) || ((*head)->next == NULL))
  {
    return;
  }

  ctx_list_split_half (*head, &list1, &list2);
  ctx_list_sort (&list1, compare, userdata);
  ctx_list_sort (&list2, compare, userdata);
  *head = ctx_list_merge_sorted (list1, list2, compare, userdata);
}

static inline void ctx_list_insert_sorted (CtxList **list,
                                           void     *item,
    int(*compare)(const void *a, const void *b, void *userdata),
                                           void     *userdata)
{
  ctx_list_prepend (list, item);
  ctx_list_sort (list, compare, userdata);
}


static inline CtxList *ctx_list_find_custom (CtxList *list,
                                         void    *needle,
                                         int(*compare)(const void *a, const void *b),
                                         void *userdata)
{
  CtxList *l;
  for (l = list; l; l = l->next)
  {
    if (compare (l->data, needle) == 0)
      return l;
  }
  return NULL;
}

#endif
/* definitions that determine which features are included and their settings,
 * for particular platforms - in particular microcontrollers ctx might need
 * tuning for different quality/performance/resource constraints.
 *
 * the way to configure ctx is to set these defines, before both including it
 * as a header and in the file where CTX_IMPLEMENTATION is set to include the
 * implementation for different featureset and runtime settings.
 *
 */

/* whether the font rendering happens in backend or front-end of API, the
 * option is used set to 0 by the tool that converts ttf fonts to ctx internal
 * representation - both should be possible so that this tool can be made
 * into a TTF/OTF font import at runtime (perhaps even with live subsetting).
 */
#ifndef CTX_BACKEND_TEXT
#define CTX_BACKEND_TEXT 1
#endif


#define CTX_RASTERIZER_AA_SLOPE_LIMIT3           (65536/CTX_SUBDIV/15)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA   (65536/CTX_SUBDIV/15)
//#define CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA (120536/CTX_SUBDIV/15)
//#define CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA (105000/CTX_SUBDIV/15)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT5           (140425/CTX_SUBDIV/15)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT15          (260425/CTX_SUBDIV/15)

/* subpixel-aa coordinates used in BITPACKing of drawlist
 *
 * powers of 2 is faster
 */
#ifndef CTX_SUBDIV
#define CTX_SUBDIV   8  //  max framebufer width 4095
//#define CTX_SUBDIV  10  //  max framebufer width 3250
//#define CTX_SUBDIV  16  //  max framebufer width 2047
//#define CTX_SUBDIV  24  //  max framebufer width 1350
//#define CTX_SUBDIV  32  //  max framebufer width 1023
#endif


// 8    12 68 40 24
// 16   12 68 40 24
/* scale-factor for font outlines prior to bit quantization by CTX_SUBDIV
 *
 * changing this also changes font file format - the value should be baked
 * into the ctxf files making them less dependent on the ctx used to
 * generate them
 */
#define CTX_BAKE_FONT_SIZE    160

/* pack some linetos/curvetos/movetos into denser drawlist instructions,
 * permitting more vectors to be stored in the same space, experimental
 * feature with added overhead.
 */
#ifndef CTX_BITPACK
#define CTX_BITPACK           1
#endif

/* whether we have a shape-cache where we keep pre-rasterized bitmaps of
 * commonly occuring small shapes, disabled by default since it has some
 * glitches (and potential hangs with multi threading).
 */
#ifndef CTX_SHAPE_CACHE
#define CTX_SHAPE_CACHE        0
#endif


#ifndef CTX_SHAPE_CACHE_DEFAULT // the default set runtime value
                                // when enabled
#define CTX_SHAPE_CACHE_DEFAULT   0
#endif

/* size (in pixels, w*h) that we cache rasterization for
 */
#ifndef CTX_SHAPE_CACHE_DIM
#define CTX_SHAPE_CACHE_DIM      (64*64)
#endif

#ifndef CTX_SHAPE_CACHE_MAX_DIM
#define CTX_SHAPE_CACHE_MAX_DIM  256
#endif

/* maximum number of entries in shape cache
 */
#ifndef CTX_SHAPE_CACHE_ENTRIES
#define CTX_SHAPE_CACHE_ENTRIES  1024
#endif


#ifndef CTX_PARSER_FIXED_TEMP
#define CTX_PARSER_FIXED_TEMP 0
         // when 1  CTX_PARSER_MAXLEN is the fixed max stringlen
#endif   // and no allocations happens beyond creating the parser,
         // when 0 the scratchbuf for parsing is a separate dynamically
         // growing buffer, that maxes out at CTX_PARSER_MAXLEN
         //
#ifndef CTX_PARSER_MAXLEN
#if CTX_PARSER_FIXED_TEMP
#define CTX_PARSER_MAXLEN  1024*128        // This is the maximum texture/string size supported
#else
#define CTX_PARSER_MAXLEN  1024*1024*16    // 16mb
#endif
#endif


#ifndef CTX_FAST_FILL_RECT
#define CTX_FAST_FILL_RECT 1    /*  matters most for tiny rectangles where it shaves overhead, for larger rectangles
                                    a ~15-20% performance win can be seen. */
#endif


#ifndef CTX_COMPOSITING_GROUPS
#define CTX_COMPOSITING_GROUPS   1
#endif

/* maximum nesting level of compositing groups
 */
#ifndef CTX_GROUP_MAX
#define CTX_GROUP_MAX             8
#endif

#ifndef CTX_ENABLE_CLIP
#define CTX_ENABLE_CLIP           1
#endif

/* use a 1bit clip buffer, saving RAM on microcontrollers, other rendering
 * will still be antialiased.
 */
#ifndef CTX_1BIT_CLIP
#define CTX_1BIT_CLIP             0
#endif


#ifndef CTX_ENABLE_SHADOW_BLUR
#define CTX_ENABLE_SHADOW_BLUR    0
#endif

#ifndef CTX_GRADIENTS
#define CTX_GRADIENTS             1
#endif

#ifndef CTX_ALIGNED_STRUCTS
#define CTX_ALIGNED_STRUCTS       1
#endif

#ifndef CTX_GRADIENT_CACHE
#define CTX_GRADIENT_CACHE        1
#endif

#ifndef CTX_FONTS_FROM_FILE
#define CTX_FONTS_FROM_FILE  0
#endif

#ifndef CTX_GET_CONTENTS
#if CTX_FONTS_FROM_FILE
#define CTX_GET_CONTENTS    1
#else
#define CTX_GET_CONTENTS    0
#endif
#endif

#ifndef CTX_FORMATTER
#define CTX_FORMATTER       1
#endif

#ifndef CTX_PARSER
#define CTX_PARSER          1
#endif

#ifndef CTX_CURRENT_PATH
#define CTX_CURRENT_PATH    1
#endif

#ifndef CTX_XML
#define CTX_XML             1
#endif

#ifndef CTX_CLIENTS
#define CTX_CLIENTS              0
#endif

/* when ctx_math is defined, which it is by default, we use ctx' own
 * implementations of math functions, instead of relying on math.h
 * the possible inlining gives us a slight speed-gain, and on
 * embedded platforms guarantees that we do not do double precision
 * math.
 */
#ifndef CTX_MATH
#define CTX_MATH           1  // use internal fast math for sqrt,sin,cos,atan2f etc.
#endif

#define ctx_log(fmt, ...)
//#define ctx_log(str, a...) fprintf(stderr, str, ##a)

/* the initial journal size - for both rasterizer
 * edgelist and drawlist.
 */
#ifndef CTX_MIN_JOURNAL_SIZE
#define CTX_MIN_JOURNAL_SIZE      512
#endif

/* The maximum size we permit the drawlist to grow to,
 * the memory used is this number * 9, where 9 is sizeof(CtxEntry)
 */
#ifndef CTX_MAX_JOURNAL_SIZE
//#define CTX_MAX_JOURNAL_SIZE   CTX_MIN_JOURNAL_SIZE
#define CTX_MAX_JOURNAL_SIZE 1024*1024*8
#endif

#ifndef CTX_DRAWLIST_STATIC
#define CTX_DRAWLIST_STATIC  0
#endif

#ifndef CTX_MIN_EDGE_LIST_SIZE
#define CTX_MIN_EDGE_LIST_SIZE   2048*2
#endif

#ifndef CTX_RASTERIZER_AA
#define CTX_RASTERIZER_AA 15   // vertical-AA of CTX_ANTIALIAS_DEFAULT
#endif

/* The maximum complexity of a single path
 */
#ifndef CTX_MAX_EDGE_LIST_SIZE
#define CTX_MAX_EDGE_LIST_SIZE  CTX_MIN_EDGE_LIST_SIZE
#endif

#ifndef CTX_STRINGPOOL_SIZE
  // XXX should be possible to make zero and disappear when codepaths not in use
  //     to save size, for card10 this is defined as a low number (some text
  //     properties still make use of it)
  //     
  //     for desktop-use this should be fully dynamic, possibly
  //     with chained pools, gradients are stored here.
#define CTX_STRINGPOOL_SIZE     1000 //
#endif

#ifndef CTX_32BIT_SEGMENTS
#define CTX_32BIT_SEGMENTS 1  // without this clipping problems might
                              // occur when drawing far outside the viewport
                              // on micro controllers you most often will
                              // want this set to 0
#endif

/* whether we dither or not for gradients
 */
#ifndef CTX_DITHER
#define CTX_DITHER 0
#endif

/*  only source-over clear and copy will work, the API still
 *  through - but the backend is limited, for use to measure
 *  size and possibly in severely constrained ROMs.
 */
#ifndef CTX_BLENDING_AND_COMPOSITING
#define CTX_BLENDING_AND_COMPOSITING 1
#endif

/*  this forces the inlining of some performance
 *  critical paths.
 */
#ifndef CTX_FORCE_INLINES
#define CTX_FORCE_INLINES               1
#endif

/* create one-off inlined inner loop for normal blend mode (for floating point,
 * and grayscale for RGBA8 manual loops overrrides. Disabling this should speed
 * up compiles at penalty for the given formats.
 */
#ifndef CTX_INLINED_NORMAL     
#define CTX_INLINED_NORMAL      0
#endif

/*
 *  do not use manual RGBA8 code but rely on ctx inline templating
 */
#ifndef CTX_INLINED_NORMAL     
#define CTX_INLINED_NORMAL_RGBA8  0
#endif

#ifndef CTX_RASTERIZER_SWITCH_DISPATCH
#define CTX_RASTERIZER_SWITCH_DISPATCH  1 // marginal improvement for some
                                          // modes, maybe get rid of this?
#endif

#ifndef CTX_U8_TO_FLOAT_LUT
#define CTX_U8_TO_FLOAT_LUT  0
#endif

#ifndef CTX_INLINED_GRADIENTS
#define CTX_INLINED_GRADIENTS   1
#endif

#ifndef CTX_BRAILLE_TEXT
#define CTX_BRAILLE_TEXT        0
#endif

/* Build code paths for grayscale rasterization, this makes clipping
 * faster.
 */
#ifndef CTX_NATIVE_GRAYA8
#define CTX_NATIVE_GRAYA8       1
#endif

/* enable CMYK rasterization targets
 */
#ifndef CTX_ENABLE_CMYK
#define CTX_ENABLE_CMYK         1
#endif

/* enable color management, slightly increases CtxColor struct size, can
 * be disabled for microcontrollers.
 */
#ifndef CTX_ENABLE_CM
#define CTX_ENABLE_CM           1
#endif

#ifndef CTX_EVENTS
#define CTX_EVENTS              1
#endif


#ifndef CTX_LIMIT_FORMATS
#define CTX_LIMIT_FORMATS       0
#endif

#ifndef CTX_ENABLE_FLOAT
#define CTX_ENABLE_FLOAT        0
#endif

/* by default ctx includes all pixel formats, on microcontrollers
 * it can be useful to slim down code and runtime size by only
 * defining the used formats, set CTX_LIMIT_FORMATS to 1, and
 * manually add CTX_ENABLE_ flags for each of them.
 */
#if CTX_LIMIT_FORMATS
#if CTX_NATIVE_GRAYA8
#define CTX_ENABLE_GRAYA8               1
#define CTX_ENABLE_GRAY8                1
#endif
#else

#define CTX_ENABLE_GRAY1                1
#define CTX_ENABLE_GRAY2                1
#define CTX_ENABLE_GRAY4                1
#define CTX_ENABLE_GRAY8                1
#define CTX_ENABLE_GRAYA8               1
#define CTX_ENABLE_GRAYF                1
#define CTX_ENABLE_GRAYAF               1

#define CTX_ENABLE_RGB8                 1
#define CTX_ENABLE_RGBA8                1
#define CTX_ENABLE_BGRA8                1
#define CTX_ENABLE_RGB332               1
#define CTX_ENABLE_RGB565               1
#define CTX_ENABLE_RGB565_BYTESWAPPED   1
#define CTX_ENABLE_RGBAF                1
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT                1
#define CTX_ENABLE_YUV420               1

#if CTX_ENABLE_CMYK
#define CTX_ENABLE_CMYK8                1
#define CTX_ENABLE_CMYKA8               1
#define CTX_ENABLE_CMYKAF               1
#endif
#endif

#ifndef CTX_RGB565_ALPHA
#define CTX_RGB565_ALPHA                0   // when enabled pure purple is transparent,
                                            // for a ~15% overall performance hit
#endif

#ifndef CTX_RGB332_ALPHA
#define CTX_RGB332_ALPHA                0   // when enabled pure purple is transparent,
                                            // for a ~15% overall performance hit
#endif

/* by including ctx-font-regular.h, or ctx-font-mono.h the
 * built-in fonts using ctx drawlist encoding is enabled
 */
#if CTX_FONT_regular || CTX_FONT_mono || CTX_FONT_bold \
  || CTX_FONT_italic || CTX_FONT_sans || CTX_FONT_serif \
  || CTX_FONT_ascii
#ifndef CTX_FONT_ENGINE_CTX
#define CTX_FONT_ENGINE_CTX        1
#endif
#endif

#ifndef CTX_FONT_ENGINE_CTX_FS
#define CTX_FONT_ENGINE_CTX_FS 0
#endif

/* If stb_strutype.h is included before ctx.h add integration code for runtime loading
 * of opentype fonts.
 */
#ifdef __STB_INCLUDE_STB_TRUETYPE_H__
#ifndef CTX_FONT_ENGINE_STB
#define CTX_FONT_ENGINE_STB        1
#endif
#else
#define CTX_FONT_ENGINE_STB        0
#endif

#ifndef CTX_BABL
#ifdef _BABL_H
#define CTX_BABL 1
#else
#define CTX_BABL 0
#endif
#endif

#ifndef _BABL_H
#undef CTX_BABL
#define CTX_BABL 0
#endif

#ifndef CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
#define CTX_ALWAYS_USE_NEAREST_FOR_SCALE1 0
#endif

/* force add format if we have shape cache */
#if CTX_SHAPE_CACHE
#ifdef CTX_ENABLE_GRAY8
#undef CTX_ENABLE_GRAY8
#endif
#define CTX_ENABLE_GRAY8  1
#endif

/* include the bitpack packer, can be opted out of to decrease code size
 */
#ifndef CTX_BITPACK_PACKER
#define CTX_BITPACK_PACKER 0
#endif

/* enable RGBA8 intermediate format for
 *the indirectly implemented pixel-formats.
 */
#if CTX_ENABLE_GRAY1 | CTX_ENABLE_GRAY2 | CTX_ENABLE_GRAY4 | CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED | CTX_ENABLE_RGB8 | CTX_ENABLE_RGB332

  #ifdef CTX_ENABLE_RGBA8
    #undef CTX_ENABLE_RGBA8
  #endif
  #define CTX_ENABLE_RGBA8  1
#endif

#ifdef CTX_ENABLE_CMYKF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_GRAYF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_GRAYAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_RGBAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_CMYKAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_CMYKF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif


/* enable cmykf which is cmyk intermediate format
 */
#ifdef CTX_ENABLE_CMYK8
#ifdef CTX_ENABLE_CMYKF
#undef CTX_ENABLE_CMYKF
#endif
#define CTX_ENABLE_CMYKF  1
#endif
#ifdef CTX_ENABLE_CMYKA8
#ifdef CTX_ENABLE_CMYKF
#undef CTX_ENABLE_CMYKF
#endif
#define CTX_ENABLE_CMYKF  1
#endif

#ifdef CTX_ENABLE_CMYKF8
#ifdef CTX_ENABLE_CMYK
#undef CTX_ENABLE_CMYK
#endif
#define CTX_ENABLE_CMYK   1
#endif

#define CTX_PI                              3.141592653589793f
#ifndef CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS
#define CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS  (180)
#endif

#ifndef CTX_MAX_FRAMEBUFFER_WIDTH
#define CTX_MAX_FRAMEBUFFER_WIDTH 2560
#endif

#ifndef CTX_MAX_FONTS
#define CTX_MAX_FONTS            3
#endif

#ifndef CTX_MAX_STATES
#define CTX_MAX_STATES           10
#endif

#ifndef CTX_MAX_EDGES
#define CTX_MAX_EDGES            257
#endif

#ifndef CTX_MAX_LINGERING_EDGES
#define CTX_MAX_LINGERING_EDGES  64
#endif


#ifndef CTX_MAX_PENDING
#define CTX_MAX_PENDING          128
#endif

#ifndef CTX_MAX_TEXTURES
#define CTX_MAX_TEXTURES         32
#endif

#ifndef CTX_HASH_ROWS
#define CTX_HASH_ROWS            4
#endif
#ifndef CTX_HASH_COLS
#define CTX_HASH_COLS            8
#endif

#ifndef CTX_INLINE_FILL_RULE
#define CTX_INLINE_FILL_RULE 1
#endif

#ifndef CTX_MAX_THREADS
#define CTX_MAX_THREADS          8 // runtime is max of cores/2 and this
#endif

#ifndef CTX_FRAGMENT_SPECIALIZE
#define CTX_FRAGMENT_SPECIALIZE 1
#endif

#define CTX_RASTERIZER_EDGE_MULTIPLIER  1024
                                        // increasing this to 2048
                                        // removes artifacts in top half of res-diagram -
                                        // but reduces maximum available buffer width
#ifndef CTX_IMPLEMENTATION
#define CTX_IMPLEMENTATION 0
#else
#undef CTX_IMPLEMENTATION
#define CTX_IMPLEMENTATION 1
#endif


#ifndef static_OPAQUE
#define static_OPAQUE 1
#endif

#ifndef CTX_SYNC_FRAMES
#define CTX_SYNC_FRAMES  1
#endif

#ifdef CTX_RASTERIZER
#if CTX_RASTERIZER==0
#if CTX_SDL || CTX_FB || CTX_HEADLESS
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif
#else
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif
#endif

#if CTX_SDL || CTX_FB || CTX_HEADLESS
#if CTX_EVENTS
#undef CTX_EVENTS
#endif
#define CTX_EVENTS 1
#endif

#if CTX_EVENTS
#ifndef CTX_HEADLESS
#define CTX_HEADLESS 1
#endif
#endif


#ifndef CTX_GRADIENT_CACHE_ELEMENTS
#define CTX_GRADIENT_CACHE_ELEMENTS 256
#endif

#ifndef CTX_PARSER_MAX_ARGS
#define CTX_PARSER_MAX_ARGS 20
#endif


#ifndef CTX_SCREENSHOT
#define CTX_SCREENSHOT 0
#endif

#ifndef CTX_ALSA
#define CTX_ALSA 0
#endif

#ifndef CTX_AUDIO
#define CTX_AUDIO 0
#endif

#if CTX_AUDIO==0
#if CTX_ALSA
#undef CTX_ALSA
#define CTX_ALSA 0
#endif
#endif

#ifndef CTX_CURL
#define CTX_CURL 0
#endif

#ifndef CTX_TILED
#if CTX_SDL || CTX_FB || CTX_KMS || CTX_HEADLESS
#define CTX_TILED 1
#else
#define CTX_TILED 0
#endif
#if !CTX_RASTERIZER
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif
#endif


#ifndef CTX_TILED_MERGE_HORIZONTAL_NEIGHBORS
#define CTX_TILED_MERGE_HORIZONTAL_NEIGHBORS 1
#endif


#ifndef CTX_THREADS
#if CTX_TILED
#define CTX_THREADS 1
#else
#define CTX_THREADS 0
#endif
#endif

#if CTX_THREADS
#include <pthread.h>
#define mtx_lock pthread_mutex_lock
#define mtx_unlock pthread_mutex_unlock
#define mtx_t pthread_mutex_t
#define cnd_t pthread_cond_t
#define mtx_plain NULL
#define mtx_init pthread_mutex_init
#define cnd_init(a) pthread_cond_init(a,NULL)
#define cnd_wait pthread_cond_wait
#define cnd_broadcast pthread_cond_broadcast
#define thrd_create(tid, tiled_render_fun, args) pthread_create(tid, NULL, tiled_render_fun, args)
#define thrd_t pthread_t
#endif

#ifndef CTX_SIMD_SUFFIX
#define CTX_SIMD_SUFFIX(symbol) symbol##_generic
#define CTX_SIMD_BUILD 0
#else


#define CTX_SIMD_BUILD 1
#ifdef CTX_COMPOSITE
#undef CTX_COMPOSITE
#define CTX_COMPOSITE 1
#endif

#endif


#if CTX_RASTERIZER
#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 1
#endif
#else
#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 0
#endif
#endif

#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 0
#endif


#ifndef CTX_BRANCH_HINTS
#define CTX_BRANCH_HINTS  1
#endif

#ifdef EMSCRIPTEN
#define CTX_WASM 1
#else
#define CTX_WASM 0
#endif


#if CTX_WASM
#undef CTX_THREADS
#define CTX_THREADS 0
#undef CTX_HEADLESS
#define CTX_HEADLESS 0
#undef CTX_TILED
#define CTX_TILED 0
#undef CTX_EVENTS
#define CTX_EVENTS 1
#undef CTX_PARSER
#define CTX_PARSER 1
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#undef CTX_SHAPE_CACHE
#define CTX_SHAPE_CACHE 0
#endif

#ifndef CTX_TINYVG
#define CTX_TINYVG 0
#endif

#define uncompress tinf_uncompress
#define Z_OK TINF_OK
#define Z_BUF_ERROR TINF_BUF_ERROR
#define Z_DATA_ERROR TINF_DATA_ERROR
 /* Copyright (C) 2020 Øyvind Kolås <pippin@gimp.org>
 */

#if CTX_FORMATTER

/* returns the maximum string length including terminating \0 */
int ctx_a85enc_len (int input_length);
int ctx_a85enc (const void *srcp, char *dst, int count);

#if CTX_PARSER

int ctx_a85dec (const char *src, char *dst, int count);
int ctx_a85len (const char *src, int count);
#endif

#endif
#ifndef __CTX_EXTRA_H
#define __CTX_EXTRA_H

#if CTX_FORCE_INLINES
#define CTX_INLINE inline __attribute__((always_inline))
#else
#define CTX_INLINE inline
#endif


#define CTX_CLAMP(val,min,max) ((val)<(min)?(min):(val)>(max)?(max):(val))
static CTX_INLINE int   ctx_mini (int a, int b)     { return (a < b) * a + (a >= b) * b; }
static CTX_INLINE float ctx_minf (float a, float b) { return (a < b) * a + (a >= b) * b; }
static CTX_INLINE int   ctx_maxi (int a, int b)     { return (a > b) * a + (a <= b) * b; }
static CTX_INLINE float ctx_maxf (float a, float b) { return (a > b) * a + (a <= b) * b; }
static CTX_INLINE float ctx_clampf (float v, float min, float max) {
       return CTX_CLAMP(v,min,max);
}


typedef enum CtxOutputmode
{
  CTX_OUTPUT_MODE_QUARTER,
  CTX_OUTPUT_MODE_BRAILLE,
  CTX_OUTPUT_MODE_SIXELS,
  CTX_OUTPUT_MODE_GRAYS,
  CTX_OUTPUT_MODE_CTX,
  CTX_OUTPUT_MODE_CTX_COMPACT,
  CTX_OUTPUT_MODE_UI
} CtxOutputmode;






static inline float ctx_pow2 (float a) { return a * a; }
#if CTX_MATH

static CTX_INLINE float
ctx_fabsf (float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i &= 0x7fffffff;
  return u.f;
}

static CTX_INLINE float
ctx_invsqrtf (float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i = 0x5f3759df - (u.i >> 1);
  u.f *= (1.5f - 0.5f * x * u.f * u.f);
  u.f *= (1.5f - 0.5f * x * u.f * u.f); //repeating Newton-Raphson step for higher precision
  return u.f;
}

static CTX_INLINE float
ctx_invsqrtf_fast (float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i = 0x5f3759df - (u.i >> 1);
  return u.f;
}

CTX_INLINE static float ctx_sqrtf (float a)
{
  return 1.0f/ctx_invsqrtf (a);
}

CTX_INLINE static float ctx_sqrtf_fast (float a)
{
  return 1.0f/ctx_invsqrtf_fast (a);
}

CTX_INLINE static float ctx_hypotf (float a, float b)
{
  return ctx_sqrtf (ctx_pow2 (a)+ctx_pow2 (b) );
}

CTX_INLINE static float ctx_hypotf_fast (float a, float b)
{
  return ctx_sqrtf_fast (ctx_pow2 (a)+ctx_pow2 (b) );
}

CTX_INLINE static float
ctx_sinf (float x)
{
  if (x < -CTX_PI * 2)
    {
      x = -x;
      long ix = x / (CTX_PI * 2);
      x = x - ix * CTX_PI * 2;
      x = -x;
    }
  if (x < -CTX_PI * 1000)
  {
    x = -0.5;
  }
  if (x > CTX_PI * 1000)
  {
          // really large numbers tend to cause practically inifinite
          // loops since the > CTX_PI * 2 seemingly fails
    x = 0.5;
  }
  if (x > CTX_PI * 2)
    { 
      long ix = x / (CTX_PI * 2);
      x = x - (ix * CTX_PI * 2);
    }
  while (x < -CTX_PI)
    { x += CTX_PI * 2; }
  while (x > CTX_PI)
    { x -= CTX_PI * 2; }

  /* source : http://mooooo.ooo/chebyshev-sine-approximation/ */
  float coeffs[]=
  {
    -0.10132118f,           // x
      0.0066208798f,         // x^3
      -0.00017350505f,        // x^5
      0.0000025222919f,      // x^7
      -0.000000023317787f,    // x^9
      0.00000000013291342f
    }; // x^11
  float x2 = x*x;
  float p11 = coeffs[5];
  float p9  = p11*x2 + coeffs[4];
  float p7  = p9*x2  + coeffs[3];
  float p5  = p7*x2  + coeffs[2];
  float p3  = p5*x2  + coeffs[1];
  float p1  = p3*x2  + coeffs[0];
  return (x - CTX_PI + 0.00000008742278f) *
         (x + CTX_PI - 0.00000008742278f) * p1 * x;
}

static CTX_INLINE float ctx_atan2f (float y, float x)
{
  float atan, z;
  if ( x == 0.0f )
    {
      if ( y > 0.0f )
        { return CTX_PI/2; }
      if ( y == 0.0f )
        { return 0.0f; }
      return -CTX_PI/2;
    }
  z = y/x;
  if ( ctx_fabsf ( z ) < 1.0f )
    {
      atan = z/ (1.0f + 0.28f*z*z);
      if (x < 0.0f)
        {
          if ( y < 0.0f )
            { return atan - CTX_PI; }
          return atan + CTX_PI;
        }
    }
  else
    {
      atan = CTX_PI/2 - z/ (z*z + 0.28f);
      if ( y < 0.0f ) { return atan - CTX_PI; }
    }
  return atan;
}


static CTX_INLINE float ctx_atanf (float a)
{
  return ctx_atan2f ( (a), 1.0f);
}

static CTX_INLINE float ctx_asinf (float x)
{
  return ctx_atanf ( (x) * (ctx_invsqrtf (1.0f-ctx_pow2 (x) ) ) );
}

static CTX_INLINE float ctx_acosf (float x)
{
  return ctx_atanf ( (ctx_sqrtf (1.0f-ctx_pow2 (x) ) / (x) ) );
}

CTX_INLINE static float ctx_cosf (float a)
{
  return ctx_sinf ( (a) + CTX_PI/2.0f);
}

static CTX_INLINE float ctx_tanf (float a)
{
  return (ctx_cosf (a) /ctx_sinf (a) );
}
static CTX_INLINE float
ctx_floorf (float x)
{
  return (int)x; // XXX
}
static CTX_INLINE float
ctx_expf (float x)
{
  union { uint32_t i; float f; } v =
    {  (uint32_t)( (1 << 23) * (x + 183.1395965f)) };
  return v.f;
}

/* define more trig based on having sqrt, sin and atan2 */

#else
#if !__COSMOPOLITAN__
#include <math.h>
#endif
static inline float ctx_fabsf (float x)           { return fabsf (x); }
static inline float ctx_floorf (float x)          { return floorf (x); }
static inline float ctx_sinf (float x)            { return sinf (x); }
static inline float ctx_atan2f (float y, float x) { return atan2f (y, x); }
static inline float ctx_hypotf (float a, float b) { return hypotf (a, b); }
static inline float ctx_acosf (float a)           { return acosf (a); }
static inline float ctx_cosf (float a)            { return cosf (a); }
static inline float ctx_tanf (float a)            { return tanf (a); }
static inline float ctx_expf (float p)            { return expf (p); }
static inline float ctx_sqrtf (float a)           { return sqrtf (a); }
#endif

static inline float _ctx_parse_float (const char *str, char **endptr)
{
  return strtod (str, endptr); /* XXX: , vs . problem in some locales */
}

const char *ctx_get_string (Ctx *ctx, uint32_t hash);
void ctx_set_string (Ctx *ctx, uint32_t hash, const char *value);
typedef struct _CtxColor CtxColor;

void
ctx_matrix_translate (CtxMatrix *matrix, float x, float y);


void ctx_get_matrix (Ctx *ctx, CtxMatrix *matrix);
void ctx_set_matrix (Ctx *ctx, CtxMatrix *matrix);
int _ctx_is_rasterizer (Ctx *ctx);

int ctx_color (Ctx *ctx, const char *string);
typedef struct _CtxState CtxState;
CtxColor *ctx_color_new ();
CtxState *ctx_get_state (Ctx *ctx);
void ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out);
void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
void ctx_color_free (CtxColor *color);
void ctx_set_color (Ctx *ctx, uint32_t hash, CtxColor *color);
int  ctx_get_color (Ctx *ctx, uint32_t hash, CtxColor *color);
int  ctx_color_set_from_string (Ctx *ctx, CtxColor *color, const char *string);

int ctx_color_is_transparent (CtxColor *color);
int ctx_utf8_len (const unsigned char first_byte);

void ctx_user_to_device          (Ctx *ctx, float *x, float *y);
void ctx_user_to_device_distance (Ctx *ctx, float *x, float *y);


void ctx_device_to_user          (Ctx *ctx, float *x, float *y);
void ctx_device_to_user_distance (Ctx *ctx, float *x, float *y);

const char *ctx_utf8_skip (const char *s, int utf8_length);
int ctx_is_set_now (Ctx *ctx, uint32_t hash);
void ctx_set_size (Ctx *ctx, int width, int height);

static inline float ctx_matrix_get_scale (CtxMatrix *matrix)
{
   return ctx_maxf (ctx_maxf (ctx_fabsf (matrix->m[0][0]),
                         ctx_fabsf (matrix->m[0][1]) ),
               ctx_maxf (ctx_fabsf (matrix->m[1][0]),
                         ctx_fabsf (matrix->m[1][1]) ) );
}

#if CTX_GET_CONTENTS
int
_ctx_file_get_contents (const char     *path,
                        unsigned char **contents,
                        long           *length);
#endif

#if CTX_FONTS_FROM_FILE
int   ctx_load_font_ttf_file (const char *name, const char *path);
#endif

#if CTX_BABL
void ctx_rasterizer_colorspace_babl (CtxState      *state,
                                     CtxColorSpace  space_slot,
                                     const Babl    *space);
#endif
void ctx_rasterizer_colorspace_icc (CtxState      *state,
                                    CtxColorSpace  space_slot,
                                    char          *icc_data,
                                    int            icc_length);


CtxBuffer *ctx_buffer_new_bare (void);

void ctx_buffer_set_data (CtxBuffer *buffer,
                          void *data, int width, int height,
                          int stride,
                          CtxPixelFormat pixel_format,
                          void (*freefunc) (void *pixels, void *user_data),
                          void *user_data);

int _ctx_set_frame (Ctx *ctx, int frame);
int _ctx_frame (Ctx *ctx);


void ctx_exit (Ctx *ctx);
void ctx_list_backends(void);
int ctx_pixel_format_ebpp (CtxPixelFormat format);


#endif
#ifndef __CTX_CONSTANTS
#define __CTX_CONSTANTS

#define TOKENHASH(a)    ((uint32_t)a)

#define CTX_strokeSource TOKENHASH(3387288669)
#define CTX_add_stop TOKENHASH(3572486242)
#define CTX_addStop TOKENHASH(3805374936)
#define CTX_alphabetic TOKENHASH(2558771929)
#define CTX_arc TOKENHASH(7298)
#define CTX_arc_to TOKENHASH(4010563993)
#define CTX_arcTo TOKENHASH(4138935887)
#define CTX_begin_path TOKENHASH(3275811535)
#define CTX_beginPath TOKENHASH(2384638508)
#define CTX_bevel TOKENHASH(25538884)
#define CTX_bottom TOKENHASH(905225156)
#define CTX_cap TOKENHASH(32838)
#define CTX_center TOKENHASH(1219785030)
#define CTX_clear TOKENHASH(37825286)
#define CTX_color TOKENHASH(38757318)
#define CTX_copy TOKENHASH(1672134)
#define CTX_clip TOKENHASH(1067782)
#define CTX_close_path TOKENHASH(3215881683)
#define CTX_closePath TOKENHASH(3625577848)
#define CTX_cmyka TOKENHASH(2870086)
#define CTX_cmyk TOKENHASH(772934)
#define CTX_cmykaS TOKENHASH(3116500921)
#define CTX_cmykS TOKENHASH(934005574)
#define CTX_color TOKENHASH(38757318)
#define CTX_blending TOKENHASH(3402343403)
#define CTX_blend TOKENHASH(9317124)
#define CTX_blending_mode TOKENHASH(4000829592)
#define CTX_blendingMode TOKENHASH(2577020122)
#define CTX_blend_mode TOKENHASH(2229422236)
#define CTX_blendMode TOKENHASH(3450578624)
#define CTX_composite TOKENHASH(2191186513)
#define CTX_compositing_mode TOKENHASH(3415700633)
#define CTX_compositingMode TOKENHASH(3625102151)
#define CTX_curve_to TOKENHASH(3569729066)
#define CTX_curveTo TOKENHASH(3536162037)
#define CTX_darken TOKENHASH(950767688)
#define CTX_defineGlyph TOKENHASH(3698027829)
#define CTX_defineTexture TOKENHASH(4201008335)
#define CTX_kerningPair TOKENHASH(3655936472)
#define CTX_destinationIn TOKENHASH(2718725020)
#define CTX_destination_in TOKENHASH(3351938654)
#define CTX_destinationAtop TOKENHASH(3609906960)
#define CTX_destination_atop TOKENHASH(2783515582)
#define CTX_destinationOver TOKENHASH(2378926016)
#define CTX_destination_over TOKENHASH(2856771196)
#define CTX_destinationOut TOKENHASH(3944490553)
#define CTX_destination_out TOKENHASH(3021444620)
#define CTX_difference TOKENHASH(2530251746)
#define CTX_done TOKENHASH(357320)
#define CTX_drgba TOKENHASH(2243720)
#define CTX_drgb TOKENHASH(146568)
#define CTX_drgbaS TOKENHASH(2541895879)
#define CTX_drgbS TOKENHASH(933379208)
#define CTX_end TOKENHASH(9098)
#define CTX_endfun TOKENHASH(983966602)
#define CTX_end_group TOKENHASH(2564160724)
#define CTX_endGroup TOKENHASH(3639210663)
#define CTX_even_odd TOKENHASH(2587574889)
#define CTX_evenOdd TOKENHASH(4065502508)
#define CTX_exit TOKENHASH(1330698)
#define CTX_extend TOKENHASH(298165770)
#define CTX_fill TOKENHASH(811596)
#define CTX_fill_rule TOKENHASH(3026141741)
#define CTX_fillRule TOKENHASH(2727819936)
#define CTX_flush TOKENHASH(18066188)
#define CTX_font TOKENHASH(1340364)
#define CTX_font_size TOKENHASH(3138232552)
#define CTX_setFontSize TOKENHASH(2794810212)
#define CTX_fontSize TOKENHASH(2516141542)
#define CTX_function TOKENHASH(2157387644)
#define CTX_getkey TOKENHASH(1688969550)
#define CTX_global_alpha TOKENHASH(4195339170)
#define CTX_globalAlpha TOKENHASH(3503999095)
#define CTX_glyph TOKENHASH(17877774)
#define CTX_gradient_add_stop TOKENHASH(2527862800)
#define CTX_gradientAddStop TOKENHASH(2707733066)
#define CTX_graya TOKENHASH(3738766)
#define CTX_gray TOKENHASH(1641614)
#define CTX_grayaS TOKENHASH(3152913809)
#define CTX_grayS TOKENHASH(934874254)
#define CTX_hanging TOKENHASH(3379012612)
#define CTX_height TOKENHASH(1359432016)
#define CTX_hor_line_to TOKENHASH(3576305368)
#define CTX_horLineTo TOKENHASH(2768557894)
#define CTX_hue TOKENHASH(11600)
#define CTX_identity TOKENHASH(4244560551)
#define CTX_ideographic TOKENHASH(4062138887)
#define CTX_imageSmoothing TOKENHASH(3391439578)
#define CTX_join TOKENHASH(936916)
#define CTX_laba TOKENHASH(69720)
#define CTX_lab TOKENHASH(4184)
#define CTX_lcha TOKENHASH(82136)
#define CTX_lch TOKENHASH(16600)
#define CTX_labaS TOKENHASH(933302360)
#define CTX_labS TOKENHASH(29167704)
#define CTX_lchaS TOKENHASH(933314776)
#define CTX_lchS TOKENHASH(29180120)
#define CTX_left TOKENHASH(1323352)
#define CTX_lighter TOKENHASH(3085731552)
#define CTX_lighten TOKENHASH(2243427702)
#define CTX_linear_gradient TOKENHASH(2750495200)
#define CTX_linearGradient TOKENHASH(2530643087)
#define CTX_line_cap TOKENHASH(3442398380)
#define CTX_lineCap TOKENHASH(4099906770)
#define CTX_setLineCap TOKENHASH(3062640202)
#define CTX_line_height TOKENHASH(2825006065)
#define CTX_line_join TOKENHASH(2796226529)
#define CTX_lineJoin TOKENHASH(3149521206)
#define CTX_setLineJoin TOKENHASH(3876390174)
#define CTX_line_spacing TOKENHASH(3474024390)
#define CTX_line_to TOKENHASH(2950597468)
#define CTX_lineTo TOKENHASH(3995194545)
#define CTX_lineDash TOKENHASH(2275747153)
#define CTX_lineDashOffset TOKENHASH(2164798257)
#define CTX_line_width TOKENHASH(2644675969)
#define CTX_lineWidth TOKENHASH(4067116285)
#define CTX_setLineWidth TOKENHASH(3835759450)
#define CTX_view_box TOKENHASH(3076034236)
#define CTX_viewBox TOKENHASH(3661895848)
#define CTX_middle TOKENHASH(360981082)
#define CTX_miter TOKENHASH(38117978)
#define CTX_miter_limit TOKENHASH(2692682139)
#define CTX_miterLimit TOKENHASH(3784823268)
#define CTX_move_to TOKENHASH(3482077014)
#define CTX_moveTo TOKENHASH(3135948887)
#define CTX_multiply TOKENHASH(2379318058)
#define CTX_new_page TOKENHASH(3781461413)
#define CTX_newPage TOKENHASH(3875814849)
#define CTX_new_path TOKENHASH(4253517559)
#define CTX_newPath TOKENHASH(2442450175)
#define CTX_new_state TOKENHASH(3282144098)
#define CTX_none TOKENHASH(357340)
#define CTX_nonzero TOKENHASH(2230085415)
#define CTX_non_zero TOKENHASH(3127422280)
#define CTX_normal TOKENHASH(808293340)
#define CTX_paint TOKENHASH(42879072)
#define CTX_quad_to TOKENHASH(3896875982)
#define CTX_quadTo TOKENHASH(3916306495)
#define CTX_radial_gradient TOKENHASH(4226017763)
#define CTX_radialGradient TOKENHASH(3218566169)
#define CTX_rectangle TOKENHASH(4111149391)
#define CTX_rect TOKENHASH(1317220)
#define CTX_rel_arc_to TOKENHASH(2653353243)
#define CTX_relArcTo TOKENHASH(2940381656)
#define CTX_rel_curve_to TOKENHASH(2413603721)
#define CTX_relCurveTo TOKENHASH(3745640049)
#define CTX_rel_hor_line_to TOKENHASH(3292310681)
#define CTX_relHorLineTo TOKENHASH(2661057467)
#define CTX_relVerLineTo TOKENHASH(3868849192)
#define CTX_rel_line_to TOKENHASH(2865414393)
#define CTX_relLineTo TOKENHASH(2437091951)
#define CTX_rel_move_to TOKENHASH(4169997481)
#define CTX_relMoveTo TOKENHASH(2527491593)
#define CTX_rel_quad_to TOKENHASH(4209276505)
#define CTX_relQuadTo TOKENHASH(3961311908)
#define CTX_rel_smoothq_to TOKENHASH(3923163705)
#define CTX_relSmoothqTo TOKENHASH(2913202089)
#define CTX_rel_smooth_to TOKENHASH(4229528839)
#define CTX_relSmoothTo TOKENHASH(3458671695)
#define CTX_rel_ver_line_to TOKENHASH(2484242991)
#define CTX_restore TOKENHASH(2936409475)
#define CTX_reset TOKENHASH(42309988)
#define CTX_rgba TOKENHASH(70116)
#define CTX_rgb TOKENHASH(4580)
#define CTX_rgbaS TOKENHASH(933302756)
#define CTX_rgbS TOKENHASH(29168100)
#define CTX_right TOKENHASH(42482276)
#define CTX_rotate TOKENHASH(377594852)
#define CTX_round TOKENHASH(9350116)
#define CTX_round_rectangle TOKENHASH(2766896494)
#define CTX_roundRectangle TOKENHASH(3688082153)
#define CTX_save TOKENHASH(372838)
#define CTX_scale TOKENHASH(11274470)
#define CTX_screen TOKENHASH(950374630)
#define CTX_setkey TOKENHASH(1688969574)
#define CTX_shadowBlur TOKENHASH(3119062524)
#define CTX_shadowColor TOKENHASH(3795289804)
#define CTX_shadowOffsetX TOKENHASH(4134163333)
#define CTX_shadowOffsetY TOKENHASH(3519010566)
#define CTX_smooth_quad_to TOKENHASH(3789701842)
#define CTX_smoothQuadTo TOKENHASH(4024936051)
#define CTX_smooth_to TOKENHASH(2307159288)
#define CTX_smoothTo TOKENHASH(3997790061)
#define CTX_sourceIn TOKENHASH(3513756343)
#define CTX_source_in TOKENHASH(3936775584)
#define CTX_sourceAtop TOKENHASH(3201391080)
#define CTX_source_atop TOKENHASH(3568635572)
#define CTX_sourceOut TOKENHASH(4217691207)
#define CTX_source_out TOKENHASH(2998974401)
#define CTX_sourceOver TOKENHASH(4071274055)
#define CTX_sourceTransform TOKENHASH(3608891648)
#define CTX_source_over TOKENHASH(2221728393)
#define CTX_square TOKENHASH(373402726)
#define CTX_start TOKENHASH(43126054)
#define CTX_start_move TOKENHASH(2528525896)
#define CTX_start_group TOKENHASH(2643259216)
#define CTX_startGroup TOKENHASH(4199711715)
#define CTX_stroke TOKENHASH(359634214)
#define CTX_text_align TOKENHASH(2641259250)
#define CTX_textAlign TOKENHASH(4087119491)
#define CTX_texture TOKENHASH(2603404275)
#define CTX_text_baseline TOKENHASH(2666328946)
#define CTX_text_baseline TOKENHASH(2666328946)
#define CTX_textBaseline TOKENHASH(3671121506)
#define CTX_fillRect TOKENHASH(2617922007)
#define CTX_text TOKENHASH(1360232)
#define CTX_text_direction TOKENHASH(2683352974)
#define CTX_textDirection TOKENHASH(2303324726)
#define CTX_text_stroke TOKENHASH(2394879415)
#define CTX_strokeText TOKENHASH(4077103477)
#define CTX_strokeRect TOKENHASH(3918462693)
#define CTX_top TOKENHASH(33768)
#define CTX_transform TOKENHASH(3717307466)
#define CTX_translate TOKENHASH(2746303805)
#define CTX_verLineTo TOKENHASH(2881865279)
#define CTX_ver_line_to TOKENHASH(3445689061)
#define CTX_width TOKENHASH(18096750)
#define CTX_winding TOKENHASH(3743938776)
#define CTX_x TOKENHASH(48)
#define CTX_xor TOKENHASH(37872)
#define CTX_y TOKENHASH(50)
#define CTX_colorSpace TOKENHASH(2624117287)
#define CTX_userRGB TOKENHASH(2839509677)
#define CTX_userCMYK TOKENHASH(4240023559)
#define CTX_deviceRGB TOKENHASH(3975717407)
#define CTX_deviceCMYK TOKENHASH(4096729420)
#define CTX_silver TOKENHASH(1219912294)
#define CTX_fuchsia TOKENHASH(3356500405)
#define CTX_gray TOKENHASH(1641614)
#define CTX_yellow TOKENHASH(1575772530)
#define CTX_white TOKENHASH(11815470)
#define CTX_maroon TOKENHASH(972001370)
#define CTX_magenta TOKENHASH(2383173845)
#define CTX_blue TOKENHASH(371460)
#define CTX_green TOKENHASH(29699214)
#define CTX_red TOKENHASH(8548)
#define CTX_purple TOKENHASH(361796960)
#define CTX_olive TOKENHASH(11946782)
#define CTX_teal TOKENHASH(788840)
#define CTX_black TOKENHASH(23268100)
#define CTX_cyan TOKENHASH(921158)
#define CTX_navy TOKENHASH(1683548)
#define CTX_lime TOKENHASH(354904)
#define CTX_aqua TOKENHASH(109634)
#define CTX_transparent TOKENHASH(3143361910)
#define CTX_currentColor TOKENHASH(2944012414)
#define CTX_title TOKENHASH(11313768)

#endif



#ifndef __CTX_LIBC_H
#define __CTX_LIBC_H

#if !__COSMOPOLITAN__
#include <stddef.h>
#endif

#if 0
static inline void
ctx_memset (void *ptr, uint8_t val, int length)
{
  uint8_t *p = (uint8_t *) ptr;
  for (int i = 0; i < length; i ++)
    { p[i] = val; }
}
#else
#define ctx_memset memset
#endif


static inline void ctx_strcpy (char *dst, const char *src)
{
  int i = 0;
  for (i = 0; src[i]; i++)
    { dst[i] = src[i]; }
  dst[i] = 0;
}

static inline char *_ctx_strchr (const char *haystack, char needle)
{
  const char *p = haystack;
  while (*p && *p != needle)
    {
      p++;
    }
  if (*p == needle)
    { return (char *) p; }
  return NULL;
}
static inline char *ctx_strchr (const char *haystack, char needle)
{
  return _ctx_strchr (haystack, needle);
}

static inline int ctx_strcmp (const char *a, const char *b)
{
  int i;
  for (i = 0; a[i] && b[i]; a++, b++)
    if (a[0] != b[0])
      { return 1; }
  if (a[0] == 0 && b[0] == 0) { return 0; }
  return 1;
}

static inline int ctx_strncmp (const char *a, const char *b, size_t n)
{
  size_t i;
  for (i = 0; a[i] && b[i] && i < n; a++, b++)
    if (a[0] != b[0])
      { return 1; }
  return 0;
}

static inline int ctx_strlen (const char *s)
{
  int len = 0;
  for (; *s; s++) { len++; }
  return len;
}

static inline char *ctx_strstr (const char *h, const char *n)
{
  int needle_len = ctx_strlen (n);
  if (n[0]==0)
    { return (char *) h; }
  while (h)
    {
      h = ctx_strchr (h, n[0]);
      if (!h)
        { return NULL; }
      if (!ctx_strncmp (h, n, needle_len) )
        { return (char *) h; }
      h++;
    }
  return NULL;
}

#endif

uint32_t    ctx_strhash        (const char *str);
CtxColor   *ctx_color_new      (void);
int         ctx_get_int        (Ctx *ctx, uint32_t hash);
int         ctx_get_is_set     (Ctx *ctx, uint32_t hash);
Ctx        *ctx_new_for_buffer (CtxBuffer *buffer);
#ifndef CTX_AUDIO_H
#define CTX_AUDIO_H

#if !__COSMOPOLITAN__
#include <stdint.h>
#endif

/* This enum should be kept in sync with the corresponding mmm enum.
 */
typedef enum {
  CTX_f32,
  CTX_f32S,
  CTX_s16,
  CTX_s16S
} CtxPCM;

void   ctx_pcm_set_format        (Ctx *ctx, CtxPCM format);
CtxPCM ctx_pcm_get_format        (Ctx *ctx);
int    ctx_pcm_get_sample_rate   (Ctx *ctx);
void   ctx_pcm_set_sample_rate   (Ctx *ctx, int sample_rate);
int    ctx_pcm_get_frame_chunk   (Ctx *ctx);
int    ctx_pcm_get_queued        (Ctx *ctx);
float  ctx_pcm_get_queued_length (Ctx *ctx);
int    ctx_pcm_queue             (Ctx *ctx, const int8_t *data, int frames);

#endif
#ifndef __CTX_CLIENTS_H
#define __CTX_CLIENTS_H

typedef enum CtxClientFlags {
  ITK_CLIENT_UI_RESIZABLE = 1<<0,
  ITK_CLIENT_CAN_LAUNCH   = 1<<1,
  ITK_CLIENT_MAXIMIZED    = 1<<2,
  ITK_CLIENT_ICONIFIED    = 1<<3,
  ITK_CLIENT_SHADED       = 1<<4,
  ITK_CLIENT_TITLEBAR     = 1<<5,
  ITK_CLIENT_LAYER2       = 1<<6,  // used for having a second set
                                   // to draw - useful for splitting
                                   // scrolled and HUD items
                                   // with HUD being LAYER2
                                  
  ITK_CLIENT_KEEP_ALIVE   = 1<<7,  // do not automatically
  ITK_CLIENT_FINISHED     = 1<<8,  // do not automatically
                                   // remove after process quits
  ITK_CLIENT_PRELOAD      = 1<<9
} CtxClientFlags;

typedef void (*CtxClientFinalize)(CtxClient *client, void *user_data);

struct _CtxClient {
  VT    *vt;        // or NULL when thread

  long       rev;

  CtxList *events;  // we could use this queue also for vt

  Ctx     *ctx;
  char    *title;
  int      x;
  int      y;
  int      width;
  int      height;
  float    opacity;
  CtxClientFlags flags;
#if 0
  int      shaded;
  int      iconified;
  int      maximized;
  int      resizable;
#endif
  int      unmaximized_x;
  int      unmaximized_y;
  int      unmaximized_width;
  int      unmaximized_height;
  int      do_quit;
  long     drawn_rev;
  int      id;
  int      internal; // render a settings window rather than a vt

#if CTX_THREADS
  thrd_t tid;     // and only split code path in processing?
                    // -- why?
#endif
  void (*start_routine)(Ctx *ctx, void *user_data);
  void    *user_data;
  CtxClientFinalize finalize;
  Ctx     *sub_ctx;
  CtxList *ctx_events;


  /* we want to keep variation at the end */
#if CTX_THREADS
  mtx_t    mtx;
#endif
#if VT_RECORD
  Ctx     *recording;
#endif
};

int   ctx_client_resize        (Ctx *ctx, int id, int width, int height);
void  ctx_client_set_font_size (Ctx *ctx, int id, float font_size);
float ctx_client_get_font_size (Ctx *ctx, int id);
void  ctx_client_maximize      (Ctx *ctx, int id);


CtxClient *vt_get_client (VT *vt);
CtxClient *ctx_client_new (Ctx *ctx,
                           const char *commandline,
                           int x, int y, int width, int height,
                           float font_size,
                           CtxClientFlags flags,
                           void *user_data,
                           CtxClientFinalize client_finalize);

CtxClient *ctx_client_new_argv (Ctx *ctx, char **argv, int x, int y, int width, int height, float font_size, CtxClientFlags flags, void *user_data,
                CtxClientFinalize client_finalize);
int ctx_clients_need_redraw (Ctx *ctx);

CtxClient *ctx_client_new_thread (Ctx *ctx, void (*start_routine)(Ctx *ctx, void *user_data),
                                  int x, int y, int width, int height, float font_size, CtxClientFlags flags, void *user_data, CtxClientFinalize finalize);

extern float ctx_shape_cache_rate;
extern int _ctx_max_threads;

CtxEvent *ctx_event_copy (CtxEvent *event);

void  ctx_client_move         (Ctx *ctx, int id, int x, int y);
void  ctx_client_shade_toggle (Ctx *ctx, int id);
float ctx_client_min_y_pos    (Ctx *ctx);
float ctx_client_max_y_pos    (Ctx *ctx);
void ctx_client_paste (Ctx *ctx, int id, const char *str);
char  *ctx_client_get_selection        (Ctx *ctx, int id);

void  ctx_client_rev_inc      (CtxClient *client);
long  ctx_client_rev          (CtxClient *client);

int   ctx_clients_active      (Ctx *ctx);

CtxClient *ctx_client_by_id (Ctx *ctx, int id);

int ctx_clients_draw (Ctx *ctx, int layer2);

void ctx_client_feed_keystring (CtxClient *client, CtxEvent *event, const char *str);
// need not be public?
void ctx_client_register_events (CtxClient *client, Ctx *ctx, double x0, double y0);

void ctx_client_remove (Ctx *ctx, CtxClient *client);

int  ctx_client_height           (Ctx *ctx, int id);
int  ctx_client_x                (Ctx *ctx, int id);
int  ctx_client_y                (Ctx *ctx, int id);
void ctx_client_raise_top        (Ctx *ctx, int id);
void ctx_client_lower_bottom     (Ctx *ctx, int id);
void ctx_client_iconify          (Ctx *ctx, int id);
int  ctx_client_is_iconified     (Ctx *ctx, int id);
void ctx_client_uniconify        (Ctx *ctx, int id);
void ctx_client_maximize         (Ctx *ctx, int id);
int  ctx_client_is_maximized     (Ctx *ctx, int id);
void ctx_client_unmaximize       (Ctx *ctx, int id);
void ctx_client_maximized_toggle (Ctx *ctx, int id);
void ctx_client_shade            (Ctx *ctx, int id);
int  ctx_client_is_shaded        (Ctx *ctx, int id);
void ctx_client_unshade          (Ctx *ctx, int id);
void ctx_client_toggle_maximized (Ctx *ctx, int id);
void ctx_client_shade_toggle     (Ctx *ctx, int id);
void ctx_client_move             (Ctx *ctx, int id, int x, int y);
int  ctx_client_resize           (Ctx *ctx, int id, int width, int height);
void ctx_client_set_opacity      (Ctx *ctx, int id, float opacity);
float ctx_client_get_opacity     (Ctx *ctx, int id);
void ctx_client_set_title        (Ctx *ctx, int id, const char *title);
const char *ctx_client_get_title (Ctx *ctx, int id);


#endif

#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD

#if CTX_IMPLEMENTATION|CTX_COMPOSITE

#ifndef __CTX_INTERNAL_H
#define __CTX_INTERNAL_H

#if !__COSMOPOLITAN__
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <math.h>
#endif


#if CTX_BRANCH_HINTS
#define CTX_LIKELY(x)      __builtin_expect(!!(x), 1)
#define CTX_UNLIKELY(x)    __builtin_expect(!!(x), 0)
#else
#define CTX_LIKELY(x)      (x)
#define CTX_UNLIKELY(x)    (x)
#endif

typedef struct _CtxRasterizer CtxRasterizer;
typedef struct _CtxGState     CtxGState;
typedef struct _CtxState      CtxState;

typedef struct _CtxSource CtxSource;


#define CTX_VALID_RGBA_U8     (1<<0)
#define CTX_VALID_RGBA_DEVICE (1<<1)
#if CTX_ENABLE_CM
#define CTX_VALID_RGBA        (1<<2)
#endif
#if CTX_ENABLE_CMYK
#define CTX_VALID_CMYKA       (1<<3)
#define CTX_VALID_DCMYKA      (1<<4)
#endif
#define CTX_VALID_GRAYA       (1<<5)
#define CTX_VALID_GRAYA_U8    (1<<6)
#define CTX_VALID_LABA        ((1<<7) | CTX_VALID_GRAYA)

struct _CtxColor
{
  uint8_t magic; // for colors used in keydb, set to a non valid start of
                 // string value.
  uint8_t rgba[4];
  uint8_t l_u8;
  uint8_t original; // the bitmask of the originally set color
  uint8_t valid;    // bitmask of which members contain valid
  // values, gets denser populated as more
  // formats are requested from a set color.
  float   device_red;
  float   device_green;
  float   device_blue;
  float   alpha;
  float   l;        // luminance and gray
#if CTX_ENABLE_LAB  // NYI
  float   a;
  float   b;
#endif
#if CTX_ENABLE_CMYK
  float   device_cyan;
  float   device_magenta;
  float   device_yellow;
  float   device_key;
  float   cyan;
  float   magenta;
  float   yellow;
  float   key;
#endif

#if CTX_ENABLE_CM
  float   red;
  float   green;
  float   blue;
#if CTX_BABL
  const Babl *space; // gets copied from state when color is declared
#else
  void   *space; // gets copied from state when color is declared, 
#endif
#endif
};

typedef struct _CtxGradientStop CtxGradientStop;

struct _CtxGradientStop
{
  CtxColor color;
  float   pos;
};


enum _CtxSourceType
{
  CTX_SOURCE_COLOR = 0,
  CTX_SOURCE_TEXTURE,
  CTX_SOURCE_LINEAR_GRADIENT,
  CTX_SOURCE_RADIAL_GRADIENT,
  CTX_SOURCE_INHERIT_FILL
};

typedef enum _CtxSourceType CtxSourceType;

typedef struct _CtxPixelFormatInfo CtxPixelFormatInfo;

struct _CtxBuffer
{
  void               *data;
  int                 width;
  int                 height;
  int                 stride;
  int                 frame;      // last frame used in, everything > 3 can be removed,
                                  // as clients wont rely on it.
  char               *eid;        // might be NULL, when not - should be unique for pixel contents
  CtxPixelFormatInfo *format;
  void (*free_func) (void *pixels, void *user_data);
  void               *user_data;

#if CTX_ENABLE_CM
#if CTX_BABL
  const Babl *space;
#else
  void       *space; 
#endif
#endif
#if 1
  CtxBuffer          *color_managed; /* only valid for one render target, cache
                                        for a specific space
                                        */
#endif
};


//void _ctx_user_to_device          (CtxState *state, float *x, float *y);
//void _ctx_user_to_device_distance (CtxState *state, float *x, float *y);

typedef struct _CtxGradient CtxGradient;
struct _CtxGradient
{
  CtxGradientStop stops[16];
  int n_stops;
};

struct _CtxSource
{
  int type;
  CtxMatrix  set_transform;
  CtxMatrix  transform;
  CtxMatrix  transform_inv;
  int pad; // to align next properly
  union
  {
    CtxColor color;
    struct
    {
      uint8_t rgba[4]; // shares data with set color
      uint8_t pad;
      CtxBuffer *buffer;
    } texture;
    struct
    {
      float x0;
      float y0;
      float x1;
      float y1;
      float dx;
      float dy;
      float start;
      float end;
      float length;
      float rdelta;
    } linear_gradient;
    struct
    {
      float x0;
      float y0;
      float r0;
      float x1;
      float y1;
      float r1;
      float rdelta;
    } radial_gradient;
  };
};

struct _CtxGState
{
  int           keydb_pos;
  int           stringpool_pos;

  CtxMatrix     transform;
  CtxSource     source_stroke;
  CtxSource     source_fill;
  float         global_alpha_f;

  float         line_width;
  float         line_dash_offset;
  float         miter_limit;
  float         font_size;
#if CTX_ENABLE_SHADOW_BLUR
  float         shadow_blur;
  float         shadow_offset_x;
  float         shadow_offset_y;
#endif
  unsigned int        clipped:1;
  CtxColorModel    color_model:8;
  /* bitfield-pack small state-parts */
  CtxLineCap          line_cap:2;
  CtxLineJoin        line_join:2;
  CtxFillRule        fill_rule:1;
  unsigned int image_smoothing:1;
  unsigned int            font:6;
  unsigned int            bold:1;
  unsigned int          italic:1;

  uint8_t       global_alpha_u8;
  int16_t       clip_min_x;
  int16_t       clip_min_y;
  int16_t       clip_max_x;
  int16_t       clip_max_y;
  int           n_dashes;

#if CTX_ENABLE_CM
#if CTX_BABL
  const Babl   *device_space;
  const Babl   *texture_space;
  const Babl   *rgb_space;       
  const Babl   *cmyk_space;

  const Babl   *fish_rgbaf_user_to_device;
  const Babl   *fish_rgbaf_texture_to_device;
  const Babl   *fish_rgbaf_device_to_user;

#else
  void         *device_space;
  void         *texture_space;
  void         *rgb_space;       
  void         *cmyk_space;
  void         *fish_rgbaf_user_to_device; // dummy padding
  void         *fish_rgbaf_texture_to_device; // dummy padding
  void         *fish_rgbaf_device_to_user; // dummy padding
#endif
#endif
  CtxCompositingMode  compositing_mode; // bitfield refs lead to
  CtxBlend                  blend_mode; // non-vectorization
  CtxExtend                 extend;

  float dashes[CTX_PARSER_MAX_ARGS];

};

typedef enum
{
  CTX_TRANSFORMATION_NONE         = 0,
  CTX_TRANSFORMATION_SCREEN_SPACE = 1,
  CTX_TRANSFORMATION_RELATIVE     = 2,
#if CTX_BITPACK
  CTX_TRANSFORMATION_BITPACK      = 4,
#endif
  CTX_TRANSFORMATION_STORE_CLEAR  = 16,
} CtxTransformation;

#define CTX_DRAWLIST_DOESNT_OWN_ENTRIES   64
#define CTX_DRAWLIST_EDGE_LIST            128
#define CTX_DRAWLIST_CURRENT_PATH         512
// BITPACK

struct _CtxDrawlist
{
  CtxEntry *entries;
  unsigned int count;
  int size;
  uint32_t  flags;
  int       bitpack_pos;  // stream is bitpacked up to this offset
};

#define CTX_MAX_KEYDB 64 // number of entries in keydb
                         // entries are "copy-on-change" between states

// the keydb consists of keys set to floating point values,
// that might also be interpreted as integers for enums.
//
// the hash
typedef struct _CtxKeyDbEntry CtxKeyDbEntry;
struct _CtxKeyDbEntry
{
  uint32_t key;
  float value;
  //union { float f[1]; uint8_t u8[4]; }value;
};

struct _CtxState
{
  int           has_moved:1;
  int           has_clipped:1;
  int16_t       gstate_no;
  int8_t        source; // used for the single-shifting to stroking
                // 0  = fill
                // 1  = start_stroke
                // 2  = in_stroke
                //
                //   if we're at in_stroke at start of a source definition
                //   we do filling

  float         x;
  float         y;
  int           ink_min_x;
  int           ink_min_y;
  int           ink_max_x;
  int           ink_max_y;
  CtxGState     gstate;
  CtxGState     gstate_stack[CTX_MAX_STATES];//at end, so can be made dynamic
#if CTX_GRADIENTS
  CtxGradient   gradient; /* we keep only one gradient,
                             this goes icky with multiple
                             restores - it should really be part of
                             graphics state..
                             XXX, with the stringpool gradients
                             can be stored there.
                           */
#endif
  CtxKeyDbEntry keydb[CTX_MAX_KEYDB];
  char          stringpool[CTX_STRINGPOOL_SIZE];
};


typedef struct _CtxFont       CtxFont;
typedef struct _CtxFontEngine CtxFontEngine;

struct _CtxFontEngine
{
#if CTX_FONTS_FROM_FILE
  int   (*load_file)   (const char *name, const char *path);
#endif
  int   (*load_memory) (const char *name, const void *data, int length);
  int   (*glyph)       (CtxFont *font, Ctx *ctx, uint32_t unichar, int stroke);
  float (*glyph_width) (CtxFont *font, Ctx *ctx, uint32_t unichar);
  float (*glyph_kern)  (CtxFont *font, Ctx *ctx, uint32_t unicharA, uint32_t unicharB);
};

struct _CtxFont
{
  CtxFontEngine *engine;
  const char *name;
  int type; // 0 ctx    1 stb    2 monobitmap
  union
  {
    struct
    {
      CtxEntry *data;
      int length;
      /* we've got ~110 bytes to fill to cover as
         much data as stbtt_fontinfo */
      //int16_t glyph_pos[26]; // for a..z
      int       glyphs; // number of glyphs
      uint32_t *index;
    } ctx;
    struct
    {
      char *path;
    } ctx_fs;
#if CTX_FONT_ENGINE_STB
    struct
    {
      stbtt_fontinfo ttf_info;
      int cache_index;
      uint32_t cache_unichar;
    } stb;
#endif
    struct { int start; int end; int gw; int gh; const uint8_t *data;} monobitmap;
  };
};


enum _CtxIteratorFlag
{
  CTX_ITERATOR_FLAT           = 0,
  CTX_ITERATOR_EXPAND_BITPACK = 2,
  CTX_ITERATOR_DEFAULTS       = CTX_ITERATOR_EXPAND_BITPACK
};
typedef enum _CtxIteratorFlag CtxIteratorFlag;


struct
  _CtxIterator
{
  int              pos;
  int              first_run;
  CtxDrawlist *drawlist;
  int              end_pos;
  int              flags;

  int              bitpack_pos;
  int              bitpack_length;     // if non 0 bitpack is active
  CtxEntry         bitpack_command[6]; // the command returned to the
  // user if unpacking is needed.
};
#define CTX_MAX_DEVICES 16
#define CTX_MAX_KEYBINDINGS         256

#if CTX_EVENTS 

// include list implementation - since it already is a header+inline online
// implementation?

typedef struct CtxItemCb {
  CtxEventType types;
  CtxCb        cb;
  void*        data1;
  void*        data2;

  void (*finalize) (void *data1, void *data2, void *finalize_data);
  void  *finalize_data;

} CtxItemCb;


#define CTX_MAX_CBS              128

typedef struct CtxItem {
  CtxMatrix inv_matrix;  /* for event coordinate transforms */

  /* bounding box */
  float          x0;
  float          y0;
  float          x1;
  float          y1;

  void *path;
  double          path_hash;

  CtxCursor       cursor; /* if 0 then UNSET and no cursor change is requested
                           */

  CtxEventType   types;   /* all cb's ored together */
  CtxItemCb cb[CTX_MAX_CBS];
  int       cb_count;
  int       ref_count;
} CtxItem;


typedef struct _CtxEvents CtxEvents;
struct _CtxEvents
{
  int             frozen;
  int             fullscreen;
  CtxList        *grabs; /* could split the grabs per device in the same way,
                            to make dispatch overhead smaller,. probably
                            not much to win though. */
  CtxItem         *prev[CTX_MAX_DEVICES];
  float            pointer_x[CTX_MAX_DEVICES];
  float            pointer_y[CTX_MAX_DEVICES];
  unsigned char    pointer_down[CTX_MAX_DEVICES];
  CtxEvent         drag_event[CTX_MAX_DEVICES];
  CtxList         *idles;
  CtxList         *idles_to_remove;
  CtxList         *idles_to_add;
  CtxList         *events; // for ctx_get_event
  CtxBinding       bindings[CTX_MAX_KEYBINDINGS]; /*< better as list, uses no mem if unused */
  int              n_bindings;
  int              in_idle_dispatch;
  int              ctx_get_event_enabled;
  int              idle_id;
  CtxList         *items;
  CtxItem         *last_item;
  CtxModifierState modifier_state;
  double           tap_hysteresis;
#if CTX_CLIENTS
  CtxList         *clients;
  CtxClient *active;
  CtxClient *active_tab;
#endif
  int              tap_delay_min;
  int              tap_delay_max;
  int              tap_delay_hold;
};


#endif

typedef struct _CtxEidInfo
{
  char *eid;
  int   frame;
  int   width;
  int   height;
} CtxEidInfo;

struct _Ctx
{
  CtxBackend       *backend;
  CtxDrawlist       drawlist;
  int               transformation;
  int               width;
  int               height;
  Ctx              *texture_cache;
  CtxList          *eid_db;
  CtxState          state;        /**/
  int               frame; /* used for texture lifetime */
  CtxBuffer         texture[CTX_MAX_TEXTURES];
  int               dirty;
#if CTX_EVENTS 
  CtxCursor         cursor;
  int               quit;
  CtxEvents         events;
  int               mouse_fd;
  int               mouse_x;
  int               mouse_y;
#endif
#if CTX_CURRENT_PATH
  CtxDrawlist       current_path; // possibly transformed coordinates !
  CtxIterator       current_path_iterator;
#endif

  uint32_t          bail;
};

static inline void
ctx_process (Ctx *ctx, CtxEntry *entry)
{
  ctx->backend->process (ctx, (CtxCommand *) entry);
}

CtxBuffer *ctx_buffer_new (int width, int height,
                           CtxPixelFormat pixel_format);
void ctx_buffer_free (CtxBuffer *buffer);

void
ctx_state_gradient_clear_stops (CtxState *state);

static inline void ctx_interpret_style         (CtxState *state, CtxEntry *entry, void *data);
static inline void ctx_interpret_transforms    (CtxState *state, CtxEntry *entry, void *data);
static inline void ctx_interpret_pos           (CtxState *state, CtxEntry *entry, void *data);
static inline void ctx_interpret_pos_transform (CtxState *state, CtxEntry *entry, void *data);

struct _CtxInternalFsEntry
{
  char *path;
  int   length;
  char *data;
};

struct _CtxPixelFormatInfo
{
  CtxPixelFormat pixel_format:8;
  uint8_t        components; /* number of components */
  uint8_t        bpp; /* bits  per pixel - for doing offset computations
                         along with rowstride found elsewhere, if 0 it indicates
                         1/8  */
  uint8_t        ebpp; /*effective bytes per pixel - for doing offset
                         computations, for formats that get converted, the
                         ebpp of the working space applied */
  uint8_t        dither_red_blue;
  uint8_t        dither_green;
  CtxPixelFormat composite_format:8;

  void         (*to_comp) (CtxRasterizer *r,
                           int x, const void * __restrict__ src, uint8_t * __restrict__ comp, int count);
  void         (*from_comp) (CtxRasterizer *r,
                             int x, const uint8_t * __restrict__ comp, void *__restrict__ dst, int count);
  void         (*apply_coverage) (CtxRasterizer *r, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, int x, uint8_t *coverage,
                          unsigned int count);
  void         (*setup) (CtxRasterizer *r);
};


static inline void
_ctx_user_to_device (CtxState *state, float *x, float *y);
static void
_ctx_user_to_device_distance (CtxState *state, float *x, float *y);
static void ctx_state_init (CtxState *state);
static inline void
ctx_interpret_pos_bare (CtxState *state, CtxEntry *entry, void *data);
static inline void
ctx_drawlist_deinit (CtxDrawlist *drawlist);

//extern CtxPixelFormatInfo *(*ctx_pixel_format_info) (CtxPixelFormat format);
CtxPixelFormatInfo *ctx_pixel_format_info (CtxPixelFormat format);



extern void (*ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width);

extern void (*ctx_composite_setup) (CtxRasterizer *rasterizer);


struct _CtxShapeEntry
{
  uint32_t hash;
  uint16_t width;
  uint16_t height;
  int      last_frame; // xxx
  uint32_t uses;  // instrumented for longer keep-alive
  uint8_t  data[];
};

typedef struct _CtxShapeEntry CtxShapeEntry;

extern void (*ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule
#if CTX_SHAPE_CACHE
                ,CtxShapeEntry *shape
#endif
                );



extern void (*ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                           float        x0,
                           float        y0,
                           float        x1,
                           float        y1,
                           uint8_t      cov);


int ctx_utf8_len (const unsigned char first_byte);
const char *ctx_utf8_skip (const char *s, int utf8_length);
int ctx_utf8_strlen (const char *s);
int
ctx_unichar_to_utf8 (uint32_t  ch,
                     uint8_t  *dest);

uint32_t
ctx_utf8_to_unichar (const char *input);


typedef struct _CtxHasher CtxHasher;

typedef void (*CtxFragment) (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz);

#define CTX_MAX_GAUSSIAN_KERNEL_DIM    512



struct _CtxShapeCache
{
  CtxShapeEntry *entries[CTX_SHAPE_CACHE_ENTRIES];
  long size;
};

typedef struct _CtxShapeCache CtxShapeCache;

typedef enum {
   CTX_COV_PATH_FALLBACK =0,
   CTX_COV_PATH_RGBA8_OVER,
   CTX_COV_PATH_RGBA8_COPY,
   CTX_COV_PATH_RGBA8_COPY_FRAGMENT,
   CTX_COV_PATH_RGBA8_OVER_FRAGMENT,
   CTX_COV_PATH_GRAYA8_COPY,
   CTX_COV_PATH_GRAY1_COPY,


   CTX_COV_PATH_RGB565_COPY,
   CTX_COV_PATH_RGB332_COPY,
   CTX_COV_PATH_GRAY8_COPY,
   CTX_COV_PATH_RGBAF_COPY,
   CTX_COV_PATH_RGB8_COPY,
   CTX_COV_PATH_CMYK8_COPY,
   CTX_COV_PATH_CMYKA8_COPY,
   CTX_COV_PATH_CMYKAF_COPY,
   CTX_COV_PATH_GRAYAF_COPY 




} CtxCovPath;

struct _CtxRasterizer
{
  CtxBackend backend;
  /* these should be initialized and used as the bounds for rendering into the
     buffer as well XXX: not yet in use, and when in use will only be
     correct for axis aligned clips - proper rasterization of a clipping path
     would be yet another refinement on top.
   */


#define CTX_COMPOSITE_ARGUMENTS CtxRasterizer *rasterizer, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, int x0, uint8_t * __restrict__ coverage, unsigned int count
  void (*comp_op)(CTX_COMPOSITE_ARGUMENTS);
  CtxFragment fragment;
  //Ctx       *ctx;
  CtxState  *state;
  void      *buf;
  int fast_aa;
  CtxCovPath  comp;
  void       (*apply_coverage) (CtxRasterizer *r, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, int x, uint8_t *coverage, unsigned int count);

  unsigned int aa;          // level of vertical aa
  int        uses_transforms;
  unsigned int prev_active_edges;
  unsigned int active_edges;
  unsigned int pending_edges;
  unsigned int ending_edges;
  unsigned int edge_pos;         // where we're at in iterating all edges
  unsigned int needs_aa3; // count of how many edges implies antialiasing
  unsigned int needs_aa5; // count of how many edges implies antialiasing
  unsigned int needs_aa15; // count of how many edges implies antialiasing
  unsigned int horizontal_edges;

  int        scanline;
  int        scan_min;
  int        scan_max;
  int        col_min;
  int        col_max;

  int        inner_x;
  int        inner_y;

  float      x;
  float      y;

  float      first_x;
  float      first_y;

  uint16_t    blit_x;
  uint16_t    blit_y;
  uint16_t    blit_width;
  uint16_t    blit_height;
  uint16_t    blit_stride;

  unsigned int  clip_rectangle:1;
  unsigned int  has_shape:2;
  int  has_prev:2;
  unsigned int  preserve:1;
#if CTX_ENABLE_SHADOW_BLUR
  unsigned int  in_shadow:1;
#endif
  unsigned int  in_text:1;
  unsigned int  swap_red_green:1;

#if CTX_BRAILLE_TEXT
  unsigned int  term_glyphs:1; // store appropriate glyphs for redisplay
#endif
  int        shadow_x;
#if CTX_BRAILLE_TEXT
  CtxList   *glyphs;
#endif
  CtxPixelFormatInfo *format;
  Ctx       *texture_source; /* normally same as ctx */
  int        shadow_y;

  uint8_t    color[4*5];   // in compositing format
  uint16_t   color_native;  //
  uint16_t   color_nativeB[5];

  int edges[CTX_MAX_EDGES]; // integer position in edge array
  CtxDrawlist edge_list;

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  int gradient_cache_valid;
  uint8_t gradient_cache_u8[CTX_GRADIENT_CACHE_ELEMENTS][4];
  int gradient_cache_elements;
#endif
#endif

#if CTX_ENABLE_CLIP
  CtxBuffer *clip_buffer;
#endif

#if CTX_COMPOSITING_GROUPS
  void      *saved_buf; // when group redirected
  CtxBuffer *group[CTX_GROUP_MAX];
#endif
#if CTX_ENABLE_SHADOW_BLUR
  float      kernel[CTX_MAX_GAUSSIAN_KERNEL_DIM];
#endif

#if static_OPAQUE
  uint8_t opaque[4096];
#endif

#if CTX_SHAPE_CACHE
  CtxShapeCache shape_cache; /* needs to be at end of struct, it
                                is excluded from clearing */
#endif
};

struct _CtxSHA1 {
    uint64_t length;
    uint32_t state[5], curlen;
    unsigned char buf[64];
};
typedef struct _CtxMurmur CtxMurmur;
struct _CtxMurmur {
    uint32_t state[2];
};


typedef struct CtxCommandState
{
  uint32_t pos;
  uint32_t active;
} CtxCommandState;

struct _CtxHasher
{
  CtxRasterizer rasterizer;
  int           cols;
  int           rows;
  uint32_t     *hashes;
  CtxMurmur     murmur_fill[CTX_MAX_STATES]; 
  CtxMurmur     murmur_stroke[CTX_MAX_STATES];
  int           source_level;
  int           pos; 
  //CtxList *active_info;

  CtxCommandState *active_info;
  int              active_info_size;
  int              active_info_count;
};

#if CTX_RASTERIZER
void ctx_rasterizer_deinit (CtxRasterizer *rasterizer);
#endif

enum {
  NC_MOUSE_NONE  = 0,
  NC_MOUSE_PRESS = 1,  /* "mouse-pressed", "mouse-released" */
  NC_MOUSE_DRAG  = 2,  /* + "mouse-drag"   (motion with pressed button) */
  NC_MOUSE_ALL   = 3   /* + "mouse-motion" (also delivered for release) */
};
void _ctx_mouse (Ctx *term, int mode);
void nc_at_exit (void);

int ctx_terminal_width  (void);
int ctx_terminal_height (void);
int ctx_terminal_cols   (void);
int ctx_terminal_rows   (void);
extern int ctx_frame_ack;

typedef struct _CtxCtx CtxCtx;
struct _CtxCtx
{
   CtxBackend backend;
   int  cols;
   int  rows;
   int  was_down;
};


extern int _ctx_max_threads;
extern int _ctx_enable_hash_cache;
void
ctx_set (Ctx *ctx, uint32_t key_hash, const char *string, int len);
const char *
ctx_get (Ctx *ctx, const char *key);

Ctx *ctx_new_ctx (int width, int height);
Ctx *ctx_new_fb (int width, int height);
Ctx *ctx_new_headless (int width, int height);
Ctx *ctx_new_kms (int width, int height);
Ctx *ctx_new_sdl (int width, int height);
Ctx *ctx_new_term (int width, int height);
Ctx *ctx_new_termimg (int width, int height);

int ctx_resolve_font (const char *name);

#if CTX_U8_TO_FLOAT_LUT
extern float ctx_u8_float[256];
#define ctx_u8_to_float(val_u8) ctx_u8_float[((uint8_t)(val_u8))]
#else
#define ctx_u8_to_float(val_u8) (val_u8/255.0f)
#endif

static inline uint8_t ctx_float_to_u8 (float val_f)
{
#if 1 
  union { float f; uint32_t i; } u;
  u.f = 32768.0f + val_f * (255.0f / 256.0f);
  return (uint8_t)u.i;
#else
  return val_f < 0.0f ? 0 : val_f > 1.0f ? 0xff : 0xff * val_f +  0.5f;
#endif
}


#define CTX_CSS_LUMINANCE_RED   0.3f
#define CTX_CSS_LUMINANCE_GREEN 0.59f
#define CTX_CSS_LUMINANCE_BLUE  0.11f

/* works on both float and uint8_t */
#define CTX_CSS_RGB_TO_LUMINANCE(rgb)  (\
  (rgb[0]) * CTX_CSS_LUMINANCE_RED + \
  (rgb[1]) * CTX_CSS_LUMINANCE_GREEN +\
  (rgb[2]) * CTX_CSS_LUMINANCE_BLUE)

const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y);
const char *ctx_native_get_event (Ctx *n, int timeoutms);
void
ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out);
void ctx_color_get_graya_u8 (CtxState *state, CtxColor *color, uint8_t *out);
float ctx_float_color_rgb_to_gray (CtxState *state, const float *rgb);
void ctx_color_get_graya (CtxState *state, CtxColor *color, float *out);
void ctx_rgb_to_cmyk (float r, float g, float b,
              float *c_out, float *m_out, float *y_out, float *k_out);
uint8_t ctx_u8_color_rgb_to_gray (CtxState *state, const uint8_t *rgb);
#if CTX_ENABLE_CMYK
void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out);
#endif
static void ctx_color_set_RGBA8 (CtxState *state, CtxColor *color, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
static void ctx_color_set_drgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out);
static void ctx_color_set_cmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a);
static void ctx_color_set_dcmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a);
static void ctx_color_set_graya (CtxState *state, CtxColor *color, float gray, float alpha);

int ctx_color_model_get_components (CtxColorModel model);

static void ctx_state_set (CtxState *state, uint32_t key, float value);

static void
ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f, float g, float h, float i);


static void ctx_font_setup ();
static float ctx_state_get (CtxState *state, uint32_t hash);

#if CTX_RASTERIZER

static void
ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y);
static void
ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y);

static void
ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y);
static void
ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y);
static void
ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
                         float x0, float y0,
                         float x1, float y1,
                         float x2, float y2);
static void
ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
                         float x0, float y0,
                         float x1, float y1,
                         float x2, float y2);

static void
ctx_rasterizer_reset (CtxRasterizer *rasterizer);
static uint32_t ctx_rasterizer_poly_to_hash (CtxRasterizer *rasterizer);
static void
ctx_rasterizer_arc (CtxRasterizer *rasterizer,
                    float        x,
                    float        y,
                    float        radius,
                    float        start_angle,
                    float        end_angle,
                    int          anticlockwise);

static void
ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y);

static void
ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y);

static void
ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
                          float x,
                          float y,
                          float width,
                          float height);

static void ctx_rasterizer_finish_shape (CtxRasterizer *rasterizer);
static void ctx_rasterizer_clip (CtxRasterizer *rasterizer);
static void
ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name);

static void
ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba);
static void
ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
                          uint16_t x,
                          uint16_t y,
                          uint8_t r,
                          uint8_t g,
                          uint8_t b,
                          uint8_t a);
static void
ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, float corner_radius);

#endif

#if CTX_ENABLE_CM // XXX to be moved to ctx.h
void
ctx_set_drgb_space (Ctx *ctx, int device_space);
void
ctx_set_dcmyk_space (Ctx *ctx, int device_space);
void
ctx_rgb_space (Ctx *ctx, int device_space);
void
ctx_set_cmyk_space (Ctx *ctx, int device_space);
#endif

#endif

CtxRasterizer *
ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias);

CTX_INLINE static uint8_t ctx_lerp_u8 (uint8_t v0, uint8_t v1, uint8_t dx)
{
#if 0
  return v0 + ((v1-v0) * dx)/255;
#else
  return ( ( ( ( (v0) <<8) + (dx) * ( (v1) - (v0) ) ) ) >>8);
#endif
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8 (const uint32_t v0, const uint32_t v1, const uint8_t dx)
{
#if 0
  char bv0[4];
  char bv1[4];
  char res[4];
  memcpy (&bv0[0], &v0, 4);
  memcpy (&bv1[0], &v1, 4);
  for (int c = 0; c < 4; c++)
    res [c] = ctx_lerp_u8 (bv0[c], bv1[c], dx);
  return ((uint32_t*)(&res[0]))[0];
#else
  const uint32_t cov = dx;
  const uint32_t si_ga = (v1 & 0xff00ff00);
  const uint32_t si_rb = v1 & 0x00ff00ff;
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t di_ga = v0 & 0xff00ff00;
  const uint32_t d_ga = (si_ga >>8) - (di_ga>>8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff)) |
     (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));

#endif
}

CTX_INLINE static void ctx_lerp_RGBA8_split (const uint32_t v0, const uint32_t v1, const uint8_t dx,
                                             uint32_t *dest_ga, uint32_t *dest_rb)
{
  const uint32_t cov = dx;
  const uint32_t si_ga = v1 & 0xff00ff00;
  const uint32_t si_rb = v1 & 0x00ff00ff;
  const uint32_t di_ga = v0 & 0xff00ff00;
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = (si_ga >>8) - (di_ga >> 8);
  *dest_rb = (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff));
  *dest_ga = (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8_merge (uint32_t di_ga, uint32_t di_rb, uint32_t si_ga, uint32_t si_rb, const uint8_t dx)
{
  const uint32_t cov = dx;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = (si_ga >> 8) - (di_ga >> 8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff))  |
      ((di_ga + ((0xff00ff + d_ga * cov)      & 0xff00ff00)));
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8_2 (const uint32_t v0, uint32_t si_ga, uint32_t si_rb, const uint8_t dx)
{
  const uint32_t cov = dx;
  const uint32_t di_ga = ( v0 & 0xff00ff00);
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = si_ga - (di_ga>>8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff)) |
     (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));
}

CTX_INLINE static float
ctx_lerpf (float v0, float v1, float dx)
{
  return v0 + (v1-v0) * dx;
}

CTX_INLINE static float
ctx_catmull_rom (float v0, float v1, float v2, float v3, float t)
{
   float ya = v0, yb = v1, yc = v2, yd = v3;
   float a3 = 0.5f * (-ya + 3 * yb - 3 * yc + yd);
   float a2 = 0.5f * (2 * ya - 5 * yb + 4 * yc - yd);
   float a1 = 0.5f * (-ya + yc);
   float a0 = yb;
   return a3 * t * t * t +
          a2 * t * t +
          a1 * t +
          a0;
}

CTX_INLINE static float
ctx_catmull_rom_left (float v0, float v1, float v2, float t)
{
   float ya = v0, yb = v1, yc = v2;
   float a2 = 0.5f * (ya - 2 * yb + yc);
   float a1 = 0.5f * (-3 * ya + 4 * yb - yc);
   float a0 = ya;
   return a2 * t * t +
          a1 * t +
          a0;
}

CTX_INLINE static float
ctx_catmull_rom_right (float v0, float v1, float v2, float t)
{
   float ya = v0, yb = v1, yc = v2;
   float a2 = 0.5f * (ya - 2 * yb + yc);
   float a1 = 0.5f * (-ya + yc);
   float a0 = yb;
   return a2 * t * t +
          a1 * t +
          a0;
}


#ifndef CTX_MIN
#define CTX_MIN(a,b)  (((a)<(b))?(a):(b))
#endif
#ifndef CTX_MAX
#define CTX_MAX(a,b)  (((a)>(b))?(a):(b))
#endif

static inline void *ctx_calloc (size_t size, size_t count);

void ctx_screenshot (Ctx *ctx, const char *output_path);


CtxSHA1 *ctx_sha1_new (void);
void ctx_sha1_free (CtxSHA1 *sha1);
int ctx_sha1_process(CtxSHA1 *sha1, const unsigned char * msg, unsigned long len);
int ctx_sha1_done(CtxSHA1 * sha1, unsigned char *out);

void _ctx_texture_lock (void);
void _ctx_texture_unlock (void);
uint8_t *ctx_define_texture_pixel_data (CtxEntry *entry);
void ctx_buffer_pixels_free (void *pixels, void *userdata);

/*ctx_texture_init:
 * return value: eid, as passed in or if NULL generated by hashing pixels and width/height
 * XXX  this is low-level and not to be used directly use define_texture instead.  XXX
 */
const char *ctx_texture_init (
                      Ctx        *ctx,
                      const char *eid,
                      int         width,
                      int         height,
                      int         stride,
                      CtxPixelFormat format,
                      void       *space,
                      uint8_t    *pixels,
                      void (*freefunc) (void *pixels, void *user_data),
                      void *user_data);

#if CTX_TILED
#if !__COSMOPOLITAN__
//#include <threads.h>
#endif
#endif
typedef struct _CtxTiled CtxTiled;


typedef struct _EvSource EvSource;
struct _EvSource
{
  void   *priv; /* private storage  */

  /* returns non 0 if there is events waiting */
  int   (*has_event) (EvSource *ev_source);

  /* get an event, the returned event should be freed by the caller  */
  char *(*get_event) (EvSource *ev_source);

  /* destroy/unref this instance */
  void  (*destroy)   (EvSource *ev_source);

  /* get the underlying fd, useful for using select on  */
  int   (*get_fd)    (EvSource *ev_source);


  void  (*set_coord) (EvSource *ev_source, double x, double y);
  /* set_coord is needed to warp relative cursors into normalized range,
   * like normal mice/trackpads/nipples - to obey edges and more.
   */

  /* if this returns non-0 select can be used for non-blocking.. */
};

struct _CtxTiled
{
   CtxBackend backend;
   void (*show_frame) (void *backend, int block);
   int           width;
   int           height;
   int           cols;
   int           rows;
   int           was_down;
   uint8_t      *pixels;
   Ctx          *ctx_copy;
   Ctx          *host[CTX_MAX_THREADS];
   CtxAntialias  antialias;
   int           quit;
#if CTX_TILED
   //_Atomic 
           int   thread_quit;
#endif
   int           shown_frame;
   int           render_frame;
   int           rendered_frame[CTX_MAX_THREADS];
   int           frame;
   int       min_col; // hasher cols and rows
   int       min_row;
   int       max_col;
   int       max_row;
  // CtxList  *active_info;
   CtxCommandState *active_info;
  // int              active_info_size;
   int              active_info_count;
   uint32_t  hashes[CTX_HASH_ROWS * CTX_HASH_COLS];
   int8_t    tile_affinity[CTX_HASH_ROWS * CTX_HASH_COLS]; // which render thread no is
                                                           // responsible for a tile
                                                           //

   int           pointer_down[3];

   CtxCursor     shown_cursor;
   int          vt_active;
   EvSource    *evsource[4];
   int          evsource_count;
   uint8_t      *fb;
#if CTX_THREADS
#if CTX_TILED
   cnd_t  cond;
   mtx_t  mtx;
#endif
#endif
};

static inline Ctx *ctx_backend_get_ctx (void *backend)
{
  CtxBackend *r = (CtxBackend*)backend;
  if (r) return r->ctx;
  return NULL;
}

void
_ctx_texture_prepare_color_management (CtxState  *state,
                                       CtxBuffer *buffer);

int ctx_is_set (Ctx *ctx, uint32_t hash);

static Ctx *_ctx_new_drawlist (int width, int height);

/**
 * ctx_new_ui:
 *
 * Create a new interactive ctx context, might depend on additional
 * integration.
 *
 * The values for backend are as for the environment variable,
 * NULL for auto.
 */
static Ctx *ctx_new_ui (int width, int height, const char *backend);

static inline void
_ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
{
  float x_in = *x;
  float y_in = *y;

  float w =   (x_in * m->m[2][0]) + (y_in * m->m[2][1]) + m->m[2][2];
       *x = ( (x_in * m->m[0][0]) + (y_in * m->m[0][1]) + m->m[0][2]) / w;
       *y = ( (x_in * m->m[1][0]) + (y_in * m->m[1][1]) + m->m[1][2]) / w;
}

static inline void
_ctx_matrix_multiply (CtxMatrix       *result,
                      const CtxMatrix *t,
                      const CtxMatrix *s)
{
  CtxMatrix r;

  for (unsigned int i = 0; i < 3; i++)
  {
    r.m[i][0] = t->m[i][0] * s->m[0][0]
              + t->m[i][1] * s->m[1][0]
              + t->m[i][2] * s->m[2][0];
    r.m[i][1] = t->m[i][0] * s->m[0][1]
              + t->m[i][1] * s->m[1][1]
              + t->m[i][2] * s->m[2][1];
    r.m[i][2] = t->m[i][0] * s->m[0][2]
              + t->m[i][1] * s->m[1][2]
              + t->m[i][2] * s->m[2][2];
  }
  *result = r;
}

static inline void
_ctx_matrix_identity (CtxMatrix *matrix)
{
  matrix->m[0][0] = 1.0f;
  matrix->m[0][1] = 0.0f;
  matrix->m[0][2] = 0.0f;
  matrix->m[1][0] = 0.0f;
  matrix->m[1][1] = 1.0f;
  matrix->m[1][2] = 0.0f;
  matrix->m[2][0] = 0.0f;
  matrix->m[2][1] = 0.0f;
  matrix->m[2][2] = 1.0f;
}


static int ctx_float_to_string_index (float val);

void
ctx_render_ctx_masked (Ctx *ctx, Ctx *d_ctx, CtxCommandState *active_list, int count, uint32_t mask);

CtxCommandState *ctx_hasher_get_active_info (Ctx *ctx, int *count);

static void ctx_state_set_blob (CtxState *state, uint32_t key, uint8_t *data, int len);

#if EMSCRIPTEN
#define CTX_EXPORT EMSCRIPTEN_KEEPALIVE
#else
#define CTX_EXPORT
#endif

#endif

#if CTX_EVENTS
#include <sys/select.h>
#endif
#ifndef CTX_DRAWLIST_H
#define CTX_DRAWLIST_H

static int
ctx_conts_for_entry (CtxEntry *entry);
void
ctx_iterator_init (CtxIterator      *iterator,
                   CtxDrawlist  *drawlist,
                   int               start_pos,
                   int               flags);

int ctx_iterator_pos (CtxIterator *iterator);

static void
ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size);
static int
ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry);
static int ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry);
int
ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry);
int
ctx_add_data (Ctx *ctx, void *data, int length);

int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2]);
int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length);

static CtxEntry
ctx_void (CtxCode code);
static inline CtxEntry
ctx_f (CtxCode code, float x, float y);
static CtxEntry
ctx_u32 (CtxCode code, uint32_t x, uint32_t y);
#if 0
static CtxEntry
ctx_s32 (CtxCode code, int32_t x, int32_t y);
#endif

static inline CtxEntry
ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1);
static CtxEntry
ctx_u8 (CtxCode code,
        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
        uint8_t e, uint8_t f, uint8_t g, uint8_t h);

#define CTX_PROCESS_VOID(cmd) do {\
  CtxEntry commands[4] = {{cmd}};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_F(cmd,x,y) do {\
  CtxEntry commands[4] = {ctx_f(cmd,x,y),};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_F1(cmd,x) do {\
  CtxEntry commands[4] = {ctx_f(cmd,x,0),};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_U32(cmd, x, y) do {\
  CtxEntry commands[4] = {ctx_u32(cmd, x, y)};\
  ctx_process (ctx, &commands[0]);}while(0)

#define CTX_PROCESS_U8(cmd, x) do {\
  CtxEntry commands[4] = {ctx_u8(cmd, x,0,0,0,0,0,0,0)};\
  ctx_process (ctx, &commands[0]);}while(0)


#if CTX_BITPACK_PACKER
static unsigned int
ctx_last_history (CtxDrawlist *drawlist);
#endif

#if CTX_BITPACK_PACKER
static void
ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos);

static void
ctx_drawlist_bitpack (CtxDrawlist *drawlist, unsigned int start_pos);
#endif

static void
ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1);
static void
ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1);
static void
ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int len);

#pragma pack(push,1)
typedef struct 
CtxSegment {
#if CTX_32BIT_SEGMENTS
  uint32_t code;
#else
  uint16_t code;
#endif
  union {
#if CTX_32BIT_SEGMENTS
   int32_t s16[4];
#else
   int16_t s16[4]; 
#endif
   uint32_t u32[2];
  } data;
  int32_t val;
  int32_t delta;
} CtxSegment;
#pragma pack(pop)



static inline CtxSegment
ctx_segment_s16 (CtxCode code, int x0, int y0, int x1, int y1)
{
  CtxSegment command;
  command.code = code;
  command.data.s16[0] = x0;
  command.data.s16[1] = y0;
  command.data.s16[2] = x1;
  command.data.s16[3] = y1;
  return command;
}

static inline void
ctx_edgelist_resize (CtxDrawlist *drawlist, int desired_size)
{
#if CTX_DRAWLIST_STATIC
    {
      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
      drawlist->entries = (CtxEntry*)&sbuf[0];
      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
    }
#else
  int new_size = desired_size;
  int min_size = CTX_MIN_JOURNAL_SIZE;
  int max_size = CTX_MAX_JOURNAL_SIZE;
    {
      min_size = CTX_MIN_EDGE_LIST_SIZE;
      max_size = CTX_MAX_EDGE_LIST_SIZE;
    }

  if (CTX_UNLIKELY(drawlist->size == max_size))
    { return; }
  new_size = ctx_maxi (new_size, min_size);
  //if (new_size < drawlist->count)
  //  { new_size = drawlist->count + 4; }
  new_size = ctx_mini (new_size, max_size);
  if (new_size != drawlist->size)
    {
      int item_size = item_size = sizeof (CtxSegment);
      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, drawlist->size);
  if (drawlist->entries)
    {
      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
      memcpy (ne, drawlist->entries, drawlist->size * item_size );
      free (drawlist->entries);
      drawlist->entries = ne;
      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
    }
  else
    {
      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
    }
  drawlist->size = new_size;
    }
  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
#endif
}


static inline int
ctx_edgelist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
{
  int ret = drawlist->count;

  if (CTX_UNLIKELY(ret >= CTX_MAX_EDGE_LIST_SIZE- 20))
    {
      return 0;
    }
  if (CTX_UNLIKELY(ret + 2 >= drawlist->size))
    {
      int new_ = ctx_maxi (drawlist->size * 2, ret + 1024);
      new_ = ctx_mini (CTX_MAX_EDGE_LIST_SIZE, new_);
      ctx_edgelist_resize (drawlist, new_);
    }

  ((CtxSegment*)(drawlist->entries))[ret] = *(CtxSegment*)entry;
  drawlist->count++;
  return ret;
}


#endif


#if CTX_COMPOSITE

#define CTX_FULL_AA 15
#define CTX_REFERENCE 0


#define CTX_RGBA8_R_SHIFT  0
#define CTX_RGBA8_G_SHIFT  8
#define CTX_RGBA8_B_SHIFT  16
#define CTX_RGBA8_A_SHIFT  24

#define CTX_RGBA8_R_MASK   (0xff << CTX_RGBA8_R_SHIFT)
#define CTX_RGBA8_G_MASK   (0xff << CTX_RGBA8_G_SHIFT)
#define CTX_RGBA8_B_MASK   (0xff << CTX_RGBA8_B_SHIFT)
#define CTX_RGBA8_A_MASK   (0xff << CTX_RGBA8_A_SHIFT)

#define CTX_RGBA8_RB_MASK  (CTX_RGBA8_R_MASK | CTX_RGBA8_B_MASK)
#define CTX_RGBA8_GA_MASK  (CTX_RGBA8_G_MASK | CTX_RGBA8_A_MASK)

static inline float ctx_fmod1f (float val)
{
  return ctx_fabsf (val - (int)(val));
}


CTX_INLINE static void
ctx_RGBA8_associate_alpha (uint8_t *u8)
{
#if 1
  uint32_t val = *((uint32_t*)(u8));
  uint32_t a = u8[3];
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
#else
  uint32_t a = u8[3];
  u8[0] = (u8[0] * a + 255) >> 8;
  u8[1] = (u8[1] * a + 255) >> 8;
  u8[2] = (u8[2] * a + 255) >> 8;
#endif
}

inline static void
ctx_RGBA8_associate_global_alpha (uint8_t *u8, uint8_t global_alpha)
{
  uint32_t val = *((uint32_t*)(u8));
  uint32_t a = (u8[3] * global_alpha + 255) >> 8;
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
}

inline static uint32_t
ctx_RGBA8_associate_global_alpha_u32 (uint32_t val, uint8_t global_alpha)
{
  uint32_t a = ((val>>24) * global_alpha + 255) >> 8;
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  return  g|rb|(a << CTX_RGBA8_A_SHIFT);
}

CTX_INLINE static void
ctx_RGBA8_associate_alpha_probably_opaque (uint8_t *u8)
{
  uint32_t a = u8[3];//val>>24;//u8[3];
  if (CTX_UNLIKELY(a!=255))
  {
    u8[0] = (u8[0] * a + 255) >> 8;
    u8[1] = (u8[1] * a + 255) >> 8;
    u8[2] = (u8[2] * a + 255) >> 8;
  }
}

CTX_INLINE static uint32_t ctx_bi_RGBA8 (uint32_t isrc00, uint32_t isrc01, uint32_t isrc10, uint32_t isrc11, uint8_t dx, uint8_t dy)
{
#if 0
#if 0
  uint8_t ret[4];
  uint8_t *src00 = (uint8_t*)&isrc00;
  uint8_t *src10 = (uint8_t*)&isrc10;
  uint8_t *src01 = (uint8_t*)&isrc01;
  uint8_t *src11 = (uint8_t*)&isrc11;
  for (int c = 0; c < 4; c++)
  {
    ret[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
                         ctx_lerp_u8 (src10[c], src11[c], dx), dy);
  }
  return  ((uint32_t*)&ret[0])[0];
#else
  return ctx_lerp_RGBA8 (ctx_lerp_RGBA8 (isrc00, isrc01, dx),
                         ctx_lerp_RGBA8 (isrc10, isrc11, dx), dy);
#endif
#else
  uint32_t s0_ga, s0_rb, s1_ga, s1_rb;
  ctx_lerp_RGBA8_split (isrc00, isrc01, dx, &s0_ga, &s0_rb);
  ctx_lerp_RGBA8_split (isrc10, isrc11, dx, &s1_ga, &s1_rb);
  return ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, dy);
#endif
}

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE

inline static int ctx_grad_index (CtxRasterizer *rasterizer, float v)
{
  int ret = v * (rasterizer->gradient_cache_elements - 1) + 0.5f;
  ret = ctx_maxi (0, ret);
  ret = ctx_mini (rasterizer->gradient_cache_elements-1, ret);
  return ret;
}

inline static int ctx_grad_index_i (CtxRasterizer *rasterizer, int v)
{
  v = v >> 8;
  return ctx_maxi (0, ctx_mini (rasterizer->gradient_cache_elements-1, v));
}

//static void
//ctx_gradient_cache_reset (void)
//{
//  ctx_gradient_cache_valid = 0;
//}
#endif


CTX_INLINE static void
_ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
  float v = x;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  CtxGradient *g = &rasterizer->state->gradient;
  if (v < 0) { v = 0; }
  if (v > 1) { v = 1; }

  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = v * 255;
      rgba[3] = 255;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      uint8_t stop_rgba[4];
      uint8_t next_rgba[4];
      ctx_color_get_rgba8 (rasterizer->state, & (stop->color), stop_rgba);
      ctx_color_get_rgba8 (rasterizer->state, & (next_stop->color), next_rgba);
      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
#if 1
      ((uint32_t*)rgba)[0] = ctx_lerp_RGBA8 (((uint32_t*)stop_rgba)[0],
                                             ((uint32_t*)next_rgba)[0], dx);
#else
      for (int c = 0; c < 4; c++)
        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
#endif
      rgba[3]=(rgba[3]*global_alpha_u8+255)>>8;
      ctx_RGBA8_associate_alpha (rgba);
      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_rgba8 (rasterizer->state, color, rgba);
  if (rasterizer->swap_red_green)
  {
    uint8_t tmp = rgba[0];
    rgba[0] = rgba[2];
    rgba[2] = tmp;
  }
  rgba[3]=(rgba[3]*global_alpha_u8+255)>>8;
  ctx_RGBA8_associate_alpha (rgba);
}

#if CTX_GRADIENT_CACHE
static void
ctx_gradient_cache_prime (CtxRasterizer *rasterizer);
#endif

CTX_INLINE static void
ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
#if CTX_GRADIENT_CACHE
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, x)][0]));
#else
 _ctx_fragment_gradient_1d_RGBA8 (rasterizer, x, y, rgba);
#endif
}
#endif

CTX_INLINE static void
ctx_u8_associate_alpha (int components, uint8_t *u8)
{
  for (int c = 0; c < components-1; c++)
    u8[c] = (u8[c] * u8[components-1] + 255)>>8;
}

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
static void
ctx_gradient_cache_prime (CtxRasterizer *rasterizer)
{
  // XXX : todo  make the number of element dynamic depending on length of gradient
  // in device coordinates.

  if (rasterizer->gradient_cache_valid)
    return;
  

  {
    CtxSource *source = &rasterizer->state->gstate.source_fill;
    float length = 100;
    if (source->type == CTX_SOURCE_LINEAR_GRADIENT)
    {
       length = source->linear_gradient.length;
    }
    else
    if (source->type == CTX_SOURCE_RADIAL_GRADIENT)
    {
       length = ctx_maxf (source->radial_gradient.r1, source->radial_gradient.r0);
    }
  //  length = CTX_GRADIENT_CACHE_ELEMENTS;
  {
     float u = length; float v = length;
     const CtxMatrix *m = &rasterizer->state->gstate.transform;
     //CtxMatrix *transform = &source->transform;
     //
     //  combine with above source transform?
     _ctx_matrix_apply_transform (m, &u, &v);
     length = ctx_maxf (u, v);
  }
  
    rasterizer->gradient_cache_elements = ctx_mini (length, CTX_GRADIENT_CACHE_ELEMENTS);
  }

  for (int u = 0; u < rasterizer->gradient_cache_elements; u++)
  {
    float v = u / (rasterizer->gradient_cache_elements - 1.0f);
    _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0f, &rasterizer->gradient_cache_u8[u][0]);
    //*((uint32_t*)(&rasterizer->gradient_cache_u8_a[u][0]))= *((uint32_t*)(&rasterizer->gradient_cache_u8[u][0]));
    //memcpy(&rasterizer->gradient_cache_u8_a[u][0], &rasterizer->gradient_cache_u8[u][0], 4);
    //ctx_RGBA8_associate_alpha (&rasterizer->gradient_cache_u8_a[u][0]);
  }
  rasterizer->gradient_cache_valid = 1;
}
#endif

CTX_INLINE static void
ctx_fragment_gradient_1d_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
  float v = x;
  CtxGradient *g = &rasterizer->state->gradient;
  if (v < 0) { v = 0; }
  if (v > 1) { v = 1; }
  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = v * 255;
      rgba[1] = 255;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      uint8_t stop_rgba[4];
      uint8_t next_rgba[4];
      ctx_color_get_graya_u8 (rasterizer->state, & (stop->color), stop_rgba);
      ctx_color_get_graya_u8 (rasterizer->state, & (next_stop->color), next_rgba);
      int dx = (v - stop->pos) * 255 / (next_stop->pos - stop->pos);
      for (int c = 0; c < 2; c++)
        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_graya_u8 (rasterizer->state, color, rgba);
}

CTX_INLINE static void
ctx_fragment_gradient_1d_RGBAF (CtxRasterizer *rasterizer, float v, float y, float *rgba)
{
  float global_alpha = rasterizer->state->gstate.global_alpha_f;
  CtxGradient *g = &rasterizer->state->gradient;
  if (v < 0) { v = 0; }
  if (v > 1) { v = 1; }
  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = v;
      rgba[3] = 1.0;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      float stop_rgba[4];
      float next_rgba[4];
      ctx_color_get_rgba (rasterizer->state, & (stop->color), stop_rgba);
      ctx_color_get_rgba (rasterizer->state, & (next_stop->color), next_rgba);
      int dx = (v - stop->pos) / (next_stop->pos - stop->pos);
      for (int c = 0; c < 4; c++)
        { rgba[c] = ctx_lerpf (stop_rgba[c], next_rgba[c], dx); }
      rgba[3] *= global_alpha;
      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_rgba (rasterizer->state, color, rgba);
  rgba[3] *= global_alpha;
}
#endif

static void
ctx_fragment_image_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dw)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;

  for (int i = 0; i < count; i ++)
  {

  int u = x;
  int v = y;
  int width = buffer->width;
  int height = buffer->height;
  if ( u < 0 || v < 0 ||
       u >= width ||
       v >= height)
    {
      *((uint32_t*)(rgba)) = 0;
    }
  else
    {
      int bpp = buffer->format->bpp/8;
      if (rasterizer->state->gstate.image_smoothing)
      {
      uint8_t *src00 = (uint8_t *) buffer->data;
      src00 += v * buffer->stride + u * bpp;
      uint8_t *src01 = src00;
      if ( u + 1 < width)
      {
        src01 = src00 + bpp;
      }
      uint8_t *src11 = src01;
      uint8_t *src10 = src00;
      if ( v + 1 < height)
      {
        src10 = src00 + buffer->stride;
        src11 = src01 + buffer->stride;
      }
      float dx = (x-(int)(x)) * 255.9;
      float dy = (y-(int)(y)) * 255.9;

      switch (bpp)
      {
      case 1:
        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
        rgba[3] = global_alpha_u8;
        break;
      case 2:
        rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dx),
                               ctx_lerp_u8 (src10[0], src11[0], dx), dy);
        rgba[3] = ctx_lerp_u8 (ctx_lerp_u8 (src00[1], src01[1], dx),
                               ctx_lerp_u8 (src10[1], src11[1], dx), dy);
        rgba[3] = (rgba[3] * global_alpha_u8) / 255;
        break;
      case 3:
      for (int c = 0; c < bpp; c++)
        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
                
        }
        rgba[3]=global_alpha_u8;
        break;
      break;
      case 4:
      for (int c = 0; c < bpp; c++)
        { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
                                 ctx_lerp_u8 (src10[c], src11[c], dx), dy);
                
        }
        rgba[3] = (rgba[3] * global_alpha_u8) / 255;
      }

      }
      else
      {
      uint8_t *src = (uint8_t *) buffer->data;
      src += v * buffer->stride + u * bpp;
      switch (bpp)
        {
          case 1:
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[0]; }
            rgba[3] = global_alpha_u8;
            break;
          case 2:
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[0]; }
            rgba[3] = src[1];
            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            break;
          case 3:
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[c]; }
            rgba[3] = global_alpha_u8;
            break;
          case 4:
            for (int c = 0; c < 4; c++)
              { rgba[c] = src[c]; }
            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            break;
        }

      }
      if (rasterizer->swap_red_green)
      {
        uint8_t tmp = rgba[0];
        rgba[0] = rgba[2];
        rgba[2] = tmp;
      }
    }
    ctx_RGBA8_associate_alpha_probably_opaque (rgba);
    rgba += 4;
    x += dx;
    y += dy;
  }
}

#if CTX_DITHER
static inline int ctx_dither_mask_a (int x, int y, int c, int divisor)
{
  /* https://pippin.gimp.org/a_dither/ */
  return ( ( ( ( (x + c * 67) + y * 236) * 119) & 255 )-127) / divisor;
}

inline static void
ctx_dither_rgba_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
{
  if (dither_red_blue == 0)
    { return; }
  for (int c = 0; c < 3; c ++)
    {
      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, c==1?dither_green:dither_red_blue);
      rgba[c] = CTX_CLAMP (val, 0, 255);
    }
}

inline static void
ctx_dither_graya_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
{
  if (dither_red_blue == 0)
    { return; }
  for (int c = 0; c < 1; c ++)
    {
      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, dither_red_blue);
      rgba[c] = CTX_CLAMP (val, 0, 255);
    }
}
#endif

#if 0
CTX_INLINE static void
ctx_RGBA8_deassociate_alpha (const uint8_t *in, uint8_t *out)
{
    uint32_t val = *((uint32_t*)(in));
    int a = val >> CTX_RGBA8_A_SHIFT;
    if (a)
    {
    if (a ==255)
    {
      *((uint32_t*)(out)) = val;
    } else
    {
      uint32_t g = (((val & CTX_RGBA8_G_MASK) * 255 / a) >> 8) & CTX_RGBA8_G_MASK;
      uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * 255 / a) >> 8) & CTX_RGBA8_RB_MASK;
      *((uint32_t*)(out)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
    }
    }
    else
    {
      *((uint32_t*)(out)) = 0;
    }
}
#endif

CTX_INLINE static void
ctx_u8_deassociate_alpha (int components, const uint8_t *in, uint8_t *out)
{
  if (in[components-1])
  {
    if (in[components-1] != 255)
    for (int c = 0; c < components-1; c++)
      out[c] = (in[c] * 255) / in[components-1];
    else
    for (int c = 0; c < components-1; c++)
      out[c] = in[c];
    out[components-1] = in[components-1];
  }
  else
  {
  for (int c = 0; c < components; c++)
    out[c] = 0;
  }
}

CTX_INLINE static void
ctx_float_associate_alpha (int components, float *rgba)
{
  float alpha = rgba[components-1];
  for (int c = 0; c < components-1; c++)
    rgba[c] *= alpha;
}

CTX_INLINE static void
ctx_float_deassociate_alpha (int components, float *rgba, float *dst)
{
  float ralpha = rgba[components-1];
  if (ralpha != 0.0) ralpha = 1.0/ralpha;

  for (int c = 0; c < components-1; c++)
    dst[c] = (rgba[c] * ralpha);
  dst[components-1] = rgba[components-1];
}

CTX_INLINE static void
ctx_RGBAF_associate_alpha (float *rgba)
{
  ctx_float_associate_alpha (4, rgba);
}

CTX_INLINE static void
ctx_RGBAF_deassociate_alpha (float *rgba, float *dst)
{
  ctx_float_deassociate_alpha (4, rgba, dst);
}


static inline void ctx_swap_red_green_u8 (void *data)
{
  uint8_t *rgba = (uint8_t*)data;
  uint8_t tmp = rgba[0];
  rgba[0] = rgba[2];
  rgba[2] = tmp;
}

static void
ctx_fragment_swap_red_green_u8 (void *out, int count)
{
  uint8_t *rgba = (uint8_t*)out;
  for (int x = 0; x < count; x++)
  {
    ctx_swap_red_green_u8 (rgba);
    rgba += 4;
  }
}

/**** rgb8 ***/

static void
ctx_fragment_image_rgb8_RGBA8_box (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  int width = buffer->width;
  int height = buffer->height;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int dim = (1.0 / factor) / 3;

  int i = 0;

  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
  {
    *((uint32_t*)(rgba))=0;
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count && !(
       x - dim < 0 || y - dim < 0 ||
       x + dim >= width ||
       y + dim >= height); i++)
  {

  int u = x;
  int v = y;
    {
      int bpp = 3;
      rgba[3]=global_alpha_u8; // gets lost
          uint64_t sum[4]={0,0,0,0};
          int count = 0;

          {
            for (int ov = - dim; ov <= dim; ov++)
            {
              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
              for (int ou = - dim; ou <= dim; ou++)
              {
                for (int c = 0; c < bpp; c++)
                  sum[c] += src[c];
                count ++;
                src += bpp;
              }

            }
          }

          int recip = 65536/count;
          for (int c = 0; c < bpp; c++)
            rgba[c] = sum[c] * recip >> 16;
          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
    }
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count; i++)
  {
    *((uint32_t*)(rgba))= 0;
    rgba += 4;
  }
}

#define CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(frag) \
static void \
frag##_swap_red_green (CtxRasterizer *rasterizer,\
                       float x, float y, float z,\
                       void *out, int count, float dx, float dy, float dz)\
{\
  frag (rasterizer, x, y, z, out, count, dx, dy, dz);\
  ctx_fragment_swap_red_green_u8 (out, count);\
}



static inline void
ctx_RGBA8_apply_global_alpha_and_associate (CtxRasterizer *rasterizer,
                                         uint8_t *buf, int count)
{
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) buf;
  if (global_alpha_u8 != 255)
  {
    for (int i = 0; i < count; i++)
    {
      ctx_RGBA8_associate_global_alpha (rgba, global_alpha_u8);
      rgba += 4;
    }
  }
  else
  {
    for (int i = 0; i < count; i++)
    {
      ctx_RGBA8_associate_alpha_probably_opaque (rgba);
      rgba += 4;
    }
  }
}

#if CTX_FRAGMENT_SPECIALIZE

static inline void
ctx_fragment_image_rgb8_RGBA8_bi (CtxRasterizer *rasterizer,
                                  float x, float y, float z,
                                  void *out, int scount,
                                  float dx, float dy, float dz)
{
  uint32_t count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int zi_delta = dz * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;
  int32_t zi = z * 65536;
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }
  }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }

  int stride = buffer->stride;
  uint8_t *data = (uint8_t*)buffer->data;
  while (i < count)
  {
    float zr = (zi!=0)*(1.0/zi) * 256;
    int du = xi * zr;
    int u = du >> 8;
    int dv = yi * zr;
    int v = dv >> 8;
      int bpp = 3;
      uint8_t *src00 = data;
      src00 += v * stride + u * bpp;
      uint8_t *src01 = src00;
      if ( u + 1 < bwidth)
      {
        src01 = src00 + bpp;
      }
      uint8_t *src11 = src01;
      uint8_t *src10 = src00;
      if ( v + 1 < bheight)
      {
        src10 = src00 + stride;
        src11 = src01 + stride;
      }
      float dx = (x-(int)(x)) * 255.9f;
      float dy = (y-(int)(y)) * 255.9f;
      for (int c = 0; c < bpp; c++)
      {
        rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dx),
                               ctx_lerp_u8 (src10[c], src11[c], dx), dy);
      }
      rgba[3] = global_alpha_u8;
      ctx_RGBA8_associate_alpha_probably_opaque (rgba);

    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 3;
    i++;
  }
}

static void
ctx_fragment_image_rgb8_RGBA8_nearest (CtxRasterizer *rasterizer,
                                       float x, float y, float z,
                                       void *out, int scount,
                                       float dx, float dy, float dz)
{
  unsigned int count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint8_t *data = ((uint8_t*)buffer->data);

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int zi_delta = dz * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;
  int32_t zi = z * 65536;
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }
  }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }

  while (i < count)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    for (unsigned int c = 0; c < 3; c++)
      rgba[c] = data[(bwidth *v +u)*3+c];
    rgba[3] = global_alpha_u8;
    ctx_RGBA8_associate_alpha_probably_opaque (rgba);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
    i++;
  }
}



CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_box)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_bi)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_nearest)


static void
ctx_fragment_image_rgb8_RGBA8 (CtxRasterizer *rasterizer,
                               float x,
                               float y,
                               float z,
                               void *out, int count, float dx, float dy, float dz)
{
  if (rasterizer->swap_red_green)
  {
    if (rasterizer->state->gstate.image_smoothing)
    {
      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
      if (factor <= 0.50f)
        ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (rasterizer,x,y,z,out,count,dx,dy,dz);
  #if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
      else if (factor > 0.99f && factor < 1.01f)
        ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer,x,y,z,
                                                            out,count,dx,dy,dz);
  #endif
      else
        ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (rasterizer,x,y,z,
                                                         out,count, dx, dy, dz);
    }
    else
    {
      ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer,x,y,z,
                                                            out,count,dx,dy,dz);
    }
  }
  else
  {
    if (rasterizer->state->gstate.image_smoothing)
    {
      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
      if (factor <= 0.50f)
        ctx_fragment_image_rgb8_RGBA8_box (rasterizer,x,y,z,out,
                                           count,dx,dy,dz);
  #if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
      else if (factor > 0.99f && factor < 1.01f)
        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
  #endif
      else
        ctx_fragment_image_rgb8_RGBA8_bi (rasterizer,x,y,z,out,count,dx,dy,dz);
    }
    else
    {
        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer,x,y,z,out,
                                               count,dx,dy, dz);
    }
  }
}


/************** rgba8 */

static void
ctx_fragment_image_rgba8_RGBA8_box (CtxRasterizer *rasterizer,
                                    float x, float y, float z,
                                    void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  int width = buffer->width;
  int height = buffer->height;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int dim = (1.0 / factor) / 3;

  int i = 0;

  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
  {
    *((uint32_t*)(rgba))=0;
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count && !(
       x - dim < 0 || y - dim < 0 ||
       x + dim >= width ||
       y + dim >= height); i++)
  {

  int u = x;
  int v = y;
    {
      int bpp = 4;
          uint64_t sum[4]={0,0,0,0};
          int count = 0;

          {
            for (int ov = - dim; ov <= dim; ov++)
            {
              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
              for (int ou = - dim; ou <= dim; ou++)
              {
                for (int c = 0; c < bpp; c++)
                  sum[c] += src[c];
                count ++;
                src += bpp;
              }

            }
          }

          int recip = 65536/count;
          for (int c = 0; c < bpp; c++)
            rgba[c] = sum[c] * recip >> 16;
          rgba[3]=rgba[3]*global_alpha_u8/255; // gets lost
          ctx_RGBA8_associate_alpha_probably_opaque (rgba);
    }
    rgba += 4;
    x += dx;
    y += dy;
  }


  for (; i < count; i++)
  {
    *((uint32_t*)(rgba))= 0;
    rgba += 4;
  }
#if CTX_DITHER
//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
//                    rasterizer->format->dither_green);
#endif
}


static void
ctx_fragment_image_rgba8_RGBA8_nearest_copy (CtxRasterizer *rasterizer,
                                             float x, float y, float z,
                                             void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = 
     g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  uint32_t *dst = (uint32_t*)out;
#if 0
  for (int i = 0; i < scount; i++)
          dst[i] = (255<<24)+128;
  return;
#endif
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = x;// + 0.5f;
  int v = y;// + 0.5f;

  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v + u;
  if (CTX_UNLIKELY(!(v >= 0 && v < bheight)))
  {
    for (unsigned i = 0 ; i < count; i++)
      *dst++ = 0;
    return;
  }

#if 1
  int pre = ctx_mini(ctx_maxi(-u,0), count);
  memset (dst, 0, pre);
  dst +=pre;
  count-=pre;
  src+=pre;
  u+=pre;
#else
  while (count && !(u >= 0))
  {
    *dst++ = 0;
    src ++;
    u++;
    count--;
  }
#endif

  int limit = ctx_mini (count, bwidth - u);
  if (limit>0)
  {
    memcpy (dst, src, limit * 4);
    dst += limit;
  }
  memset (dst, 0, count - limit);

//ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat (CtxRasterizer *rasterizer,
                                                    float x, float y, float z,
                                                    void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = 
     g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  uint32_t *dst = (uint32_t*)out;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = x;
  int v = y;
  if (v < 0) v += bheight * 8192;
  if (u < 0) u += bwidth * 8192;
  v %= bheight;
  u %= bwidth;

  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v;

  while (count)
  {
     int chunk = ctx_mini (bwidth - u, count);
     memcpy (dst, src + u, chunk * 4);
     dst += chunk;
     count -= chunk;
     u = (u + chunk) % bwidth;
  }
}

static inline int
_ctx_coords_restrict (CtxExtend extend,
                      int *u, int *v,
                      int bwidth, int bheight)
{
  switch (extend)
  {
    case CTX_EXTEND_REPEAT:
      if(u)
      {
         while (*u < 0) *u += bwidth * 4096;   // XXX need better way to do this
         *u  %= bwidth;
      }
      if(v)
      {
        while (*v < 0) *v += bheight * 4096;
        *v  %= bheight;
      }
      return 1;
    case CTX_EXTEND_REFLECT:
      if (u)
      {
      while (*u < 0) *u += bwidth * 4096;   // XXX need better way to do this
      *u  %= (bwidth*2);

      *u = (*u>=bwidth) * (bwidth*2 - *u) +
           (*u<bwidth) * *u;
      }

      if (v)
      {
      while (*v < 0) *v += bheight * 4096;
      *v  %= (bheight*2);
      *v = (*v>=bheight) * (bheight*2 - *v) +
           (*v<bheight) * *v;
      }

      return 1;
    case CTX_EXTEND_PAD:
      if (u)*u = ctx_mini (ctx_maxi (*u, 0), bwidth-1);
      if (v)*v = ctx_mini (ctx_maxi (*v, 0), bheight-1);
      return 1;
    case CTX_EXTEND_NONE:
      if (u)
      {
      //*u  %= bwidth;
      if (*u < 0 || *u >= bwidth) return 0;
      }
      if (v)
      {
      //*v  %= bheight;
      if (*v < 0 || *v >= bheight) return 0;
      }
      return 1;
  }
  return 0;
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest_affine (CtxRasterizer *rasterizer,
                                               float x, float y, float z,
                                               void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  //uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;
  switch (extend){
          case CTX_EXTEND_NONE:
                  {

    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if ((u1>>16) <0 ||
          (v1>>16) <0 ||
          (u1>>16) >= (bwidth) - 1 ||
          (v1>>16) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
  }

  while (i < count)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    //((uint32_t*)(&rgba[0]))[0] =
    //  ctx_RGBA8_associate_global_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
    ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
    i++;
  }
                  }
  break;
          default:
    while (i < count)
    {
      int u = xi >> 16;
      int v = yi >> 16;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      //((uint32_t*)(&rgba[0]))[0] =
      //  ctx_RGBA8_associate_global_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
      ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
      xi += xi_delta;
      yi += yi_delta;
      rgba += 4;
      i++;
    }
    break;
  }
}



static void
ctx_fragment_image_rgba8_RGBA8_nearest_scale (CtxRasterizer *rasterizer,
                                              float x, float y, float z,
                                              void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = NULL;
  CtxExtend  extend = rasterizer->state->gstate.extend;
  uint32_t *src = NULL;
  buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  int ideltax = dx * 65536;
  uint32_t *dst = (uint32_t*)out;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int bbheight = bheight << 16;
  int bbwidth  = bwidth << 16;
//  x += 0.5f;
//  y += 0.5f;

  src = (uint32_t*)buffer->data;
  //if (!src){ fprintf (stderr, "eeek bailing in nearest fragment\n"); return;};

  {
    unsigned int i = 0;
    int32_t ix = x * 65536;
    int32_t iy = y * 65536;

    if (extend == CTX_EXTEND_NONE)
    {
    int32_t u1 = ix + ideltax * (count-1);
    int32_t v1 = iy;
    uint32_t *edst = ((uint32_t*)out)+count - 1;
    for (; i < count; )
    {
      if (u1 <0 || v1 < 0 || u1 >= bbwidth || v1 >= bbheight)
      {
        *edst-- = 0;
        count --;
        u1 -= ideltax;
      }
      else break;
    }

    for (i = 0; i < count; i ++)
    {
      if (ix < 0 || iy < 0 || ix >= bbwidth  || iy >= bbheight)
      {
        *dst++ = 0;
        x += dx;
        ix += ideltax;
      }
      else break;
    }

      int v = iy >> 16;
      int u = ix >> 16;
      int o = (v)*bwidth;
      for (; i < count; i ++)
      {
        u = ix >> 16;
        *dst++ = src[o + (u)];
        ix += ideltax;
      }
    }
    else
    {

      int v = iy >> 16;
      int u = ix >> 16;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      int o = (v)*bwidth;
      for (; i < count; i ++)
      {
        u = ix >> 16;
        _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
        *dst++ = src[o + (u)];
        ix += ideltax;
      }
    }
  }
//  ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest_generic (CtxRasterizer *rasterizer,
                                                float x, float y, float z,
                                                void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  //uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int zi_delta = dz * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;
  int32_t zi = z * 65536;
  switch (extend){
          case CTX_EXTEND_NONE:
                  {

    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }

  while (i < count)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    //((uint32_t*)(&rgba[0]))[0] =
    //  ctx_RGBA8_associate_global_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
    ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
    i++;
  }
                  }
  break;
  default:
    while (i < count)
    {
      float z_recip = (zi!=0) * (1.0/zi);
      int u = xi * z_recip;
      int v = yi * z_recip;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      //((uint32_t*)(&rgba[0]))[0] =
      //  ctx_RGBA8_associate_global_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
      ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
      xi += xi_delta;
      yi += yi_delta;
      zi += zi_delta;
      rgba += 4;
      i++;
    }
    break;
  }
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int icount, float dx, float dy, float dz)
{
  unsigned int count = icount;
  CtxExtend extend = rasterizer->state->gstate.extend;
  if (z == 1.0f && dz == 0.0f) // this also catches other constant z!
  {
    if (dy == 0.0f && dx == 1.0 && extend == CTX_EXTEND_NONE)
      ctx_fragment_image_rgba8_RGBA8_nearest_copy (rasterizer, x, y, z, out, count, dx, dy, dz);
    else
      ctx_fragment_image_rgba8_RGBA8_nearest_affine (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  else
  {
    ctx_fragment_image_rgba8_RGBA8_nearest_generic (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
}


static inline void
ctx_fragment_image_rgba8_RGBA8_bi_scale (CtxRasterizer *rasterizer,
                                         float x, float y, float z,
                                         void *out, int scount, float dx, float dy, float dz)
{
    uint32_t count = scount;
    x -= 0.5f;
    y -= 0.5f;
    uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
    uint8_t *rgba = (uint8_t *) out;
    CtxSource *g = &rasterizer->state->gstate.source_fill;
    CtxExtend  extend = rasterizer->state->gstate.extend;
    CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
    const int bwidth = buffer->width;
    const int bheight = buffer->height;
    unsigned int i = 0;

    if (!extend)
    {
    if (!(y >= 0 && y < bheight))
    {
      uint32_t *dst = (uint32_t*)rgba;
      for (i = 0 ; i < count; i++)
        *dst++ = 0;
      return;
    }
    }

    //x+=1; // XXX off by one somewhere? ,, needed for alignment with nearest

    int32_t yi = y * 65536;
    int32_t xi = x * 65536;
    int xi_delta = dx * 65536;

    if (!extend)
    {
    int32_t u1 = xi + xi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if (u1 <0 || u1 +65536 >= (bwidth<<16))
    {
      *edst-- = 0;
      count --;
      u1 -= xi_delta;
    }
    else break;
  }
    for (i= 0; i < count; i ++)
    {
      int u = xi >> 16;
      if ( u  < 0 || u >= bwidth-1)
      {
        *((uint32_t*)(rgba))= 0;
        xi += xi_delta;
        rgba += 4;
      }
      else
        break;
    }
    }

 
  int v = yi >> 16;


  int dv = (yi >> 8) & 0xff;

  int u = xi >> 16;

  int v1 = v+1;

  _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
  _ctx_coords_restrict (extend, NULL, &v1, bwidth, bheight);

  uint32_t *data = ((uint32_t*)buffer->data) + bwidth * v;
  uint32_t *ndata = ((uint32_t*)buffer->data) + bwidth * v1;

  if (!extend && v1 > bheight-1) ndata = data;

  if (extend)
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;

      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb;
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_associate_global_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du), global_alpha_u8);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (CTX_LIKELY(prev_u == u))
        {
        }
        else if (prev_u == u-1)
        {
          s0_ga = s1_ga;
          s0_rb = s1_rb;
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u++;
        }
        else
        {
          ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_associate_global_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8)), global_alpha_u8);
        xi += xi_delta;
        rgba += 4;
        u = xi >> 16;
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
      }
    }
  
  }
  else
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;
  
      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb;
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_associate_global_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du), global_alpha_u8);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (CTX_LIKELY(prev_u == u))
        {
        }
        else if (prev_u == u-1)
        {
          s0_ga = s1_ga;
          s0_rb = s1_rb;
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u++;
        }
        else
        {
          ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_associate_global_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8)), global_alpha_u8);
        xi += xi_delta;
        rgba += 4;
        u = xi >> 16;
      }
    }
  }
}


static inline void
ctx_fragment_image_rgba8_RGBA8_bi_affine (CtxRasterizer *rasterizer,
                                          float x, float y, float z,
                                          void *out, int scount,
                                          float dx, float dy, float dz)
{
        x-=0.5;
        y-=0.5;
  uint32_t count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;

  if (extend == CTX_EXTEND_NONE)
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if ((u1>>16) <0 ||
          (v1>>16) <0 ||
          (u1>>16) >= (bwidth) - 1 ||
          (v1>>16) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
  }
  }

  uint32_t *src00=data;
  uint32_t *src01=data;
  uint32_t *src10=data;
  uint32_t *src11=data;

  while (i < count)
  {
    int du = xi >> 8;
    int u = du >> 8;
    int dv = yi >> 8;
    int v = dv >> 8;
    if (CTX_UNLIKELY(u < 0 || v < 0 || u+1 >= bwidth || v+1 >=bheight)) // default to next sample down and to right
    {
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] =
        ctx_RGBA8_associate_global_alpha_u32 (
            ctx_bi_RGBA8 (*src00,*src01,*src10,*src11, du,dv), global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;

    i++;
  }
}


static inline void
ctx_fragment_image_rgba8_RGBA8_bi_generic (CtxRasterizer *rasterizer,
                                           float x, float y, float z,
                                           void *out, int scount,
                                           float dx, float dy, float dz)
{
        x-=0.5;
        y-=0.5;
  uint32_t count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = dy * 65536;
  int xi_delta = dx * 65536;
  int zi_delta = dz * 65536;
  int32_t yi = y * 65536;
  int32_t xi = x * 65536;
  int32_t zi = z * 65536;
  if (extend == CTX_EXTEND_NONE) {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0/zi);
    int u = xi * z_recip;
    int v = yi * z_recip;
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }
  }

  uint32_t *src00=data;
  uint32_t *src01=data;
  uint32_t *src10=data;
  uint32_t *src11=data;

  while (i < count)
  {
    float zr = (zi!=0)*(1.0/zi) * 256;
    int du = xi * zr;
    int u = du >> 8;
    int dv = yi * zr;
    int v = dv >> 8;
    if (CTX_UNLIKELY(u < 0 || v < 0 || u+1 >= bwidth || v+1 >=bheight)) // default to next sample down and to right
    {
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] =
        ctx_RGBA8_associate_global_alpha_u32 (
            ctx_bi_RGBA8 (*src00,*src01,*src10,*src11, du,dv), global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;

    i++;
  }
}


static void
ctx_fragment_image_rgba8_RGBA8_bi (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int icount, float dx, float dy, float dz)
{
  unsigned int count = icount;
  if (dy == 0.0f && dx > 0.0f && z==1.0f && dz==0.0f) 
  {
    ctx_fragment_image_rgba8_RGBA8_bi_scale (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  else if (z == 1.0f && dz == 0.0f)
    ctx_fragment_image_rgba8_RGBA8_bi_affine (rasterizer, x, y, z, out, count, dx, dy, dz);
  else
  {
    ctx_fragment_image_rgba8_RGBA8_bi_generic (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
}
#endif

#define ctx_clampi(val,min,max) \
     ctx_mini (ctx_maxi ((val), (min)), (max))

static inline uint32_t ctx_yuv_to_rgba32 (uint8_t y, uint8_t u, uint8_t v)
{
  int cy  = ((y - 16) * 76309) >> 16;
  int cr  = (v - 128);
  int cb  = (u - 128);
  int red = cy + ((cr * 104597) >> 16);
  int green = cy - ((cb * 25674 + cr * 53278) >> 16);
  int blue = cy + ((cb * 132201) >> 16);
  return  ctx_clampi (red, 0, 255) |
          (ctx_clampi (green, 0, 255) << 8) |
          (ctx_clampi (blue, 0, 255) << 16) |
          (0xff << 24);
}

static void
ctx_fragment_image_yuv420_RGBA8_nearest (CtxRasterizer *rasterizer,
                                         float x, float y, float z,
                                         void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer;
  if (buffer->color_managed)
    buffer = buffer->color_managed;
  uint8_t *src = (uint8_t *) buffer->data;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int bwidth_div_2  = bwidth/2;
  int bheight_div_2  = bheight/2;
  x += 0.5f;
  y += 0.5f;

  if (!src)
          return;

  {
    int i = 0;

    float  u1 = x + dx * (count-1);
    float  v1 = y + dy * (count-1);
    uint32_t *edst = ((uint32_t*)out)+count - 1;
    for (; i < count; )
    {
      if (u1 <0 || v1 < 0 || u1 >= bwidth || v1 >= bheight)
      {
        *edst-- = 0;
        count --;
        u1 -= dx;
        v1 -= dy;
      }
      else break;
    }

    for (; i < count; i ++)
    {
      int u = x;
      int v = y;
      if ((u < 0 || v < 0 || u >= bwidth || v >= bheight))
      {
        *((uint32_t*)(rgba))= 0;
      }
      else
      {
        break;
      }
      x += dx;
      y += dy;
      rgba += 4;
    }

    uint32_t u_offset = bheight * bwidth;
    uint32_t v_offset = u_offset + bheight_div_2 * bwidth_div_2;

    if (rasterizer->swap_red_green)
    {
      v_offset = bheight * bwidth;
      u_offset = v_offset + bheight_div_2 * bwidth_div_2;
    }

    // XXX this is incorrect- but fixes some bug!
    int ix = 65536;//x * 65536;
    int iy = y * 65536;

    int ideltax = dx * 65536;
    int ideltay = dy * 65536;

    if (ideltay == 0)
    {
      int u = ix >> 16;
      int v = iy >> 16;

      uint32_t y  = v * bwidth;
      uint32_t uv = (v / 2) * bwidth_div_2;

      if (v >= 0 && v < bheight)
      while (i < count)// && u >= 0 && u+1 < bwidth)
      {
        *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y+u],
                        src[u_offset+uv+u/2], src[v_offset+uv+u/2]);
#if 0
#if CTX_DITHER
       ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
                           rasterizer->format->dither_green);
#endif
#endif

        ix += ideltax;
        rgba += 4;
        u = ix >> 16;
        i++;
      }
    }
    else
    {
      int u = ix >> 16;
      int v = iy >> 16;

      while (i < count)// && u >= 0 && v >= 0 && u < bwidth && v < bheight)
      {
        uint32_t y  = v * bwidth + u;
        uint32_t uv = (v / 2) * bwidth_div_2 + (u / 2);

        *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y],
                        src[u_offset+uv], src[v_offset+uv]);
#if 0
#if CTX_DITHER
       ctx_dither_rgba_u8 (rgba, x+i, y, rasterizer->format->dither_red_blue,
                           rasterizer->format->dither_green);
#endif
#endif

        ix += ideltax;
        iy += ideltay;
        rgba += 4;
        u = ix >> 16;
        v = iy >> 16;
        i++;
      }
    }

    for (; i < count; i++)
    {
      *((uint32_t*)(rgba))= 0;
      rgba += 4;
    }
  }

  if (rasterizer->state->gstate.global_alpha_u8 != 255)
    ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
}

#if CTX_FRAGMENT_SPECIALIZE

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_box)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest)

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_copy)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_scale)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_affine)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_generic)

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_scale)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_affine)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_generic)

static void
ctx_fragment_image_rgba8_RGBA8 (CtxRasterizer *rasterizer,
                                float x, float y, float z,
                                void *out, int count, float dx, float dy, float dz)
{
  if (rasterizer->state->gstate.image_smoothing)
  {
    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
    if (factor <= 0.50f)
    {
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
    else if (factor > 0.99f && factor < 1.01f)
    {
      // XXX: also verify translate == 0 for this fast path to be valid
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
#endif
    else
    {
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
  }
  else
  {
    if (rasterizer->swap_red_green)
      ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
    else
      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  //ctx_fragment_swap_red_green_u8 (out, count);
#if 0
#if CTX_DITHER
  uint8_t *rgba = (uint8_t*)out;
  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
#endif
}
#endif

static void
ctx_fragment_image_gray1_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer;
  for (int i = 0; i < count; i ++)
  {
  int u = x;
  int v = y;
  if ( u < 0 || v < 0 ||
       u >= buffer->width ||
       v >= buffer->height)
    {
      rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
    }
  else
    {
      uint8_t *src = (uint8_t *) buffer->data;
      src += v * buffer->stride + u / 8;
      if (*src & (1<< (u & 7) ) )
        {
          rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
        }
      else
        {
          for (int c = 0; c < 4; c++)
            { rgba[c] = 255;
            }//g->texture.rgba[c];
            //}
        }
    }

    rgba += 4;
    x += dx;
    y += dy;
  }
}

#if CTX_GRADIENTS
static void
ctx_fragment_radial_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = x;
#endif
  for (int i = 0; i <  count; i ++)
  {
    float v = (ctx_hypotf_fast (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
#if CTX_GRADIENT_CACHE
    uint32_t *rgbap = (uint32_t*)&rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, v)][0];
    *((uint32_t*)rgba) = *rgbap;
#else
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
#endif
#
#if CTX_DITHER
    ctx_dither_rgba_u8 (rgba, ox+i, scan, rasterizer->format->dither_red_blue,
                        rasterizer->format->dither_green);
#endif
    rgba += 4;
    x += dx;
    y += dy;
  }
}

static void
ctx_fragment_linear_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
#if 0
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i <  count; i ++)
  {
  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
                g->linear_gradient.length) -
              g->linear_gradient.start) * (g->linear_gradient.rdelta);
#if CTX_GRADIENT_CACHE
  uint32_t*rgbap = ((uint32_t*)(&ctx_gradient_cache_u8[ctx_grad_index(v)][0]));
  *((uint32_t*)rgba) = *rgbap;
#else
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
#endif
#if CTX_DITHER
  ctx_dither_rgba_u8 (rgba, x+i, y, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
    rgba += 4;
    x += dx;
    y += dy;
  }
#else
  uint8_t *rgba = (uint8_t *) out;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float u0 = x; float v0 = y;
  float ud = dx; float vd = dy;
  float linear_gradient_rdelta = g->linear_gradient.rdelta;
  float linear_gradient_length = g->linear_gradient.length;
  float linear_gradient_length_recip = 1.0f/linear_gradient_length;
  float linear_gradient_dx = g->linear_gradient.dx *linear_gradient_length_recip * linear_gradient_rdelta;
  float linear_gradient_dy = g->linear_gradient.dy *linear_gradient_length_recip * linear_gradient_rdelta;
  float linear_gradient_start = g->linear_gradient.start * linear_gradient_rdelta;

#if CTX_DITHER
  int dither_red_blue = rasterizer->format->dither_red_blue;
  int dither_green = rasterizer->format->dither_green;
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = x;
#endif

  u0 *= linear_gradient_dx;
  v0 *= linear_gradient_dy;
  ud *= linear_gradient_dx;
  vd *= linear_gradient_dy;

#if CTX_GRADIENT_CACHE
  int vv = ((u0 + v0) - linear_gradient_start) * (rasterizer->gradient_cache_elements-1) * 256;
  int ud_plus_vd = (ud + vd) * (rasterizer->gradient_cache_elements-1) * 256;
#else
  float vv = ((u0 + v0) - linear_gradient_start);
  float ud_plus_vd = (ud + vd);
#endif

  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
  uint32_t*rgbap = ((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
  *((uint32_t*)rgba) = *rgbap;
#else
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba+= 4;
    vv += ud_plus_vd;
  }
#endif
}

#endif

static void
ctx_fragment_color_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba_out = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  ctx_color_get_rgba8 (rasterizer->state, &g->color, rgba_out);
  ctx_RGBA8_associate_alpha (rgba_out);
  if (rasterizer->swap_red_green)
  {
    int tmp = rgba_out[0];
    rgba_out[0] = rgba_out[2];
    rgba_out[2] = tmp;
  }
  for (int i = 1; i < count; i++, rgba_out+=4)
    memcpy (rgba_out + count * 4, rgba_out, 4);
}
#if CTX_ENABLE_FLOAT

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i < count; i++)
  {
    float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
                  g->linear_gradient.length) -
                g->linear_gradient.start) * (g->linear_gradient.rdelta);
    ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0f, rgba);
    x += dx;
    y += dy;
    rgba += 4;
  }
}

static void
ctx_fragment_radial_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i < count; i++)
  {
  float v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
        v = (v - g->radial_gradient.r0) * (g->radial_gradient.rdelta);
  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0f, rgba);
    x+=dx;
    y+=dy;
    rgba +=4;
  }
}
#endif


static void
ctx_fragment_color_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  float  in[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  ctx_color_get_rgba (rasterizer->state, &g->color, in);
  for (int c = 0; c < 3; c++)
    in[c] *= in[3];
  while (count--)
  {
    for (int c = 0; c < 4; c++)
      rgba[c] = in[c];
    rgba += 4;
  }
}


static void ctx_fragment_image_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *outf = (float *) out;
  uint8_t rgba[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int c = 0; c < 4 * count; c ++) { outf[c] = ctx_u8_to_float (rgba[c]); }
}

static CtxFragment ctx_rasterizer_get_fragment_RGBAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:         return ctx_fragment_image_RGBAF;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBAF;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBAF;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBAF;
#endif
    }
  return ctx_fragment_color_RGBAF;
}
#endif


static inline int
ctx_matrix_no_perspective (CtxMatrix *matrix)
{
  if (fabsf(matrix->m[2][0]) >0.001f) return 0;
  if (fabsf(matrix->m[2][1]) >0.001f) return 0;
  if (fabsf(matrix->m[2][2] - 1.0f)>0.001f) return 0;
  return 1;
}

/* for multiples of 90 degree rotations, we return no rotation */
static inline int
ctx_matrix_no_skew_or_rotate (CtxMatrix *matrix)
{
  if (fabsf(matrix->m[0][1]) >0.001f) return 0;
  if (fabsf(matrix->m[1][0]) >0.001f) return 0;
  return ctx_matrix_no_perspective (matrix);
}


static CtxFragment ctx_rasterizer_get_fragment_RGBA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:
      {
        CtxBuffer *buffer = g->texture.buffer;
        if (buffer)
          buffer = buffer->color_managed?buffer->color_managed:buffer;
        if (!buffer || !buffer->format)
          return ctx_fragment_color_RGBA8;

        if (buffer->format->pixel_format == CTX_FORMAT_YUV420)
        {
          return ctx_fragment_image_yuv420_RGBA8_nearest;
        }
        else
#if CTX_FRAGMENT_SPECIALIZE
        switch (buffer->format->bpp)
          {
            case 1: return ctx_fragment_image_gray1_RGBA8;
#if 1
            case 24: 
              {
                if (gstate->image_smoothing)
                {
                  float factor = ctx_matrix_get_scale (&gstate->transform);
                          //fprintf (stderr, "{%.3f}", factor);
                  if (factor < 0.5f)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_box_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_box;
                  }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
                  else if (factor > 0.99f && factor < 1.01f)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_nearest;
                  }
#endif
                  else
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_bi;
                  }
                }
                else
                {
                  if (rasterizer->swap_red_green)
                    return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
                  return ctx_fragment_image_rgb8_RGBA8_nearest;
                }
              }
              break;
#endif
            case 32:
              {
                CtxMatrix *transform = &gstate->source_fill.transform;
                CtxExtend extend = rasterizer->state->gstate.extend;
                if (gstate->image_smoothing)
                {
                  float factor = ctx_matrix_get_scale (&gstate->transform);
                          //fprintf (stderr, "[%.3f]", factor);
                  if (factor < 0.5f)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgba8_RGBA8_box_swap_red_green;
                    return ctx_fragment_image_rgba8_RGBA8_box;
                  }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
                  else if (factor > 0.99f && factor < 1.01f && extend == CTX_EXTEND_NONE)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                    return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                  }
#endif
                  else
                  {
                    if (rasterizer->swap_red_green)
                    {
                      if (ctx_matrix_no_perspective (transform))
                      {
                        if (ctx_matrix_no_skew_or_rotate (transform))
                        {
                          if (ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f &&
                              ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f &&
                              ctx_fmod1f (transform->m[0][2]) < 0.001f &&
                              ctx_fmod1f (transform->m[1][2]) < 0.001f)
                          {
                            if (extend == CTX_EXTEND_NONE)
                              return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                            else if (extend == CTX_EXTEND_REPEAT)
                              return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat_swap_red_green;
                          }
                          return ctx_fragment_image_rgba8_RGBA8_bi_scale_swap_red_green;
                        }
                        return ctx_fragment_image_rgba8_RGBA8_bi_affine_swap_red_green;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_bi_generic_swap_red_green;
                    }

                    if (ctx_matrix_no_perspective (transform))
                    {
                      if (ctx_matrix_no_skew_or_rotate (transform))
                      {
                        if (ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f &&
                            ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f &&
                            ctx_fmod1f (transform->m[0][2]) < 0.001f &&
                            ctx_fmod1f (transform->m[1][2]) < 0.001f)
                        {
                          if (extend == CTX_EXTEND_NONE)
                            return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                          else if (extend == CTX_EXTEND_REPEAT)
                            return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat;
                        }
                        return ctx_fragment_image_rgba8_RGBA8_bi_scale;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_bi_affine;
                    }
                    return ctx_fragment_image_rgba8_RGBA8_bi_generic;
                  }
                }
                else
                {
                  if (rasterizer->swap_red_green)
                  {
                    if (ctx_matrix_no_perspective (transform))
                    {
                      if (ctx_matrix_no_skew_or_rotate (transform))
                      {
                        if (ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f &&
                            ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f)
                        {
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                         if (extend == CTX_EXTEND_NONE)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                         else if (extend == CTX_EXTEND_REPEAT)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat_swap_red_green;
                        }
                        return ctx_fragment_image_rgba8_RGBA8_nearest_scale_swap_red_green;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_nearest_affine_swap_red_green;
                    }
                    return ctx_fragment_image_rgba8_RGBA8_nearest_generic_swap_red_green;
                  }
                  if (ctx_matrix_no_perspective (transform))
                  {
                    if (ctx_matrix_no_skew_or_rotate (transform))
                    {
                      if (ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f &&
                          ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f)
                      {
                         if (extend == CTX_EXTEND_NONE)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                         else if (extend == CTX_EXTEND_REPEAT)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_nearest_scale;
                    }
                    return ctx_fragment_image_rgba8_RGBA8_nearest_affine;
                  }
                  return ctx_fragment_image_rgba8_RGBA8_nearest_generic;
                }
              }
            default: return ctx_fragment_image_RGBA8;
          }
#else
          return ctx_fragment_image_RGBA8;
#endif
      }

      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBA8;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBA8;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBA8;
#endif
    }
  return ctx_fragment_color_RGBA8;
}

static inline void
ctx_init_uv (CtxRasterizer *rasterizer,
             int x0,
             int y0,
             float *u0, float *v0, float *w0, float *ud, float *vd, float *wd)
             //float *u0, float *v0, float *w0, float *ud, float *vd, float *wd)
{
  CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
  *u0 = transform->m[0][0] * (x0 + 0.0f) +
        transform->m[0][1] * (y0 + 0.0f) +
        transform->m[0][2];
  *v0 = transform->m[1][0] * (x0 + 0.0f) +
        transform->m[1][1] * (y0 + 0.0f) +
        transform->m[1][2];
  *w0 = transform->m[2][0] * (x0 + 0.0f) +
        transform->m[2][1] * (y0 + 0.0f) +
        transform->m[2][2];
  *ud = transform->m[0][0];
  *vd = transform->m[1][0];
  *wd = transform->m[2][0];
}

static void
ctx_u8_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  if (CTX_UNLIKELY(rasterizer->fragment))
    {
      float u0 = 0; float v0 = 0;
      float ud = 0; float vd = 0;
      float w0 = 1; float wd = 0;
      ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
      while (count--)
      {
        uint8_t cov = *coverage;
        if (CTX_UNLIKELY(cov == 0))
        {
          u0+=ud;
          v0+=vd;
        }
        else
        {
          rasterizer->fragment (rasterizer, u0, v0, w0, src, 1, ud, vd, wd);
          u0+=ud;
          v0+=vd;
          if (cov == 255)
          {
            for (int c = 0; c < components; c++)
              dst[c] = src[c];
          }
          else
          {
            uint8_t rcov = 255 - cov;
            for (int c = 0; c < components; c++)
              { dst[c] = (src[c]*cov + dst[c]*rcov)/255; }
          }
        }
        dst += components;
        coverage ++;
      }
      return;
    }

  while (count--)
  {
    uint8_t cov = *coverage;
    uint8_t rcov = 255-cov;
    for (int c = 0; c < components; c++)
      { dst[c] = (src[c]*cov+dst[c]*rcov)/255; }
    dst += components;
    coverage ++;
  }
}

static void
ctx_u8_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  while (count--)
  {
    uint8_t cov = *coverage;
    for (int c = 0; c < components; c++)
      { dst[c] = (dst[c] * (256-cov)) >> 8; }
    coverage ++;
    dst += components;
  }
}

typedef enum {
  CTX_PORTER_DUFF_0,
  CTX_PORTER_DUFF_1,
  CTX_PORTER_DUFF_ALPHA,
  CTX_PORTER_DUFF_1_MINUS_ALPHA,
} CtxPorterDuffFactor;

#define  \
ctx_porter_duff_factors(mode, foo, bar)\
{\
  switch (mode)\
  {\
     case CTX_COMPOSITE_SOURCE_ATOP:\
        f_s = CTX_PORTER_DUFF_ALPHA;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_1;\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
        f_s = CTX_PORTER_DUFF_1;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_1;\
       break;\
     case CTX_COMPOSITE_XOR:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
        f_s = CTX_PORTER_DUFF_ALPHA;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
     case CTX_COMPOSITE_COPY:\
        f_s = CTX_PORTER_DUFF_1;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     default:\
     case CTX_COMPOSITE_CLEAR:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
  }\
}

static void
ctx_u8_source_over_normal_color (int components,
                                 CtxRasterizer         *rasterizer,
                                 uint8_t * __restrict__ dst,
                                 uint8_t * __restrict__ src,
                                 int                    x0,
                                 uint8_t * __restrict__ coverage,
                                 int                    count)
{
  uint8_t tsrc[5];
  *((uint32_t*)tsrc) = *((uint32_t*)src);

  while (count--)
  {
    for (int c = 0; c < components; c++)
      //dst[c] =  ((tsrc[c] * *coverage)>>8) + (dst[c] * (((65536)-(tsrc[components-1] * *coverage)))>>16);
      dst[c] =  ((((tsrc[c] * *coverage)) + (dst[c] * (((255)-(((255+(tsrc[components-1] * *coverage))>>8))))))>>8);
    coverage ++;
    dst+=components;
  }
}

static void
ctx_u8_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  while (count--)
  {
    for (int c = 0; c < components; c++)
      dst[c] =  ctx_lerp_u8(dst[c],src[c],coverage[0]);
    coverage ++;
    dst+=components;
  }
}

static inline void
ctx_RGBA8_source_over_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
{
  while (count--)
  {
     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
//   uint32_t di_ga = ((*((uint32_t*)dst)) & 0xff00ff00) >> 8;
//   uint32_t di_rb = (*((uint32_t*)dst)) & 0x00ff00ff;
     uint32_t si_a  = si_ga >> 16;
     uint32_t cov = *coverage;
     uint32_t racov = (255-((255+si_a*cov)>>8));
     *((uint32_t*)(dst)) =

     (((si_rb*cov+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
     ((si_ga*cov+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);

     coverage ++;
     tsrc += 4;
     dst  += 4;
  }
}

static inline void
ctx_RGBA8_source_over_normal_full_cov_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
{
  uint32_t *ttsrc = (uint32_t*)tsrc;
  uint32_t *ddst  = (uint32_t*)dst;
  while (count--)
  {
     uint32_t si_ga = ((*ttsrc) & 0xff00ff00) >> 8;
     uint32_t si_rb = (*ttsrc++) & 0x00ff00ff;
     uint32_t si_a  = si_ga >> 16;
     uint32_t racov = si_a^255;
     *(ddst) =
     (((si_rb*255+0xff00ff+(((*ddst)&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
     ((si_ga*255+0xff00ff+((((*ddst)&0xff00ff00)>>8)*racov))&0xff00ff00);
     ddst++;
  }
}

static inline void
ctx_RGBA8_source_copy_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *__restrict__ tsrc)
{
  uint32_t *ttsrc = (uint32_t*)tsrc;
  uint32_t *ddst  = (uint32_t*)dst;
  while (count--)
  {
    *ddst=ctx_lerp_RGBA8 (*ddst, *(ttsrc++), *(coverage++));
    ddst++;
  }
}

static inline void
ctx_RGBA8_source_over_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
  uint8_t _tsrc[4 * (count)];
  rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
  ctx_RGBA8_source_over_normal_buf (rasterizer,
                       dst, src, x0, coverage, count, &_tsrc[0]);
}

static inline void
ctx_RGBA8_source_over_normal_full_cov_fragment (CTX_COMPOSITE_ARGUMENTS, int scanlines)
{
  CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
  int scan = rasterizer->scanline /CTX_FULL_AA;

  if (CTX_LIKELY(ctx_matrix_no_perspective (transform)))
  {
    float u0, v0, ud, vd, w0, wd;
    ctx_init_uv (rasterizer, x0, scan, &u0, &v0, &w0, &ud, &vd, &wd);
    for (int y = 0; y < scanlines; y++)
    {
      uint8_t _tsrc[4 * count];
      rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
      ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
                          dst, src, x0, coverage, count, &_tsrc[0]);
      u0 -= vd;
      v0 += ud;
      dst += rasterizer->blit_stride;
    }
  }
  else
  {
    for (int y = 0; y < scanlines; y++)
    {
      uint8_t _tsrc[4 * count];
      float u0, v0, ud, vd, w0, wd;
      ctx_init_uv (rasterizer, x0, scan+y, &u0, &v0, &w0, &ud, &vd, &wd);
      rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
      ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
                          dst, src, x0, coverage, count, &_tsrc[0]);
      dst += rasterizer->blit_stride;
    }
  }
}

static inline void
ctx_RGBA8_source_copy_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
  uint8_t _tsrc[4 * (count)];
  rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
  ctx_RGBA8_source_copy_normal_buf (rasterizer,
                       dst, src, x0, coverage, count, &_tsrc[0]);
}


static void
ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
#if CTX_REFERENCE
  ctx_u8_source_over_normal_color (4, rasterizer, dst, src, x0, coverage, count);
#else
  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
  uint32_t si_a  = si_ga >> 16;

  while (count--)
  {
     uint32_t cov   = *coverage++;
     uint32_t rcov  = (((255+si_a * cov)>>8))^255;
     uint32_t di    = *((uint32_t*)dst);
     uint32_t di_ga = ((di & 0xff00ff00) >> 8);
     uint32_t di_rb = (di & 0x00ff00ff);
     *((uint32_t*)(dst)) =
     (((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
      ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00);
     dst+=4;
  }
#endif
}

static void
ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
#if CTX_REFERENCE
  ctx_u8_source_copy_normal_color (4, rasterizer, dst, src, x0, coverage, count);
#else
  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];

  while (count--)
  {
     uint32_t cov   = *coverage++;
     uint32_t di    = *((uint32_t*)dst);
     uint32_t di_ga = (di & 0xff00ff00);
     uint32_t di_rb = (di & 0x00ff00ff);

     uint32_t d_rb  = si_rb - di_rb;
     uint32_t d_ga  = si_ga - (di_ga>>8);

     *((uint32_t*)(dst)) =

     (((di_rb + ((d_rb * cov)>>8)) & 0x00ff00ff))  |
      ((di_ga + ((d_ga * cov)      & 0xff00ff00)));
     dst +=4;
  }
#endif
}

static void
ctx_RGBA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_u8_blend_normal (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
{
  for (int j = 0; j < count; j++)
  {
  switch (components)
  {
     case 3:
       ((uint8_t*)(blended))[2] = ((uint8_t*)(src))[2];
       *((uint16_t*)(blended)) = *((uint16_t*)(src));
       break;
     case 2:
       *((uint16_t*)(blended)) = *((uint16_t*)(src));
       break;
     case 5:
       *((uint32_t*)(blended)) = *((uint32_t*)(src));
       ((uint8_t*)(blended))[4] = ((uint8_t*)(src))[4];
       break;
     case 4:
       *((uint32_t*)(blended)) = *((uint32_t*)(src));
       break;
     default:
       {
        for (int i = 0; i<components;i++)
           blended[i] = src[i];
       }
       break;
  }
    blended+=components;
    src+=components;
  }
}

/* branchless 8bit add that maxes out at 255 */
static inline uint8_t ctx_sadd8(uint8_t a, uint8_t b)
{
  uint16_t s = (uint16_t)a+b;
  return -(s>>8) | (uint8_t)s;
}

#if CTX_BLENDING_AND_COMPOSITING

#define ctx_u8_blend_define(name, CODE) \
static inline void \
ctx_u8_blend_##name (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)\
{\
  for (int j = 0; j < count; j++) { \
  uint8_t *s=src; uint8_t b[components];\
  ctx_u8_deassociate_alpha (components, dst, b);\
    CODE;\
  blended[components-1] = src[components-1];\
  ctx_u8_associate_alpha (components, blended);\
  src += components;\
  dst += components;\
  blended += components;\
  }\
}

#define ctx_u8_blend_define_seperable(name, CODE) \
        ctx_u8_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \

ctx_u8_blend_define_seperable(multiply,     blended[c] = (b[c] * s[c])/255;)
ctx_u8_blend_define_seperable(screen,       blended[c] = s[c] + b[c] - (s[c] * b[c])/255;)
ctx_u8_blend_define_seperable(overlay,      blended[c] = b[c] < 127 ? (s[c] * b[c])/255 :
                                                         s[c] + b[c] - (s[c] * b[c])/255;)
ctx_u8_blend_define_seperable(darken,       blended[c] = ctx_mini (b[c], s[c]))
ctx_u8_blend_define_seperable(lighten,      blended[c] = ctx_maxi (b[c], s[c]))
ctx_u8_blend_define_seperable(color_dodge,  blended[c] = b[c] == 0 ? 0 :
                                     s[c] == 255 ? 255 : ctx_mini(255, (255 * b[c]) / (255-s[c])))
ctx_u8_blend_define_seperable(color_burn,   blended[c] = b[c] == 1 ? 1 :
                                     s[c] == 0 ? 0 : 255 - ctx_mini(255, (255*(255 - b[c])) / s[c]))
ctx_u8_blend_define_seperable(hard_light,   blended[c] = s[c] < 127 ? (b[c] * s[c])/255 :
                                                          b[c] + s[c] - (b[c] * s[c])/255;)
ctx_u8_blend_define_seperable(difference,   blended[c] = (b[c] - s[c]))
ctx_u8_blend_define_seperable(divide,       blended[c] = s[c]?(255 * b[c]) / s[c]:0)
ctx_u8_blend_define_seperable(addition,     blended[c] = ctx_sadd8 (s[c], b[c]))
ctx_u8_blend_define_seperable(subtract,     blended[c] = ctx_maxi(0, s[c]-b[c]))
ctx_u8_blend_define_seperable(exclusion,    blended[c] = b[c] + s[c] - 2 * (b[c] * s[c]/255))
ctx_u8_blend_define_seperable(soft_light,
  if (s[c] <= 255/2)
  {
    blended[c] = b[c] - (255 - 2 * s[c]) * b[c] * (255 - b[c]) / (255 * 255);
  }
  else
  {
    int d;
    if (b[c] <= 255/4)
      d = (((16 * b[c] - 12 * 255)/255 * b[c] + 4 * 255) * b[c])/255;
    else
      d = ctx_sqrtf(b[c]/255.0) * 255.4;
    blended[c] = (b[c] + (2 * s[c] - 255) * (d - b[c]))/255;
  }
)

static int ctx_int_get_max (int components, int *c)
{
  int max = 0;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] > max) max = c[i];
  }
  return max;
}

static int ctx_int_get_min (int components, int *c)
{
  int min = 400;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] < min) min = c[i];
  }
  return min;
}

static int ctx_int_get_lum (int components, int *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return CTX_CSS_RGB_TO_LUMINANCE(c);
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         int sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
            break;
  }
}

static int ctx_u8_get_lum (int components, uint8_t *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return CTX_CSS_RGB_TO_LUMINANCE(c);
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         int sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
            break;
  }
}
static int ctx_u8_get_sat (int components, uint8_t *c)
{
  switch (components)
  {
    case 3:
    case 4:
            { int r = c[0];
              int g = c[1];
              int b = c[2];
              return ctx_maxi(r, ctx_maxi(g,b)) - ctx_mini(r,ctx_mini(g,b));
            }
            break;
    case 1:
    case 2:
            return 0.0;
            break;
    default:
       {
         int min = 1000;
         int max = -1000;
         for (int i = 0; i < components - 1; i ++)
         {
           if (c[i] < min) min = c[i];
           if (c[i] > max) max = c[i];
         }
         return max-min;
       }
       break;
  }
}

static void ctx_u8_set_lum (int components, uint8_t *c, uint8_t lum)
{
  int d = lum - ctx_u8_get_lum (components, c);
  int tc[components];
  for (int i = 0; i < components - 1; i++)
  {
    tc[i] = c[i] + d;
  }

  int l = ctx_int_get_lum (components, tc);
  int n = ctx_int_get_min (components, tc);
  int x = ctx_int_get_max (components, tc);

  if (n < 0 && l!=n)
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * l) / (l-n));
  }

  if (x > 255 && x!=l)
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * (255 - l)) / (x-l));
  }
  for (int i = 0; i < components - 1; i++)
    c[i] = tc[i];
}

static void ctx_u8_set_sat (int components, uint8_t *c, uint8_t sat)
{
  int max = 0, mid = 1, min = 2;
  
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}

  if (c[max] > c[min])
  {
    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
    c[max] = sat;
  }
  else
  {
    c[mid] = c[max] = 0;
  }
  c[min] = 0;
}

ctx_u8_blend_define(color,
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_u8_set_lum(components, blended, ctx_u8_get_lum (components, s));
)

ctx_u8_blend_define(hue,
  int in_sat = ctx_u8_get_sat(components, b);
  int in_lum = ctx_u8_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_u8_set_sat(components, blended, in_sat);
  ctx_u8_set_lum(components, blended, in_lum);
)

ctx_u8_blend_define(saturation,
  int in_sat = ctx_u8_get_sat(components, s);
  int in_lum = ctx_u8_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_u8_set_sat(components, blended, in_sat);
  ctx_u8_set_lum(components, blended, in_lum);
)

ctx_u8_blend_define(luminosity,
  int in_lum = ctx_u8_get_lum(components, s);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_u8_set_lum(components, blended, in_lum);
)
#endif

CTX_INLINE static void
ctx_u8_blend (int components, CtxBlend blend, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
{
#if CTX_BLENDING_AND_COMPOSITING
  switch (blend)
  {
    case CTX_BLEND_NORMAL:      ctx_u8_blend_normal      (components, dst, src, blended, count); break;
    case CTX_BLEND_MULTIPLY:    ctx_u8_blend_multiply    (components, dst, src, blended, count); break;
    case CTX_BLEND_SCREEN:      ctx_u8_blend_screen      (components, dst, src, blended, count); break;
    case CTX_BLEND_OVERLAY:     ctx_u8_blend_overlay     (components, dst, src, blended, count); break;
    case CTX_BLEND_DARKEN:      ctx_u8_blend_darken      (components, dst, src, blended, count); break;
    case CTX_BLEND_LIGHTEN:     ctx_u8_blend_lighten     (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR_DODGE: ctx_u8_blend_color_dodge (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR_BURN:  ctx_u8_blend_color_burn  (components, dst, src, blended, count); break;
    case CTX_BLEND_HARD_LIGHT:  ctx_u8_blend_hard_light  (components, dst, src, blended, count); break;
    case CTX_BLEND_SOFT_LIGHT:  ctx_u8_blend_soft_light  (components, dst, src, blended, count); break;
    case CTX_BLEND_DIFFERENCE:  ctx_u8_blend_difference  (components, dst, src, blended, count); break;
    case CTX_BLEND_EXCLUSION:   ctx_u8_blend_exclusion   (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR:       ctx_u8_blend_color       (components, dst, src, blended, count); break;
    case CTX_BLEND_HUE:         ctx_u8_blend_hue         (components, dst, src, blended, count); break;
    case CTX_BLEND_SATURATION:  ctx_u8_blend_saturation  (components, dst, src, blended, count); break;
    case CTX_BLEND_LUMINOSITY:  ctx_u8_blend_luminosity  (components, dst, src, blended, count); break;
    case CTX_BLEND_ADDITION:    ctx_u8_blend_addition    (components, dst, src, blended, count); break;
    case CTX_BLEND_DIVIDE:      ctx_u8_blend_divide      (components, dst, src, blended, count); break;
    case CTX_BLEND_SUBTRACT:    ctx_u8_blend_subtract    (components, dst, src, blended, count); break;
  }
#else
  switch (blend)
  {
    default:                    ctx_u8_blend_normal      (components, dst, src, blended, count); break;
  }

#endif
}

CTX_INLINE static void
__ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
                     int                    components,
                     uint8_t *              dst,
                     uint8_t *              src,
                     int                    x0,
                     uint8_t * __restrict__ coverage,
                     int                    count,
                     CtxCompositingMode     compositing_mode,
                     CtxFragment            fragment,
                     CtxBlend               blend)
{
  CtxPorterDuffFactor f_s, f_d;
  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
  CtxGState *gstate = &rasterizer->state->gstate;
  uint8_t global_alpha_u8 = gstate->global_alpha_u8;
  uint8_t tsrc[components * count];
  int src_step = 0;

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    src = &tsrc[0];
    memcpy (src, rasterizer->color, 4);
    if (blend != CTX_BLEND_NORMAL)
      ctx_u8_blend (components, blend, dst, src, src, 1);
  }
  else
  {
    float u0 = 0; float v0 = 0;
    float ud = 0; float vd = 0;
    float w0 = 1; float wd = 0;
    src = &tsrc[0];

    ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
    fragment (rasterizer, u0, v0, w0, src, count, ud, vd, wd);
    if (blend != CTX_BLEND_NORMAL)
      ctx_u8_blend (components, blend, dst, src, src, count);
    src_step = components;
  }

  while (count--)
  {
    uint32_t cov = *coverage;

    if (CTX_UNLIKELY(global_alpha_u8 != 255))
      cov = (cov * global_alpha_u8 + 255) >> 8;

    uint8_t csrc[components];
    for (int c = 0; c < components; c++)
      csrc[c] = (src[c] * cov + 255) >> 8;

    for (int c = 0; c < components; c++)
    {
      uint32_t res = 0;
#if 1
      switch (f_s)
      {
        case CTX_PORTER_DUFF_0:             break;
        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1] + 255) >> 8; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (256-dst[components-1])) >> 8; break;
      }
      switch (f_d)
      {
        case CTX_PORTER_DUFF_0: break;
        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1] + 255) >> 8; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (256-csrc[components-1])) >> 8; break;
      }
#else
      switch (f_s)
      {
        case CTX_PORTER_DUFF_0:             break;
        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1])/255; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (255-dst[components-1]))/255; break;
      }
      switch (f_d)
      {
        case CTX_PORTER_DUFF_0: break;
        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1])/255; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (255-csrc[components-1]))/255; break;
      }
#endif
      dst[c] = res;
    }
    coverage ++;
    src+=src_step;
    dst+=components;
  }
}

CTX_INLINE static void
_ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
                     int                    components,
                     uint8_t *              dst,
                     uint8_t * __restrict__ src,
                     int                    x0,
                     uint8_t *              coverage,
                     int                    count,
                     CtxCompositingMode     compositing_mode,
                     CtxFragment            fragment,
                     CtxBlend               blend)
{
  __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, compositing_mode, fragment, blend);
}

#define _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend) \
   switch (rasterizer->state->gstate.compositing_mode) \
   { \
     case CTX_COMPOSITE_SOURCE_ATOP: \
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_XOR:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_XOR, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
       break;\
     case CTX_COMPOSITE_COPY:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_COPY, fragment, blend);\
       break;\
     case CTX_COMPOSITE_CLEAR:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_CLEAR, fragment, blend);\
       break;\
   }

/* generating one function per compositing_mode would be slightly more efficient,
 * but on embedded targets leads to slightly more code bloat,
 * here we trade off a slight amount of performance
 */
#define ctx_u8_porter_duff(comp_format, components, source, fragment, blend) \
static void \
ctx_##comp_format##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
{ \
  _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend);\
}

ctx_u8_porter_duff(RGBA8, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
//ctx_u8_porter_duff(comp_name, components,color_##blend_name,  NULL, blend_mode)


#if CTX_INLINED_NORMAL_RGBA8

ctx_u8_porter_duff(RGBA8, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_GRADIENTS
ctx_u8_porter_duff(RGBA8, 4,linear_gradient, ctx_fragment_linear_gradient_RGBA8, rasterizer->state->gstate.blend_mode)
ctx_u8_porter_duff(RGBA8, 4,radial_gradient, ctx_fragment_radial_gradient_RGBA8, rasterizer->state->gstate.blend_mode)
#endif
ctx_u8_porter_duff(RGBA8, 4,image,           ctx_fragment_image_RGBA8,           rasterizer->state->gstate.blend_mode)
#endif


static void
ctx_RGBA8_nop (CTX_COMPOSITE_ARGUMENTS)
{
}


static inline void
ctx_setup_native_color (CtxRasterizer *rasterizer)
{
  if (rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR)
    rasterizer->format->from_comp (rasterizer, 0,
      &rasterizer->color[0],
      &rasterizer->color_native,
      1);
}

static void
ctx_setup_apply_coverage (CtxRasterizer *rasterizer)
{
  rasterizer->apply_coverage = rasterizer->format->apply_coverage ?
                               rasterizer->format->apply_coverage :
                               rasterizer->comp_op;
}

static void
ctx_setup_RGBA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components       = 4;
  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBA8 (rasterizer);
  rasterizer->comp_op  = ctx_RGBA8_porter_duff_generic;
  rasterizer->comp = CTX_COV_PATH_FALLBACK;

  int blend_mode       = gstate->blend_mode;
  int compositing_mode = gstate->compositing_mode;

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      ctx_fragment_color_RGBA8 (rasterizer, 0,0, 1,rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
      {
        for (int c = 0; c < 4; c ++)
          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8 + 255)>>8;
      }
      uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
      uint32_t si_ga      = (src_pix & 0xff00ff00) >> 8;
      uint32_t si_rb      = src_pix & 0x00ff00ff;
      uint32_t si_ga_full = si_ga * 255;
      uint32_t si_rb_full = si_rb * 255;
//      uint32_t si_a       = si_ga >> 16;

      ((uint32_t*)rasterizer->color)[1] = si_ga;
      ((uint32_t*)rasterizer->color)[2] = si_rb;
      ((uint32_t*)rasterizer->color)[3] = si_ga_full;
      ((uint32_t*)rasterizer->color)[4] = si_rb_full;
    }

#if CTX_INLINED_NORMAL_RGBA8
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_RGBA8_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBA8_copy_normal;
          if (gstate->source_fill.type == CTX_SOURCE_COLOR)
            rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;

        }
        else if (gstate->global_alpha_u8 == 0)
        {
          rasterizer->comp_op = ctx_RGBA8_nop;
        }
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
              if ( ((float*)rasterizer->color)[3] >= 0.999f)
                rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
            }
            else
            {
              rasterizer->comp_op = ctx_RGBAF_porter_duff_color_normal;
            }
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_linear_gradient_normal;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_radial_gradient_normal;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_image_normal;
            break;
          default:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_color;
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_linear_gradient;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_radial_gradient;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_image;
            break;
          default:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_generic;
            break;
        }
        break;
    }

#else

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {

      if (blend_mode == CTX_BLEND_NORMAL)
      {
        if(compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
          rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
        }
        else if (compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
        {
          if (rasterizer->color[components-1] == 255)
          {
            rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
            rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
          }
          else
          {
            rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
            rasterizer->comp = CTX_COV_PATH_RGBA8_OVER;
          }
        }
      }
      else if (compositing_mode == CTX_COMPOSITE_CLEAR)
      {
        rasterizer->comp_op = ctx_RGBA8_clear_normal;
      }
  }
  else if (blend_mode == CTX_BLEND_NORMAL)
  {
    if(compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
    {
       rasterizer->comp_op = ctx_RGBA8_source_over_normal_fragment;
       rasterizer->comp = CTX_COV_PATH_RGBA8_OVER_FRAGMENT;
    }
    else if (compositing_mode == CTX_COMPOSITE_COPY)
    {
       rasterizer->comp_op = ctx_RGBA8_source_copy_normal_fragment;
       rasterizer->comp = CTX_COV_PATH_RGBA8_COPY_FRAGMENT;
    }
  }
#endif
  ctx_setup_apply_coverage (rasterizer);
}


static void
ctx_setup_RGB (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_RGB332 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB332_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_RGB565 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB565_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_RGB8 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB8_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_composite_convert (CTX_COMPOSITE_ARGUMENTS)
{
  uint8_t pixels[count * rasterizer->format->ebpp];
  rasterizer->format->to_comp (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
  rasterizer->format->from_comp (rasterizer, x0, &pixels[0], dst, count);
}

#if CTX_ENABLE_FLOAT
static void
ctx_float_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;

  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);

  while (count--)
  {
    uint8_t cov = *coverage;
    float covf = ctx_u8_to_float (cov);
    for (int c = 0; c < components; c++)
      dstf[c] = dstf[c]*(1.0-covf) + srcf[c]*covf;
    dstf += components;
    coverage ++;
  }
}

static void
ctx_float_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  while (count--)
  {
#if 0
    uint8_t cov = *coverage;
    if (cov == 0)
    {
    }
    else if (cov == 255)
    {
#endif
      switch (components)
      {
        case 2:
          ((uint64_t*)(dst))[0] = 0;
          break;
        case 4:
          ((uint64_t*)(dst))[0] = 0;
          ((uint64_t*)(dst))[1] = 0;
          break;
        default:
          for (int c = 0; c < components; c++)
            dstf[c] = 0.0f;
      }
#if 0
    }
    else
    {
      float ralpha = 1.0 - ctx_u8_to_float (cov);
      for (int c = 0; c < components; c++)
        { dstf[c] = (dstf[c] * ralpha); }
    }
    coverage ++;
#endif
    dstf += components;
  }
}


static inline void
ctx_float_source_over_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;
  while (count--)
  {
    uint8_t cov = *coverage;
    float fcov = ctx_u8_to_float (cov);
    float ralpha = 1.0f - fcov * srcf[components-1];
    for (int c = 0; c < components; c++)
      dstf[c] = srcf[c]*fcov + dstf[c] * ralpha;
    coverage ++;
    dstf+= components;
  }
}

static void
ctx_float_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;

  while (count--)
  {
    uint8_t cov = *coverage;
    float fcov = ctx_u8_to_float (cov);
    float ralpha = 1.0f - fcov;
    for (int c = 0; c < components; c++)
      dstf[c] = (srcf[c]*fcov + dstf[c] * ralpha);
    coverage ++;
    dstf+= components;
  }
}

inline static void
ctx_float_blend_normal (int components, float *dst, float *src, float *blended)
{
  float a = src[components-1];
  for (int c = 0; c <  components - 1; c++)
    blended[c] = src[c] * a;
  blended[components-1]=a;
}

static float ctx_float_get_max (int components, float *c)
{
  float max = -1000.0f;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] > max) max = c[i];
  }
  return max;
}

static float ctx_float_get_min (int components, float *c)
{
  float min = 400.0;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] < min) min = c[i];
  }
  return min;
}

static float ctx_float_get_lum (int components, float *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return CTX_CSS_RGB_TO_LUMINANCE(c);
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         float sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
  }
}

static float ctx_float_get_sat (int components, float *c)
{
  switch (components)
  {
    case 3:
    case 4:
            { float r = c[0];
              float g = c[1];
              float b = c[2];
              return ctx_maxf(r, ctx_maxf(g,b)) - ctx_minf(r,ctx_minf(g,b));
            }
            break;
    case 1:
    case 2: return 0.0;
            break;
    default:
       {
         float min = 1000;
         float max = -1000;
         for (int i = 0; i < components - 1; i ++)
         {
           if (c[i] < min) min = c[i];
           if (c[i] > max) max = c[i];
         }
         return max-min;
       }
  }
}

static void ctx_float_set_lum (int components, float *c, float lum)
{
  float d = lum - ctx_float_get_lum (components, c);
  float tc[components];
  for (int i = 0; i < components - 1; i++)
  {
    tc[i] = c[i] + d;
  }

  float l = ctx_float_get_lum (components, tc);
  float n = ctx_float_get_min (components, tc);
  float x = ctx_float_get_max (components, tc);

  if (n < 0.0f && l != n)
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * l) / (l-n));
  }

  if (x > 1.0f && x != l)
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * (1.0f - l)) / (x-l));
  }
  for (int i = 0; i < components - 1; i++)
    c[i] = tc[i];
}

static void ctx_float_set_sat (int components, float *c, float sat)
{
  int max = 0, mid = 1, min = 2;
  
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}

  if (c[max] > c[min])
  {
    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
    c[max] = sat;
  }
  else
  {
    c[mid] = c[max] = 0.0f;
  }
  c[min] = 0.0f;

}

#define ctx_float_blend_define(name, CODE) \
static inline void \
ctx_float_blend_##name (int components, float * __restrict__ dst, float *src, float *blended)\
{\
  float *s = src; float b[components];\
  ctx_float_deassociate_alpha (components, dst, b);\
    CODE;\
  blended[components-1] = s[components-1];\
  ctx_float_associate_alpha (components, blended);\
}

#define ctx_float_blend_define_seperable(name, CODE) \
        ctx_float_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \

ctx_float_blend_define_seperable(multiply,    blended[c] = (b[c] * s[c]);)
ctx_float_blend_define_seperable(screen,      blended[c] = b[c] + s[c] - (b[c] * s[c]);)
ctx_float_blend_define_seperable(overlay,     blended[c] = b[c] < 0.5f ? (s[c] * b[c]) :
                                                          s[c] + b[c] - (s[c] * b[c]);)
ctx_float_blend_define_seperable(darken,      blended[c] = ctx_minf (b[c], s[c]))
ctx_float_blend_define_seperable(lighten,     blended[c] = ctx_maxf (b[c], s[c]))
ctx_float_blend_define_seperable(color_dodge, blended[c] = (b[c] == 0.0f) ? 0.0f :
                                     s[c] == 1.0f ? 1.0f : ctx_minf(1.0f, (b[c]) / (1.0f-s[c])))
ctx_float_blend_define_seperable(color_burn,  blended[c] = (b[c] == 1.0f) ? 1.0f :
                                     s[c] == 0.0f ? 0.0f : 1.0f - ctx_minf(1.0f, ((1.0f - b[c])) / s[c]))
ctx_float_blend_define_seperable(hard_light,  blended[c] = s[c] < 0.f ? (b[c] * s[c]) :
                                                          b[c] + s[c] - (b[c] * s[c]);)
ctx_float_blend_define_seperable(difference,  blended[c] = (b[c] - s[c]))

ctx_float_blend_define_seperable(divide,      blended[c] = s[c]?(b[c]) / s[c]:0.0f)
ctx_float_blend_define_seperable(addition,    blended[c] = s[c]+b[c])
ctx_float_blend_define_seperable(subtract,    blended[c] = s[c]-b[c])

ctx_float_blend_define_seperable(exclusion,   blended[c] = b[c] + s[c] - 2.0f * b[c] * s[c])
ctx_float_blend_define_seperable(soft_light,
  if (s[c] <= 0.5f)
  {
    blended[c] = b[c] - (1.0f - 2.0f * s[c]) * b[c] * (1.0f - b[c]);
  }
  else
  {
    int d;
    if (b[c] <= 255/4)
      d = (((16 * b[c] - 12.0f) * b[c] + 4.0f) * b[c]);
    else
      d = ctx_sqrtf(b[c]);
    blended[c] = (b[c] + (2.0f * s[c] - 1.0f) * (d - b[c]));
  }
)


ctx_float_blend_define(color,
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_float_set_lum(components, blended, ctx_float_get_lum (components, s));
)

ctx_float_blend_define(hue,
  float in_sat = ctx_float_get_sat(components, b);
  float in_lum = ctx_float_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_float_set_sat(components, blended, in_sat);
  ctx_float_set_lum(components, blended, in_lum);
)

ctx_float_blend_define(saturation,
  float in_sat = ctx_float_get_sat(components, s);
  float in_lum = ctx_float_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_float_set_sat(components, blended, in_sat);
  ctx_float_set_lum(components, blended, in_lum);
)

ctx_float_blend_define(luminosity,
  float in_lum = ctx_float_get_lum(components, s);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_float_set_lum(components, blended, in_lum);
)

inline static void
ctx_float_blend (int components, CtxBlend blend, float * __restrict__ dst, float *src, float *blended)
{
  switch (blend)
  {
    case CTX_BLEND_NORMAL:      ctx_float_blend_normal      (components, dst, src, blended); break;
    case CTX_BLEND_MULTIPLY:    ctx_float_blend_multiply    (components, dst, src, blended); break;
    case CTX_BLEND_SCREEN:      ctx_float_blend_screen      (components, dst, src, blended); break;
    case CTX_BLEND_OVERLAY:     ctx_float_blend_overlay     (components, dst, src, blended); break;
    case CTX_BLEND_DARKEN:      ctx_float_blend_darken      (components, dst, src, blended); break;
    case CTX_BLEND_LIGHTEN:     ctx_float_blend_lighten     (components, dst, src, blended); break;
    case CTX_BLEND_COLOR_DODGE: ctx_float_blend_color_dodge (components, dst, src, blended); break;
    case CTX_BLEND_COLOR_BURN:  ctx_float_blend_color_burn  (components, dst, src, blended); break;
    case CTX_BLEND_HARD_LIGHT:  ctx_float_blend_hard_light  (components, dst, src, blended); break;
    case CTX_BLEND_SOFT_LIGHT:  ctx_float_blend_soft_light  (components, dst, src, blended); break;
    case CTX_BLEND_DIFFERENCE:  ctx_float_blend_difference  (components, dst, src, blended); break;
    case CTX_BLEND_EXCLUSION:   ctx_float_blend_exclusion   (components, dst, src, blended); break;
    case CTX_BLEND_COLOR:       ctx_float_blend_color       (components, dst, src, blended); break;
    case CTX_BLEND_HUE:         ctx_float_blend_hue         (components, dst, src, blended); break;
    case CTX_BLEND_SATURATION:  ctx_float_blend_saturation  (components, dst, src, blended); break;
    case CTX_BLEND_LUMINOSITY:  ctx_float_blend_luminosity  (components, dst, src, blended); break;
    case CTX_BLEND_ADDITION:    ctx_float_blend_addition    (components, dst, src, blended); break;
    case CTX_BLEND_SUBTRACT:    ctx_float_blend_subtract    (components, dst, src, blended); break;
    case CTX_BLEND_DIVIDE:      ctx_float_blend_divide      (components, dst, src, blended); break;
  }
}

/* this is the grunt working function, when inlined code-path elimination makes
 * it produce efficient code.
 */
CTX_INLINE static void
ctx_float_porter_duff (CtxRasterizer         *rasterizer,
                       int                    components,
                       uint8_t * __restrict__ dst,
                       uint8_t * __restrict__ src,
                       int                    x0,
                       uint8_t * __restrict__ coverage,
                       int                    count,
                       CtxCompositingMode     compositing_mode,
                       CtxFragment            fragment,
                       CtxBlend               blend)
{
  float *dstf = (float*)dst;

  CtxPorterDuffFactor f_s, f_d;
  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float   global_alpha_f = rasterizer->state->gstate.global_alpha_f;
  
  if (rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR)
  {
    float tsrc[components];

    while (count--)
    {
      uint8_t cov = *coverage;
#if 1
      if (
        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
        compositing_mode == CTX_COMPOSITE_XOR               ||
        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
        ))))
      {
        coverage ++;
        dstf+=components;
        continue;
      }
#endif
      memcpy (tsrc, rasterizer->color, sizeof(tsrc));

      if (blend != CTX_BLEND_NORMAL)
        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
      float covf = ctx_u8_to_float (cov);

      if (global_alpha_u8 != 255)
        covf = covf * global_alpha_f;

      if (covf != 1.0f)
      {
        for (int c = 0; c < components; c++)
          tsrc[c] *= covf;
      }

      for (int c = 0; c < components; c++)
      {
        float res;
        /* these switches and this whole function is written to be
         * inlined when compiled when the enum values passed in are
         * constants.
         */
        switch (f_s)
        {
          case CTX_PORTER_DUFF_0: res = 0.0f; break;
          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
        }
        switch (f_d)
        {
          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
        }
      }
      coverage ++;
      dstf     +=components;
    }
  }
  else
  {
    float tsrc[components];
    float u0 = 0; float v0 = 0;
    float ud = 0; float vd = 0;
    float w0 = 1; float wd = 0;
    for (int c = 0; c < components; c++) tsrc[c] = 0.0f;
    ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);

    while (count--)
    {
      uint8_t cov = *coverage;
#if 1
      if (
        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
        compositing_mode == CTX_COMPOSITE_XOR               ||
        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
        ))))
      {
        u0 += ud;
        v0 += vd;
        coverage ++;
        dstf+=components;
        continue;
      }
#endif

      fragment (rasterizer, u0, v0, w0, tsrc, 1, ud, vd, wd);
      if (blend != CTX_BLEND_NORMAL)
        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
      u0 += ud;
      v0 += vd;
      float covf = ctx_u8_to_float (cov);

      if (global_alpha_u8 != 255)
        covf = covf * global_alpha_f;

      if (covf != 1.0f)
      {
        for (int c = 0; c < components; c++)
          tsrc[c] *= covf;
      }

      for (int c = 0; c < components; c++)
      {
        float res;
        /* these switches and this whole function is written to be
         * inlined when compiled when the enum values passed in are
         * constants.
         */
        switch (f_s)
        {
          case CTX_PORTER_DUFF_0: res = 0.0f; break;
          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
        }
        switch (f_d)
        {
          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
        }
      }
      coverage ++;
      dstf     +=components;
    }
  }
}

/* generating one function per compositing_mode would be slightly more efficient,
 * but on embedded targets leads to slightly more code bloat,
 * here we trade off a slight amount of performance
 */
#define ctx_float_porter_duff(compformat, components, source, fragment, blend) \
static void \
ctx_##compformat##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
{ \
   switch (rasterizer->state->gstate.compositing_mode) \
   { \
     case CTX_COMPOSITE_SOURCE_ATOP: \
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_XOR:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_XOR, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
       break;\
     case CTX_COMPOSITE_COPY:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_COPY, fragment, blend);\
       break;\
     case CTX_COMPOSITE_CLEAR:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_CLEAR, fragment, blend);\
       break;\
   }\
}
#endif

#if CTX_ENABLE_RGBAF

ctx_float_porter_duff(RGBAF, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(RGBAF, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
#if CTX_GRADIENTS
ctx_float_porter_duff(RGBAF, 4,linear_gradient, ctx_fragment_linear_gradient_RGBAF, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(RGBAF, 4,radial_gradient, ctx_fragment_radial_gradient_RGBAF, rasterizer->state->gstate.blend_mode)
#endif
ctx_float_porter_duff(RGBAF, 4,image,           ctx_fragment_image_RGBAF,           rasterizer->state->gstate.blend_mode)


#if CTX_GRADIENTS
#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,                               blend_mode)\
ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,               blend_mode)\
ctx_float_porter_duff(comp_name, components,linear_gradient_##blend_name,  ctx_fragment_linear_gradient_RGBA8, blend_mode)\
ctx_float_porter_duff(comp_name, components,radial_gradient_##blend_name,  ctx_fragment_radial_gradient_RGBA8, blend_mode)\
ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,           blend_mode)
#else
#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,                               blend_mode)\
ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,               blend_mode)\
ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,           blend_mode)
#endif

ctx_float_porter_duff_blend(RGBAF, 4, CTX_BLEND_NORMAL, normal)


static void
ctx_RGBAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (4, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_RGBAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (4, rasterizer, dst, src, x0, coverage, count);
}

#if 1
static void
ctx_RGBAF_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_over_normal_color (4, rasterizer, dst, rasterizer->color, x0, coverage, count);
}
#endif
#endif

static void
ctx_setup_RGBAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 4;
  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
#if 1
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
      ctx_fragment_color_RGBAF (rasterizer, 0,0,1, rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }
  else
#endif
  {
    rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
  }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_RGBAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBAF_copy_normal;
          if (gstate->source_fill.type == CTX_SOURCE_COLOR)
            rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;

        }
        else if (gstate->global_alpha_u8 == 0)
        {
          rasterizer->comp_op = ctx_RGBA8_nop;
        }
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              rasterizer->comp_op = ctx_RGBAF_source_over_normal_color;
              if ( ((float*)rasterizer->color)[3] >= 0.999f)
                rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;
            }
            else
            {
              rasterizer->comp_op = ctx_RGBAF_porter_duff_color_normal;
            }
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient_normal;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient_normal;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_image_normal;
            break;
          default:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_image;
            break;
          default:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
            break;
        }
        break;
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

#endif
#if CTX_ENABLE_GRAYAF

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float rgba[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0 ; i < count; i++)
  {
  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
                g->linear_gradient.length) -
              g->linear_gradient.start) * (g->linear_gradient.rdelta);
  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 1.0, rgba);
  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
  ((float*)out)[1] = rgba[3];
     out = ((float*)(out)) + 2;
     x += dx;
     y += dy;
  }
}

static void
ctx_fragment_radial_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float rgba[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i < count; i ++)
  {
  float v = 0.0f;
  if ((g->radial_gradient.r1-g->radial_gradient.r0) > 0.0f)
    {
      v = ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y);
      v = (v - g->radial_gradient.r0) / (g->radial_gradient.rdelta);
    }
  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0, rgba);
  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
  ((float*)out)[1] = rgba[3];
     out = ((float*)(out)) + 2;
     x += dx;
     y += dy;
  }
}
#endif

static void
ctx_fragment_color_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i < count; i++)
  {
     ctx_color_get_graya (rasterizer->state, &g->color, (float*)out);
     out = ((float*)(out)) + 2;
     x += dx;
     y += dy;
  }
}

static void ctx_fragment_image_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t rgba[4];
  float rgbaf[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int c = 0; c < 2 * count; c ++) { 
    rgbaf[c] = ctx_u8_to_float (rgba[c]);
    ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgbaf);
    ((float*)out)[1] = rgbaf[3];
    out = ((float*)out) + 2;
  }
}

static CtxFragment ctx_rasterizer_get_fragment_GRAYAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYAF;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYAF;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYAF;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYAF;
#endif
    }
  return ctx_fragment_color_GRAYAF;
}

ctx_float_porter_duff(GRAYAF, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(GRAYAF, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_float_porter_duff(GRAYAF, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
ctx_float_porter_duff(GRAYAF, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_GRAYAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_GRAYAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_GRAYAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
}
#endif

static void
ctx_setup_GRAYAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 2;
  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
      ctx_color_get_rgba (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }
  else
  {
    rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
  }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_GRAYAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_GRAYAF_copy_normal;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_RGBA8_nop;
#if 1
              else //if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_GRAYAF_source_copy_normal_color;
#endif
              //else
          //      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
            }
            else
            {
              rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
            break;
          default:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
            break;
        }
        break;
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

#endif
#if CTX_ENABLE_GRAYF

static void
ctx_composite_GRAYF (CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;

  float temp[count*2];
  for (unsigned int i = 0; i < count; i++)
  {
    temp[i*2] = dstf[i];
    temp[i*2+1] = 1.0f;
  }
  rasterizer->comp_op (rasterizer, (uint8_t*)temp, rasterizer->color, x0, coverage, count);
  for (unsigned int i = 0; i < count; i++)
  {
    dstf[i] = temp[i*2];
  }
}

#endif
#if CTX_ENABLE_BGRA8

inline static void
ctx_swap_red_green (uint8_t *rgba)
{
  uint32_t *buf  = (uint32_t *) rgba;
  uint32_t  orig = *buf;
  uint32_t  green_alpha = (orig & 0xff00ff00);
  uint32_t  red_blue    = (orig & 0x00ff00ff);
  uint32_t  red         = red_blue << 16;
  uint32_t  blue        = red_blue >> 16;
  *buf = green_alpha | red | blue;
}

static void
ctx_BGRA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  uint32_t *srci = (uint32_t *) buf;
  uint32_t *dsti = (uint32_t *) rgba;
  while (count--)
    {
      uint32_t val = *srci++;
      ctx_swap_red_green ( (uint8_t *) &val);
      *dsti++      = val;
    }
}

static void
ctx_RGBA8_to_BGRA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  ctx_BGRA8_to_RGBA8 (rasterizer, x, rgba, (uint8_t *) buf, count);
}

static void
ctx_composite_BGRA8 (CTX_COMPOSITE_ARGUMENTS)
{
  // for better performance, this could be done without a pre/post conversion,
  // by swapping R and B of source instead... as long as it is a color instead
  // of gradient or image
  //
  //
  uint8_t pixels[count * 4];
  ctx_BGRA8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
  ctx_BGRA8_to_RGBA8  (rasterizer, x0, &pixels[0], dst, count);
}


#endif
static void
ctx_composite_direct (CTX_COMPOSITE_ARGUMENTS)
{
  // for better performance, this could be done without a pre/post conversion,
  // by swapping R and B of source instead... as long as it is a color instead
  // of gradient or image
  //
  //
  rasterizer->comp_op (rasterizer, dst, rasterizer->color, x0, coverage, count);
}

#if CTX_ENABLE_CMYKAF

static void
ctx_fragment_other_CMYKAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *cmyka = (float*)out;
  float _rgba[4 * count];
  float *rgba = &_rgba[0];
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:
        ctx_fragment_image_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_COLOR:
        ctx_fragment_color_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT:
        ctx_fragment_linear_gradient_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_RADIAL_GRADIENT:
        ctx_fragment_radial_gradient_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
#endif
      default:
        rgba[0]=rgba[1]=rgba[2]=rgba[3]=0.0f;
        break;
    }
  for (int i = 0; i < count; i++)
  {
    cmyka[4]=rgba[3];
    ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2], &cmyka[0], &cmyka[1], &cmyka[2], &cmyka[3]);
    cmyka += 5;
    rgba += 4;
  }
}

static void
ctx_fragment_color_CMYKAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  float *cmyka = (float*)out;
  float cmyka_in[5];
  ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, cmyka_in);
  for (int i = 0; i < count; i++)
  {
    for (int c = 0; c < 4; c ++)
    {
      cmyka[c] = (1.0f - cmyka_in[c]);
    }
    cmyka[4] = cmyka_in[4];
    cmyka += 5;
  }
}

static CtxFragment ctx_rasterizer_get_fragment_CMYKAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_COLOR:
        return ctx_fragment_color_CMYKAF;
    }
  return ctx_fragment_other_CMYKAF;
}

ctx_float_porter_duff (CMYKAF, 5,color,           rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff (CMYKAF, 5,generic,         rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_float_porter_duff (CMYKAF, 5,color_normal,            rasterizer->fragment, CTX_BLEND_NORMAL)
ctx_float_porter_duff (CMYKAF, 5,generic_normal,          rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_CMYKAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (5, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_CMYKAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (5, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_CMYKAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_copy_normal_color (5, rasterizer, dst, rasterizer->color, x0, coverage, count);
}
#endif

static void
ctx_setup_CMYKAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 5;
  rasterizer->fragment = ctx_rasterizer_get_fragment_CMYKAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
      rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
      ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
      if (gstate->global_alpha_u8 != 255)
        ((float*)rasterizer->color)[components-1] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }
  else
  {
    rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
  }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_CMYKAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_CMYKAF_copy_normal;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_RGBA8_nop;
              else if (((float*)rasterizer->color)[components-1] == 1.0f)
              {
                rasterizer->comp_op = ctx_CMYKAF_source_copy_normal_color;
                rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
              }
              else
                rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
            }
            else
            {
              rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
            break;
          default:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
            break;
        }
        break;
    }
#else

    if (gstate->blend_mode == CTX_BLEND_NORMAL &&
        gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
        }
        else if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER &&
                 rasterizer->color[components-1] == 255)
        {
          rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
        }
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

static void
ctx_setup_CMYKA8 (CtxRasterizer *rasterizer)
{
  ctx_setup_CMYKAF (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_CMYKAF_COPY)
    rasterizer->comp = CTX_COV_PATH_CMYKA8_COPY;
}

static void
ctx_setup_CMYK8 (CtxRasterizer *rasterizer)
{
  ctx_setup_CMYKAF (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_CMYKAF_COPY)
    rasterizer->comp = CTX_COV_PATH_CMYK8_COPY;
}

#endif
#if CTX_ENABLE_CMYKA8

static void
ctx_CMYKA8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      for (int c = 0; c < 4; c ++)
        { dst[c] = ctx_u8_to_float ( (255-src[c]) ); }
      dst[4] = ctx_u8_to_float (src[4]);
      for (int c = 0; c < 4; c++)
        { dst[c] *= dst[4]; }
      src += 5;
      dst += 5;
    }
}
static void
ctx_CMYKAF_to_CMYKA8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      int a = ctx_float_to_u8 (src[4]);
      if (a != 0 && a != 255)
      {
        float recip = 1.0f/src[4];
        for (int c = 0; c < 4; c++)
        {
          dst[c] = ctx_float_to_u8 (1.0f - src[c] * recip);
        }
      }
      else
      {
        for (int c = 0; c < 4; c++)
          dst[c] = 255 - ctx_float_to_u8 (src[c]);
      }
      dst[4]=a;

      src += 5;
      dst += 5;
    }
}

static void
ctx_composite_CMYKA8 (CTX_COMPOSITE_ARGUMENTS)
{
  float pixels[count * 5];
  ctx_CMYKA8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], rasterizer->color, x0, coverage, count);
  ctx_CMYKAF_to_CMYKA8 (rasterizer, &pixels[0], dst, count);
}

#endif
#if CTX_ENABLE_CMYK8

static void
ctx_CMYK8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      dst[0] = ctx_u8_to_float (255-src[0]);
      dst[1] = ctx_u8_to_float (255-src[1]);
      dst[2] = ctx_u8_to_float (255-src[2]);
      dst[3] = ctx_u8_to_float (255-src[3]);
      dst[4] = 1.0f;
      src += 4;
      dst += 5;
    }
}
static void
ctx_CMYKAF_to_CMYK8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      float c = src[0];
      float m = src[1];
      float y = src[2];
      float k = src[3];
      float a = src[4];
      if (a != 0.0f && a != 1.0f)
        {
          float recip = 1.0f/a;
          c *= recip;
          m *= recip;
          y *= recip;
          k *= recip;
        }
      c = 1.0 - c;
      m = 1.0 - m;
      y = 1.0 - y;
      k = 1.0 - k;
      dst[0] = ctx_float_to_u8 (c);
      dst[1] = ctx_float_to_u8 (m);
      dst[2] = ctx_float_to_u8 (y);
      dst[3] = ctx_float_to_u8 (k);
      src += 5;
      dst += 4;
    }
}

static void
ctx_composite_CMYK8 (CTX_COMPOSITE_ARGUMENTS)
{
  float pixels[count * 5];
  ctx_CMYK8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, (uint8_t *) &pixels[0], src, x0, coverage, count);
  ctx_CMYKAF_to_CMYK8 (rasterizer, &pixels[0], dst, count);
}
#endif

#if CTX_ENABLE_RGB8

inline static void
ctx_RGB8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (const uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = pixel[1];
      rgba[2] = pixel[2];
      rgba[3] = 255;
      pixel+=3;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_RGB8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      pixel[0] = rgba[0];
      pixel[1] = rgba[1];
      pixel[2] = rgba[2];
      pixel+=3;
      rgba +=4;
    }
}

#endif
#if CTX_ENABLE_GRAY1

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY1_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int bitno = x&7;
      rgba[0] = 255 * ((*pixel) & (1<<bitno));
      rgba[1] = 255;
      pixel+= (bitno ==7);
      x++;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int gray = rgba[0];
      int bitno = x&7;
      if (gray >= 128)
        *pixel |= (1<<bitno);
      else
        *pixel &= (~ (1<<bitno));
      pixel+= (bitno==7);
      x++;
      rgba +=2;
    }
}

#else

inline static void
ctx_GRAY1_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int bitno = x&7;
      *((uint32_t*)(rgba))=0xff000000 + 0x00ffffff * ((*pixel & (1<< bitno ) )!=0);
      pixel += (bitno ==7);
      x++;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int gray = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
      int bitno = x&7;
      //gray += ctx_dither_mask_a (x, rasterizer->scanline/aa, 0, 127);
      if (gray >= 128)
        *pixel |= (1<< bitno);
      else
        *pixel &= (~ (1<< bitno));
      pixel+= (bitno ==7);
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY2

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY2_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
      val <<= 6;
      rgba[0] = val;
      rgba[1] = 255;
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = rgba[0];
      val >>= 6;
      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}
#else

inline static void
ctx_GRAY2_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (3 << ( (x & 3) <<1) ) ) >> ( (x&3) <<1);
      val <<= 6;
      rgba[0] = val;
      rgba[1] = val;
      rgba[2] = val;
      rgba[3] = 255;
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
      val >>= 6;
      *pixel = *pixel & (~ (3 << ( (x&3) <<1) ) );
      *pixel = *pixel | ( (val << ( (x&3) <<1) ) );
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY4

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY4_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
      val <<= 4;
      rgba[0] = val;
      rgba[1] = 255;
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = rgba[0];
      val >>= 4;
      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}
#else
inline static void
ctx_GRAY4_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
      val <<= 4;
      rgba[0] = val;
      rgba[1] = val;
      rgba[2] = val;
      rgba[3] = 255;
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
      val >>= 4;
      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY8

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = 255;
      pixel+=1;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      pixel[0] = rgba[0];
      pixel+=1;
      rgba +=2;
    }
}
#else
inline static void
ctx_GRAY8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = pixel[0];
      rgba[2] = pixel[0];
      rgba[3] = 255;
      pixel+=1;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  for (int i = 0; i < count; i ++)
    {
      pixel[i] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba + i * 4);
    }
}
#endif

#endif
#if CTX_ENABLE_GRAYA8

inline static void
ctx_GRAYA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (const uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = pixel[0];
      rgba[2] = pixel[0];
      rgba[3] = pixel[1];
      pixel+=2;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      pixel[0] = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
      pixel[1] = rgba[3];
      pixel+=2;
      rgba +=4;
    }
}

#if CTX_NATIVE_GRAYA8
CTX_INLINE static void ctx_rgba_to_graya_u8 (CtxState *state, uint8_t *in, uint8_t *out)
{
  out[0] = ctx_u8_color_rgb_to_gray (state, in);
  out[1] = in[3];
}

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
        uint8_t *dst = (uint8_t*)out;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = x;
#endif
  for (int i = 0; i < count;i ++)
  {
  float v = ( ( (g->linear_gradient.dx * x + g->linear_gradient.dy * y) /
                g->linear_gradient.length) -
              g->linear_gradient.start) * (g->linear_gradient.rdelta);
  {
    uint8_t rgba[4];
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
   
  }

#if CTX_DITHER
  ctx_dither_graya_u8 ((uint8_t*)dst, ox + i, scan, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
  dst += 2;
  x += dx;
  y += dy;
  }
}

static void
ctx_fragment_radial_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *dst = (uint8_t*)out;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = x;
#endif

  for (int i = 0; i < count;i ++)
  {
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float v = (ctx_hypotf (g->radial_gradient.x0 - x, g->radial_gradient.y0 - y) -
              g->radial_gradient.r0) * (g->radial_gradient.rdelta);
  {
    uint8_t rgba[4];
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
  }
#if CTX_DITHER
  ctx_dither_graya_u8 ((uint8_t*)dst, ox+i, scan, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
  dst += 2;
  x += dx;
  y += dy;
  }
}
#endif

static void
ctx_fragment_color_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  uint16_t *dst = (uint16_t*)out;
  uint16_t pix;
  ctx_color_get_graya_u8 (rasterizer->state, &g->color, (uint8_t*)&pix);
  for (int i = 0; i <count; i++)
  {
    dst[i]=pix;
  }
}

static void ctx_fragment_image_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t rgba[4*count];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int i = 0; i < count; i++)
    ctx_rgba_to_graya_u8 (rasterizer->state, &rgba[i*4], &((uint8_t*)out)[i*2]);
}

static CtxFragment ctx_rasterizer_get_fragment_GRAYA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYA8;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYA8;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYA8;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYA8;
#endif
    }
  return ctx_fragment_color_GRAYA8;
}

ctx_u8_porter_duff(GRAYA8, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_u8_porter_duff(GRAYA8, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_GRAYA8_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_copy_normal (2, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_GRAYA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_clear_normal (2, rasterizer, dst, src, x0, coverage, count);
}

static void
ctx_GRAYA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_source_over_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
}

static void
ctx_GRAYA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_source_copy_normal_color (2, rasterizer, dst, rasterizer->color, x0, coverage, count);
}
#endif

inline static int
ctx_is_opaque_color (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  if (gstate->global_alpha_u8 != 255)
    return 0;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    uint8_t ga[2];
    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
    return ga[1] == 255;
  }
  return 0;
}

static void
ctx_setup_GRAYA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 2;
  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYA8 (rasterizer);
  rasterizer->comp_op  = ctx_GRAYA8_porter_duff_generic;
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      ctx_fragment_color_GRAYA8 (rasterizer, 0,0, 1,rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8)/255;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_GRAYA8_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_GRAYA8_copy_normal;
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (rasterizer->color[components-1] == 0)
                rasterizer->comp_op = ctx_RGBA8_nop;
              else if (rasterizer->color[components-1] == 255)
              {
                rasterizer->comp_op = ctx_GRAYA8_source_copy_normal_color;
                rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
              }
              else
                rasterizer->comp_op = ctx_GRAYA8_source_over_normal_color;
            }
            else
            {
              rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic;
        break;
    }
#else
    if (gstate->blend_mode == CTX_BLEND_NORMAL &&
        gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
        else if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER &&
                 rasterizer->color[components-1] == 255)
        {
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

static void
ctx_setup_GRAY4 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_GRAY2 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_GRAY1 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY1_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

static void
ctx_setup_GRAY8 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY8_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

#endif

#endif

inline static void
ctx_332_unpack (uint8_t pixel,
                uint8_t *red,
                uint8_t *green,
                uint8_t *blue)
{
  uint32_t b = (pixel & 3) <<6;
  uint32_t g = ( (pixel >> 2) & 7) <<5;
  uint32_t r = ( (pixel >> 5) & 7) <<5;

#if 1
  *blue  = (b > 224) * 255 + (b <= 224) * b;
  *green = (g > 224) * 255 + (g <= 224) * g;
  *red   = (r > 224) * 255 + (r <= 224) * r;
#else
  *blue  =  b;
  *green =  g;
  *red   =  r;
#endif
}

static inline uint8_t
ctx_332_pack (uint8_t red,
              uint8_t green,
              uint8_t blue)
{
  uint8_t c  = (red >> 5) << 5;
  c |= (green >> 5) << 2;
  c |= (blue >> 6);
  return c;
}
#if CTX_ENABLE_RGB332

static inline uint8_t
ctx_888_to_332 (uint32_t in)
{
  uint8_t *rgb=(uint8_t*)(&in);
  return ctx_332_pack (rgb[0],rgb[1],rgb[2]);
}

static inline uint32_t
ctx_332_to_888 (uint8_t in)
{
  uint32_t ret = 0;
  uint8_t *rgba=(uint8_t*)&ret;
  ctx_332_unpack (in,
                  &rgba[0],
                  &rgba[1],
                  &rgba[2]);
  //rgba[3] = 255;
  return ret;
}

static inline void
ctx_RGB332_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      ctx_332_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2]);
#if CTX_RGB332_ALPHA
      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
        { rgba[3] = 0; }
      else
#endif
        { rgba[3] = 255; }
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGBA8_to_RGB332 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
#if CTX_RGB332_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_332_pack (255, 0, 255); }
      else
#endif
        { pixel[0] = ctx_332_pack (rgba[0], rgba[1], rgba[2]); }
      pixel+=1;
      rgba +=4;
    }
}

static void
ctx_composite_RGB332 (CTX_COMPOSITE_ARGUMENTS)
{
  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
  {
    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
    uint32_t si_a  = si_ga >> 16;

    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;

    while (count--)
    {
      if (CTX_LIKELY(*coverage == 255))
      {
        uint32_t rcov  = 255-*coverage++;
        uint32_t di    = ctx_332_to_888 (*((uint8_t*)dst));
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_332((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
         (((si_gaf) + di_ga * rcov) & 0xff00ff00));
         dst+=1;
      }
      else
      {
        uint32_t cov   = *coverage++;
        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
        uint32_t di    = ctx_332_to_888 (*((uint8_t*)dst));
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_332((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00));
         dst+=1;
      }
    }
    return;
  }
  uint8_t pixels[count * 4];
  ctx_RGB332_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
  ctx_RGBA8_to_RGB332 (rasterizer, x0, &pixels[0], dst, count);
}

#endif
static inline uint16_t
ctx_565_pack (const uint8_t  red,
              const uint8_t  green,
              const uint8_t  blue,
              const int      byteswap)
{
  uint32_t c = (red >> 3) << 11;
  c |= (green >> 2) << 5;
  c |= blue >> 3;
  if (byteswap)
    { return (c>>8) | (c<<8); } /* swap bytes */
  return c;
}
#if CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED

static inline void
ctx_565_unpack (const uint16_t pixel,
                uint8_t *red,
                uint8_t *green,
                uint8_t *blue,
                const int byteswap)
{
  uint16_t byteswapped;
  if (byteswap)
    { byteswapped = (pixel>>8) | (pixel<<8); }
  else
    { byteswapped  = pixel; }
  uint8_t b  =  (byteswapped & 31) <<3;
  uint8_t g  = ( (byteswapped>>5) & 63) <<2;
  uint8_t r  = ( (byteswapped>>11) & 31) <<3;

#if 0
  *blue  = (b > 248) * 255 + (b <= 248) * b;
  *green = (g > 248) * 255 + (g <= 248) * g;
  *red   = (r > 248) * 255 + (r <= 248) * r;
#else
  *blue = b;
  *green = g;
  *red = r;
#endif
}

static inline uint32_t
ctx_565_unpack_32 (const uint16_t pixel,
                   const int byteswap)
{
  uint16_t byteswapped;
  if (byteswap)
    { byteswapped = (pixel>>8) | (pixel<<8); }
  else
    { byteswapped  = pixel; }
  uint32_t b   = (byteswapped & 31) <<3;
  uint32_t g = ( (byteswapped>>5) & 63) <<2;
  uint32_t r   = ( (byteswapped>>11) & 31) <<3;
#if 0
  b = (b > 248) * 255 + (b <= 248) * b;
  g = (g > 248) * 255 + (g <= 248) * g;
  r = (r > 248) * 255 + (r <= 248) * r;
#endif

  return r +  (g << 8) + (b << 16) + (0xff << 24);
}


static inline uint16_t
ctx_888_to_565 (uint32_t in, int byteswap)
{
  uint8_t *rgb=(uint8_t*)(&in);
  return ctx_565_pack (rgb[0],rgb[1],rgb[2], byteswap);
}

static inline uint32_t
ctx_565_to_888 (uint16_t in, int byteswap)
{
  uint32_t ret = 0;
  uint8_t *rgba=(uint8_t*)&ret;
  ctx_565_unpack (in,
                  &rgba[0],
                  &rgba[1],
                  &rgba[2],
                  byteswap);
  //rgba[3]=255;
  return ret;
}

#endif
#if CTX_ENABLE_RGB565


static inline void
ctx_RGB565_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
      // XXX : checking the raw value for alpha before unpack will be faster
      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 0);
#if CTX_RGB565_ALPHA
      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
        { rgba[3] = 0; }
#endif
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGBA8_to_RGB565 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
#if CTX_RGB565_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_565_pack (255, 0, 255, 0); }
      else
#endif
        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 0); }
      pixel+=1;
      rgba +=4;
    }
}

static void
ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS);
static void
ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS);

static void
ctx_composite_RGB565 (CTX_COMPOSITE_ARGUMENTS)
{
#if 1
  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
  {
    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
    uint32_t si_a  = si_ga >> 16;

    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;

    while (count--)
    {
      if (CTX_LIKELY(*coverage == 255)) // not vectorizable but we probably
      {                                 // want to keep it like this
        uint32_t rcov  = 255-*coverage++;
        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 0);
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_565((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
         (((si_gaf) + di_ga * rcov) & 0xff00ff00), 0);
         dst+=2;
      }
      else
      {
        uint32_t cov   = *coverage++;
        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 0);
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_565((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00), 0);
         dst+=2;
      }
    }
    return;
  }
#endif

  uint8_t pixels[count * 4];
  ctx_RGB565_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
  ctx_RGBA8_to_RGB565 (rasterizer, x0, &pixels[0], dst, count);
}
#endif
#if CTX_ENABLE_RGB565_BYTESWAPPED

static inline void
ctx_RGB565_BS_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
      //ctx_565_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2], 1);
      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 1);
#if CTX_RGB565_ALPHA
      if (rgba[0]==255 && rgba[2] == 255 && rgba[1]==0)
        { rgba[3] = 0; }
      else
        { rgba[3] = 255; }
#endif
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGBA8_to_RGB565_BS (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
#if CTX_RGB565_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_565_pack (255, 0, 255, 1); }
      else
#endif
        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 1); }
      pixel+=1;
      rgba +=4;
    }
}

static void
ctx_composite_RGB565_BS (CTX_COMPOSITE_ARGUMENTS)
{
#if 1
  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
  {
    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
    uint32_t si_a  = si_ga >> 16;

    uint32_t si_gaf = (((uint32_t*)rasterizer->color)[1] << 8) + 255;
    uint32_t si_rbf = (((uint32_t*)rasterizer->color)[2] << 8) + 255;

    while (count--)
    {
      if (CTX_LIKELY(*coverage == 255))
      {
        uint32_t rcov  = 255-*coverage++;
        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 1);
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_565((((si_rbf + di_rb * rcov) & 0xff00ff00) >> 8)  |
         (((si_gaf) + di_ga * rcov) & 0xff00ff00), 1);
         dst+=2;
      }
      else
      {
        uint32_t cov   = *coverage++;
        uint32_t rcov  = (((255+si_a * cov)>>8))^255;
        uint32_t di    = ctx_565_to_888 (*((uint16_t*)dst), 1);
        uint32_t di_ga = ((di & 0xff00ff00) >> 8);
        uint32_t di_rb = (di & 0x00ff00ff);
        *((uint16_t*)(dst)) =
        ctx_888_to_565((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
         ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00), 1);
         dst+=2;
      }
    }
    return;
  }
#endif

  uint8_t pixels[count * 4];
  ctx_RGB565_BS_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (rasterizer, &pixels[0], rasterizer->color, x0, coverage, count);
  ctx_RGBA8_to_RGB565_BS (rasterizer, x0, &pixels[0], dst, count);
}
#endif


static inline uint32_t
ctx_over_RGBA8 (uint32_t dst, uint32_t src, uint32_t cov)
{
  uint32_t si_ga = (src & 0xff00ff00) >> 8;
  uint32_t si_rb = src & 0x00ff00ff;
  uint32_t si_a  = si_ga >> 16;
  uint32_t rcov  = ((255+si_a * cov)>>8)^255;
  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}


static inline uint32_t
ctx_over_RGBA8_full (uint32_t dst, uint32_t src)
{
  uint32_t si_ga = (src & 0xff00ff00) >> 8;
  uint32_t si_rb = src & 0x00ff00ff;
  uint32_t si_a  = si_ga >> 16;
  uint32_t rcov  = si_a^255;
  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * 255) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * 255) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}

static inline uint32_t
ctx_over_RGBA8_2 (uint32_t dst, uint32_t si_ga, uint32_t si_rb, uint32_t si_a, uint32_t cov)
{
  uint32_t rcov  = ((si_a * cov)/255)^255;
  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}

static inline uint32_t
ctx_over_RGBA8_full_2 (uint32_t dst, uint32_t si_ga_full, uint32_t si_rb_full, uint32_t si_a)
{
  uint32_t rcov = si_a^255;
  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb_full) + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga_full) + (di_ga * rcov)) & 0xff00ff00);
}

static inline void ctx_span_set_color (uint32_t *dst_pix, uint32_t val, int count)
{
  if (count>0)
  while(count--)
    *dst_pix++=val;
}

static inline void ctx_span_set_colorb  (uint32_t *dst_pix, uint32_t val, int count)
{
  while(count--)
    *dst_pix++=val;
}

static inline void ctx_span_set_colorbu (uint32_t *dst_pix, uint32_t val, unsigned int count)
{
  while(count--)
    *dst_pix++=val;
}

static inline void ctx_span_set_color_x4 (uint32_t *dst_pix, uint32_t *val, int count)
{
  if (count>0)
  while(count--)
  {
    *dst_pix++=val[0];
    *dst_pix++=val[1];
    *dst_pix++=val[2];
    *dst_pix++=val[3];
  }
}

#if CTX_FAST_FILL_RECT

static inline void ctx_RGBA8_image_rgba8_RGBA8_bi_scaled_fill_rect (CtxRasterizer *rasterizer, int x0, int y0, int x1, int y1, int copy)
{
  CtxExtend extend = rasterizer->state->gstate.extend;
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);
  u0-=0.5;
  v0-=0.5;

  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
  int blit_stride = rasterizer->blit_stride;
  dst += (y0 - rasterizer->blit_y) * blit_stride;
  dst += (x0) * rasterizer->format->bpp/8;

  unsigned int width = x1-x0+1;
  unsigned int height = y1-y0+1;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;

  int bwidth = buffer->width;
  int bheight = buffer->height;
  uint8_t tsrc[copy?1:width*4]; /* unused when not copy */

  //uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint32_t *data = ((uint32_t*)buffer->data);
  uint32_t rb_row[2][width*2];
  //uint32_t ga_row[2][width];

  int32_t row_u = u0 * 65536;
  int32_t row_v = v0 * 65536;
  int   ui_delta = ud * 65536;
  int   vi_delta = vd * 65536;

  int iter = 0;

  int loaded_v = -1;
  int top      = iter % 2;


  { // preload previous row for first row
    int32_t ui  = row_u;
    int32_t vi  = row_v;
    unsigned int xa=0;
      for (unsigned int x = 0; x < width; x++, xa+=2)
      {
        int u = ui >> 16;
        int v = vi >> 16;
        int u1 = u + 1;
        uint32_t  blank = 0;
        uint32_t *src0 = &blank;
        uint32_t *src1 = src0;
    
        if (CTX_LIKELY(_ctx_coords_restrict (extend, &u, &v, bwidth, bheight)))
        {
          src0 = data + u + bwidth * (v);
        }
        if (CTX_LIKELY(_ctx_coords_restrict (extend, &u1, &v, bwidth, bheight)))
        {
          src1 = data + u1 + bwidth * (v);
        }
    
        ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[!top][xa], &rb_row[!top][xa+1]);
        ui += ui_delta;
        vi += vi_delta;
      }
    }

  { // scale/translate only
    for (unsigned int y = 0; y < height; y++)
    {
       int top     = iter % 2;
       int32_t ui = row_u;
       int32_t vi = row_v;
       int v =  (vi >> 16) + 1;
       uint8_t dv = ((row_v)>> 8);
  
       if (v != loaded_v)
       {
         loaded_v = v;
         unsigned int xa=0;

           for (unsigned int x = 0; x < width; x++, xa+=2)
           {
             int u = ui >> 16;
             int u1 = u+1;
             uint32_t  blank = 0;
             uint32_t *src0 = &blank;
             uint32_t *src1 = src0;


        if (CTX_LIKELY(_ctx_coords_restrict (extend, &u, &v, bwidth, bheight)))
        {
          src0 = data + u + bwidth * (v);
        }
        if (CTX_LIKELY(_ctx_coords_restrict (extend, &u1, &v, bwidth, bheight)))
        {
          src1 = data + u1 + bwidth * (v);
        }

             ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[top][xa], &rb_row[top][xa+1]);
             ui += ui_delta;
           }
         iter++;
         top    = iter % 2;
       }
       
       {
         uint32_t*dst_i = copy?(uint32_t*)dst:(uint32_t*)tsrc;
         int ntop = !top;
         for (unsigned int xa = 0; xa < width * 2; xa+=2)
         {
            *dst_i ++ =
            ctx_lerp_RGBA8_merge (rb_row[top][xa], rb_row[top][xa+1], 
                                  rb_row[ntop][xa], rb_row[ntop][xa+1],
                                  dv);
         }
         if (!copy)
         ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
            dst, NULL, x0, NULL, width, &tsrc[0]);
       }
       row_u -= vi_delta;
       row_v += ui_delta;
       dst += blit_stride;
    }
  }
}

static inline void ctx_RGBA8_image_rgba8_RGBA8_bi_affine_fill_rect (CtxRasterizer *rasterizer, int x0, int y0, int x1, int y1, int copy)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);
  u0-=0.5;
  v0-=0.5;

  uint8_t *dst = ( (uint8_t *) rasterizer->buf);
  int blit_stride = rasterizer->blit_stride;
  dst += (y0 - rasterizer->blit_y) * blit_stride;
  dst += (x0) * rasterizer->format->bpp/8;

  unsigned int width = x1-x0+1;
  unsigned int height = y1-y0+1;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;

  int bwidth = buffer->width;
  int bheight = buffer->height;
  uint8_t tsrc[copy?1:width*4]; /* unused when not copy */

  //uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint32_t *data = ((uint32_t*)buffer->data);
  uint32_t rb_row[2][width*2];
  //uint32_t ga_row[2][width];

  uint32_t row_u = u0 * 65536;
  uint32_t row_v = v0 * 65536;
  int   ui_delta = ud * 65536;
  int   vi_delta = vd * 65536;

  int iter = 0;

  int loaded_v = -1;
  int top      = iter % 2;


  { // preload previous row for first row
    uint32_t ui  = row_u;
    uint32_t vi  = row_v;
    unsigned int xa=0;
      for (unsigned int x = 0; x < width; x++, xa+=2)
      {
        int u = ui >> 16;
        int v = vi >> 16;
        uint32_t  blank = 0;
        uint32_t *src0 = &blank;
        uint32_t *src1 = src0;
    
        if (CTX_LIKELY (v >= 0 && v < bheight))
        {
          if (CTX_LIKELY (u >= 0 && u + 1 < bwidth))
          {
            src0 = data + u + bwidth * (v);
            src1 = src0 + 1;
          }
          else
          {
            if (u >= 0 && u < bwidth)
              src0 = data + u + bwidth * (v);
            if (u + 1>= 0 && u + 1 < bwidth)
              src1 = data + (u+1) + bwidth * (v);
          }
        }
    
        ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[!top][xa], &rb_row[!top][xa+1]);
        ui += ui_delta;
        vi += vi_delta;
      }
    }

  for (unsigned int y = 0; y < height; y++)
  {
     int top     = iter % 2;
     uint32_t ui = row_u;
     uint32_t vi = row_v;

     int v =  (vi >> 16) + 1;
     uint8_t dv = ((row_v)>> 8);

     if (v != loaded_v)
     {
       loaded_v = v;
       unsigned int xa=0;
       for (unsigned int x = 0; x < width; x++, xa+=2)
       {
         int u = ui >> 16;
         v =  (vi >> 16) + 1;
         uint32_t  blank = 0;
         uint32_t *src0 = &blank;
         uint32_t *src1 = src0;
         if (CTX_LIKELY (v >= 0 && v < bheight))
         {
           if (CTX_LIKELY(u >= 0 && u + 1 < bwidth))
           {
             src0 = data + u + bwidth * (v);
             src1 = src0 + 1;
           }
           else
           {
             if (u >= 0 && u < bwidth)
               src0 = data + u + bwidth * (v);
             if (u + 1>= 0 && u + 1 < bwidth)
               src1 = src0 + 1;
           }
         }
         ctx_lerp_RGBA8_split (*src0, *src1, ui>>8, &rb_row[top][xa], &rb_row[top][xa+1]);
         ui += ui_delta;
         vi += vi_delta;
       }
       iter++;
       top    = iter % 2;
     }
     
     {
       uint32_t*dst_i = copy?(uint32_t*)dst:(uint32_t*)tsrc;
       int ntop = !top;
       for (unsigned int xa = 0; xa < width * 2; xa+=2)
       {
          *dst_i ++ =
          ctx_lerp_RGBA8_merge (rb_row[top][xa], rb_row[top][xa+1], 
                                rb_row[ntop][xa], rb_row[ntop][xa+1],
                                dv);
       }
       if (!copy)
       ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
          dst, NULL, x0, NULL, width, &tsrc[0]);
     }
     row_u -= vi_delta;
     row_v += ui_delta;
     dst += blit_stride;
  }
}

#if 0
static inline void ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect_copy (CtxRasterizer *rasterizer, int x0, int y0, int x1, int y1, int copy)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);

  uint32_t *dst = ( (uint32_t *) rasterizer->buf);
  int blit_stride = rasterizer->blit_stride/4;
  dst += (y0 - rasterizer->blit_y) * blit_stride;
  dst += (x0);

  unsigned int width = x1-x0+1;
  unsigned int height = y1-y0+1;

  //CtxSource *g = &rasterizer->state->gstate.source_fill;
  //CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = 
     g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = x0;// + 0.5f;
  int v = y0;// + 0.5f;

  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v + u;

  int pre = ctx_mini(ctx_maxi(-u,0), width);

  width-=pre;
  u+=pre;

  int core = ctx_mini (width, bwidth - u);

  if (copy)
  {
    if (core>0)
    {
      uint32_t *t_dst = dst;
      for (unsigned int y = 0; y < height; y++)
      {
         if (CTX_LIKELY((v >= 0 && v < bheight)))
         {
           memcpy (t_dst, src + pre, core * 4);
         }
         v++;
         src += bwidth;
         t_dst += blit_stride;
      }
    }
  }
  else
  {
    if (core>0)
    {
      uint32_t *t_dst = dst;
      for (unsigned int y = 0; y < height; y++)
      {
         if (CTX_LIKELY((v >= 0 && v < bheight)))
         {
           ctx_RGBA8_source_over_normal_full_cov_buf (rasterizer,
               (uint8_t*)t_dst, NULL, x0+pre, NULL, core, (uint8_t*)src);
         }
         v++;
         src += bwidth;
         t_dst += blit_stride;
      }
    }
  }
}
#endif


static void
ctx_composite_fill_rect_aligned (CtxRasterizer *rasterizer,
                                 int            x0,
                                 int            y0,
                                 int            x1,
                                 int            y1,
                                 uint8_t        cov)
{
  int blit_x = rasterizer->blit_x;
  int blit_y = rasterizer->blit_y;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;
  int blit_stride = rasterizer->blit_stride;

  x0 = ctx_maxi (x0, blit_x);
  x1 = ctx_mini (x1, blit_x + blit_width - 1);
  y0 = ctx_maxi (y0, blit_y);
  y1 = ctx_mini (y1, blit_y + blit_height - 1);

  int width = x1 - x0 + 1;
  int height= y1 - y0 + 1;
  //
  if (CTX_UNLIKELY (width <=0 || height <= 0))
    return;

  CtxCovPath comp = rasterizer->comp;
  uint8_t *dst;

  // this could be done here, but is not used
  // by a couple of the cases
#define INIT_ENV do {\
  rasterizer->scanline = y0 * CTX_FULL_AA; \
  dst = ( (uint8_t *) rasterizer->buf); \
  dst += (y0 - blit_y) * blit_stride; \
  dst += (x0 * rasterizer->format->bpp)/8;}while(0);

//if (CTX_UNLIKELY(width <=0 || height <= 0))
//  return;
  if (cov == 255)
  {
    switch (comp)
    {
    case CTX_COV_PATH_RGBA8_COPY:
    {
      uint32_t color;
      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
      INIT_ENV;
      if (CTX_UNLIKELY(width == 1))
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          *dst_i = color;
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
#if 0
          uint32_t *dst_pix = (uint32_t*)&dst[0];
          int count = width;
          while(count--)
            *dst_pix++=color;
#else
          ctx_span_set_colorbu ((uint32_t*)&dst[0], color, width);
#endif
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBAF_COPY:
    case CTX_COV_PATH_GRAY8_COPY:
    case CTX_COV_PATH_GRAYA8_COPY:
    case CTX_COV_PATH_GRAYAF_COPY:
    case CTX_COV_PATH_CMYKAF_COPY:
    case CTX_COV_PATH_RGB565_COPY:
    case CTX_COV_PATH_RGB332_COPY:
    case CTX_COV_PATH_RGB8_COPY:
    case CTX_COV_PATH_CMYK8_COPY:
    case CTX_COV_PATH_CMYKA8_COPY:
    {
      uint8_t *color = (uint8_t*)&rasterizer->color_native;
      unsigned int bytes = rasterizer->format->bpp/8;
      INIT_ENV;

      switch (bytes)
      {
        case 1:
          {
          uint8_t col = *color;
          if (width == 1)
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            *dst = col;
            dst += blit_stride;
          }
          else
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
#if 0
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++) *dst_i++ = col;
#else
            memset (dst, col, width);
#endif
            dst += blit_stride;
          }
          }
          break;
        case 2:
          {
            uint16_t val = ((uint16_t*)color)[0];
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              uint16_t *dst_i = (uint16_t*)&dst[0];
              for (int x = 0; x < width; x++)
                 *dst_i++ = val;
              dst += blit_stride;
            }
          }
          break;
        case 3:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
                for (unsigned int b = 0; b < 3; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        case 4:
          {
            uint32_t val = ((uint32_t*)color)[0];
            if (width == 1)
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              *((uint32_t*)&dst[0]) = val;
              dst += blit_stride;
            }
            else
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              //uint32_t *dst_i = (uint32_t*)&dst[0];
              ctx_span_set_colorbu ((uint32_t*)&dst[0], val, width);
              dst += blit_stride;
            }
          }
          break;
        case 5:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
               for (unsigned int b = 0; b < 5; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        case 16:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)for (unsigned int b = 0; b < 16; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        default:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
              for (unsigned int b = 0; b < bytes; b++)
                *dst_i++ = color[b];
            dst += blit_stride;
          }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_OVER:
    {
      uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
      uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
      uint32_t si_a  = rasterizer->color[3];
      INIT_ENV;

      if (CTX_UNLIKELY(width == 1))
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          ((uint32_t*)(dst))[0] = ctx_over_RGBA8_full_2 (
             ((uint32_t*)(dst))[0], si_ga_full, si_rb_full, si_a);
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (int i = 0; i < width; i++)
          {
            dst_i[i] = ctx_over_RGBA8_full_2 (dst_i[i], si_ga_full, si_rb_full, si_a);
          }
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
    {
      CtxFragment fragment = rasterizer->fragment;
      CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
      CtxExtend extend = rasterizer->state->gstate.extend;
      INIT_ENV;

      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_scale)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_scaled_fill_rect (rasterizer, x0, y0, x1,
y1, 1);
        return;
      }
      else if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_affine && extend == CTX_EXTEND_NONE)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_affine_fill_rect (rasterizer, x0, y0, x1,
y1, 1);
        return;
      }

      if (CTX_LIKELY(ctx_matrix_no_perspective (transform)))
      {
        int scan = rasterizer->scanline/CTX_FULL_AA;
        float u0, v0, ud, vd, w0, wd;
        ctx_init_uv (rasterizer, x0, scan, &u0, &v0, &w0, &ud, &vd, &wd);
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          fragment (rasterizer, u0, v0, w0, &dst[0], width, ud, vd, wd);
          u0 -= vd;
          v0 += ud;
          dst += blit_stride;
        }
      }
      else
      {
        int scan = rasterizer->scanline/CTX_FULL_AA;
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          float u0, v0, ud, vd, w0, wd;
          ctx_init_uv (rasterizer, x0, scan + y-y0, &u0, &v0, &w0, &ud, &vd, &wd);
          fragment (rasterizer, u0, v0, w0, &dst[0], width, ud, vd, wd);
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
    {
      CtxFragment fragment = rasterizer->fragment;
      CtxExtend extend = rasterizer->state->gstate.extend;
#if 0
      if (fragment == ctx_fragment_image_rgba8_RGBA8_nearest_copy)
      {
        ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect_copy (rasterizer, x0, y0, x1, y1, 0);
        return;
      }
      else
#endif
      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_scale)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_scaled_fill_rect (rasterizer, x0, y0, x1,
y1, 0);
        return;
      }
      else if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_affine && extend == CTX_EXTEND_NONE)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_affine_fill_rect (rasterizer, x0, y0, x1,
y1, 0);
        return;
      }

      INIT_ENV;
      ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
                         &dst[0], NULL, x0, NULL, width, y1-y0+1);
      return;
    }
    break;
    default:
    break;
    }
  }
  else
  {
    switch (comp)
    {
    case CTX_COV_PATH_RGBA8_COPY:
    {
      uint32_t color;
      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
      INIT_ENV;
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
          {
            dst_i[i] = ctx_lerp_RGBA8 (dst_i[i], color, cov);
          }
          dst += blit_stride;
        }
        return;
      }
    }
    case CTX_COV_PATH_RGBAF_COPY:
    {
      float *color = ((float*)rasterizer->color);
      float covf = cov / 255.0f;
      INIT_ENV;
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          float *dst_f = (float*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
          {
            for (unsigned int c = 0; c < 4; c++)
              dst_f[i*4+c] = ctx_lerpf (dst_f[i*4+c], color[c], covf);
          }
          dst += blit_stride;
        }
        return;
      }
    }
    case CTX_COV_PATH_RGBA8_OVER:
    {
      uint32_t color;
      memcpy (&color, (uint32_t*)rasterizer->color, sizeof (color));
      INIT_ENV;
      if (width == 1)
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          *dst_i = ctx_over_RGBA8 (*dst_i, color, cov);
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
          {
            dst_i[i] = ctx_over_RGBA8 (dst_i[i], color, cov);
          }
          dst += blit_stride;
        }
      }
      return;
    }
    break;
    default:
    break;
    }
  }

  INIT_ENV;
#undef INIT_ENV


  /* fallback */
  {
    uint8_t coverage[width];
    memset (coverage, cov, sizeof (coverage) );
    for (unsigned int y = y0; y <= (unsigned)y1; y++)
    {
      rasterizer->apply_coverage (rasterizer, &dst[0], rasterizer->color, x0, coverage, width);
      rasterizer->scanline += CTX_FULL_AA;
      dst += blit_stride;
    }
  }
}

void
CTX_SIMD_SUFFIX (ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                          float          x0,
                          float          y0,
                          float          x1,
                          float          y1,
                          uint8_t        cov);

void
CTX_SIMD_SUFFIX (ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                          float          x0,
                          float          y0,
                          float          x1,
                          float          y1,
                          uint8_t        cov)
{
  if((ctx_fmod1f (x0) < 0.01f || ctx_fmod1f(x0) > 0.99f) &&
     (ctx_fmod1f (y0) < 0.01f || ctx_fmod1f(y0) > 0.99f) &&
     (ctx_fmod1f (x1) < 0.01f || ctx_fmod1f(x1) > 0.99f) &&
     (ctx_fmod1f (y1) < 0.01f || ctx_fmod1f(y1) > 0.99f))
  {
    /* best-case scenario axis aligned rectangle */
    ctx_composite_fill_rect_aligned (rasterizer, x0, y0, x1-1, y1-1, 255);
    return;
  }

  int blit_x = rasterizer->blit_x;
  int blit_y = rasterizer->blit_y;
  int blit_stride = rasterizer->blit_stride;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;

  x0 = ctx_maxf (x0, blit_x);
  y0 = ctx_maxf (y0, blit_y);
  x1 = ctx_minf (x1, blit_x + blit_width );
  y1 = ctx_minf (y1, blit_y + blit_height );

  uint8_t left = 255-ctx_fmod1f (x0) * 255;
  uint8_t top  = 255-ctx_fmod1f (y0) * 255;
  uint8_t right  = ctx_fmod1f (x1) * 255;
  uint8_t bottom = ctx_fmod1f (y1) * 255;

  x0 = ctx_floorf (x0);
  y0 = ctx_floorf (y0);
  x1 = ctx_floorf (x1+7/8.0);
  y1 = ctx_floorf (y1+15/15.0);

  int has_top    = (top < 255);
  int has_bottom = (bottom <255);
  int has_right  = (right >0);
  int has_left   = (left >0);

  int width = x1 - x0;

  if ((width >0))
  {
     uint8_t *dst = ( (uint8_t *) rasterizer->buf);
     uint8_t coverage[width+2];
     uint32_t x0i = x0+has_left;
     uint32_t x1i = x1-has_right;
     uint32_t y0i = y0+has_top;
     uint32_t y1i = y1-has_bottom;
     dst += (((int)y0) - blit_y) * blit_stride;
     dst += ((int)x0) * rasterizer->format->bpp/8;

     if (has_top)
     {
       int i = 0;
       if (has_left)
       {
         coverage[i++] = (top * left + 255) >> 8;
       }
       for (unsigned int x = x0i; x < x1i; x++)
         coverage[i++] = top;
       if (has_right)
         coverage[i++]= (top * right + 255) >> 8;

       rasterizer->apply_coverage (rasterizer, dst, rasterizer->color, x0, coverage, width);
       dst += blit_stride;
     }

  if (y1-y0-has_top-has_bottom > 0)
  {
    if (has_left)
      ctx_composite_fill_rect_aligned (rasterizer, x0, y0i,
                                                   x0, y1i-1, left);
    if (has_right)
      ctx_composite_fill_rect_aligned (rasterizer, x1-1, y0i,
                                                   x1-1, y1i-1, right);

    if (width - has_left - has_right > 0)
      ctx_composite_fill_rect_aligned (rasterizer, x0i,y0i,
                                          x1i-1,y1i-1,255);

    dst += blit_stride * (y1i-y0i);
  }
    if (has_bottom)
    {
      int i = 0;
      if (has_left)
        coverage[i++] = (bottom * left + 255) >> 8;
      for (unsigned int x = x0i; x < x1i; x++)
        coverage[i++] = bottom;
      coverage[i++]= (bottom * right + 255) >> 8;

      rasterizer->apply_coverage (rasterizer,dst, rasterizer->color, x0, coverage, width);
    }
  }
}

void
CTX_SIMD_SUFFIX(ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width);

void
CTX_SIMD_SUFFIX(ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width)
{
      float lwmod = ctx_fmod1f (line_width);
      int lw = ctx_floorf (line_width + 0.5f);
      int is_compat_even = (lw % 2 == 0) && (lwmod < 0.1); // only even linewidths implemented properly
      int is_compat_odd = (lw % 2 == 1) && (lwmod < 0.1); // only even linewidths implemented properly

      float off_x = 0;
      float off_y = 0;

      if (is_compat_odd)
      {
        off_x = 0.5f;
        off_y = (CTX_FULL_AA/2)*1.0 / (CTX_FULL_AA);
      }

      if((is_compat_odd || is_compat_even) &&

     ((ctx_fmod1f (x0-off_x) < 0.01f || ctx_fmod1f(x0-off_x) > 0.99f) &&
     (ctx_fmod1f (y0-off_y) < 0.01f || ctx_fmod1f(y0-off_y) > 0.99f) &&
     (ctx_fmod1f (x1-off_x) < 0.01f || ctx_fmod1f(x1-off_x) > 0.99f) &&
     (ctx_fmod1f (y1-off_y) < 0.01f || ctx_fmod1f(y1-off_y) > 0.99f)))


      {
        int bw = lw/2+1;
        int bwb = lw/2;

        if (is_compat_even)
        {
          bw = lw/2;
        }
        /* top */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         x0-bwb, y0-bwb,
                                         x1+bw-1, y0+bw-1, 255);
        /* bottom */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         x0-bwb, y1-bwb,
                                         x1-bwb-1, y1+bw-1, 255);

        /* left */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         x0-bwb, y0+1,
                                         x0+bw-1, y1-bwb, 255);
        /* right */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         x1-bwb, y0+1,
                                         x1+bw-1, y1+bw-1, 255);
      }
      else
      {
        float hw = line_width/2;


        /* top */
        ctx_composite_fill_rect (rasterizer,
                                 x0+hw, y0-hw,
                                 x1-hw, y0+hw, 255);
        /* bottom */
        ctx_composite_fill_rect (rasterizer,
                                 x0+hw, y1-hw,
                                 x1-hw, y1+hw, 255);

        /* left */
        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y0+hw,
                                 x0+hw, y1-hw, 255);
        /* right */

        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y0+hw,
                                 x1+hw, y1-hw, 255);

        /* corners */

        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y0-hw,
                                 x0+hw, y0+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y1-hw,
                                 x1+hw, y1+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y0-hw,
                                 x1+hw, y0+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y1-hw,
                                 x0+hw, y1+hw, 255);
      }
}


#endif

static void
CTX_SIMD_SUFFIX (ctx_composite_setup) (CtxRasterizer *rasterizer)
{
  if (CTX_UNLIKELY (rasterizer->comp_op==NULL))
  {
#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  switch (rasterizer->state->gstate.source_fill.type)
  {
    case CTX_SOURCE_LINEAR_GRADIENT:
    case CTX_SOURCE_RADIAL_GRADIENT:
      ctx_gradient_cache_prime (rasterizer);
      break;
    case CTX_SOURCE_TEXTURE:

      _ctx_matrix_multiply (&rasterizer->state->gstate.source_fill.transform,
                            &rasterizer->state->gstate.source_fill.set_transform,
                            &rasterizer->state->gstate.transform);

      rasterizer->state->gstate.source_fill.transform_inv =
                           rasterizer->state->gstate.source_fill.transform;
      ctx_matrix_invert (&rasterizer->state->gstate.source_fill.transform);

#if 0
      if (!rasterizer->state->gstate.source_fill.texture.buffer->color_managed)
      {
        _ctx_texture_prepare_color_management (rasterizer->state,
        rasterizer->state->gstate.source_fill.texture.buffer);
      }
#endif
      break;
  }
#endif
#endif
  }
    rasterizer->format->setup (rasterizer);

}


CtxPixelFormatInfo CTX_SIMD_SUFFIX(ctx_pixel_formats)[]=
{
#if CTX_ENABLE_RGBA8
  {
    CTX_FORMAT_RGBA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
    NULL, NULL, NULL, ctx_setup_RGBA8
  },
#endif
#if CTX_ENABLE_BGRA8
  {
    CTX_FORMAT_BGRA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_BGRA8_to_RGBA8, ctx_RGBA8_to_BGRA8, ctx_composite_BGRA8, ctx_setup_RGBA8,
  },
#endif
#if CTX_ENABLE_GRAYF
  {
    CTX_FORMAT_GRAYF, 1, 32, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
    NULL, NULL, ctx_composite_GRAYF, ctx_setup_GRAYAF,
  },
#endif
#if CTX_ENABLE_GRAYAF
  {
    CTX_FORMAT_GRAYAF, 2, 64, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
    NULL, NULL, NULL, ctx_setup_GRAYAF,
  },
#endif
#if CTX_ENABLE_RGBAF
  {
    CTX_FORMAT_RGBAF, 4, 128, 4 * 4, 0, 0, CTX_FORMAT_RGBAF,
    NULL, NULL, NULL, ctx_setup_RGBAF,
  },
#endif
#if CTX_ENABLE_RGB8
  {
    CTX_FORMAT_RGB8, 3, 24, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_RGB8_to_RGBA8, ctx_RGBA8_to_RGB8, ctx_composite_convert, ctx_setup_RGB8,
  },
#endif
#if CTX_ENABLE_GRAY1
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY1, 1, 1, 2, 1, 1, CTX_FORMAT_GRAYA8,
    ctx_GRAY1_to_GRAYA8, ctx_GRAYA8_to_GRAY1, ctx_composite_convert, ctx_setup_GRAY1,
#else
    CTX_FORMAT_GRAY1, 1, 1, 4, 1, 1, CTX_FORMAT_RGBA8,
    ctx_GRAY1_to_RGBA8, ctx_RGBA8_to_GRAY1, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY2
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY2, 1, 2, 2, 4, 4, CTX_FORMAT_GRAYA8,
    ctx_GRAY2_to_GRAYA8, ctx_GRAYA8_to_GRAY2, ctx_composite_convert, ctx_setup_GRAY2,
#else
    CTX_FORMAT_GRAY2, 1, 2, 4, 4, 4, CTX_FORMAT_RGBA8,
    ctx_GRAY2_to_RGBA8, ctx_RGBA8_to_GRAY2, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY4
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY4, 1, 4, 2, 16, 16, CTX_FORMAT_GRAYA8,
    ctx_GRAY4_to_GRAYA8, ctx_GRAYA8_to_GRAY4, ctx_composite_convert, ctx_setup_GRAY4,
#else
    CTX_FORMAT_GRAY4, 1, 4, 4, 16, 16, CTX_FORMAT_GRAYA8,
    ctx_GRAY4_to_RGBA8, ctx_RGBA8_to_GRAY4, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY8
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY8, 1, 8, 2, 0, 0, CTX_FORMAT_GRAYA8,
    ctx_GRAY8_to_GRAYA8, ctx_GRAYA8_to_GRAY8, ctx_composite_convert, ctx_setup_GRAY8,
#else
    CTX_FORMAT_GRAY8, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_GRAY8_to_RGBA8, ctx_RGBA8_to_GRAY8, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAYA8
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAYA8, 2, 16, 2, 0, 0, CTX_FORMAT_GRAYA8,
    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, NULL, ctx_setup_GRAYA8,
#else
    CTX_FORMAT_GRAYA8, 2, 16, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_RGB332
  {
    CTX_FORMAT_RGB332, 3, 8, 4, 10, 12, CTX_FORMAT_RGBA8,
    ctx_RGB332_to_RGBA8, ctx_RGBA8_to_RGB332,
    ctx_composite_RGB332, ctx_setup_RGB332,
  },
#endif
#if CTX_ENABLE_RGB565
  {
    CTX_FORMAT_RGB565, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
    ctx_RGB565_to_RGBA8, ctx_RGBA8_to_RGB565,
    ctx_composite_RGB565, ctx_setup_RGB565,
  },
#endif
#if CTX_ENABLE_RGB565_BYTESWAPPED
  {
    CTX_FORMAT_RGB565_BYTESWAPPED, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
    ctx_RGB565_BS_to_RGBA8,
    ctx_RGBA8_to_RGB565_BS,
    ctx_composite_RGB565_BS, ctx_setup_RGB565,
  },
#endif
#if CTX_ENABLE_CMYKAF
  {
    CTX_FORMAT_CMYKAF, 5, 160, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, NULL, ctx_setup_CMYKAF,
  },
#endif
#if CTX_ENABLE_CMYKA8
  {
    CTX_FORMAT_CMYKA8, 5, 40, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, ctx_composite_CMYKA8, ctx_setup_CMYKA8,
  },
#endif
#if CTX_ENABLE_CMYK8
  {
    CTX_FORMAT_CMYK8, 5, 32, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, ctx_composite_CMYK8, ctx_setup_CMYK8,
  },
#endif
#if CTX_ENABLE_YUV420
  {
    CTX_FORMAT_YUV420, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
    NULL, NULL, ctx_composite_convert, ctx_setup_RGB,
  },
#endif
  {
    CTX_FORMAT_NONE
  }
};




#endif // CTX_COMPOSITE

#endif // CTX_IMPLEMENTATION

#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD
#if CTX_COMPOSITE 

#define CTX_AA_HALFSTEP2   (CTX_FULL_AA/2)
#define CTX_AA_HALFSTEP    ((CTX_FULL_AA/2)+1)

CTX_INLINE static int ctx_compare_edges (const void *ap, const void *bp)
{
  const CtxSegment *a = (const CtxSegment *) ap;
  const CtxSegment *b = (const CtxSegment *) bp;
  return a->data.s16[1] - b->data.s16[1];
}

CTX_INLINE static int ctx_edge_qsort_partition (CtxSegment *A, int low, int high)
{
  CtxSegment pivot = A[ (high+low) /2];
  int i = low;
  int j = high;
  while (i <= j)
    {
      while (ctx_compare_edges (&A[i], &pivot) < 0) { i ++; }
      while (ctx_compare_edges (&pivot, &A[j]) < 0) { j --; }
      if (i <= j)
        {
          CtxSegment tmp = A[i];
          A[i] = A[j];
          A[j] = tmp;
          i++;
          j--;
        }
    }
  return i;
}

static inline void ctx_edge_qsort (CtxSegment *entries, int low, int high)
{
  int p = ctx_edge_qsort_partition (entries, low, high);
  if (low < p -1 )
    { ctx_edge_qsort (entries, low, p - 1); }
  if (low < high)
    { ctx_edge_qsort (entries, p, high); }
}

static inline void ctx_rasterizer_sort_edges (CtxRasterizer *rasterizer)
{
  ctx_edge_qsort ((CtxSegment*)& (rasterizer->edge_list.entries[0]), 0, rasterizer->edge_list.count-1);
}

static inline void ctx_rasterizer_discard_edges (CtxRasterizer *rasterizer)
{
  int scanline = rasterizer->scanline;
  int next_scanline = scanline + CTX_FULL_AA;
  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
  int *edges = rasterizer->edges;
  for (unsigned int i = 0; i < rasterizer->active_edges; i++)
    {
      CtxSegment *segment = segments + edges[i];
      int edge_end = segment->data.s16[3]-1;
      if (edge_end < scanline)
        {

          int dx_dy = abs(segment->delta);
          rasterizer->needs_aa3  -= (dx_dy > limit3);
          rasterizer->needs_aa5  -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
          rasterizer->needs_aa15 -= (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
          rasterizer->edges[i] = rasterizer->edges[rasterizer->active_edges-1];
          rasterizer->active_edges--;
          i--;
        }
      else if (edge_end < next_scanline)
        rasterizer->ending_edges++;
    }
#if 0
  // perhaps we should - but for 99% of the cases we do not need to, so we skip it
  for (int i = 0; i < rasterizer->pending_edges; i++)
    {
      int edge_end = ((CtxSegment*)(rasterizer->edge_list.entries))[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[3]-1;
      if (edge_end < scanline + CTX_FULL_AA)
        rasterizer->ending_edges++;
    }
#endif
}

inline static void ctx_rasterizer_increment_edges (CtxRasterizer *rasterizer, int count)
{
  rasterizer->scanline += count;
  CtxSegment *__restrict__ segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
  unsigned int active_edges = rasterizer->active_edges;
  unsigned int pending_edges = rasterizer->pending_edges;
  unsigned int pending_base = CTX_MAX_EDGES-pending_edges;
  for (unsigned int i = 0; i < active_edges; i++)
    {
      CtxSegment *segment = segments + rasterizer->edges[i];
      segment->val += segment->delta * count;
    }
  for (unsigned int i = 0; i < pending_edges; i++)
    {
      CtxSegment *segment = segments + rasterizer->edges[pending_base+i];
      segment->val += segment->delta * count;
    }
}

/* feeds up to rasterizer->scanline,
   keeps a pending buffer of edges - that encompass
   the full incoming scanline,
   feed until the start of the scanline and check for need for aa
   in all of pending + active edges, then
   again feed_edges until middle of scanline if doing non-AA
   or directly render when doing AA
*/
inline static void ctx_edge2_insertion_sort (CtxSegment *segments, int *entries, unsigned int count)
{
  for(unsigned int i=1; i<count; i++)
   {
     int temp = entries[i];
     int j = i-1;
     while (j >= 0 && segments[temp].val - segments[entries[j]].val < 0)
     {
       entries[j+1] = entries[j];
       j--;
     }
     entries[j+1] = temp;
   }
}

inline static int ctx_edge2_compare2 (CtxSegment *segments, int a, int b)
{
  CtxSegment *seg_a = &segments[a];
  CtxSegment *seg_b = &segments[b];
  int minval_a = ctx_mini (seg_a->val - seg_a->delta * CTX_AA_HALFSTEP2, seg_a->val + seg_a->delta * CTX_AA_HALFSTEP);
  int minval_b = ctx_mini (seg_b->val - seg_b->delta * CTX_AA_HALFSTEP2, seg_b->val + seg_b->delta * CTX_AA_HALFSTEP);
  return minval_a - minval_b;
}

inline static void ctx_edge2_insertion_sort2 (CtxSegment *segments, int *entries, unsigned int count)
{
  for(unsigned int i=1; i<count; i++)
   {
     int temp = entries[i];
     int j = i-1;
     while (j >= 0 && ctx_edge2_compare2 (segments, temp, entries[j]) < 0)
     {
       entries[j+1] = entries[j];
       j--;
     }
     entries[j+1] = temp;
   }
}

inline static void ctx_rasterizer_feed_edges (CtxRasterizer *rasterizer, int apply2_sort)
{
  int miny;
  CtxSegment *__restrict__ entries = (CtxSegment*)&rasterizer->edge_list.entries[0];
  int *edges = rasterizer->edges;
  unsigned int pending_edges   = rasterizer->pending_edges;
  rasterizer->horizontal_edges = 0;
  rasterizer->ending_edges     = 0;
  for (unsigned int i = 0; i < pending_edges; i++)
    {
      if (entries[rasterizer->edges[CTX_MAX_EDGES-1-i]].data.s16[1] - 1 <= rasterizer->scanline &&
          rasterizer->active_edges < CTX_MAX_EDGES-2)
        {
          unsigned int no = rasterizer->active_edges;
          rasterizer->active_edges++;
          edges[no] = edges[CTX_MAX_EDGES-1-i];
          edges[CTX_MAX_EDGES-1-i] =
            edges[CTX_MAX_EDGES-1-pending_edges + 1];
          pending_edges--;
          i--;
        }
    }
  int limit3 = CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA;
  int scanline = rasterizer->scanline;
  int next_scanline = scanline + CTX_FULL_AA;
  unsigned int edge_pos = rasterizer->edge_pos;
  unsigned int edge_count = rasterizer->edge_list.count;
  while ((edge_pos < edge_count &&
         (miny=entries[edge_pos].data.s16[1]-1)  <= next_scanline))
    {
      if (rasterizer->active_edges < CTX_MAX_EDGES-2 &&
      entries[edge_pos].data.s16[3]-1 /* (maxy) */  >= scanline)
        {
          int dy = (entries[edge_pos].data.s16[3] - 1 - miny);
          if (dy)
            {
              int yd = scanline - miny;
              unsigned int no = rasterizer->active_edges;
              rasterizer->active_edges++;
              unsigned int index = edges[no] = edge_pos;
              int x0 = entries[index].data.s16[0];
              int x1 = entries[index].data.s16[2];
              int dx_dy = CTX_RASTERIZER_EDGE_MULTIPLIER * (x1 - x0) / dy;
              entries[index].delta = dx_dy;
              entries[index].val = x0 * CTX_RASTERIZER_EDGE_MULTIPLIER +
                                         (yd * dx_dy);

              {
                int abs_dx_dy = abs(dx_dy);
                rasterizer->needs_aa3  += (abs_dx_dy > limit3);
                rasterizer->needs_aa5  += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5);
                rasterizer->needs_aa15 += (abs_dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
              }

              if (miny > scanline &&
                  pending_edges < CTX_MAX_PENDING-1)
              {
                  /* it is a pending edge - we add it to the end of the array
                     and keep a different count for items stored here, like
                     a heap and stack growing against each other
                  */
                    edges[CTX_MAX_EDGES-1-pending_edges] =
                    rasterizer->edges[no];
                    pending_edges++;
                    rasterizer->active_edges--;
              }
            }
          else
            rasterizer->horizontal_edges ++;
        }
      edge_pos++;
    }
    rasterizer->pending_edges = pending_edges;
    rasterizer->edge_pos = edge_pos;
    ctx_rasterizer_discard_edges (rasterizer);
    if (apply2_sort)
      ctx_edge2_insertion_sort2 ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, rasterizer->active_edges);
    else
      ctx_edge2_insertion_sort ((CtxSegment*)rasterizer->edge_list.entries, rasterizer->edges, rasterizer->active_edges);
}
#undef CTX_CMPSWP

static inline void ctx_coverage_post_process (CtxRasterizer *rasterizer, unsigned int minx, unsigned int maxx, uint8_t *coverage, int *first_col, int *last_col)
{
#if CTX_ENABLE_SHADOW_BLUR
  if (CTX_UNLIKELY(rasterizer->in_shadow))
  {
    float radius = rasterizer->state->gstate.shadow_blur;
    unsigned int dim = 2 * radius + 1;
    if (CTX_UNLIKELY (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM))
      dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
    {
      uint16_t temp[maxx-minx+1];
      memset (temp, 0, sizeof (temp));
      for (unsigned int x = dim/2; x < maxx-minx + 1 - dim/2; x ++)
        for (unsigned int u = 0; u < dim; u ++)
        {
          temp[x] += coverage[minx+x+u-dim/2] * rasterizer->kernel[u] * 256;
        }
      for (unsigned int x = 0; x < maxx-minx + 1; x ++)
        coverage[minx+x] = temp[x] >> 8;
    }
  }
#endif

#if CTX_ENABLE_CLIP
  if (CTX_UNLIKELY(rasterizer->clip_buffer &&  !rasterizer->clip_rectangle))
  {
  int scanline     = rasterizer->scanline - CTX_FULL_AA; // we do the
                                                 // post process after
                                                 // coverage generation icnrement
    /* perhaps not working right for clear? */
    int y = scanline / CTX_FULL_AA;//rasterizer->aa;
    uint8_t *clip_line = &((uint8_t*)(rasterizer->clip_buffer->data))[rasterizer->blit_width*y];
    // XXX SIMD candidate
    for (unsigned int x = minx; x <= maxx; x ++)
    {
#if CTX_1BIT_CLIP
       coverage[x] = (coverage[x] * ((clip_line[x/8]&(1<<(x&8)))?255:0))/255;
#else
       coverage[x] = (255 + coverage[x] * clip_line[x-rasterizer->blit_x])>>8;
#endif
    }
  }
#endif
}

#define CTX_EDGE(no)      entries[edges[no]]
#define CTX_EDGE_YMIN     (segment->data.s16[1]-1)

#define UPDATE_PARITY \
        if (CTX_LIKELY(scanline!=CTX_EDGE_YMIN))\
        { \
          if (is_winding)\
             parity = parity + -1+2*(segment->code == CTX_EDGE_FLIPPED);\
          else\
             parity = 1-parity; \
        }


inline static void
ctx_rasterizer_generate_coverage (CtxRasterizer *rasterizer,
                                  int            minx,
                                  int            maxx,
                                  uint8_t       *coverage,
                                  int            is_winding,
                                  const uint8_t  aa_factor,
                                  const uint8_t  fraction)
{
  CtxSegment *entries      = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int        *edges        = rasterizer->edges;
  int         scanline     = rasterizer->scanline;
  int         active_edges = rasterizer->active_edges;
  int         parity       = 0;
  coverage -= minx;
  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment *segment = &entries[edges[t]];
      UPDATE_PARITY;

      if (parity)
        {
          CtxSegment *next_segment = &entries[edges[t+1]];
          const int x0 = segment->val;
          const int x1 = next_segment->val;
          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (CTX_UNLIKELY (first < minx))
          { 
            first = minx;
            graystart=0;
          }
          if (CTX_UNLIKELY (last > maxx))
          {
            last = maxx;
            grayend=255;
          }

          graystart = fraction- (graystart&0xff)/aa_factor;
          grayend   = (grayend & 0xff) / aa_factor;

          if (first < last)
          {
              coverage[first] += graystart;
              for (int x = first + 1; x < last; x++)
                coverage[x]  += fraction;
              coverage[last] += grayend;
          }
          else if (first == last)
            coverage[first] += (graystart-fraction+grayend);
        }
   }
}

inline static void
ctx_rasterizer_generate_coverage_set (CtxRasterizer *rasterizer,
                                      int            minx,
                                      int            maxx,
                                      uint8_t       *coverage,
                                      int            is_winding)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int      *edges = rasterizer->edges;
  int scanline     = rasterizer->scanline;
  int active_edges = rasterizer->active_edges;
  int parity = 0;
  coverage -= minx;
  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment *segment = &entries[edges[t]];
      UPDATE_PARITY;

      if (parity)
        {
          CtxSegment *next_segment = &entries[edges[t+1]];
          const int x0        = segment->val;
          const int x1        = next_segment->val;
          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (CTX_UNLIKELY (first < minx))
          { 
            first = minx;
            graystart=0;
          }
          if (CTX_UNLIKELY (last > maxx))
          {
            last = maxx;
            grayend=255;
          }

          graystart = (graystart&0xff) ^ 255;
          grayend   = (grayend & 0xff);

          coverage[first] += graystart;
          coverage[last]  += grayend;
          if (first + 1< last)
              memset(&coverage[first+1], 255, last-(first+1));
        }
   }
}


inline static void
ctx_rasterizer_generate_coverage_apply (CtxRasterizer *rasterizer,
                                        int            minx,
                                        int            maxx,
                                        uint8_t* __restrict__ coverage,
                                        int            is_winding,
                                        CtxCovPath     comp)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int *edges          = rasterizer->edges;
  int scanline        = rasterizer->scanline;
  const int bpp       = rasterizer->format->bpp;
  int active_edges    = rasterizer->active_edges;
  int parity          = 0;
#if CTX_RASTERIZER_SWITCH_DISPATCH
  uint32_t *src_pixp;
  uint32_t src_pix, si_ga, si_rb, si_ga_full, si_rb_full, si_a;
  if (comp != CTX_COV_PATH_FALLBACK &&
      comp != CTX_COV_PATH_RGBA8_COPY_FRAGMENT &&
      comp != CTX_COV_PATH_RGBA8_OVER_FRAGMENT)
  {
    src_pixp   = ((uint32_t*)rasterizer->color);
    src_pix    = src_pixp[0];
    si_ga      = ((uint32_t*)rasterizer->color)[1];
    si_rb      = ((uint32_t*)rasterizer->color)[2];
    si_ga_full = ((uint32_t*)rasterizer->color)[3];
    si_rb_full = ((uint32_t*)rasterizer->color)[4];
    si_a       = src_pix >> 24;
  }
  else
  {
    src_pix    =
    si_ga      =
    si_rb      =
    si_ga_full =
    si_rb_full =
    si_a       = 0;
    src_pixp = &src_pix;
  }
#endif

  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));
  int accumulator_x=0;
  uint8_t accumulated = 0;
  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment *segment = &entries[edges[t]];
      UPDATE_PARITY;

       if (parity)
        {
          CtxSegment   *next_segment = &entries[edges[t+1]];
          const int x0        = segment->val;
          const int x1        = next_segment->val;

          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first = graystart >> 8;
          int grayend = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int last = grayend >> 8;

          if (CTX_UNLIKELY(first < minx))
          { 
            graystart = 0;
            first = minx;
          }
          if (CTX_UNLIKELY(last > maxx))
          {
             last = maxx;
             grayend=255;
          }
          graystart = (graystart&0xff) ^ 255;

          grayend     = (grayend & 0xff);

          if (accumulated)
          {
            if (accumulator_x == first)
            {
              graystart += accumulated;
            }
            else
            {
              uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)/8]);
              switch (comp)
              {
#if CTX_RASTERIZER_SWITCH_DISPATCH
                case CTX_COV_PATH_RGBA8_COPY:
                  *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
                  break;
                case CTX_COV_PATH_RGBA8_OVER:
                  *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
                  break;
#endif
                default:
                  rasterizer->apply_coverage (rasterizer, (uint8_t*)dst_pix, rasterizer->color, accumulator_x, &accumulated, 1);
              }
            }
            accumulated = 0;
          }

          if (first < last)
          {
            switch (comp)
            {
#if CTX_RASTERIZER_SWITCH_DISPATCH
              case CTX_COV_PATH_RGBA8_COPY:
              {
                uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
                *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, graystart);

                dst_pix++;
                ctx_span_set_colorb (dst_pix, src_pix, last - first - 1);
              }
              break;
            case CTX_COV_PATH_RGB8_COPY:
            case CTX_COV_PATH_RGBAF_COPY:
            case CTX_COV_PATH_RGB565_COPY:
            case CTX_COV_PATH_RGB332_COPY:
            case CTX_COV_PATH_GRAYA8_COPY:
            case CTX_COV_PATH_GRAYAF_COPY:
            case CTX_COV_PATH_CMYKAF_COPY:
            case CTX_COV_PATH_GRAY8_COPY:
            case CTX_COV_PATH_CMYKA8_COPY:
            case CTX_COV_PATH_CMYK8_COPY:
            {
              uint8_t* dsts = (uint8_t*)(&dst[(first *bpp)/8]);
              uint8_t  startcov = graystart;
              rasterizer->apply_coverage (rasterizer, (uint8_t*)dsts, rasterizer->color, first, &startcov, 1);
              uint8_t* dst_i = (uint8_t*)dsts;
              uint8_t *color = ((uint8_t*)&rasterizer->color_native);
              unsigned int bytes = rasterizer->format->bpp/8;
              dst_i+=bytes;

              unsigned int count = last-(first+1);//  (last - post) - (first+pre) + 1;

              //for (int i = first + pre; i <= last - post; i++)
              if (CTX_LIKELY(count>0))
              switch (bytes)
              {
                case 1:
#if 1
                  memset (dst_i, color[0], count);
#else
                  while (count--)
                  {
                    dst_i[0] = color[0];
                    dst_i++;
                  }
#endif
                  break;
                case 2:
                  {
                    uint16_t val = ((uint16_t*)color)[0];
                    while (count--)
                    {
                      ((uint16_t*)dst_i)[0] = val;
                      dst_i+=2;
                    }
                  }
                  break;
                case 4:
                  {
                    uint32_t val = ((uint32_t*)color)[0];
                    ctx_span_set_colorb ((uint32_t*)dst, val, count);
                  }
                  break;
                case 16:
                  ctx_span_set_color_x4 ((uint32_t*)dst, (uint32_t*)color, count);
                  break;
                case 3:
                 while (count--)
                 {
                   *dst_i ++ = color[0];
                   *dst_i ++ = color[1];
                   *dst_i ++ = color[2];
                 }
                 break;
                case 5:
                 while (count--)
                 {
                   *dst_i ++ = color[0];
                   *dst_i ++ = color[1];
                   *dst_i ++ = color[2];
                   *dst_i ++ = color[3];
                   *dst_i ++ = color[4];
                 }
                 break;
                default:
                 while (count--)
                 {
                   for (unsigned int b = 0; b < bytes; b++)
                     *dst_i++ = color[b];
                 }
                  break;
               }
            }
              break;

              case CTX_COV_PATH_GRAY1_COPY:
              {
                uint8_t* dstp = (uint8_t*)(&dst[(first *bpp)/8]);
                uint8_t *srcp = (uint8_t*)src_pixp;
                uint8_t  startcov = graystart;
                rasterizer->apply_coverage (rasterizer, (uint8_t*)dstp, rasterizer->color, first, &startcov, 1);
                dstp = (uint8_t*)(&dst[((first+1)*bpp)/8]);
                unsigned int count = last - first - 1;
                if (srcp[0]>=127)
                {
                  int x = first + 1;
                  for (unsigned int i = 0; i < count && x & 7; count--)
                  {
                     int bitno = x & 7;
                     *dstp |= (1<<bitno);
                     dstp += (bitno == 7);
                     x++;
                  }

                  for (unsigned int i = 0; i < count && count>8; count-=8)
                  {
                     *dstp = 255;
                     dstp++;
                     x+=8;
                  }

                  for (unsigned int i = 0; i < count; i++)
                  {
                     int bitno = x & 7;
                     *dstp |= (1<<bitno);
                     dstp += (bitno == 7);
                     x++;
                  }
                }
                else
                {
                  unsigned int x = first + 1;
                  for (unsigned int i = 0; i < count && x & 7; count--)
                  {
                     int bitno = x & 7;
                     *dstp &= ~(1<<bitno);
                     dstp += (bitno == 7);
                     x++;
                  }

                  for (unsigned int i = 0; i < count && count>8; count-=8)
                  {
                     *dstp = 0;
                     dstp++;
                     x+=8;
                  }

                  for (unsigned int i = 0; i < count; i++)
                  {
                     int bitno = x & 7;
                     *dstp &= ~(1<<bitno);
                     dstp += (bitno == 7);
                     x++;
                  }

                }
              }
              break;

            case CTX_COV_PATH_RGBA8_OVER:
            {
              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
              *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, graystart);
              dst_pix++;
              for (unsigned int i = first + 1; i < (unsigned)last; i++)
              {
                *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
                dst_pix++;
              }
            }
            break;
            case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
            {
              float u0 = 0; float v0 = 0;
              float ud = 0; float vd = 0;
              float w0 = 1; float wd = 0;
              uint8_t gs = graystart;
              ctx_RGBA8_source_copy_normal_fragment (rasterizer, &dst[(first * bpp)/8], NULL, first, &gs, 1);
              ctx_init_uv (rasterizer, first+1, scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);
              rasterizer->fragment (rasterizer, u0, v0, w0, &dst[((first+1)*bpp)/8], last-first-1, ud, vd, wd);
            }
            break;
              case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
            {
              uint8_t gs = graystart;
              ctx_RGBA8_source_over_normal_fragment (rasterizer, &dst[(first * bpp)/8], NULL, first, &gs, 1);
              ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
                                                     &dst[((first+1)*bpp)/8], NULL, first + 1, NULL, last-first-1, 1);
            }
            break;
#endif
              default:
            {
#if static_OPAQUE
              uint8_t *opaque = &rasterizer->opaque[0];
#else
              uint8_t opaque[last-first];
              memset (opaque, 255, sizeof (opaque));
#endif
              opaque[0] = graystart;
              rasterizer->apply_coverage (rasterizer,
                                          &dst[(first * bpp)/8],
                                          rasterizer->color, first, opaque, last-first);

#if static_OPAQUE
              opaque[0] = 255;
#endif
            }
            }
            accumulated = grayend;
          }
          else if (first == last)
          {
            accumulated = (graystart-(grayend^255));
          }
          accumulator_x = last;
        }
   }

   if (accumulated)
   {
     uint32_t* dst_pix = (uint32_t*)(&dst[(accumulator_x*bpp)/8]);
     switch (comp)
     {
#if CTX_RASTERIZER_SWITCH_DISPATCH
       case CTX_COV_PATH_RGBA8_COPY:
         *dst_pix = ctx_lerp_RGBA8_2(*dst_pix, si_ga, si_rb, accumulated);
         break;
       case CTX_COV_PATH_RGBA8_OVER:
         *dst_pix = ctx_over_RGBA8_2(*dst_pix, si_ga, si_rb, si_a, accumulated);
         break;
#endif
       default:
         rasterizer->apply_coverage (rasterizer, (uint8_t*)dst_pix, rasterizer->color, accumulator_x, &accumulated, 1);
     }
   }
}

inline static int ctx_rasterizer_is_simple (CtxRasterizer *rasterizer)
{
  if (rasterizer->fast_aa == 0 ||
      rasterizer->ending_edges ||
      rasterizer->pending_edges)
   return 0;
  int *edges  = rasterizer->edges;
  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];

  int active_edges = rasterizer->active_edges;
  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment *segment0 = segments + edges[t];
      CtxSegment *segment1 = segments + edges[t+1];
      const int delta0    = segment0->delta;
      const int delta1    = segment1->delta;
      const int x0        = segment0->val;
      const int x1        = segment1->val;
      int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
      int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
      int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
      int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
      if (x1_end < x0_end   ||
          x1_start < x0_end ||
          x1_end < x0_start
         )
         return 0;
    }
  return 1;
}


inline static void
ctx_rasterizer_generate_coverage_set2 (CtxRasterizer *rasterizer,
                                         int            minx,
                                         int            maxx,
                                         uint8_t       *coverage,
                                         int            is_winding)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int *edges  = rasterizer->edges;
  int scanline        = rasterizer->scanline;
  int active_edges    = rasterizer->active_edges;
  int parity        = 0;

  coverage -= minx;

  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;

  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment   *segment = &entries[edges[t]];
      UPDATE_PARITY;

       if (parity)
        {
          CtxSegment   *next_segment = &entries[edges[t+1]];
          const int x0        = segment->val;
          const int x1        = next_segment->val;
          const int delta0    = segment->delta;
          const int delta1    = next_segment->delta;

          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;

          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (CTX_UNLIKELY (first < minx))
          { 
            first = minx;
            graystart=0;
          }
          if (CTX_UNLIKELY (last > maxx))
          {
            last = maxx;
            grayend=255;
          }
          graystart = (graystart&0xff) ^ 255;
          grayend   = (grayend & 0xff);

          if (first < last)
          {
            int pre = 1;
            int post = 1;

            if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
            {
              coverage[first] += graystart;
            }
            else
            {
              unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));
              unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));

              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
              int count = 0;

              int mod = ((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *
                         (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);

              int recip = 65536/sum;
              for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
              {
                coverage[us + count] += ((u - u0 + mod) * recip)>>16;
                count++;
              }
              pre = (us+count-1)-first+1;
            }
  
            if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
            {
               coverage[last] += grayend;
            }
            else
            {
              unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));
              unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));

              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
              int count = 0;
              int mod = ((((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255)+64) *
                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);
              int recip = 65536 / sum;
              for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
              {
                coverage[us + count] += (((u - u0 + mod) * recip)>>16) ^ 255;
                count++;
              }
              post = last-us+1;
            }
            for (int i = first + pre; i <= last - post; i++)
              coverage[i] = 255;
          }
          else if (first == last)
          {
            coverage[last]+=(graystart-(grayend^255));
          }
        }
   }
}


inline static void
ctx_rasterizer_generate_coverage_apply2 (CtxRasterizer *rasterizer,
                                         int            minx,
                                         int            maxx,
                                         uint8_t       *coverage,
                                         int            is_winding,
                                         CtxCovPath     comp)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int *edges          = rasterizer->edges;
  int  scanline       = rasterizer->scanline;
  const int  bpp      = rasterizer->format->bpp;
  int  active_edges   = rasterizer->active_edges;
  int  parity         = 0;

#if CTX_RASTERIZER_SWITCH_DISPATCH
  uint32_t *src_pixp;
  uint32_t src_pix, si_ga, si_rb, si_ga_full, si_rb_full, si_a;
  if (comp != CTX_COV_PATH_FALLBACK &&
      comp != CTX_COV_PATH_RGBA8_COPY_FRAGMENT &&
      comp != CTX_COV_PATH_RGBA8_OVER_FRAGMENT)
  {
    src_pixp   = ((uint32_t*)rasterizer->color);
    src_pix    = src_pixp[0];
    si_ga      = ((uint32_t*)rasterizer->color)[1];
    si_rb      = ((uint32_t*)rasterizer->color)[2];
    si_ga_full = ((uint32_t*)rasterizer->color)[3];
    si_rb_full = ((uint32_t*)rasterizer->color)[4];
    si_a  = src_pix >> 24;
  }
  else
  {
    src_pix    =
    si_ga      =
    si_rb      =
    si_ga_full =
    si_rb_full =
    si_a  = 0;
    src_pixp = &src_pix;
  }
#endif

  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +
         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));

  coverage -= minx;

  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;

  int accumulated_x0 = 65538;
  int accumulated_x1 = 65536;

  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment   *segment = &entries[edges[t]];
      UPDATE_PARITY;

       if (parity)
        {
          CtxSegment   *next_segment = &entries[edges[t+1]];
          const int x0        = segment->val;
          const int x1        = next_segment->val;
          const int delta0    = segment->delta;
          const int delta1    = next_segment->delta;

          int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
          int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
          int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
          int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;

          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (CTX_UNLIKELY (first < minx))
          { 
            first = minx;
            graystart=0;
          }
          if (CTX_UNLIKELY (last > maxx))
          {
            last = maxx;
            grayend=255;
          }
          graystart = 255-(graystart&0xff);
          grayend   = (grayend & 0xff);

          if (first < last)
          {
            int pre = 1;
            int post = 1;

          if (abs(delta0) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
          {
             coverage[first] += graystart;

            accumulated_x1 = first;
            accumulated_x0 = ctx_mini (accumulated_x0, first);
          }
          else
          {
            unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));
            unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));

            int mod = ((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *
                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);

            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
            int count = 0;
            int recip = 65536/ sum;
            for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
            {
              coverage[us + count] += ((u - u0 + mod) * recip)>>16;
              count++;
            }
            pre = (us+count-1)-first+1;

            accumulated_x0 = ctx_mini (accumulated_x0, us);
            accumulated_x1 = us + count - 1;
          }

          if (accumulated_x1-accumulated_x0>=0)
          {
             switch (comp)
             {
#if CTX_RASTERIZER_SWITCH_DISPATCH
                case CTX_COV_PATH_RGBA8_OVER:
                {
                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
                    {
                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
                      dst_i++;
                    }
                }
                break;

                case CTX_COV_PATH_RGBA8_COPY:
                {
                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
                  {
                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
                    dst_i++;
                  }
                }
                  break;
                case CTX_COV_PATH_RGB8_COPY:
                {
                  uint8_t *dst_i = (uint8_t*)&dst[((accumulated_x0) * bpp)/8];
                  uint8_t *srcp = (uint8_t*)src_pixp;
                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
                  {
                    for (int c = 0; c < 3; c++)
                      dst_i[c] = ctx_lerp_u8 (dst_i[c], srcp[c], coverage[accumulated_x0+i]);
                    dst_i +=3;
                  }
                }
                  break;
#endif
                default:
                rasterizer->apply_coverage (rasterizer,
                          &dst[((accumulated_x0) * bpp)/8],
                          rasterizer->color,
                          accumulated_x0,
                          &coverage[accumulated_x0],
                          accumulated_x1-accumulated_x0+1);
             }
             accumulated_x0 = 65538;
             accumulated_x1 = 65536;
          }

          if (abs(delta1) < CTX_RASTERIZER_AA_SLOPE_LIMIT3_FAST_AA)
          {
             coverage[last] += grayend;
             accumulated_x1 = last;
             accumulated_x0 = last;
          }
          else
          {
            unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));
            unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));

            int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
            int count = 0;

            int mod = ((((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) +64) *
                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
            int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV * 1.25)/255);

            int recip = 65536/ sum;
            for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
            {
              coverage[us + count] = (((u - u0 + mod)*recip)>>16)^255;
              count++;
            }
            post = last-us+1;

            accumulated_x1 = us + count;
            accumulated_x0 = us;
          }
          switch (comp)
          {
#if CTX_RASTERIZER_SWITCH_DISPATCH
            case CTX_COV_PATH_RGBAF_COPY:
            case CTX_COV_PATH_GRAY8_COPY:
            case CTX_COV_PATH_RGB8_COPY:
            case CTX_COV_PATH_GRAYA8_COPY:
            case CTX_COV_PATH_GRAYAF_COPY:
            case CTX_COV_PATH_CMYKAF_COPY:
            case CTX_COV_PATH_RGB565_COPY:
            case CTX_COV_PATH_RGB332_COPY:
            case CTX_COV_PATH_CMYK8_COPY:
            case CTX_COV_PATH_CMYKA8_COPY:
            {
              uint8_t* dsts = (uint8_t*)(&dst[(first *bpp)/8]);
              uint8_t* dst_i = (uint8_t*)dsts;
              uint8_t* color = ((uint8_t*)&rasterizer->color_native);
              unsigned int bytes = rasterizer->format->bpp/8;
              dst_i+=pre*bytes;

              int scount = (last - post) - (first+pre) + 1;
              unsigned int count = scount;

              //for (int i = first + pre; i <= last - post; i++)
              if (CTX_LIKELY(scount>0))
              switch (bytes)
              {
                case 1:
#if 1
                  memset (dst_i, color[0], count);
#else
                  while (count--)
                  {
                    dst_i[0] = color[0];
                    dst_i++;
                  }
#endif
                  break;
                case 2:
                  {
                    uint16_t val = ((uint16_t*)color)[0];
                    while (count--)
                    {
                      ((uint16_t*)dst_i)[0] = val;
                      dst_i+=2;
                    }
                  }
                  break;
                case 4:
                  {
                    uint32_t val = ((uint32_t*)color)[0];
                    while (count--)
                    {
                      ((uint32_t*)dst_i)[0] = val;
                      dst_i+=4;
                    }
                  }
                  break;
                case 16:
                  ctx_span_set_color_x4 ((uint32_t*)dst, (uint32_t*)color, count);
                  break;
                case 3:
                 while (count--)
                 {
                   *dst_i++ = color[0];
                   *dst_i++ = color[1];
                   *dst_i++ = color[2];
                 }
                 break;
                case 5:
                 while (count--)
                 {
                   *dst_i++ = color[0];
                   *dst_i++ = color[1];
                   *dst_i++ = color[2];
                   *dst_i++ = color[3];
                   *dst_i++ = color[4];
                 }
                 break;
                default:
                 while (count--)
                 {
                   for (unsigned int b = 0; b < bytes; b++)
                     *dst_i++ = color[b];
                 }
                  break;
               }
             }
             break;

            case CTX_COV_PATH_RGBA8_COPY:
            {
              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
              dst_pix+=pre;
              ctx_span_set_color (dst_pix, src_pix, last-first-pre-post + 1);
            }
            break;


            case CTX_COV_PATH_RGBA8_OVER:
            {
              uint32_t* dst_pix = (uint32_t*)(&dst[(first *bpp)/8]);
              dst_pix+=pre;
              int scount = (last - post) - (first + pre) + 1;
              if (scount > 0)
              {
                unsigned int count = scount;
                while (count--)
                {
                  *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
                  dst_pix++;
                }
              }
            }
            break;
            case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
            {
              int width = last-first-pre-post+1;
              if (width>0)
              {
                float u0 = 0; float v0 = 0;
                float ud = 0; float vd = 0;
                float w0 = 1; float wd = 0;
                ctx_init_uv (rasterizer, first+pre, rasterizer->scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);
                rasterizer->fragment (rasterizer, u0, v0, w0, &dst[(first+pre)*bpp/8],
                                      width, ud, vd, wd);
              }
            }
            break;
            case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
              {
                int width = last-first-pre-post+1;
                if (width>0)
                ctx_RGBA8_source_over_normal_full_cov_fragment (rasterizer,
                               &dst[((first+pre)*bpp)/8],
                               NULL,
                               first + pre,
                               NULL,
                               width, 1);
              }
            break;
#endif
            default:
              {
                int width = last-first-pre-post+1;
                if (width > 0)
                {
#if static_OPAQUE
                uint8_t *opaque = &rasterizer->opaque[0];
#else
                uint8_t opaque[width];
                memset (opaque, 255, sizeof (opaque));
#endif
                rasterizer->apply_coverage (rasterizer,
                            &dst[((first + pre) * bpp)/8],
                            rasterizer->color,
                            first + pre,
                            opaque,
                            width);
                }
              }
          }
          }
          else if (first == last)
          {
            coverage[last]+=(graystart-(255-grayend));

            accumulated_x1 = last;
            accumulated_x0 = ctx_mini (accumulated_x0, last);
          }
        }
   }

   if (accumulated_x1-accumulated_x0>=0)
   {
             switch (comp)
             {
#if CTX_RASTERIZER_SWITCH_DISPATCH
                case CTX_COV_PATH_RGBA8_OVER:
                {
                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
                    {
                      *dst_i = ctx_over_RGBA8_2 (*dst_i, si_ga, si_rb, si_a, coverage[accumulated_x0+i]);
                      dst_i++;
                    }
                }
                break;
                case CTX_COV_PATH_RGBA8_COPY:
                {
                  uint32_t *dst_i = (uint32_t*)&dst[((accumulated_x0) * bpp)/8];
                  for (int i = 0; i < accumulated_x1-accumulated_x0+1; i++)
                  {
                    *dst_i = ctx_lerp_RGBA8_2 (*dst_i, si_ga, si_rb, coverage[accumulated_x0+i]);
                    dst_i++;
                  }
                }
                  break;
#endif
                default:
                rasterizer->apply_coverage (rasterizer,
                          &dst[((accumulated_x0) * bpp)/8],
                          rasterizer->color,
                          accumulated_x0,
                          &coverage[accumulated_x0],
                          accumulated_x1-accumulated_x0+1);
             }
   }
}

#undef CTX_EDGE_Y0
#undef CTX_EDGE

static inline void
ctx_rasterizer_reset (CtxRasterizer *rasterizer)
{
  rasterizer->has_shape       =   
  rasterizer->has_prev        =   
  rasterizer->edge_list.count =    // ready for new edges
  rasterizer->edge_pos        =   
  rasterizer->scanline        = 0;
  if (CTX_LIKELY(!rasterizer->preserve))
  {
    rasterizer->scan_min      =
    rasterizer->col_min       = 5000;
    rasterizer->scan_max      =
    rasterizer->col_max       = -5000;
  }
  //rasterizer->comp_op       = NULL; // keep comp_op cached 
  //     between rasterizations where rendering attributes are
  //     nonchanging
}

static void
ctx_rasterizer_rasterize_edges2 (CtxRasterizer *rasterizer, const int fill_rule 
#if CTX_SHAPE_CACHE
                                ,CtxShapeEntry *shape
#endif
                               )
{
  rasterizer->pending_edges   =   
  rasterizer->active_edges    =   0;
  //rasterizer->scanline        = 0;
  int       is_winding  = fill_rule == CTX_FILL_RULE_WINDING;
  const CtxCovPath comp = rasterizer->comp;
  const int real_aa     = rasterizer->aa;
  uint8_t  *dst         = ((uint8_t *) rasterizer->buf);
  int       scan_start  = rasterizer->blit_y * CTX_FULL_AA;
  int       scan_end    = scan_start + (rasterizer->blit_height - 1) * CTX_FULL_AA;
  const int blit_width  = rasterizer->blit_width;
  const int blit_max_x  = rasterizer->blit_x + blit_width;
  int       minx        = rasterizer->col_min / CTX_SUBDIV - rasterizer->blit_x;
  int       maxx        = (rasterizer->col_max + CTX_SUBDIV-1) / CTX_SUBDIV -
                          rasterizer->blit_x;
  const int blit_stride = rasterizer->blit_stride;
  uint8_t   real_fraction = 255/real_aa;

  rasterizer->prev_active_edges = -1;
  if (
#if CTX_SHAPE_CACHE
    !shape &&
#endif
    maxx > blit_max_x - 1)
    { maxx = blit_max_x - 1; }

  minx = ctx_maxi (rasterizer->state->gstate.clip_min_x, minx);
  maxx = ctx_mini (rasterizer->state->gstate.clip_max_x, maxx);
  minx = ctx_maxi (0, minx); // redundant?
  if (CTX_UNLIKELY (minx >= maxx))
    {
      return;
    }
#if CTX_SHAPE_CACHE
  uint8_t _coverage[shape?2:maxx-minx+1];
#else
  uint8_t _coverage[maxx-minx+1];
#endif
  uint8_t *coverage = &_coverage[0];

  int coverage_size;

  rasterizer->scan_min -= (rasterizer->scan_min % CTX_FULL_AA);
#if CTX_SHAPE_CACHE
  if (shape)
    {
      coverage_size = shape->width;
      coverage = &shape->data[0];
      scan_start = rasterizer->scan_min;
      scan_end   = rasterizer->scan_max;
    }
  else
#endif
  {
     coverage_size = sizeof (_coverage);
     if (rasterizer->scan_min > scan_start)
       {
          dst += (rasterizer->blit_stride * (rasterizer->scan_min-scan_start) / CTX_FULL_AA);
          scan_start = rasterizer->scan_min;
       }
      scan_end = ctx_mini (rasterizer->scan_max, scan_end);
  }

  if (CTX_UNLIKELY(rasterizer->state->gstate.clip_min_y * CTX_FULL_AA > scan_start ))
    { 
       dst += (rasterizer->blit_stride * (rasterizer->state->gstate.clip_min_y * CTX_FULL_AA -scan_start) / CTX_FULL_AA);
       scan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA; 
    }
  scan_end = ctx_mini (rasterizer->state->gstate.clip_max_y * CTX_FULL_AA, scan_end);
  if (CTX_UNLIKELY(scan_start > scan_end ||
      (scan_start > (rasterizer->blit_y + (rasterizer->blit_height-1)) * CTX_FULL_AA) ||
      (scan_end < (rasterizer->blit_y) * CTX_FULL_AA)))
  { 
    /* not affecting this rasterizers scanlines */
    return;
  }

  rasterizer->horizontal_edges =
    rasterizer->needs_aa3  =
    rasterizer->needs_aa5  =
    rasterizer->needs_aa15 = 0;

  ctx_rasterizer_sort_edges (rasterizer);
  rasterizer->scanline = scan_start;
  ctx_rasterizer_feed_edges (rasterizer, 0); 

  int avoid_direct = (0 
#if CTX_ENABLE_CLIP
         || rasterizer->clip_buffer
#endif
#if CTX_ENABLE_SHADOW_BLUR
         || rasterizer->in_shadow
#endif
#if CTX_SHAPE_CACHE
         || shape != NULL
#endif
         );

  for (; rasterizer->scanline <= scan_end;)
    {

    if (rasterizer->active_edges == 0 && rasterizer->pending_edges == 0)
    { /* no edges */
      ctx_rasterizer_feed_edges (rasterizer, 0);
      ctx_rasterizer_increment_edges (rasterizer, CTX_FULL_AA);
      dst += blit_stride;
#if CTX_SHAPE_CACHE
      if (shape)
      {
        memset (coverage, 0, coverage_size);
        coverage += shape->width;
      }
#endif
      rasterizer->prev_active_edges = rasterizer->active_edges;
      continue;
    }
    else if (real_aa != 1 && ( (rasterizer->horizontal_edges!=0) 
          || (rasterizer->active_edges != rasterizer->prev_active_edges)
          || (rasterizer->active_edges + rasterizer->pending_edges == rasterizer->ending_edges)
          ))
    { /* needs full AA */
        int increment = CTX_FULL_AA/real_aa;
        memset (coverage, 0, coverage_size);
        for (int i = 0; i < real_aa; i++)
        {
          ctx_rasterizer_feed_edges (rasterizer, 0);
          ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, real_aa, real_fraction);
          ctx_rasterizer_increment_edges (rasterizer, increment);
        }
    }
    else if (rasterizer->needs_aa3 == 0)
    {
      if (! avoid_direct)
      { /* can generate with direct rendering to target (we're not using shape cache) */
        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
        ctx_rasterizer_feed_edges (rasterizer, 0);

        ctx_rasterizer_generate_coverage_apply (rasterizer, minx, maxx, coverage, is_winding, comp);
        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);

        dst += blit_stride;
        rasterizer->prev_active_edges = rasterizer->active_edges;
        continue;
      }
      else
      { /* cheap fully correct AA, to coverage mask / clipping */
        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
        ctx_rasterizer_feed_edges (rasterizer, 0);

        memset (coverage, 0, coverage_size);
        ctx_rasterizer_generate_coverage_set (rasterizer, minx, maxx, coverage, is_winding);
        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
      }
    }
    else if (ctx_rasterizer_is_simple (rasterizer))
    { /* the scanline transitions does not contain multiple intersections - each aa segment is a linear ramp */
      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP2);
      ctx_rasterizer_feed_edges (rasterizer, 1);
      memset (coverage, 0, coverage_size);
      if (!avoid_direct)
      {
        ctx_rasterizer_generate_coverage_apply2 (rasterizer, minx, maxx, coverage, is_winding,
                      comp);
        ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);

        dst += blit_stride;
        rasterizer->prev_active_edges = rasterizer->active_edges;
        continue;
      }
      ctx_rasterizer_generate_coverage_set2 (rasterizer, minx, maxx, coverage, is_winding);
      ctx_rasterizer_increment_edges (rasterizer, CTX_AA_HALFSTEP);
      if (real_aa == 1)
      {
        for (int x = minx; x <= maxx; x ++)
          coverage[x] = coverage[x] > 127?255:0;
      }
    }
    else
    { /* determine level of oversampling based on lowest steepness edges */
      int aa = 3;
      if (rasterizer->needs_aa5 && real_aa >=5)
      {
         aa = 5;
         if (rasterizer->needs_aa15 && real_aa >=15)
           aa = 15;
      }
      int scanline_increment = 15/aa;

      memset (coverage, 0, coverage_size);
      uint8_t fraction = 255/aa;
      for (int i = 0; i < CTX_FULL_AA; i+= scanline_increment)
      {
        ctx_rasterizer_feed_edges (rasterizer, 0);
        ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, aa, fraction);
        ctx_rasterizer_increment_edges (rasterizer, scanline_increment);
      }
    }

  ctx_coverage_post_process (rasterizer, minx, maxx, coverage - minx, NULL, NULL);
#if CTX_SHAPE_CACHE
  if (shape == NULL)
#endif
  {
    rasterizer->apply_coverage (rasterizer,
                         &dst[(minx * rasterizer->format->bpp) /8],
                         rasterizer->color,
                         minx,
                         coverage,
                         maxx-minx+ 1);
  }
#if CTX_SHAPE_CACHE
  else
  {
    coverage += shape->width;
  }
#endif
      dst += blit_stride;
      rasterizer->prev_active_edges = rasterizer->active_edges;
    }

  if (CTX_UNLIKELY(rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_OUT ||
      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_SOURCE_IN ||
      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_IN ||
      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_DESTINATION_ATOP ||
      rasterizer->state->gstate.compositing_mode == CTX_COMPOSITE_CLEAR))
  {
     /* fill in the rest of the blitrect when compositing mode permits it */
     uint8_t nocoverage[rasterizer->blit_width];
     //int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
     int gscan_start = rasterizer->state->gstate.clip_min_y * CTX_FULL_AA;
     int gscan_end = rasterizer->state->gstate.clip_max_y * CTX_FULL_AA;
     memset (nocoverage, 0, sizeof(nocoverage));
     int startx   = rasterizer->state->gstate.clip_min_x;
     int endx     = rasterizer->state->gstate.clip_max_x;
     int clipw    = endx-startx + 1;
     uint8_t *dst = ( (uint8_t *) rasterizer->buf);

     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (gscan_start / CTX_FULL_AA);
     for (rasterizer->scanline = gscan_start; rasterizer->scanline < scan_start;)
     {
       rasterizer->apply_coverage (rasterizer,
                                   &dst[ (startx * rasterizer->format->bpp) /8],
                                   rasterizer->color,
                                      0,
                                      nocoverage, clipw);
       rasterizer->scanline += CTX_FULL_AA;
       dst += rasterizer->blit_stride;
     }
     if (minx < startx)
     {
     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
     {
       rasterizer->apply_coverage (rasterizer,
                                   &dst[ (startx * rasterizer->format->bpp) /8],
                                   rasterizer->color,
                                   0,
                                   nocoverage, minx-startx);
       dst += blit_stride;
     }
     }

     if (endx > maxx)
     {
     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_start / CTX_FULL_AA);
     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
     {
       rasterizer->apply_coverage (rasterizer,
                                   &dst[ (maxx * rasterizer->format->bpp) /8],
                                   rasterizer->color,
                                   0,
                                   nocoverage, endx-maxx);

       rasterizer->scanline += CTX_FULL_AA;
       dst += rasterizer->blit_stride;
     }
     }
#if 1
     dst = (uint8_t*)(rasterizer->buf) + rasterizer->blit_stride * (scan_end / CTX_FULL_AA);
     // XXX this crashes under valgrind/asan
     if(0)for (rasterizer->scanline = scan_end; rasterizer->scanline/CTX_FULL_AA < gscan_end-1;)
     {
       rasterizer->apply_coverage (rasterizer,
                                   &dst[ (startx * rasterizer->format->bpp) /8],
                                   rasterizer->color,
                                   0,
                                   nocoverage, clipw-1);

       rasterizer->scanline += CTX_FULL_AA;
       dst += blit_stride;
     }
#endif
  }
}


#if CTX_INLINE_FILL_RULE
void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
#if CTX_SHAPE_CACHE
                                ,CtxShapeEntry *shape
#endif
                               );
#else

void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
#if CTX_SHAPE_CACHE
                                ,CtxShapeEntry *shape
#endif
                               );
#endif


#if CTX_INLINE_FILL_RULE
void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
#if CTX_SHAPE_CACHE
                                ,CtxShapeEntry *shape
#endif
                               )
{
  if (fill_rule)
  {
    ctx_rasterizer_rasterize_edges2 (rasterizer, 1
#if CTX_SHAPE_CACHE
                    ,shape
#endif
                    );
  }
  else
  {
    ctx_rasterizer_rasterize_edges2 (rasterizer, 0
#if CTX_SHAPE_CACHE
                    ,shape
#endif
                    );
  }
}
#else

void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule 
#if CTX_SHAPE_CACHE
                                ,CtxShapeEntry *shape
#endif
                               )
{
    ctx_rasterizer_rasterize_edges2 (rasterizer, fill_rule
#if CTX_SHAPE_CACHE
                    ,shape
#endif
                    );
}

#endif



extern CtxPixelFormatInfo *ctx_pixel_formats;
void CTX_SIMD_SUFFIX(ctx_simd_setup)(void);
void CTX_SIMD_SUFFIX(ctx_simd_setup)(void)
{
  ctx_pixel_formats         = CTX_SIMD_SUFFIX(ctx_pixel_formats);
  ctx_composite_setup       = CTX_SIMD_SUFFIX(ctx_composite_setup);
  ctx_rasterizer_rasterize_edges = CTX_SIMD_SUFFIX(ctx_rasterizer_rasterize_edges);
#if CTX_FAST_FILL_RECT
  ctx_composite_fill_rect   = CTX_SIMD_SUFFIX(ctx_composite_fill_rect);
  ctx_composite_stroke_rect = CTX_SIMD_SUFFIX(ctx_composite_stroke_rect);
#endif
}


#endif
#endif
#if CTX_IMPLEMENTATION
#if CTX_RASTERIZER


inline static float ctx_fast_hypotf (float x, float y)
{
  if (x < 0) { x = -x; }
  if (y < 0) { y = -y; }
  if (x < y)
    { return 0.96f * y + 0.4f * x; }
  else
    { return 0.96f * x + 0.4f * y; }
}



static void
ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba)
{
  /* FIXME XXX we only have one gradient, but might need separate gradients
   * for fill/stroke !
   * 
   */
  CtxGradient *gradient = &rasterizer->state->gradient;
  CtxGradientStop *stop = &gradient->stops[gradient->n_stops];
  stop->pos = pos;
  ctx_color_set_rgba (rasterizer->state, & (stop->color), rgba[0], rgba[1], rgba[2], rgba[3]);
  if (gradient->n_stops < 15) //we'll keep overwriting the last when out of stops
    { gradient->n_stops++; }
}

static inline void ctx_rasterizer_update_inner_point (CtxRasterizer *rasterizer, int x, int y)
{
  rasterizer->scan_min = ctx_mini (y, rasterizer->scan_min);
  rasterizer->scan_max = ctx_maxi (y, rasterizer->scan_max);
  rasterizer->col_min = ctx_mini (x, rasterizer->col_min);
  rasterizer->col_max = ctx_maxi (x, rasterizer->col_max);
  rasterizer->inner_x = x;
  rasterizer->inner_y = y;
}

static inline int ctx_rasterizer_add_point (CtxRasterizer *rasterizer, int x1, int y1)
{
  CtxSegment entry = {CTX_EDGE, {{0},}};

  entry.data.s16[0]=rasterizer->inner_x;
  entry.data.s16[1]=rasterizer->inner_y;

  entry.data.s16[2]=x1;
  entry.data.s16[3]=y1;

  ctx_rasterizer_update_inner_point (rasterizer, x1, y1);

  return ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);
}

#if 0
#define CTX_SHAPE_CACHE_PRIME1   7853
#define CTX_SHAPE_CACHE_PRIME2   4129
#define CTX_SHAPE_CACHE_PRIME3   3371
#define CTX_SHAPE_CACHE_PRIME4   4221
#else
#define CTX_SHAPE_CACHE_PRIME1   283
#define CTX_SHAPE_CACHE_PRIME2   599
#define CTX_SHAPE_CACHE_PRIME3   101
#define CTX_SHAPE_CACHE_PRIME4   661
#endif


float ctx_shape_cache_rate = 0.0;
#if CTX_SHAPE_CACHE
int   _ctx_shape_cache_enabled = CTX_SHAPE_CACHE_DEFAULT;

//static CtxShapeCache ctx_cache = {{NULL,}, 0};

static long ctx_shape_cache_hits   = 0;
static long ctx_shape_cache_misses = 0;


/* this returns the buffer to use for rendering, it always
   succeeds..
 */
static inline CtxShapeEntry *ctx_shape_entry_find (CtxRasterizer *rasterizer, uint32_t hash, int width, int height)
{
  /* use both some high and some low bits  */
  int entry_no = ( (hash >> 10) ^ (hash & 1023) ) % CTX_SHAPE_CACHE_ENTRIES;
  {
    static int i = 0;
    i++;
    if (i>256)
      {
        if (ctx_shape_cache_hits+ctx_shape_cache_misses)
        {
          ctx_shape_cache_rate = 
                0.5 * ctx_shape_cache_rate +
                0.5 * (ctx_shape_cache_hits * 100.0  / (ctx_shape_cache_hits+ctx_shape_cache_misses));
        }
        i = 0;
        ctx_shape_cache_hits = 0;
        ctx_shape_cache_misses = 0;
      }
  }
// XXX : this 1 one is needed  to silence a false positive:
// ==90718== Invalid write of size 1
// ==90718==    at 0x1189EF: ctx_rasterizer_generate_coverage (ctx.h:4786)
// ==90718==    by 0x118E57: ctx_rasterizer_rasterize_edges (ctx.h:4907)
//
  int size = sizeof (CtxShapeEntry) + width * height + 1;

  CtxShapeEntry *entry = rasterizer->shape_cache.entries[entry_no];
  if (entry)
    {
      int old_size = sizeof (CtxShapeEntry) + entry->width + entry->height + 1;
      if (entry->hash == hash &&
          entry->width == width &&
          entry->height == height)
        {
          if (entry->uses < 1<<30)
            { entry->uses++; }
          ctx_shape_cache_hits ++;
          return entry;
        }

      if (old_size >= size)
      {
         rasterizer->shape_cache.size -= old_size;
         rasterizer->shape_cache.size += (old_size-size); // slack/leaked
      }
      else
      {
        rasterizer->shape_cache.entries[entry_no] = NULL;
        rasterizer->shape_cache.size -= entry->width * entry->height;
        rasterizer->shape_cache.size -= sizeof (CtxShapeEntry);
        free (entry);
        entry = NULL;
      }
    }

  if (!entry)
    entry = rasterizer->shape_cache.entries[entry_no] = (CtxShapeEntry *) calloc (size, 1);

  rasterizer->shape_cache.size += size;

  ctx_shape_cache_misses ++;
  entry->hash   = hash;
  entry->width  = width;
  entry->height = height;
  entry->uses = 0;
  return entry;
}

#endif

static uint32_t ctx_rasterizer_poly_to_hash (CtxRasterizer *rasterizer)
{
  int x = 0;
  int y = 0;

  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];

  int ox = entry->data.s16[2];
  int oy = entry->data.s16[3];
  uint32_t hash = rasterizer->edge_list.count;
  hash = ox;//(ox % CTX_SUBDIV);
  hash *= CTX_SHAPE_CACHE_PRIME1;
  hash += oy; //(oy % CTX_RASTERIZER_AA);
  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
    {
      CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[i];
      x = entry->data.s16[2];
      y = entry->data.s16[3];
      int dx = x-ox;
      int dy = y-oy;
      ox = x;
      oy = y;
      hash *= CTX_SHAPE_CACHE_PRIME3;
      hash += dx;
      hash *= CTX_SHAPE_CACHE_PRIME4;
      hash += dy;
    }
  return hash;
}

static uint32_t ctx_rasterizer_poly_to_edges (CtxRasterizer *rasterizer)
{
#if CTX_SHAPE_CACHE
  int x = 0;
  int y = 0;
#endif
  unsigned int count = rasterizer->edge_list.count;
  if (CTX_UNLIKELY (count == 0))
     return 0;
  CtxSegment *entry = (CtxSegment*)&rasterizer->edge_list.entries[0];
#if CTX_SHAPE_CACHE
#if 1
  int ox = entry->data.s16[2];
  int oy = entry->data.s16[3];
#endif
  uint32_t hash = rasterizer->edge_list.count;
  hash = (ox & CTX_SUBDIV);
  hash *= CTX_SHAPE_CACHE_PRIME1;
  hash += (oy & CTX_SUBDIV);
#endif
  //CtxSegment *entry = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
  for (unsigned int i = 0; i < count; i++)
    {
#if CTX_SHAPE_CACHE
      x = entry->data.s16[2];
      y = entry->data.s16[3];
      int dx = x-ox;
      int dy = y-oy;
      ox = x;
      oy = y;
      hash *= CTX_SHAPE_CACHE_PRIME3;
      hash += dx;
      hash *= CTX_SHAPE_CACHE_PRIME4;
      hash += dy;
#endif
#if 1
      if (entry->data.s16[3] < entry->data.s16[1])
        {
          *entry = ctx_segment_s16 (CTX_EDGE_FLIPPED,
                            entry->data.s16[2], entry->data.s16[3],
                            entry->data.s16[0], entry->data.s16[1]);
        }
#endif
      entry++;
    }
#if CTX_SHAPE_CACHE
  return hash;
#else
  return 0;
#endif
}

static inline void ctx_rasterizer_finish_shape (CtxRasterizer *rasterizer)
{
  if (rasterizer->has_shape && rasterizer->has_prev)
    {
      ctx_rasterizer_line_to (rasterizer, rasterizer->first_x, rasterizer->first_y);
      rasterizer->has_prev = 0;
    }
}

//#define MIN_Y -100
//#define MAX_Y 3800
//#define MIN_X -100
//#define MAX_X 3600*10

static inline void ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  float tx = x; float ty = y;
  rasterizer->x        = x;
  rasterizer->y        = y;
  rasterizer->first_x  = x;
  rasterizer->first_y  = y;
  rasterizer->has_prev = -1;
  if (rasterizer->uses_transforms)
    {
      _ctx_user_to_device (rasterizer->state, &tx, &ty);
    }

  tx = (tx - rasterizer->blit_x) * CTX_SUBDIV;
  ty = ty * CTX_FULL_AA;

  //ty = ctx_maxf (MIN_Y, ty);
  //ty = ctx_minf (MAX_Y, ty);
  //tx = ctx_maxf (MIN_X, tx);
  //tx = ctx_minf (MAX_X, tx);
  ctx_rasterizer_update_inner_point (rasterizer, tx, ty);
}

static inline void
ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  rasterizer->has_shape = 1;
  rasterizer->y         = y;
  rasterizer->x         = x;

  float tx = x;
  float ty = y;
  //float ox = rasterizer->x;
  //float oy = rasterizer->y;
  if (rasterizer->uses_transforms)
    {
      _ctx_user_to_device (rasterizer->state, &tx, &ty);
    }
  tx -= rasterizer->blit_x;

  //ty = ctx_maxf (MIN_Y, ty);
  //ty = ctx_minf (MAX_Y, ty);
  //tx = ctx_maxf (MIN_X, tx);
  //tx = ctx_minf (MAX_X, tx);
  
  ctx_rasterizer_add_point (rasterizer, tx * CTX_SUBDIV, ty * CTX_FULL_AA);//rasterizer->aa);

  if (CTX_UNLIKELY(rasterizer->has_prev<=0))
    {
      CtxSegment *entry = & ((CtxSegment*)rasterizer->edge_list.entries)[rasterizer->edge_list.count-1];
      entry->code = CTX_NEW_EDGE;
      rasterizer->has_prev = 1;
    }
}


CTX_INLINE static float
ctx_bezier_sample_1d (float x0, float x1, float x2, float x3, float dt)
{
  return ctx_lerpf (
      ctx_lerpf (ctx_lerpf (x0, x1, dt),
                 ctx_lerpf (x1, x2, dt), dt),
      ctx_lerpf (ctx_lerpf (x1, x2, dt),
                 ctx_lerpf (x2, x3, dt), dt), dt);
}

CTX_INLINE static void
ctx_bezier_sample (float x0, float y0,
                   float x1, float y1,
                   float x2, float y2,
                   float x3, float y3,
                   float dt, float *x, float *y)
{
  *x = ctx_bezier_sample_1d (x0, x1, x2, x3, dt);
  *y = ctx_bezier_sample_1d (y0, y1, y2, y3, dt);
}

static inline void
ctx_rasterizer_bezier_divide (CtxRasterizer *rasterizer,
                              float ox, float oy,
                              float x0, float y0,
                              float x1, float y1,
                              float x2, float y2,
                              float sx, float sy,
                              float ex, float ey,
                              float s,
                              float e,
                              int   iteration,
                              float tolerance)
{
  float t = (s + e) * 0.5f;
  float x, y;
  float lx, ly, dx, dy;
  ctx_bezier_sample (ox, oy, x0, y0, x1, y1, x2, y2, t, &x, &y);
  lx = ctx_lerpf (sx, ex, t);
  ly = ctx_lerpf (sy, ey, t);
  dx = lx - x;
  dy = ly - y;
  if (iteration < 5 && (dx*dx+dy*dy) > tolerance)
  {
    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
                                  sx, sy, x, y, s, t, iteration + 1,
                                  tolerance);
    ctx_rasterizer_line_to (rasterizer, x, y);
    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
                                  x, y, ex, ey, t, e, iteration + 1,
                                  tolerance);
  }
}

static void
ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
                         float x0, float y0,
                         float x1, float y1,
                         float x2, float y2)
{
  float tolerance = 0.125f/ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  float ox = rasterizer->state->x;
  float oy = rasterizer->state->y;

  tolerance = tolerance * tolerance;

  {
#define CTX_AVOID_CLIPPED_SUBDIVISION 0
#if CTX_AVOID_CLIPPED_SUBDIVISION
  float maxx = ctx_maxf (x1,x2);
  maxx = ctx_maxf (maxx, ox);
  maxx = ctx_maxf (maxx, x0);
  float maxy = ctx_maxf (y1,y2);
  maxy = ctx_maxf (maxy, oy);
  maxy = ctx_maxf (maxy, y0);
  float minx = ctx_minf (x1,x2);
  minx = ctx_minf (minx, ox);
  minx = ctx_minf (minx, x0);
  float miny = ctx_minf (y1,y2);
  miny = ctx_minf (miny, oy);
  miny = ctx_minf (miny, y0);
  
  float coords[4][2]={{minx,miny},
                      {maxx,miny},
                      {maxx,maxy},
                      {minx,maxy}};
  for (int i = 0; i < 4; i++)
  {
    _ctx_user_to_device (rasterizer->state, &coords[i][0], &coords[i][1]);
  }
  minx = maxx = coords[0][0];
  miny = maxy = coords[0][1];
  for (int i = 1; i < 4; i++)
  {
    minx = ctx_minf (minx, coords[i][0]);
    miny = ctx_minf (miny, coords[i][1]);
    maxx = ctx_maxf (minx, coords[i][0]);
    maxy = ctx_maxf (miny, coords[i][1]);
  }

    if( (maxx-minx) + (maxy-miny) < 0.66f ||
        (minx > rasterizer->blit_x + rasterizer->blit_width) ||
        (miny > rasterizer->blit_y + rasterizer->blit_height) ||
        (maxx < rasterizer->blit_x) ||
        (maxy < rasterizer->blit_y) )
    {
    }
    else
#endif
  ctx_rasterizer_bezier_divide (rasterizer,
                                ox, oy, x0, y0,
                                x1, y1, x2, y2,
                                ox, oy, x2, y2,
                                0.0f, 1.0f, 0, tolerance);
  }
  ctx_rasterizer_line_to (rasterizer, x2, y2);
}

static void
ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  //if (CTX_UNLIKELY(x == 0.f && y == 0.f))
  //{ return; }
  x += rasterizer->x;
  y += rasterizer->y;
  ctx_rasterizer_move_to (rasterizer, x, y);
}

static void
ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  //if (CTX_UNLIKELY(x== 0.f && y==0.f))
  //  { return; }
  x += rasterizer->x;
  y += rasterizer->y;
  ctx_rasterizer_line_to (rasterizer, x, y);
}

static void
ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
                             float x0, float y0, float x1, float y1, float x2, float y2)
{
  x0 += rasterizer->x;
  y0 += rasterizer->y;
  x1 += rasterizer->x;
  y1 += rasterizer->y;
  x2 += rasterizer->x;
  y2 += rasterizer->y;
  ctx_rasterizer_curve_to (rasterizer, x0, y0, x1, y1, x2, y2);
}


static int
ctx_rasterizer_find_texture (CtxRasterizer *rasterizer,
                             const char *eid)
{
  int no;
  for (no = 0; no < CTX_MAX_TEXTURES; no++)
  {
    if (rasterizer->texture_source->texture[no].data &&
        rasterizer->texture_source->texture[no].eid &&
        !strcmp (rasterizer->texture_source->texture[no].eid, eid))
      return no;
  }
  return -1;
}

static void
ctx_rasterizer_set_texture (CtxRasterizer *rasterizer,
                            const char *eid,
                            float x,
                            float y)
{
  int is_stroke = (rasterizer->state->source != 0);
  CtxSource *source = is_stroke && (rasterizer->state->gstate.source_stroke.type != CTX_SOURCE_INHERIT_FILL)?
                        &rasterizer->state->gstate.source_stroke:
                        &rasterizer->state->gstate.source_fill;
  rasterizer->state->source = 0;

  int no = ctx_rasterizer_find_texture (rasterizer, eid);
  if (no < 0 || no >= CTX_MAX_TEXTURES) { no = 0; }
  if (rasterizer->texture_source->texture[no].data == NULL)
    {
      fprintf (stderr, "ctx tex fail %p %s %i\n", rasterizer->texture_source, eid, no);
      return;
    }
  else
  {
    rasterizer->texture_source->texture[no].frame = rasterizer->texture_source->frame;
  }
  source->type = CTX_SOURCE_TEXTURE;
  source->texture.buffer = &rasterizer->texture_source->texture[no];
  ctx_matrix_identity (&source->set_transform);
  ctx_matrix_translate (&source->set_transform, x, y);
}


static void
ctx_rasterizer_define_texture (CtxRasterizer *rasterizer,
                               const char    *eid,
                               int            width,
                               int            height,
                               int            format,
                               char unsigned *data)
{
  _ctx_texture_lock (); // we're using the same texture_source from all threads, keeping allocaitons down
                        // need synchronizing (it could be better to do a pre-pass)
  ctx_texture_init (rasterizer->texture_source,
                    eid,
                    width,
                    height,
                    ctx_pixel_format_get_stride ((CtxPixelFormat)format, width),
                    (CtxPixelFormat)format,
#if CTX_ENABLE_CM
                    (void*)rasterizer->state->gstate.texture_space,
#else
                    NULL,
#endif
                    data,
                    ctx_buffer_pixels_free, (void*)23);
                    /*  when userdata for ctx_buffer_pixels_free is 23, texture_init dups the data on
                     *  use
                     */

  ctx_rasterizer_set_texture (rasterizer, eid, 0.0, 0.0);
  if (!rasterizer->state->gstate.source_fill.texture.buffer->color_managed)
  {
    _ctx_texture_prepare_color_management (rasterizer->state,
    rasterizer->state->gstate.source_fill.texture.buffer);
  }
  _ctx_texture_unlock ();
}


inline static int
ctx_is_transparent (CtxRasterizer *rasterizer, int stroke)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  if (gstate->global_alpha_u8 == 0)
    return 1;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    uint8_t ga[2];
    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
    if (ga[1] == 0)
      return 1;
  }
  return 0;
}



static void
ctx_rasterizer_fill (CtxRasterizer *rasterizer)
{
  unsigned int preserved_count =
          (rasterizer->preserve&&rasterizer->edge_list.count)?
             rasterizer->edge_list.count:1;
  int blit_x = rasterizer->blit_x;
  int blit_y = rasterizer->blit_y;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;
#if CTX_SHAPE_CACHE
  int blit_stride = rasterizer->blit_stride;
#endif

  CtxSegment temp[preserved_count]; /* copy of already built up path's poly line
                                       XXX - by building a large enough path
                                       the stack can be smashed!
                                     */
  if (CTX_UNLIKELY(rasterizer->preserve))
    { memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) ); }

#if CTX_ENABLE_SHADOW_BLUR
  if (CTX_UNLIKELY(rasterizer->in_shadow))
  {
  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
    {
      CtxSegment *entry = &((CtxSegment*)rasterizer->edge_list.entries)[i];
      entry->data.s16[2] += rasterizer->shadow_x * CTX_SUBDIV;
      entry->data.s16[3] += rasterizer->shadow_y * CTX_FULL_AA;
    }
    rasterizer->scan_min += rasterizer->shadow_y * CTX_FULL_AA;
    rasterizer->scan_max += rasterizer->shadow_y * CTX_FULL_AA;
    rasterizer->col_min  += (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * CTX_SUBDIV;
    rasterizer->col_max  += (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * CTX_SUBDIV;
  }
#endif

  if (CTX_UNLIKELY(ctx_is_transparent (rasterizer, 0) ||
      rasterizer->scan_min > CTX_FULL_AA * (blit_y + blit_height) ||
      rasterizer->scan_max < CTX_FULL_AA * blit_y ||
      rasterizer->col_min > CTX_SUBDIV * (blit_x + blit_width) ||
      rasterizer->col_max < CTX_SUBDIV * blit_x))
    {
    }
  else
  {
    ctx_composite_setup (rasterizer);

    rasterizer->state->ink_min_x = ctx_mini (rasterizer->state->ink_min_x, rasterizer->col_min / CTX_SUBDIV);
    rasterizer->state->ink_max_x = ctx_maxi (rasterizer->state->ink_min_x, rasterizer->col_max / CTX_SUBDIV);
    rasterizer->state->ink_min_y = ctx_mini (rasterizer->state->ink_min_y, rasterizer->scan_min / CTX_FULL_AA);
    rasterizer->state->ink_max_y = ctx_maxi (rasterizer->state->ink_max_y, rasterizer->scan_max / CTX_FULL_AA);

#if CTX_FAST_FILL_RECT
  if (rasterizer->edge_list.count == 5)
    {
      CtxSegment *entry0 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
      CtxSegment *entry1 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[1];
      CtxSegment *entry2 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[2];
      CtxSegment *entry3 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[3];


      if (
          (!(rasterizer->state->gstate.clipped != 0)) &
          (entry0->data.s16[2] == entry1->data.s16[2]) &
          (entry0->data.s16[3] == entry3->data.s16[3]) &
          (entry1->data.s16[3] == entry2->data.s16[3]) &
          (entry2->data.s16[2] == entry3->data.s16[2])
#if CTX_ENABLE_SHADOW_BLUR
           && !rasterizer->in_shadow
#endif
         )
       {
         float x0 = entry3->data.s16[2] * (1.0f / CTX_SUBDIV);
         float y0 = entry3->data.s16[3] * (1.0f / CTX_FULL_AA);
         float x1 = entry1->data.s16[2] * (1.0f / CTX_SUBDIV);
         float y1 = entry1->data.s16[3] * (1.0f / CTX_FULL_AA);

         if (x1 > x0 && y1 > y0)
         {
           ctx_composite_fill_rect (rasterizer, x0, y0, x1, y1, 255);
           goto done;
         }
       }
    }
#endif

    ctx_rasterizer_finish_shape (rasterizer);

    uint32_t hash = ctx_rasterizer_poly_to_edges (rasterizer);
    if (hash){};

#if CTX_SHAPE_CACHE
    int width = (rasterizer->col_max + (CTX_SUBDIV-1) ) / CTX_SUBDIV - rasterizer->col_min/CTX_SUBDIV + 1;
    int height = (rasterizer->scan_max + (CTX_FULL_AA-1) ) / CTX_FULL_AA - rasterizer->scan_min / CTX_FULL_AA + 1;
    if (width * height < CTX_SHAPE_CACHE_DIM && width >=1 && height >= 1
        && width < CTX_SHAPE_CACHE_MAX_DIM
        && height < CTX_SHAPE_CACHE_MAX_DIM 
#if CTX_ENABLE_SHADOW_BLUR
        && !rasterizer->in_shadow
#endif
        )
      {
        int scan_min = rasterizer->scan_min;
        int col_min = rasterizer->col_min;
        scan_min -= (scan_min % CTX_FULL_AA);
        int y0 = scan_min / CTX_FULL_AA;
        int y1 = y0 + height;
        int x0 = col_min / CTX_SUBDIV;
        int ymin = y0;
        int x1 = x0 + width;
        int clip_x_min = blit_x;
        int clip_x_max = blit_x + blit_width - 1;
        int clip_y_min = blit_y;
        int clip_y_max = blit_y + blit_height - 1;

        int dont_cache = 0;
        if (CTX_UNLIKELY(x1 >= clip_x_max))
          { x1 = clip_x_max;
            dont_cache = 1;
          }
        int xo = 0;
        if (CTX_UNLIKELY(x0 < clip_x_min))
          {
            xo = clip_x_min - x0;
            x0 = clip_x_min;
            dont_cache = 1;
          }
        if (CTX_UNLIKELY(y0 < clip_y_min || y1 >= clip_y_max))
          dont_cache = 1;
        if (dont_cache || !_ctx_shape_cache_enabled)
        {
          rasterizer->scanline = scan_min;
          ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
#if CTX_SHAPE_CACHE
                                        , NULL
#endif
                                       );
        }
        else
        {
        rasterizer->scanline = scan_min;
        CtxShapeEntry *shape = ctx_shape_entry_find (rasterizer, hash, width, height); 

        if (shape->uses == 0)
          {
            CtxBuffer *buffer_backup = rasterizer->clip_buffer;
            rasterizer->clip_buffer = NULL;
            ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule, shape);
            rasterizer->clip_buffer = buffer_backup;
          }

        int ewidth = x1 - x0;
        if (ewidth>0)
        {
          rasterizer->scanline = scan_min;
          int bpp = rasterizer->format->bpp;
          if (rasterizer->clip_buffer && !rasterizer->clip_rectangle)
          {
          uint8_t composite[ewidth];
          uint8_t *clip_data = (uint8_t*)rasterizer->clip_buffer->data;
          int shape_width = shape->width;
          for (int y = y0; y < y1; y++)
            {
              if ( (y >= clip_y_min) && (y <= clip_y_max) )
                {
                    for (int x = 0; x < ewidth; x++)
                    {
                      int val = shape->data[shape_width * (int)(y-ymin) + xo + x];
                      // XXX : not valid for 1bit clip buffers
                      val = (val*(clip_data) [
                              ((y-blit_y) * blit_width) + x0 + x])/255;
                      composite[x] = val;
                    }
                    rasterizer->apply_coverage (rasterizer,
                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + ((int) (x0) * bpp)/8,
                                                 rasterizer->color,
                                                 x0, // is 0
                                                 composite,
                                                 ewidth );
                 }
               rasterizer->scanline += CTX_FULL_AA;
            }
          }
          else
          {
          for (int y = y0; y < y1; y++)
            {
              if (CTX_LIKELY((y >= clip_y_min) && (y <= clip_y_max) ))
                {
                    rasterizer->apply_coverage (rasterizer,
                                                 ( (uint8_t *) rasterizer->buf) + (y-blit_y) * blit_stride + (int) ((x0) * bpp)/8, rasterizer->color,
                                                 x0,
                                                 &shape->data[shape->width * (int) (y-ymin) + xo],
                                                 ewidth );
                }
               rasterizer->scanline += CTX_FULL_AA;
            }
          }
         }
        }
      }
    else
#endif
    {
            
    ctx_rasterizer_rasterize_edges (rasterizer, rasterizer->state->gstate.fill_rule
#if CTX_SHAPE_CACHE
                                    , NULL
#endif
                                   );
    }
  }
#if CTX_FAST_FILL_RECT
done:
#endif
  if (CTX_UNLIKELY(rasterizer->preserve))
    {
      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
      rasterizer->edge_list.count = preserved_count;
    }
#if CTX_ENABLE_SHADOW_BLUR
  if (CTX_UNLIKELY(rasterizer->in_shadow))
  {
    rasterizer->scan_min -= rasterizer->shadow_y * CTX_FULL_AA;
    rasterizer->scan_max -= rasterizer->shadow_y * CTX_FULL_AA;
    rasterizer->col_min  -= (rasterizer->shadow_x - rasterizer->state->gstate.shadow_blur * 3 + 1) * CTX_SUBDIV;
    rasterizer->col_max  -= (rasterizer->shadow_x + rasterizer->state->gstate.shadow_blur * 3 + 1) * CTX_SUBDIV;
  }
#endif
  rasterizer->preserve = 0;
}

#if 0
static void
ctx_rasterizer_triangle (CtxRasterizer *rasterizer,
                         int x0, int y0,
                         int x1, int y1,
                         int x2, int y2,
                         int r0, int g0, int b0, int a0,
                         int r1, int g1, int b1, int a1,
                         int r2, int g2, int b2, int a2,
                         int u0, int v0,
                         int u1, int v1)
{

}
#endif


typedef struct _CtxTermGlyph CtxTermGlyph;

struct _CtxTermGlyph
{
  uint32_t unichar;
  int      col;
  int      row;
  uint8_t  rgba_bg[4];
  uint8_t  rgba_fg[4];
};

static int _ctx_glyph (Ctx *ctx, uint32_t unichar, int stroke);
static void
ctx_rasterizer_glyph (CtxRasterizer *rasterizer, uint32_t unichar, int stroke)
{
  float tx = rasterizer->state->x;
  float ty = rasterizer->state->y - rasterizer->state->gstate.font_size;
  float tx2 = rasterizer->state->x + rasterizer->state->gstate.font_size;
  float ty2 = rasterizer->state->y + rasterizer->state->gstate.font_size;
  _ctx_user_to_device (rasterizer->state, &tx, &ty);
  _ctx_user_to_device (rasterizer->state, &tx2, &ty2);

  if (tx2 < rasterizer->blit_x || ty2 < rasterizer->blit_y) return;
  if (tx  > rasterizer->blit_x + rasterizer->blit_width ||
      ty  > rasterizer->blit_y + rasterizer->blit_height)
          return;

#if CTX_BRAILLE_TEXT
  float font_size = 0;
  int ch = 1;
  int cw = 1;

  if (rasterizer->term_glyphs)
  {
    float tx = 0;
    font_size = rasterizer->state->gstate.font_size;

    ch = ctx_term_get_cell_height (rasterizer->backend.ctx);
    cw = ctx_term_get_cell_width (rasterizer->backend.ctx);

    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
  }
  if (rasterizer->term_glyphs && !stroke &&
      fabs (font_size - ch) < 0.5)
  {
    float tx = rasterizer->x;
    float ty = rasterizer->y;
    _ctx_user_to_device (rasterizer->state, &tx, &ty);
    int col = tx / cw + 1;
    int row = ty / ch + 1;
    CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
    ctx_list_append (&rasterizer->glyphs, glyph);
    glyph->unichar = unichar;
    glyph->col = col;
    glyph->row = row;
    ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
                         &glyph->rgba_fg[0]);
  }
  else
#endif
  _ctx_glyph (rasterizer->backend.ctx, unichar, stroke);
}

static void
_ctx_text (Ctx        *ctx,
           const char *string,
           int         stroke,
           int         visible);
static void
ctx_rasterizer_text (CtxRasterizer *rasterizer, const char *string, int stroke)
{
#if CTX_BRAILLE_TEXT
  float font_size = 0;
  if (rasterizer->term_glyphs)
  {
    float tx = 0;
    font_size = rasterizer->state->gstate.font_size;
    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
  }
  int   ch = ctx_term_get_cell_height (rasterizer->backend.ctx);
  int   cw = ctx_term_get_cell_width (rasterizer->backend.ctx);

  if (rasterizer->term_glyphs && !stroke &&
      fabs (font_size - ch) < 0.5)
  {
    float tx = rasterizer->x;
    float ty = rasterizer->y;
    _ctx_user_to_device (rasterizer->state, &tx, &ty);
    int col = tx / cw + 1;
    int row = ty / ch + 1;
    for (int i = 0; string[i]; i++, col++)
    {
      CtxTermGlyph *glyph = ctx_calloc (sizeof (CtxTermGlyph), 1);
      ctx_list_prepend (&rasterizer->glyphs, glyph);
      glyph->unichar = string[i];
      glyph->col = col;
      glyph->row = row;
      ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
                      glyph->rgba_fg);
    }
  }
  else
#endif
  {
    _ctx_text (rasterizer->backend.ctx, string, stroke, 1);
  }
}

void
_ctx_font (Ctx *ctx, const char *name);
static void
ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name)
{
  _ctx_font (rasterizer->backend.ctx, font_name);
}

static void
ctx_rasterizer_arc (CtxRasterizer *rasterizer,
                    float          x,
                    float          y,
                    float          radius,
                    float          start_angle,
                    float          end_angle,
                    int            anticlockwise)
{
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS;
  full_segments = factor * radius * CTX_PI * 2 / 4.0;
  if (full_segments > CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS)
    { full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS; }
  if (full_segments < 24) full_segments = 24;
  float step = CTX_PI*2.0/full_segments;
  int steps;

  if (end_angle < -30.0)
    end_angle = -30.0;
  if (start_angle < -30.0)
    start_angle = -30.0;
  if (end_angle > 30.0)
    end_angle = 30.0;
  if (start_angle > 30.0)
    start_angle = 30.0;

  if (radius <= 0.0001)
          return;

  if (end_angle == start_angle)
          // XXX also detect arcs fully outside render view
    {
    if (rasterizer->has_prev!=0)
      ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                              y + ctx_sinf (end_angle) * radius);
      else
      ctx_rasterizer_move_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                            y + ctx_sinf (end_angle) * radius);
      return;
    }
#if 1
  if ( (!anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||
       ( (anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f ) ) 
  ||   (anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||  (!anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f )  )
    {
      steps = full_segments - 1;
    }
  else
#endif
    {
      if (anticlockwise)
      steps = (start_angle - end_angle) / (CTX_PI*2) * full_segments;
      else
      steps = (end_angle - start_angle) / (CTX_PI*2) * full_segments;
   // if (steps > full_segments)
   //   steps = full_segments;
    }

  if (anticlockwise) { step = step * -1; }
  int first = 1;
  if (steps == 0 /* || steps==full_segments -1  || (anticlockwise && steps == full_segments) */)
    {
      float xv = x + ctx_cosf (start_angle) * radius;
      float yv = y + ctx_sinf (start_angle) * radius;
      if (!rasterizer->has_prev)
        { ctx_rasterizer_move_to (rasterizer, xv, yv); }
      first = 0;
    }
  else
    {
      for (float angle = start_angle, i = 0; i < steps; angle += step, i++)
        {
          float xv = x + ctx_cosf (angle) * radius;
          float yv = y + ctx_sinf (angle) * radius;
          if (first && !rasterizer->has_prev)
            { ctx_rasterizer_move_to (rasterizer, xv, yv); }
          else
            { ctx_rasterizer_line_to (rasterizer, xv, yv); }
          first = 0;
        }
    }
  ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                          y + ctx_sinf (end_angle) * radius);
}

static void
ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y)
{
  ctx_rasterizer_curve_to (rasterizer,
                           (cx * 2 + rasterizer->x) / 3.0f, (cy * 2 + rasterizer->y) / 3.0f,
                           (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
                           x,                              y);
}

static void
ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
                            float cx, float cy,
                            float x,  float y)
{
  ctx_rasterizer_quad_to (rasterizer, cx + rasterizer->x, cy + rasterizer->y,
                          x  + rasterizer->x, y  + rasterizer->y);
}

static void
ctx_rasterizer_rectangle_reverse (CtxRasterizer *rasterizer,
                                  float x,
                                  float y,
                                  float width,
                                  float height);

static void
ctx_rasterizer_stroke (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  CtxSource source_backup;
  int count = rasterizer->edge_list.count;
  if (count == 0)
    return;
  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
  {
    source_backup = gstate->source_fill;
    gstate->source_fill = rasterizer->state->gstate.source_stroke;
  }
  int preserved = rasterizer->preserve;
  float factor = ctx_matrix_get_scale (&gstate->transform);
  float line_width = gstate->line_width * factor;

  rasterizer->comp_op = NULL;
  ctx_composite_setup (rasterizer);

  CtxSegment temp[count]; /* copy of already built up path's poly line  */
  memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) );

#if CTX_FAST_FILL_RECT
  if (rasterizer->edge_list.count == 5)
    {
      CtxSegment *entry0 = &((CtxSegment*)rasterizer->edge_list.entries)[0];
      CtxSegment *entry1 = &((CtxSegment*)rasterizer->edge_list.entries)[1];
      CtxSegment *entry2 = &((CtxSegment*)rasterizer->edge_list.entries)[2];
      CtxSegment *entry3 = &((CtxSegment*)rasterizer->edge_list.entries)[3];

      if (!rasterizer->state->gstate.clipped &&
          (entry0->data.s16[2] == entry1->data.s16[2]) &&
          (entry0->data.s16[3] == entry3->data.s16[3]) &&
          (entry1->data.s16[3] == entry2->data.s16[3]) &&
          (entry2->data.s16[2] == entry3->data.s16[2])
#if CTX_ENABLE_SHADOW_BLUR
           && !rasterizer->in_shadow
#endif
         )
       {

        float x0 = entry3->data.s16[2] * 1.0f / CTX_SUBDIV;
        float y0 = entry3->data.s16[3] * 1.0f / CTX_FULL_AA;
        float x1 = entry1->data.s16[2] * 1.0f / CTX_SUBDIV;
        float y1 = entry1->data.s16[3] * 1.0f / CTX_FULL_AA;

        ctx_composite_stroke_rect (rasterizer, x0, y0, x1, y1, line_width);

        goto done;


       }
    }
#endif
  
    {
    {
      if (line_width < 5.0f)
      {
      factor *= 0.89; /* this hack adjustment makes sharp 1px and 2px strokewidths
      //                 end up sharp without erronious AA; we seem to be off by
      //                 one somewhere else, causing the need for this
      //                 */
      line_width *= 0.89f;
      }
      ctx_rasterizer_reset (rasterizer); /* then start afresh with our stroked shape  */
      CtxMatrix transform_backup = gstate->transform;
      _ctx_matrix_identity (&gstate->transform);
      float prev_x = 0.0f;
      float prev_y = 0.0f;
      float half_width_x = line_width/2;
      float half_width_y = half_width_x;

      if (CTX_UNLIKELY(line_width <= 0.0f))
        { // makes 0 width be hairline
          half_width_x = .5f;
          half_width_y = .5f;
        }
      int start = 0;
      int end   = 0;
      while (start < count)
        {
          int started = 0;
          int i;
          for (i = start; i < count; i++)
            {
              CtxSegment *entry = &temp[i];
              float x, y;
              if (entry->code == CTX_NEW_EDGE)
                {
                  if (CTX_LIKELY(started))
                    {
                      end = i - 1;
                      goto foo;
                    }
                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
                  prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
                  started = 1;
                  start = i;
                }
              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
              float dx = x - prev_x;
              float dy = y - prev_y;
              float length = ctx_fast_hypotf (dx, dy);
              if (length>0.001f)
                {
                  float recip_length = 1.0/length;
                  dx = dx * recip_length * half_width_x;
                  dy = dy * recip_length * half_width_y;
                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
                    {
                      ctx_rasterizer_finish_shape (rasterizer);
                      ctx_rasterizer_move_to (rasterizer, prev_x+dy, prev_y-dx);
                    }
                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
                  
                  // we need to know the slope of the other side

                  // XXX possible miter line-to
                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+4, prev_y+dx+10);
                  //ctx_rasterizer_line_to (rasterizer, prev_x-dy+8, prev_y+dx+0);

                  ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
                }
              prev_x = x;
              prev_y = y;
            }
          end = i-1;
foo:
          for (int i = end; i >= start; i--)
            {
              CtxSegment *entry = &temp[i];
              float x, y, dx, dy;
              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
              dx = x - prev_x;
              dy = y - prev_y;
              float length = ctx_fast_hypotf (dx, dy);
              float recip_length = 1.0f/length;
              dx = dx * recip_length * half_width_x;
              dy = dy * recip_length * half_width_y;
              if (CTX_LIKELY(length>0.001f))
                {
                  ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
                  // XXX possible miter line-to
             //   ctx_rasterizer_line_to (rasterizer, prev_x-dy+10, prev_y+dx+10);
                  ctx_rasterizer_line_to (rasterizer, x-dy,      y+dx);
                }
              prev_x = x;
              prev_y = y;
              if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
                {
                  x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
                  y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
                  dx = x - prev_x;
                  dy = y - prev_y;
                  length = ctx_fast_hypotf (dx, dy);
                  recip_length = 1.0f/length;
                  if (CTX_LIKELY(length>0.001f))
                    {
                      dx = dx * recip_length * half_width_x;
                      dy = dy * recip_length * half_width_y;
                      ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
                      ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
                    }
                }
              if ( (prev_x != x) && (prev_y != y) )
                {
                  prev_x = x;
                  prev_y = y;
                }
            }
          start = end+1;
        }
      ctx_rasterizer_finish_shape (rasterizer);
      switch (gstate->line_cap)
        {
          case CTX_CAP_SQUARE: // XXX: incorrect - if rectangles were in
                               //                  reverse order - rotation would be off
                               //                  better implement correct here
            {
              float x = 0, y = 0;
              int has_prev = 0;
              for (int i = 0; i < count; i++)
                {
                  CtxSegment *entry = &temp[i];
                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
                    {
                      if (has_prev)
                        {
                          ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x, half_width_y);
                          ctx_rasterizer_finish_shape (rasterizer);
                        }
                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
                      y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
                      ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, half_width_y * 2);
                      ctx_rasterizer_finish_shape (rasterizer);
                    }
                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
                  has_prev = 1;
                }
              ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, half_width_y * 2);
              ctx_rasterizer_finish_shape (rasterizer);
            }
            break;
          case CTX_CAP_NONE: /* nothing to do */
            break;
          case CTX_CAP_ROUND:
            {
              float x = 0, y = 0;
              int has_prev = 0;
              for (int i = 0; i < count; i++)
                {
                  CtxSegment *entry = &temp[i];
                  if (CTX_UNLIKELY(entry->code == CTX_NEW_EDGE))
                    {
                      if (has_prev)
                        {
                          ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
                          ctx_rasterizer_finish_shape (rasterizer);
                        }
                      x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
                      y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
                      ctx_rasterizer_finish_shape (rasterizer);
                    }
                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
                  has_prev = 1;
                }
              ctx_rasterizer_move_to (rasterizer, x, y);
              ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
              ctx_rasterizer_finish_shape (rasterizer);
              break;
            }
        }
      switch (gstate->line_join)
        {
          case CTX_JOIN_BEVEL:
          case CTX_JOIN_MITER:
            break;
          case CTX_JOIN_ROUND:
            {
              float x = 0, y = 0;
              for (int i = 0; i < count-1; i++)
                {
                  CtxSegment *entry = &temp[i];
                  x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
                  y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
                  if (CTX_UNLIKELY(entry[1].code == CTX_EDGE))
                    {
                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
                      ctx_rasterizer_finish_shape (rasterizer);
                    }
                }
              break;
            }
        }
      CtxFillRule rule_backup = gstate->fill_rule;
      gstate->fill_rule = CTX_FILL_RULE_WINDING;
      rasterizer->preserve = 0; // so fill isn't tripped
      ctx_rasterizer_fill (rasterizer);
      gstate->fill_rule = rule_backup;
      gstate->transform = transform_backup;
    }
  }
#if CTX_FAST_FILL_RECT
done:
#endif
  if (preserved)
    {
      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
      rasterizer->edge_list.count = count;
      rasterizer->preserve = 0;
    }
  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
    gstate->source_fill = source_backup;
}

#if CTX_1BIT_CLIP
#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY1
#else
#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY8
#endif


static void
ctx_rasterizer_clip_reset (CtxRasterizer *rasterizer)
{
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
   ctx_buffer_free (rasterizer->clip_buffer);
  rasterizer->clip_buffer = NULL;
#endif
  rasterizer->state->gstate.clip_min_x = rasterizer->blit_x;
  rasterizer->state->gstate.clip_min_y = rasterizer->blit_y;

  rasterizer->state->gstate.clip_max_x = rasterizer->blit_x + rasterizer->blit_width - 1;
  rasterizer->state->gstate.clip_max_y = rasterizer->blit_y + rasterizer->blit_height - 1;
}

static void
ctx_rasterizer_clip_apply (CtxRasterizer *rasterizer,
                           CtxSegment    *edges)
{
  unsigned int count = edges[0].data.u32[0];

  int minx = 5000;
  int miny = 5000;
  int maxx = -5000;
  int maxy = -5000;
  int prev_x = 0;
  int prev_y = 0;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;

  float coords[6][2];

  for (unsigned int i = 0; i < count; i++)
    {
      CtxSegment *entry = &edges[i+1];
      float x, y;
      if (entry->code == CTX_NEW_EDGE)
        {
          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
          prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
          if (prev_x < minx) { minx = prev_x; }
          if (prev_y < miny) { miny = prev_y; }
          if (prev_x > maxx) { maxx = prev_x; }
          if (prev_y > maxy) { maxy = prev_y; }
        }
      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
      y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
      if (x < minx) { minx = x; }
      if (y < miny) { miny = y; }
      if (x > maxx) { maxx = x; }
      if (y > maxy) { maxy = y; }

      if (i < 6)
      {
        coords[i][0] = x;
        coords[i][1] = y;
      }
    }

#if CTX_ENABLE_CLIP

  if ((rasterizer->clip_rectangle==1
       || !rasterizer->clip_buffer)
      )
  {
    if (count == 5)
    {
      if (coords[0][0] == coords[1][0] &&
          coords[0][1] == coords[4][1] &&
          coords[0][1] == coords[3][1] &&
          coords[1][1] == coords[2][1] &&
          coords[3][0] == coords[4][0]
          )
      {
#if 0
        printf ("%d,%d %dx%d\n", minx, miny,
                                       maxx-minx+1, maxy-miny+1);
#endif

         rasterizer->state->gstate.clip_min_x =
            ctx_maxi (minx, rasterizer->state->gstate.clip_min_x);
         rasterizer->state->gstate.clip_min_y =
            ctx_maxi (miny, rasterizer->state->gstate.clip_min_y);
         rasterizer->state->gstate.clip_max_x =
            ctx_mini (maxx, rasterizer->state->gstate.clip_max_x);
         rasterizer->state->gstate.clip_max_y =
            ctx_mini (maxy, rasterizer->state->gstate.clip_max_y);

         rasterizer->clip_rectangle = 1;

#if 0
         if (!rasterizer->clip_buffer)
           rasterizer->clip_buffer = ctx_buffer_new (blit_width,
                                                     blit_height,
                                                     CTX_CLIP_FORMAT);

         memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
         int i = 0;
         for (int y = rasterizer->state->gstate.clip_min_y;
                  y <= rasterizer->state->gstate.clip_max_y;
                  y++)
         for (int x = rasterizer->state->gstate.clip_min_x;
                  x <= rasterizer->state->gstate.clip_max_x;
                  x++, i++)
         {
           ((uint8_t*)(rasterizer->clip_buffer->data))[i] = 255;
         }
#endif

         return;
      }
#if 0
      else
      {
        printf ("%d,%d %dx%d  0,0:%.2f 0,1:%.2f 1,0:%.2f 11:%.2f 20:%.2f 21:%2.f 30:%.2f 31:%.2f 40:%.2f 41:%.2f\n", minx, miny,
                                       maxx-minx+1, maxy-miny+1
                                       
         ,coords[0][0] ,  coords[0][1]
         ,coords[1][0] ,  coords[1][1]
         ,coords[2][0] ,  coords[2][1]
         ,coords[3][0] ,  coords[3][1]
         ,coords[4][0] ,  coords[4][1]
         );
      }
#endif
    }
  }
  rasterizer->clip_rectangle = 0;

  if ((minx == maxx) || (miny == maxy)) // XXX : reset hack
  {
    ctx_rasterizer_clip_reset (rasterizer);
    return;//goto done;
  }

  int we_made_it = 0;
  CtxBuffer *clip_buffer;

  if (!rasterizer->clip_buffer)
  {
    rasterizer->clip_buffer = ctx_buffer_new (blit_width,
                                              blit_height,
                                              CTX_CLIP_FORMAT);
    clip_buffer = rasterizer->clip_buffer;
    we_made_it = 1;
    if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height/8);
    else
      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
  }
  else
  {
    clip_buffer = ctx_buffer_new (blit_width, blit_height,
                                  CTX_CLIP_FORMAT);
  }

  {

  int prev_x = 0;
  int prev_y = 0;

    Ctx *ctx = ctx_new_for_framebuffer (clip_buffer->data, blit_width, blit_height,
       blit_width,
       CTX_CLIP_FORMAT);

  for (unsigned int i = 0; i < count; i++)
    {
      CtxSegment *entry = &edges[i+1];
      float x, y;
      if (entry->code == CTX_NEW_EDGE)
        {
          prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
          prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
          ctx_move_to (ctx, prev_x, prev_y);
        }
      x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
      y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
      ctx_line_to (ctx, x, y);
    }
    ctx_gray (ctx, 1.0f);
    ctx_fill (ctx);
    ctx_free (ctx);
  }

  int maybe_rect = 1;
  rasterizer->clip_rectangle = 0;

  if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
  {
    unsigned int count = blit_width * blit_height / 8;
    for (unsigned int i = 0; i < count; i++)
    {
      ((uint8_t*)rasterizer->clip_buffer->data)[i] =
      (((uint8_t*)rasterizer->clip_buffer->data)[i] &
      ((uint8_t*)clip_buffer->data)[i]);
    }
  }
  else
  {
    int count = blit_width * blit_height;


    int i;
    int x0 = 0;
    int y0 = 0;
    int width = -1;
    int next_stage = 0;
    uint8_t *p_data = (uint8_t*)rasterizer->clip_buffer->data;
    uint8_t *data = (uint8_t*)clip_buffer->data;

    i=0;
    /* find upper left */
    for (; i < count && maybe_rect && !next_stage; i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;
      switch (val)
      {
        case 255:
          x0 = i % blit_width;
          y0 = i / blit_width;
          next_stage = 1;
          break;
        case 0: break;
        default:
          maybe_rect = 0;
          break;
      }
    }

    next_stage = 0;
    /* figure out with */
    for (; i < count && !next_stage && maybe_rect; i++)
    {
      int x = i % blit_width;
      int y = i / blit_width;
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (y == y0)
      {
        switch (val)
        {
          case 255:
            width = x - x0 + 1;
            break;
          case 0:
            next_stage = 1;
            break;
          default:
            maybe_rect = 0;
            break;
        }
        if (x % blit_width == blit_width - 1) next_stage = 1;
      }
      else next_stage = 1;
    }

    next_stage = 0;
    /* body */
    for (; i < count && maybe_rect && !next_stage; i++)
    {
      int x = i % blit_width;
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (x < x0)
      {
        if (val != 0){ maybe_rect = 0; next_stage = 1; }
      } else if (x < x0 + width)
      {
        if (val != 255){ if (val != 0) maybe_rect = 0; next_stage = 1; }
      } else {
        if (val != 0){ maybe_rect = 0; next_stage = 1; }
      }
    }

    next_stage = 0;
    /* foot */
    for (; i < count && maybe_rect && !next_stage; i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (val != 0){ maybe_rect = 0; next_stage = 1; }
    }


    for (; i < count; i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;
    }

    if (maybe_rect)
       rasterizer->clip_rectangle = 1;
  }
  if (!we_made_it)
   ctx_buffer_free (clip_buffer);
#else
  if (coords[0][0]){};
#endif
  
  rasterizer->state->gstate.clip_min_x = ctx_maxi (minx,
                                         rasterizer->state->gstate.clip_min_x);
  rasterizer->state->gstate.clip_min_y = ctx_maxi (miny,
                                         rasterizer->state->gstate.clip_min_y);
  rasterizer->state->gstate.clip_max_x = ctx_mini (maxx,
                                         rasterizer->state->gstate.clip_max_x);
  rasterizer->state->gstate.clip_max_y = ctx_mini (maxy,
                                         rasterizer->state->gstate.clip_max_y);
}

static void
ctx_rasterizer_clip (CtxRasterizer *rasterizer)
{
  int count = rasterizer->edge_list.count;
  CtxSegment temp[count+1]; /* copy of already built up path's poly line  */
  rasterizer->state->has_clipped=1;
  rasterizer->state->gstate.clipped=1;
  //if (rasterizer->preserve)
    { memcpy (temp + 1, rasterizer->edge_list.entries, sizeof (temp) - sizeof (temp[0]));
      temp[0].code = CTX_NOP;
      temp[0].data.u32[0] = count;
      ctx_state_set_blob (rasterizer->state, CTX_clip, (uint8_t*)temp, sizeof(temp));
    }
  ctx_rasterizer_clip_apply (rasterizer, temp);
  ctx_rasterizer_reset (rasterizer);
  if (rasterizer->preserve)
    {
      memcpy (rasterizer->edge_list.entries, temp + 1, sizeof (temp) - sizeof(temp[0]));
      rasterizer->edge_list.count = count;
      rasterizer->preserve = 0;
    }
}


#if 0
static void
ctx_rasterizer_load_image (CtxRasterizer *rasterizer,
                           const char  *path,
                           float x,
                           float y)
{
  // decode PNG, put it in image is slot 1,
  // magic width height stride format data
  ctx_buffer_load_png (&rasterizer->backend.ctx->texture[0], path);
  ctx_rasterizer_set_texture (rasterizer, 0, x, y);
}
#endif

static void
ctx_rasterizer_rectangle_reverse (CtxRasterizer *rasterizer,
                                  float x,
                                  float y,
                                  float width,
                                  float height)
{
  ctx_rasterizer_move_to (rasterizer, x, y);
  ctx_rasterizer_rel_line_to (rasterizer, 0, height);
  ctx_rasterizer_rel_line_to (rasterizer, width, 0);
  ctx_rasterizer_rel_line_to (rasterizer, 0, -height);
  ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
  //ctx_rasterizer_rel_line_to (rasterizer, width/2, 0);
  ctx_rasterizer_finish_shape (rasterizer);
}

static void
ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
                          float x,
                          float y,
                          float width,
                          float height)
{
  ctx_rasterizer_move_to (rasterizer, x, y);
  ctx_rasterizer_rel_line_to (rasterizer, width, 0);
  ctx_rasterizer_rel_line_to (rasterizer, 0, height);
  ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
  ctx_rasterizer_rel_line_to (rasterizer, 0, -height);
  //ctx_rasterizer_rel_line_to (rasterizer, width/2, 0);
  ctx_rasterizer_finish_shape (rasterizer);
}

static void
ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
                          uint16_t x,
                          uint16_t y,
                          uint8_t r,
                          uint8_t g,
                          uint8_t b,
                          uint8_t a)
{
  rasterizer->state->gstate.source_fill.type = CTX_SOURCE_COLOR;
  ctx_color_set_RGBA8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, r, g, b, a);
  rasterizer->comp_op = NULL;
#if 0
  // XXX : doesn't take transforms into account - and has
  // received less testing than code paths part of protocol,
  // using rectangle properly will trigger the fillrect fastpath
  ctx_rasterizer_pset (rasterizer, x, y, 255);
#else
  ctx_rasterizer_rectangle (rasterizer, x, y, 1.0, 1.0);
  ctx_rasterizer_fill (rasterizer);
#endif
}

#if CTX_ENABLE_SHADOW_BLUR
static inline float
ctx_gaussian (float x, float mu, float sigma)
{
  float a = ( x- mu) / sigma;
  return ctx_expf (-0.5 * a * a);
}

static inline void
ctx_compute_gaussian_kernel (int dim, float radius, float *kernel)
{
  float sigma = radius / 2;
  float sum = 0.0;
  int i = 0;
  //for (int row = 0; row < dim; row ++)
    for (int col = 0; col < dim; col ++, i++)
    {
      float val = //ctx_gaussian (row, radius, sigma) *
                            ctx_gaussian (col, radius, sigma);
      kernel[i] = val;
      sum += val;
    }
  i = 0;
  //for (int row = 0; row < dim; row ++)
    for (int col = 0; col < dim; col ++, i++)
        kernel[i] /= sum;
}
#endif

static void
ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, float corner_radius)
{
  float aspect  = 1.0f;
  float radius  = corner_radius / aspect;
  float degrees = CTX_PI / 180.0f;

  if (radius > width*0.5f) radius = width/2;
  if (radius > height*0.5f) radius = height/2;

  ctx_rasterizer_finish_shape (rasterizer);
  ctx_rasterizer_arc (rasterizer, x + width - radius, y + radius, radius, -90 * degrees, 0 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + width - radius, y + height - radius, radius, 0 * degrees, 90 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + radius, y + height - radius, radius, 90 * degrees, 180 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + radius, y + radius, radius, 180 * degrees, 270 * degrees, 0);

  ctx_rasterizer_finish_shape (rasterizer);
}

static void
ctx_rasterizer_process (Ctx *ctx, CtxCommand *command);

#if CTX_COMPOSITING_GROUPS
static void
ctx_rasterizer_start_group (CtxRasterizer *rasterizer) /* add a radius? */
{
  CtxEntry save_command = ctx_void(CTX_SAVE);
  // allocate buffer, and set it as temporary target
  int no;
  if (rasterizer->group[0] == NULL) // first group
  {
    rasterizer->saved_buf = rasterizer->buf;
  }
  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);

  if (no >= CTX_GROUP_MAX)
     return;
  rasterizer->group[no] = ctx_buffer_new (rasterizer->blit_width,
                                          rasterizer->blit_height,
                                          rasterizer->format->composite_format);
  rasterizer->buf = rasterizer->group[no]->data;
  ctx_rasterizer_process (rasterizer->backend.ctx, (CtxCommand*)&save_command);
}

static void
ctx_rasterizer_end_group (CtxRasterizer *rasterizer)
{
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  CtxEntry save_command = ctx_void(CTX_SAVE);
  int no = 0;
  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
  no--;

  if (no < 0)
    return;

  Ctx *ctx = rasterizer->backend.ctx;

  CtxCompositingMode comp = rasterizer->state->gstate.compositing_mode;
  CtxBlend blend = rasterizer->state->gstate.blend_mode;
  CtxExtend extend = rasterizer->state->gstate.extend;
  float global_alpha = rasterizer->state->gstate.global_alpha_f;
  // fetch compositing, blending, global alpha
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
  CtxEntry set_state[4]=
  {
    ctx_u32 (CTX_COMPOSITING_MODE, comp,  0),
    ctx_u32 (CTX_BLEND_MODE,       blend, 0),
    ctx_u32 (CTX_EXTEND,          extend, 0),
    ctx_f  (CTX_GLOBAL_ALPHA,     global_alpha, 0.0)
  };
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[0]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[1]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[2]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[3]);
  if (no == 0)
  {
    rasterizer->buf = rasterizer->saved_buf;
  }
  else
  {
    rasterizer->buf = rasterizer->group[no-1]->data;
  }
  // XXX use texture_source ?
   ctx_texture_init (ctx, ".ctx-group", 
                  rasterizer->blit_width, 
                  rasterizer->blit_height,
                                         
                  rasterizer->blit_width * rasterizer->format->bpp/8,
                  rasterizer->format->pixel_format,
                  NULL, // space
                  (uint8_t*)rasterizer->group[no]->data,
                  NULL, NULL);
  {
     const char *eid = ".ctx-group";
     int   eid_len = strlen (eid);

     CtxEntry commands[4] =
      {
       ctx_f   (CTX_TEXTURE, rasterizer->blit_x, rasterizer->blit_y), 
       ctx_u32 (CTX_DATA, eid_len, eid_len/9+1),
       ctx_u32 (CTX_CONT, 0,0),
       ctx_u32 (CTX_CONT, 0,0)
      };
     memcpy( (char *) &commands[2].data.u8[0], eid, eid_len);
     ( (char *) (&commands[2].data.u8[0]) ) [eid_len]=0;

     ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  {
    CtxEntry commands[2]=
    {
      ctx_f (CTX_RECTANGLE, rasterizer->blit_x, rasterizer->blit_y),
      ctx_f (CTX_CONT,      rasterizer->blit_width, rasterizer->blit_height)
    };
    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  {
    CtxEntry commands[1] = { ctx_void (CTX_FILL) };
    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  //ctx_texture_release (rasterizer->backend.ctx, ".ctx-group");
  ctx_buffer_free (rasterizer->group[no]);
  rasterizer->group[no] = 0;
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}
#endif

#if CTX_ENABLE_SHADOW_BLUR
static void
ctx_rasterizer_shadow_stroke (CtxRasterizer *rasterizer)
{
  CtxColor color;
  CtxEntry save_command = ctx_void(CTX_SAVE);
  Ctx *ctx = rasterizer->backend.ctx;

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 0)
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  float radius = rasterizer->state->gstate.shadow_blur;
  int dim = 2 * radius + 1;
  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
  {
    int i = 0;
    for (int v = 0; v < dim; v += 1, i++)
      {
        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command[0]);
#if CTX_ENABLE_SHADOW_BLUR
        rasterizer->in_shadow = 1;
#endif
        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
        rasterizer->shadow_y = dy;
        rasterizer->preserve = 1;
        ctx_rasterizer_stroke (rasterizer);
#if CTX_ENABLE_SHADOW_BLUR
        rasterizer->in_shadow = 0;
#endif
      }
  }
  //free (kernel);
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}

static void
ctx_rasterizer_shadow_text (CtxRasterizer *rasterizer, const char *str)
{
  float x = rasterizer->state->x;
  float y = rasterizer->state->y;
  CtxColor color;
  CtxEntry save_command = ctx_void(CTX_SAVE);
  Ctx *ctx = rasterizer->backend.ctx;

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 0)
  };
  CtxEntry move_to_command [1]=
  {
    ctx_f (CTX_MOVE_TO, x, y),
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  float radius = rasterizer->state->gstate.shadow_blur;
  int dim = 2 * radius + 1;
  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);

  {
      {
        move_to_command[0].data.f[0] = x;
        move_to_command[0].data.f[1] = y;
        set_color_command[2].data.f[0] = rgba[3];
        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
        ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
        rasterizer->in_shadow=1;
        ctx_rasterizer_text (rasterizer, str, 0);
        rasterizer->in_shadow=0;
      }
  }
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
  move_to_command[0].data.f[0] = x;
  move_to_command[0].data.f[1] = y;
  ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
}

static void
ctx_rasterizer_shadow_fill (CtxRasterizer *rasterizer)
{
  CtxColor color;
  Ctx *ctx = rasterizer->backend.ctx;
  CtxEntry save_command = ctx_void(CTX_SAVE);

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, CTX_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 0)
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  float radius = rasterizer->state->gstate.shadow_blur;
  int dim = 2 * radius + 1;
  if (dim > CTX_MAX_GAUSSIAN_KERNEL_DIM)
    dim = CTX_MAX_GAUSSIAN_KERNEL_DIM;
  ctx_compute_gaussian_kernel (dim, radius, rasterizer->kernel);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);

  {
    for (int v = 0; v < dim; v ++)
      {
        int i = v;
        float dy = rasterizer->state->gstate.shadow_offset_y + v - dim/2;
        set_color_command[2].data.f[0] = rasterizer->kernel[i] * rgba[3];
        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
        rasterizer->in_shadow = 1;
        rasterizer->shadow_x = rasterizer->state->gstate.shadow_offset_x;
        rasterizer->shadow_y = dy;
        rasterizer->preserve = 1;
        ctx_rasterizer_fill (rasterizer);
        rasterizer->in_shadow = 0;
      }
  }
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}
#endif

static void
ctx_rasterizer_line_dash (CtxRasterizer *rasterizer, unsigned int count, float *dashes)
{
  if (!dashes)
  {
    rasterizer->state->gstate.n_dashes = 0;
    return;
  }
  count = CTX_MIN(count, CTX_PARSER_MAX_ARGS-1);
  rasterizer->state->gstate.n_dashes = count;
  memcpy(&rasterizer->state->gstate.dashes[0], dashes, count * sizeof(float));
  for (unsigned int i = 0; i < count; i ++)
  {
    if (rasterizer->state->gstate.dashes[i] < 0.0001f)
      rasterizer->state->gstate.dashes[i] = 0.0001f; // hang protection
  }
}


static void
ctx_rasterizer_process (Ctx *ctx, CtxCommand *command)
{
  CtxEntry      *entry      = &command->entry;
  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx->backend;
  CtxState      *state      = rasterizer->state;
  CtxCommand    *c          = (CtxCommand *) entry;
  int            clear_clip = 0;

  ctx_interpret_style (state, entry, NULL);
  switch (c->code)
    {
#if CTX_ENABLE_SHADOW_BLUR
      case CTX_SHADOW_COLOR:
        {
          CtxColor  col;
          CtxColor *color = &col;
          //state->gstate.source_fill.type = CTX_SOURCE_COLOR;
          switch ((int)c->rgba.model)
            {
              case CTX_RGB:
                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, 1.0f);
                break;
              case CTX_RGBA:
                //ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                break;
              case CTX_DRGBA:
                ctx_color_set_drgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                break;
#if CTX_ENABLE_CMYK
              case CTX_CMYKA:
                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, c->cmyka.a);
                break;
              case CTX_CMYK:
                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
                break;
              case CTX_DCMYKA:
                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, c->cmyka.a);
                break;
              case CTX_DCMYK:
                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
                break;
#endif
              case CTX_GRAYA:
                ctx_color_set_graya (state, color, c->graya.g, c->graya.a);
                break;
              case CTX_GRAY:
                ctx_color_set_graya (state, color, c->graya.g, 1.0f);
                break;
            }
          ctx_set_color (rasterizer->backend.ctx, CTX_shadowColor, color);
        }
        break;
#endif
      case CTX_LINE_DASH:
        if (c->line_dash.count)
          {
            ctx_rasterizer_line_dash (rasterizer, c->line_dash.count, c->line_dash.data);
          }
        else
        ctx_rasterizer_line_dash (rasterizer, 0, NULL);
        break;


      case CTX_LINE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_LINE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_MOVE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_MOVE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_CURVE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
                                 c->c.x1, c->c.y1,
                                 c->c.x2, c->c.y2);
        break;
      case CTX_REL_CURVE_TO:
        if (ctx->bail) break;
        ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
                                     c->c.x1, c->c.y1,
                                     c->c.x2, c->c.y2);
        break;
      case CTX_QUAD_TO:
        if (ctx->bail) break;
        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_REL_QUAD_TO:
        if (ctx->bail) break;
        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_ARC:
        if (ctx->bail) break;
        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, c->arc.direction);
        break;
      case CTX_RECTANGLE:
        if (ctx->bail) break;
        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                  c->rectangle.width, c->rectangle.height);
        break;
      case CTX_ROUND_RECTANGLE:
        if (ctx->bail) break;
        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                        c->rectangle.width, c->rectangle.height,
                                        c->rectangle.radius);
        break;
      case CTX_SET_PIXEL:
        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
                                  c->set_pixel.rgba[0],
                                  c->set_pixel.rgba[1],
                                  c->set_pixel.rgba[2],
                                  c->set_pixel.rgba[3]);
        break;
      case CTX_DEFINE_TEXTURE:
        {
          uint8_t *pixel_data = ctx_define_texture_pixel_data (entry);
          ctx_rasterizer_define_texture (rasterizer, c->define_texture.eid,
                                         c->define_texture.width, c->define_texture.height,
                                         c->define_texture.format,
                                         pixel_data);
          rasterizer->comp_op = NULL;
          rasterizer->fragment = NULL;
        }
        break;
      case CTX_TEXTURE:
        ctx_rasterizer_set_texture (rasterizer, c->texture.eid,
                                    c->texture.x, c->texture.y);
        rasterizer->comp_op = NULL;
        rasterizer->fragment = NULL;
        break;
      case CTX_SOURCE_TRANSFORM:
        ctx_matrix_set (&state->gstate.source_fill.set_transform,
                        ctx_arg_float (0), ctx_arg_float (1),
                        ctx_arg_float (2), ctx_arg_float (3),
                        ctx_arg_float (4), ctx_arg_float (5),
                        ctx_arg_float (6), ctx_arg_float (7),
                        ctx_arg_float (8));
        rasterizer->comp_op = NULL;
        break;
#if 0
      case CTX_LOAD_IMAGE:
        ctx_rasterizer_load_image (rasterizer, ctx_arg_string(),
                                   ctx_arg_float (0), ctx_arg_float (1) );
        break;
#endif
#if CTX_GRADIENTS
      case CTX_GRADIENT_STOP:
        {
          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
                         };
          ctx_rasterizer_gradient_add_stop (rasterizer,
                                            ctx_arg_float (0), rgba);
          rasterizer->comp_op = NULL;
        }
        break;
      case CTX_LINEAR_GRADIENT:
        ctx_state_gradient_clear_stops (state);
        rasterizer->gradient_cache_valid = 0;
        rasterizer->comp_op = NULL;
        break;
      case CTX_RADIAL_GRADIENT:
        ctx_state_gradient_clear_stops (state);
        rasterizer->gradient_cache_valid = 0;
        rasterizer->comp_op = NULL;
        break;
#endif
      case CTX_PRESERVE:
        rasterizer->preserve = 1;
        break;
      case CTX_COLOR:
      case CTX_COMPOSITING_MODE:
      case CTX_BLEND_MODE:
      case CTX_EXTEND:
        rasterizer->comp_op = NULL;
        break;
#if CTX_COMPOSITING_GROUPS
      case CTX_START_GROUP:
        ctx_rasterizer_start_group (rasterizer);
        break;
      case CTX_END_GROUP:
        ctx_rasterizer_end_group (rasterizer);
        break;
#endif

      case CTX_RESTORE:
        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
             i < state->gstate.keydb_pos; i++)
        {
          if (state->keydb[i].key == CTX_clip)
          {
            clear_clip = 1;
          }
        }
        /* FALLTHROUGH */
      case CTX_ROTATE:
      case CTX_SCALE:
      case CTX_APPLY_TRANSFORM:
      case CTX_TRANSLATE:
      case CTX_IDENTITY:
        rasterizer->uses_transforms = 1;
        /* FALLTHROUGH */
      case CTX_SAVE:
        rasterizer->comp_op = NULL;
        ctx_interpret_transforms (state, entry, NULL);
        if (clear_clip)
        {
          ctx_rasterizer_clip_reset (rasterizer);
        for (int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
             i < state->gstate.keydb_pos; i++)
        {
          if (state->keydb[i].key == CTX_clip)
          {
            int idx = ctx_float_to_string_index (state->keydb[i].value);
            if (idx >=0)
            {
              CtxSegment *edges = (CtxSegment*)&state->stringpool[idx];
              ctx_rasterizer_clip_apply (rasterizer, edges);
            }
          }
        }
        }
        break;
      case CTX_STROKE:
          if (rasterizer->edge_list.count == 0)break;
#if CTX_ENABLE_SHADOW_BLUR
        if (state->gstate.shadow_blur > 0.0 &&
            !rasterizer->in_text)
          ctx_rasterizer_shadow_stroke (rasterizer);
#endif
        {
        int count = rasterizer->edge_list.count;
        if (state->gstate.n_dashes)
        {
          int n_dashes = state->gstate.n_dashes;
          float *dashes = state->gstate.dashes;
          float factor = ctx_matrix_get_scale (&state->gstate.transform);

          CtxSegment temp[count]; /* copy of already built up path's poly line  */
          memcpy (temp, rasterizer->edge_list.entries, sizeof (temp));
          int start = 0;
          int end   = 0;
      CtxMatrix transform_backup = state->gstate.transform;
      _ctx_matrix_identity (&state->gstate.transform);
      ctx_rasterizer_reset (rasterizer); /* for dashing we create
                                            a dashed path to stroke */
      float prev_x = 0.0f;
      float prev_y = 0.0f;
      float pos = 0.0;

      int   dash_no  = 0.0;
      float dash_lpos = state->gstate.line_dash_offset * factor;
      int   is_down = 0;

          while (start < count)
          {
            int started = 0;
            int i;
            is_down = 0;

            if (!is_down)
            {
              CtxSegment *entry = &temp[0];
              prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
              prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
              ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
              is_down = 1;
            }

            for (i = start; i < count; i++)
            {
              CtxSegment *entry = &temp[i];
              float x, y;
              if (entry->code == CTX_NEW_EDGE)
                {
                  if (started)
                    {
                      end = i - 1;
                      dash_no = 0;
                      dash_lpos = 0.0;
                      goto foo;
                    }
                  prev_x = entry->data.s16[0] * 1.0f / CTX_SUBDIV;
                  prev_y = entry->data.s16[1] * 1.0f / CTX_FULL_AA;
                  started = 1;
                  start = i;
                  is_down = 1;
                  ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
                }

again:

              x = entry->data.s16[2] * 1.0f / CTX_SUBDIV;
              y = entry->data.s16[3] * 1.0f / CTX_FULL_AA;
              float dx = x - prev_x;
              float dy = y - prev_y;
              float length = ctx_fast_hypotf (dx, dy);

              if (dash_lpos + length >= dashes[dash_no] * factor)
              {
                float p = (dashes[dash_no] * factor - dash_lpos) / length;
                float splitx = x * p + (1.0f - p) * prev_x;
                float splity = y * p + (1.0f - p) * prev_y;
                if (is_down)
                {
                  ctx_rasterizer_line_to (rasterizer, splitx, splity);
                  is_down = 0;
                }
                else
                {
                  ctx_rasterizer_move_to (rasterizer, splitx, splity);
                  is_down = 1;
                }
                prev_x = splitx;
                prev_y = splity;
                dash_no++;
                dash_lpos=0;
                if (dash_no >= n_dashes) dash_no = 0;
                goto again;
              }
              else
              {
                pos += length;
                dash_lpos += length;
                {
                  if (is_down)
                    ctx_rasterizer_line_to (rasterizer, x, y);
                }
              }
              prev_x = x;
              prev_y = y;
            }
          end = i-1;
foo:
          start = end+1;
        }
        state->gstate.transform = transform_backup;
        }
        ctx_rasterizer_stroke (rasterizer);
        }
        ctx_rasterizer_reset (rasterizer);

        break;
      case CTX_FONT:
        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
        break;
      case CTX_TEXT:
        rasterizer->in_text++;
#if CTX_ENABLE_SHADOW_BLUR
        if (state->gstate.shadow_blur > 0.0)
          ctx_rasterizer_shadow_text (rasterizer, ctx_arg_string ());
#endif
        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 0);
        rasterizer->in_text--;
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_STROKE_TEXT:
        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 1);
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_GLYPH:
        ctx_rasterizer_glyph (rasterizer, entry[0].data.u32[0], entry[0].data.u8[4]);
        break;
      case CTX_PAINT:
        // XXX simplify this with a special case
        ctx_rasterizer_rectangle (rasterizer, -1000.0, -1000.0, 10000, 10000);
        ctx_rasterizer_fill (rasterizer);
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_FILL:
          if (rasterizer->edge_list.count == 0)break;
#if CTX_ENABLE_SHADOW_BLUR
        if (state->gstate.shadow_blur > 0.0 &&
            !rasterizer->in_text)
          ctx_rasterizer_shadow_fill (rasterizer);
#endif
        ctx_rasterizer_fill (rasterizer);
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_RESET:
      case CTX_BEGIN_PATH:
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_CLIP:
        ctx_rasterizer_clip (rasterizer);
        break;
      case CTX_CLOSE_PATH:
        ctx_rasterizer_finish_shape (rasterizer);
        break;
      case CTX_IMAGE_SMOOTHING:
        rasterizer->comp_op = NULL;
        break;
    }
  ctx_interpret_pos_bare (state, entry, NULL);
}

void
ctx_rasterizer_deinit (CtxRasterizer *rasterizer)
{
  ctx_drawlist_deinit (&rasterizer->edge_list);
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
  {
    ctx_buffer_free (rasterizer->clip_buffer);
    rasterizer->clip_buffer = NULL;
  }
#endif
#if CTX_SHAPE_CACHE
  for (int i = 0; i < CTX_SHAPE_CACHE_ENTRIES; i ++)
    if (rasterizer->shape_cache.entries[i])
    {
      free (rasterizer->shape_cache.entries[i]);
      rasterizer->shape_cache.entries[i] = NULL;
    }

#endif


  free (rasterizer);
}


CtxAntialias ctx_get_antialias (Ctx *ctx)
{
#if CTX_EVENTS
  if (ctx_backend_is_tiled (ctx))
  {
     CtxTiled *fb = (CtxTiled*)(ctx->backend);
     return fb->antialias;
  }
#endif
  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return CTX_ANTIALIAS_DEFAULT;

  switch (((CtxRasterizer*)(ctx->backend))->aa)
  {
    case 1: return CTX_ANTIALIAS_NONE;
    case 3: return CTX_ANTIALIAS_FAST;
    //case 5: return CTX_ANTIALIAS_GOOD;
    default:
    case 15: return CTX_ANTIALIAS_DEFAULT;
  }
}

static int _ctx_antialias_to_aa (CtxAntialias antialias)
{
  switch (antialias)
  {
    case CTX_ANTIALIAS_NONE: return 1;
    case CTX_ANTIALIAS_FAST: return 3;
    case CTX_ANTIALIAS_GOOD: return 5;
    default:
    case CTX_ANTIALIAS_DEFAULT: return CTX_RASTERIZER_AA;
  }
}

void
ctx_set_antialias (Ctx *ctx, CtxAntialias antialias)
{
#if CTX_EVENTS
  if (ctx_backend_is_tiled (ctx))
  {
     CtxTiled *fb = (CtxTiled*)(ctx->backend);
     fb->antialias = antialias;
     for (int i = 0; i < _ctx_max_threads; i++)
     {
       ctx_set_antialias (fb->host[i], antialias);
     }
     return;
  }
#endif
  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return;

  ((CtxRasterizer*)(ctx->backend))->aa = 
     _ctx_antialias_to_aa (antialias);
  ((CtxRasterizer*)(ctx->backend))->fast_aa = 0;
  if (antialias == CTX_ANTIALIAS_DEFAULT||
      antialias == CTX_ANTIALIAS_FAST)
    ((CtxRasterizer*)(ctx->backend))->fast_aa = 1;
}

CtxRasterizer *
ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias)
{
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
    ctx_buffer_free (rasterizer->clip_buffer);
#endif
  if (rasterizer->edge_list.size)
    ctx_drawlist_deinit (&rasterizer->edge_list);
#if CTX_SHAPE_CACHE
  memset (rasterizer, 0, sizeof (CtxRasterizer) - sizeof (CtxShapeCache));
#else
  memset (rasterizer, 0, sizeof (CtxRasterizer));
#endif
  CtxBackend *backend = (CtxBackend*)rasterizer;
  backend->process = ctx_rasterizer_process;
  backend->free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
  backend->ctx     = ctx;
  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
  rasterizer->state       = state;
  rasterizer->texture_source = texture_source?texture_source:ctx;

  rasterizer->aa          = _ctx_antialias_to_aa (antialias);
  rasterizer->fast_aa = (antialias == CTX_ANTIALIAS_DEFAULT||antialias == CTX_ANTIALIAS_FAST);
  ctx_state_init (rasterizer->state);
  rasterizer->buf         = data;
  rasterizer->blit_x      = x;
  rasterizer->blit_y      = y;
  rasterizer->blit_width  = width;
  rasterizer->blit_height = height;
  rasterizer->state->gstate.clip_min_x  = x;
  rasterizer->state->gstate.clip_min_y  = y;
  rasterizer->state->gstate.clip_max_x  = x + width - 1;
  rasterizer->state->gstate.clip_max_y  = y + height - 1;
  rasterizer->blit_stride = stride;
  rasterizer->scan_min    = 5000;
  rasterizer->scan_max    = -5000;

  if (pixel_format == CTX_FORMAT_BGRA8)
  {
    pixel_format = CTX_FORMAT_RGBA8;
    rasterizer->swap_red_green = 1;
  }

  rasterizer->format = ctx_pixel_format_info (pixel_format);

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  rasterizer->gradient_cache_elements = CTX_GRADIENT_CACHE_ELEMENTS;
  rasterizer->gradient_cache_valid = 0;
#endif
#endif

#if static_OPAQUE
  memset (rasterizer->opaque, 255, sizeof (rasterizer->opaque));
#endif

  return rasterizer;
}

Ctx *
ctx_new_for_buffer (CtxBuffer *buffer)
{
  Ctx *ctx = _ctx_new_drawlist (buffer->width, buffer->height);
  ctx_set_backend (ctx,
                    ctx_rasterizer_init ( (CtxRasterizer *) malloc (sizeof (CtxRasterizer) ),
                                          ctx, NULL, &ctx->state,
                                          buffer->data, 0, 0, buffer->width, buffer->height,
                                          buffer->stride, buffer->format->pixel_format,
                                          CTX_ANTIALIAS_DEFAULT));
  return ctx;
}

Ctx *
ctx_new_for_framebuffer (void *data, int width, int height,
                         int stride,
                         CtxPixelFormat pixel_format)
{
  Ctx *ctx = _ctx_new_drawlist (width, height);
  CtxRasterizer *r = ctx_rasterizer_init ( (CtxRasterizer *) ctx_calloc (sizeof (CtxRasterizer), 1),
                                          ctx, NULL, &ctx->state, data, 0, 0, width, height,
                                          stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
  ctx_set_backend (ctx, r);
  if (pixel_format == CTX_FORMAT_GRAY1) // XXX we get some bugs without it..
  {                                     //     something is going amiss with offsets
    ctx_set_antialias (ctx, CTX_ANTIALIAS_NONE);
  }
  return ctx;
}

// ctx_new_for_stream (FILE *stream);

#if 0
CtxRasterizer *ctx_rasterizer_new (void *data, int x, int y, int width, int height,
                                   int stride, CtxPixelFormat pixel_format)
{
  CtxState    *state    = (CtxState *) malloc (sizeof (CtxState) );
  CtxRasterizer *rasterizer = (CtxRasterizer *) malloc (sizeof (CtxBackend) );
  ctx_rasterizer_init (rasterizer, state, data, x, y, width, height,
                       stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
}
#endif

#else

#endif


void
ctx_state_gradient_clear_stops (CtxState *state)
{
  state->gradient.n_stops = 0;
}


/****  end of engine ****/
/* atty - audio interface and driver for terminals
 * Copyright (C) 2020 Øyvind Kolås <pippin@gimp.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 
 */

static const char *base64_map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
static void bin2base64_group (const unsigned char *in, int remaining, char *out)
{
  unsigned char digit[4] = {0,0,64,64};
  int i;
  digit[0] = in[0] >> 2;
  digit[1] = ((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4);
  if (remaining > 1)
    {
      digit[2] = ((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6);
      if (remaining > 2)
        digit[3] = ((in[2] & 0x3f));
    }
  for (i = 0; i < 4; i++)
    out[i] = base64_map[digit[i]];
}

void
ctx_bin2base64 (const void *bin,
                int         bin_length,
                char       *ascii)
{
  /* this allocation is a hack to ensure we always produce the same result,
   * regardless of padding data accidentally taken into account.
   */
  unsigned char *bin2 = (unsigned char*)calloc (bin_length + 4, 1);
  unsigned const char *p = bin2;
  int i;
  memcpy (bin2, bin, bin_length);
  for (i=0; i*3 < bin_length; i++)
   {
     int remaining = bin_length - i*3;
     bin2base64_group (&p[i*3], remaining, &ascii[i*4]);
   }
  free (bin2);
  ascii[i*4]=0;
}

static unsigned char base64_revmap[255];
static void base64_revmap_init (void)
{
  static int done = 0;
  if (done)
    return;

  for (int i = 0; i < 255; i ++)
    base64_revmap[i]=255;
  for (int i = 0; i < 64; i ++)
    base64_revmap[((const unsigned char*)base64_map)[i]]=i;
  /* include variants used in URI encodings for decoder,
   * even if that is not how we encode
  */
  base64_revmap['-']=62;
  base64_revmap['_']=63;
  base64_revmap['+']=62;
  base64_revmap['/']=63;

  done = 1;
}


int
ctx_base642bin (const char    *ascii,
                int           *length,
                unsigned char *bin)
{
  int i;
  int charno = 0;
  int outputno = 0;
  int carry = 0;
  base64_revmap_init ();
  for (i = 0; ascii[i]; i++)
    {
      int bits = base64_revmap[((const unsigned char*)ascii)[i]];
      if (length && outputno > *length)
        {
          *length = -1;
          return -1;
        }
      if (bits != 255)
        {
          switch (charno % 4)
            {
              case 0:
                carry = bits;
                break;
              case 1:
                bin[outputno] = (carry << 2) | (bits >> 4);
                outputno++;
                carry = bits & 15;
                break;
              case 2:
                bin[outputno] = (carry << 4) | (bits >> 2);
                outputno++;
                carry = bits & 3;
                break;
              case 3:
                bin[outputno] = (carry << 6) | bits;
                outputno++;
                carry = 0;
                break;
            }
          charno++;
        }
    }
  bin[outputno]=0;
  if (length)
    *length= outputno;
  return outputno;
}
#ifndef SQUOZE_H
#define SQUOZE_H

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>

uint32_t squoze6 (const char *utf8);
uint64_t squoze10 (const char *utf8);
uint64_t squoze12 (const char *utf8);
const char *squoze6_decode (uint32_t hash);
const char *squoze10_decode (uint64_t hash);
const char *squoze12_decode (uint64_t hash);

//#define SQUOZE_NO_INTERNING  // this disables the interning - providing only a hash (and decode for non-overflowed hashes)

#define SQUOZE_ENTER_SQUEEZE    16

#define SQUOZE_SPACE            0
#define SQUOZE_DEC_OFFSET_A     27
#define SQUOZE_INC_OFFSET_A     28
#define SQUOZE_DEC_OFFSET_B     29
#define SQUOZE_INC_OFFSET_B     30
#define SQUOZE_ENTER_UTF5       31

#define SQUOZE_JUMP_STRIDE      26
#define SQUOZE_JUMP_OFFSET      19

static inline uint32_t squoze_utf8_to_unichar (const char *input);
static inline int      squoze_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
static inline int      squoze_utf8_len        (const unsigned char first_byte);


/* returns the base-offset of the segment this unichar belongs to,
 *
 * segments are 26 items long and are offset so that the 'a'-'z' is
 * one segment.
 */
static inline int squoze_new_offset (uint32_t unichar)
{
  uint32_t ret = unichar - (unichar % SQUOZE_JUMP_STRIDE) + SQUOZE_JUMP_OFFSET;
  if (ret > unichar) ret -= SQUOZE_JUMP_STRIDE;
  return ret;
}

static int squoze_needed_jump (uint32_t off, uint32_t unicha)
{
  int count = 0;
  int unichar = unicha;
  int offset = off;

  if (unichar == 32) // space is always in range
    return 0;

  /* TODO: replace this with direct computation of values instead of loops */

  while (unichar < offset)
  {
    offset -= SQUOZE_JUMP_STRIDE;
    count ++;
  }
  if (count)
  {
    return -count;
  }
  while (unichar - offset >= SQUOZE_JUMP_STRIDE)
  {
    offset += SQUOZE_JUMP_STRIDE;
    count ++;
  }
  return count;
}

static inline int
squoze_utf5_length (uint32_t unichar)
{
  int octets = 0;
  if (unichar == 0)
    return 1;
  while (unichar)
  {
    octets ++;
    unichar /= 16;
  }
  return octets;
}

typedef struct EncodeUtf5 {
  int      is_utf5;
  int      offset;
  int      length;
  void    *write_data;
  uint32_t current;
} EncodeUtf5;

static inline uint64_t
squoze_overflow_mask_for_dim (int squoze_dim)
{
  return ((uint64_t)1<<(squoze_dim * 5 + 1));
}

static int squoze_compute_cost_utf5 (int offset, int val, int next_val)
{
  int cost = 0; 
  cost += squoze_utf5_length (val);
  if (next_val)
  {
    int no_change_cost = squoze_utf5_length (next_val);
#if 0 // not hit in test-corpus, it is easier to specify and
      // port the hash consistently without it
    offset = squoze_new_offset (val);
    int change_cost = 1;
    int needed_jump = squoze_needed_jump (offset, next_val);

    if (needed_jump == 0)
    {
      change_cost += 1;
    } else if (needed_jump >= -2 && needed_jump <= 2)
    {
      change_cost += 2;
    }
    else if (needed_jump >= -10 && needed_jump <= -10)
    {
      change_cost += 3;
    }
    else
    {
      change_cost += 100;
    }

    if (change_cost < no_change_cost)
    {
      cost += change_cost;
    }
    else
#endif
    {
      cost += no_change_cost;
    }

  }



  return cost;
}

static int squoze_compute_cost_squeezed (int offset, int val, int next_val)
{
  int needed_jump = squoze_needed_jump (offset, val);
  int cost = 0;
  if (needed_jump == 0)
  {
    cost += 1;
  }
  else if (needed_jump >= -2 && needed_jump <= 2)
  {
    cost += 2;
    offset += SQUOZE_JUMP_STRIDE * needed_jump;
  }
  else if (needed_jump >= -10 && needed_jump <= 10)
  {
    cost += 3;
    offset += SQUOZE_JUMP_STRIDE * needed_jump;
  }
  else
  {
    cost += 100; // very expensive, makes the other choice win
  }

  if (next_val)
  {
    int change_cost = 1 + squoze_utf5_length (next_val);
    int no_change_cost = 0;
    needed_jump = squoze_needed_jump (offset, next_val);

    if (needed_jump == 0)
    {
      no_change_cost += 1;
    }
    else if (needed_jump >= -2 && needed_jump <= 2)
    {
      no_change_cost += 2;
    }
    else if (needed_jump >= -10 && needed_jump <= 10)
    {
      no_change_cost += 3;
      offset += SQUOZE_JUMP_STRIDE * needed_jump;
    }
    else
    {
      no_change_cost = change_cost;
    }
    if (change_cost < no_change_cost)
      cost += change_cost;
    else
      cost += no_change_cost;
  }

  return cost;
}


static void squoze5_encode (const char *input, int inlen,
                            char *output, int *r_outlen,
                            int permit_squeezed,
                            int escape_endzero)
{
  int offset  = squoze_new_offset('a');
  int is_utf5 = 1;
  int len     = 0;

  for (int i = 0; i < inlen; i+= squoze_utf8_len (input[i]))
  {
    int val = squoze_utf8_to_unichar (&input[i]);
    int next_val = 0;
    int first_len = squoze_utf8_len (input[i]);
    if (i + first_len < inlen)
      next_val = squoze_utf8_to_unichar (&input[i+first_len]);

    if (is_utf5)
    {
      int change_cost    = squoze_compute_cost_squeezed (offset, val, next_val);
      int no_change_cost = squoze_compute_cost_utf5 (offset, val, next_val);
  
      if (i != 0)          /* ignore cost of initial 'G' */
        change_cost += 1;

      if (permit_squeezed && change_cost <= no_change_cost)
      {
        output[len++] = SQUOZE_ENTER_SQUEEZE;
        is_utf5 = 0;
      }
    }
    else
    {
      int change_cost    = 1 + squoze_compute_cost_utf5 (offset, val, next_val);
      int no_change_cost = squoze_compute_cost_squeezed (offset, val, next_val);

      if (change_cost < no_change_cost)
      {
        output[len++] = SQUOZE_ENTER_UTF5;
        is_utf5 = 1;
      }
    }

    if (!is_utf5)
    {
      int needed_jump = squoze_needed_jump (offset, val);
      if (needed_jump)
      {
        if (needed_jump >= -2 && needed_jump <= 2)
        {
          switch (needed_jump)
          {
            case -1: output[len++] = SQUOZE_DEC_OFFSET_B; break;
            case  1: output[len++] = SQUOZE_INC_OFFSET_B; break;
            case -2: output[len++] = SQUOZE_DEC_OFFSET_A; break;
            case  2: output[len++] = SQUOZE_INC_OFFSET_A; break;
          }
          offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else if (needed_jump >= -10 && needed_jump <= 10) {
              int encoded_val;
              if (needed_jump < -2)
                encoded_val = 5 - needed_jump;
              else
                encoded_val = needed_jump - 3;

              output[len++] = (encoded_val / 4) + SQUOZE_DEC_OFFSET_A;
              output[len++] = (encoded_val % 4) + SQUOZE_DEC_OFFSET_A;

              offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else
        {
          assert(0); // should not be reached
          output[len++] = SQUOZE_ENTER_UTF5;
          is_utf5 = 1;
        }
      }
    }

    if (is_utf5)
    {
      int octets = 0;
      offset = squoze_new_offset (val);
      while (val)
      {
        int oval = val % 16;
        int hi = 16;
        if (val / 16) hi = 0;
        output[len+ (octets++)] = oval + hi;
        val /= 16;
      }
      for (int j = 0; j < octets/2; j++) // mirror in-place
      {                                  // TODO refactor to be single pass
        int tmp = output[len+j];
        output[len+j] = output[len+octets-1-j];
        output[len+octets-1-j] = tmp;
      }
      len += octets;
    }
    else 
    {
       if (val == ' ')
       {
         output[len++] = SQUOZE_SPACE;
       }
       else
       {
         output[len++] = val-offset+1;
       }
    }
  }

  if (escape_endzero && len && output[len-1]==0)
  {
    if (is_utf5)
      output[len++] = 16;
    else
      output[len++] = SQUOZE_ENTER_UTF5;
  }
  output[len]=0;
  if (r_outlen)
    *r_outlen = len;
}

static inline uint64_t _squoze (int squoze_dim, const char *utf8)
{
  char encoded[4096]="";
  int  encoded_len=0;
  squoze5_encode (utf8, strlen (utf8), encoded, &encoded_len, 1, 1);
  uint64_t hash = 0;
  int  utf5 = (encoded[0] != SQUOZE_ENTER_SQUEEZE);
  uint64_t multiplier = ((squoze_dim == 6) ? 0x25bd1e975
                                           : 0x98173415bd1e975);

  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
  uint64_t all_bits        = overflowed_mask - 1;

  int rshift = (squoze_dim == 6) ? 8 : 16;


  if (encoded_len - (!utf5) <= squoze_dim)
  {
    for (int i = !utf5; i < encoded_len; i++)
    {
      uint64_t val = encoded[i];
      hash = hash | (val << (5*(i-(!utf5))));
    }
    hash <<= 1; // make room for the bit that encodes utf5 or squeeze
  }
  else
  {
    for (int i = 0; i < encoded_len; i++)
    {
      uint64_t val = encoded[i];
      hash = hash ^ val;
      hash = hash * multiplier;
      hash = hash & all_bits;
      hash = hash ^ ((hash >> rshift));
    }
    hash |= overflowed_mask;
  }
  return hash | utf5;
}

typedef struct _CashInterned CashInterned;

struct _CashInterned {
    uint64_t   hash;
    char      *string;
};

static CashInterned *interned = NULL;
static int n_interned = 0;
static int s_interned = 0;

static int squoze_interned_find (uint64_t hash)
{
#if 1
  int min = 0;
  int max = n_interned - 1;
  if (max <= 0)
    return 0;
  do
  {
     int pos = (min + max)/2;
     if (interned[pos].hash == hash)
       return pos;
     else if (min == max - 1)
       return max;
     else if (interned[pos].hash < hash)
       min = pos;
     else
       max = pos;
  } while (min != max);
  return max;
#else
  for (int i = 0; i < n_interned; i++)
    if (interned[i].hash > hash)
      return i;
  return 0;
#endif
}

static inline uint64_t squoze (int squoze_dim, const char *utf8)
{
  uint64_t hash = _squoze (squoze_dim, utf8);
#ifdef SQUOZE_NO_INTERNING
  return hash;
#endif
  uint64_t overflowed_mask = squoze_overflow_mask_for_dim (squoze_dim);
  if (hash & overflowed_mask)
  {
    int pos = squoze_interned_find (hash);
    if (interned && interned[pos].hash == hash)
      return hash;

    if (n_interned + 1 >= s_interned)
    {
       s_interned = (s_interned + 128)*2;
       interned = (CashInterned*)realloc (interned, s_interned * sizeof (CashInterned));
    }

    n_interned++;
#if 1
    if (n_interned-pos)
      memmove (&interned[pos+1], &interned[pos], (n_interned-pos) * sizeof (CashInterned));
    // the memmove is the expensive part of testing for collisions
    // insertions should be cheaper! at least looking up strings
    // is cheap
#else
    pos = n_interned-1;
#endif
    {
      CashInterned *entry = &interned[pos];
      entry->hash = hash;
      entry->string = strdup (utf8);
    }

  }
  return hash;
}

uint32_t squoze6 (const char *utf8)
{
  return squoze (6, utf8);
}

uint64_t squoze10 (const char *utf8)
{
  return squoze (10, utf8);
}

uint64_t squoze12 (const char *utf8)
{
  return squoze (12, utf8);
}

uint32_t ctx_strhash(const char *str) {
  return squoze (6, str);
}

typedef struct CashUtf5Dec {
  int       is_utf5;
  int       offset;
  void     *write_data;
  uint32_t  current;
  void    (*append_unichar) (uint32_t unichar, void *write_data);
  int       jumped_amount;
  int       jump_mode;
} CashUtf5Dec;

typedef struct CashUtf5DecDefaultData {
  uint8_t *buf;
  int      length;
} CashUtf5DecDefaultData;

static void squoze_decode_utf5_append_unichar_as_utf8 (uint32_t unichar, void *write_data)
{
  CashUtf5DecDefaultData *data = (CashUtf5DecDefaultData*)write_data;
  int length = squoze_unichar_to_utf8 (unichar, &data->buf[data->length]);
  data->buf[data->length += length] = 0;
}

static void squoze_decode_jump (CashUtf5Dec *dec, uint8_t in)
{
  dec->offset -= SQUOZE_JUMP_STRIDE * dec->jumped_amount;
  int jump_len = (dec->jump_mode - SQUOZE_DEC_OFFSET_A) * 4 +
                 (in - SQUOZE_DEC_OFFSET_A);
  if (jump_len > 7)
    jump_len = 5 - jump_len;
  else
    jump_len += 3;
  dec->offset += jump_len * SQUOZE_JUMP_STRIDE;
  dec->jumped_amount = 0;
}

static void squoze_decode_utf5 (CashUtf5Dec *dec, uint8_t in)
{
  if (dec->is_utf5)
  {
    if (in >= 16)
    {
      if (dec->current)
      {
        dec->offset = squoze_new_offset (dec->current);
        dec->append_unichar (dec->current, dec->write_data);
        dec->current = 0;
      }
    }
    if (in == SQUOZE_ENTER_SQUEEZE)
    {
      if (dec->current)
      {
        dec->offset = squoze_new_offset (dec->current);
        dec->append_unichar (dec->current, dec->write_data);
        dec->current = 0;
      }
      dec->is_utf5 = 0;
    }
    else
    {
      dec->current = dec->current * 16 + (in % 16);
    }
  }
  else
  {
    if (dec->jumped_amount)
    {
      switch (in)
      {
        case SQUOZE_DEC_OFFSET_A:
        case SQUOZE_DEC_OFFSET_B:
        case SQUOZE_INC_OFFSET_A:
        case SQUOZE_INC_OFFSET_B:
          squoze_decode_jump (dec, in);
          break;
        default:
          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
      }
    }
    else
    {
      switch (in)
      {
        case SQUOZE_ENTER_UTF5:
          dec->is_utf5 = 1;
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
        case SQUOZE_SPACE: 
          dec->append_unichar (' ', dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
        case SQUOZE_DEC_OFFSET_A:
          dec->jumped_amount = -2;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_INC_OFFSET_A:
          dec->jumped_amount = 2;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_DEC_OFFSET_B:
          dec->jumped_amount = -1;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_INC_OFFSET_B:
          dec->jumped_amount = 1;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        default:
          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
      }
    }
  }
}

static void squoze_decode_utf5_bytes (int is_utf5, 
                        const unsigned char *input, int inlen,
                        char *output, int *r_outlen)
{
  CashUtf5DecDefaultData append_data = {(unsigned char*)output, 0};
  CashUtf5Dec dec = {is_utf5,
                     squoze_new_offset('a'),
                     &append_data,
                     0,
                     squoze_decode_utf5_append_unichar_as_utf8,
                     0, 0
                    };
  for (int i = 0; i < inlen; i++)
    squoze_decode_utf5 (&dec, input[i]);
  if (dec.current)
    dec.append_unichar (dec.current, dec.write_data);
  if (r_outlen)
    *r_outlen = append_data.length;
}

static const char *squoze_decode_r (int squoze_dim, uint64_t hash, char *ret, int retlen)
{
  uint64_t overflowed_mask = ((uint64_t)1<<(squoze_dim * 5 + 1));

  if (hash & overflowed_mask)
  {
#if 0
    for (int i = 0; i < n_interned; i++)
    {
      CashInterned *entry = &interned[i];
      if (entry->hash == hash)
        return entry->string;
    }
#else
    int pos = squoze_interned_find (hash);
    if (!interned || (interned[pos].hash!=hash))
      return NULL;
    return interned[pos].string;
#endif
    return NULL;
  }

  uint8_t utf5[140]=""; // we newer go really high since there isnt room
                        // in the integers
  uint64_t tmp = hash & (overflowed_mask-1);
  int len = 0;
  int is_utf5 = tmp & 1;
  tmp /= 2;
  int in_utf5 = is_utf5;
  while (tmp > 0)
  {
    uint64_t remnant = tmp % 32;
    uint64_t val = remnant;

    if      ( in_utf5 && val == SQUOZE_ENTER_SQUEEZE) in_utf5 = 0;
    else if (!in_utf5 && val == SQUOZE_ENTER_UTF5) in_utf5 = 1;

    utf5[len++] = val;
    tmp -= remnant;
    tmp /= 32;
  }
  utf5[len]=0;
  squoze_decode_utf5_bytes (is_utf5, utf5, len, ret, &retlen);
  //ret[retlen]=0;
  return ret;
}

/* copy the value as soon as possible, some mitigation is in place
 * for more than one value in use and cross-thread interactions.
 */
static const char *squoze_decode (int squoze_dim, uint64_t hash)
{
#define THREAD __thread  // use thread local storage
  static THREAD int no = 0;
  static THREAD char ret[8][256];
  no ++;
  if (no > 7) no = 0;
  return squoze_decode_r (squoze_dim, hash, ret[no], 256);
#undef THREAD
}

const char *squoze6_decode (uint32_t hash)
{
  return squoze_decode (6, hash);
}

const char *squoze10_decode (uint64_t hash)
{
  return squoze_decode (10, hash);
}

const char *squoze12_decode (uint64_t hash)
{
  return squoze_decode (12, hash);
}

static inline uint32_t
squoze_utf8_to_unichar (const char *input)
{
  const uint8_t *utf8 = (const uint8_t *) input;
  uint8_t c = utf8[0];
  if ( (c & 0x80) == 0)
    { return c; }
  else if ( (c & 0xE0) == 0xC0)
    return ( (utf8[0] & 0x1F) << 6) |
           (utf8[1] & 0x3F);
  else if ( (c & 0xF0) == 0xE0)
    return ( (utf8[0] & 0xF)  << 12) |
           ( (utf8[1] & 0x3F) << 6) |
           (utf8[2] & 0x3F);
  else if ( (c & 0xF8) == 0xF0)
    return ( (utf8[0] & 0x7)  << 18) |
           ( (utf8[1] & 0x3F) << 12) |
           ( (utf8[2] & 0x3F) << 6) |
           (utf8[3] & 0x3F);
  else if ( (c & 0xFC) == 0xF8)
    return ( (utf8[0] & 0x3)  << 24) |
           ( (utf8[1] & 0x3F) << 18) |
           ( (utf8[2] & 0x3F) << 12) |
           ( (utf8[3] & 0x3F) << 6) |
           (utf8[4] & 0x3F);
  else if ( (c & 0xFE) == 0xFC)
    return ( (utf8[0] & 0x1)  << 30) |
           ( (utf8[1] & 0x3F) << 24) |
           ( (utf8[2] & 0x3F) << 18) |
           ( (utf8[3] & 0x3F) << 12) |
           ( (utf8[4] & 0x3F) << 6) |
           (utf8[5] & 0x3F);
  return 0;
}
static inline int
squoze_unichar_to_utf8 (uint32_t  ch,
                      uint8_t  *dest)
{
  /* http://www.cprogramming.com/tutorial/utf8.c  */
  /*  Basic UTF-8 manipulation routines
    by Jeff Bezanson
    placed in the public domain Fall 2005 ... */
  if (ch < 0x80)
    {
      dest[0] = (char) ch;
      return 1;
    }
  if (ch < 0x800)
    {
      dest[0] = (ch>>6) | 0xC0;
      dest[1] = (ch & 0x3F) | 0x80;
      return 2;
    }
  if (ch < 0x10000)
    {
      dest[0] = (ch>>12) | 0xE0;
      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
      dest[2] = (ch & 0x3F) | 0x80;
      return 3;
    }
  if (ch < 0x110000)
    {
      dest[0] = (ch>>18) | 0xF0;
      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
      dest[3] = (ch & 0x3F) | 0x80;
      return 4;
    }
  return 0;
}

static inline int
squoze_utf8_len (const unsigned char first_byte)
{
  if      ( (first_byte & 0x80) == 0)
    { return 1; } /* ASCII */
  else if ( (first_byte & 0xE0) == 0xC0)
    { return 2; }
  else if ( (first_byte & 0xF0) == 0xE0)
    { return 3; }
  else if ( (first_byte & 0xF8) == 0xF0)
    { return 4; }
  return 1;
}

#endif


static inline int
ctx_conts_for_entry (CtxEntry *entry)
{
    switch (entry->code)
    {
      case CTX_DATA:
        return entry->data.u32[1];
      case CTX_RADIAL_GRADIENT:
      case CTX_ARC:
      case CTX_CURVE_TO:
      case CTX_REL_CURVE_TO:
      case CTX_COLOR:
      case CTX_ROUND_RECTANGLE:
      case CTX_SHADOW_COLOR:
        return 2;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        return 3;
      case CTX_APPLY_TRANSFORM:
      case CTX_SOURCE_TRANSFORM:
        return 4;
      case CTX_FILL_RECT:
      case CTX_STROKE_RECT:
      case CTX_RECTANGLE:
      case CTX_VIEW_BOX:
      case CTX_REL_QUAD_TO:
      case CTX_QUAD_TO:
      case CTX_LINEAR_GRADIENT:
        return 1;

      case CTX_TEXT:
      case CTX_LINE_DASH:
      case CTX_COLOR_SPACE:
      case CTX_STROKE_TEXT:
      case CTX_FONT:
      case CTX_TEXTURE:
        {
          int eid_len = entry[1].data.u32[1];
          return eid_len + 1;
        }
      case CTX_DEFINE_TEXTURE:
        {
          int eid_len = entry[2].data.u32[1];
          int pix_len = entry[2 + eid_len + 1].data.u32[1];
          return eid_len + pix_len + 2 + 1;
        }
      default:
        return 0;
    }
}

// expanding arc_to to arc can be the job
// of a layer in front of backend?
//   doing:
//     rectangle
//     arc
//     ... etc reduction to beziers
//     or even do the reduction to
//     polylines directly here...
//     making the rasterizer able to
//     only do poly-lines? will that be faster?

/* the iterator - should decode bitpacked data as well -
 * making the rasterizers simpler, possibly do unpacking
 * all the way to absolute coordinates.. unless mixed
 * relative/not are wanted.
 */


void
ctx_iterator_init (CtxIterator      *iterator,
                   CtxDrawlist  *drawlist,
                   int               start_pos,
                   int               flags)
{
  iterator->drawlist   = drawlist;
  iterator->flags          = flags;
  iterator->bitpack_pos    = 0;
  iterator->bitpack_length = 0;
  iterator->pos            = start_pos;
  iterator->end_pos        = drawlist->count;
  iterator->first_run      = 1; // -1 is a marker used for first run
  ctx_memset (iterator->bitpack_command, 0, sizeof (iterator->bitpack_command) );
}

int ctx_iterator_pos (CtxIterator *iterator)
{
  return iterator->pos;
}

static inline CtxEntry *_ctx_iterator_next (CtxIterator *iterator)
{
  int ret = iterator->pos;
  CtxEntry *entry = &iterator->drawlist->entries[ret];
  if (CTX_UNLIKELY(ret >= iterator->end_pos))
    { return NULL; }

  if (CTX_UNLIKELY(iterator->first_run))
      iterator->first_run = 0;
  else
     iterator->pos += (ctx_conts_for_entry (entry) + 1);

  if (CTX_UNLIKELY(iterator->pos >= iterator->end_pos))
    { return NULL; }
  return &iterator->drawlist->entries[iterator->pos];
}

// 6024x4008
#if CTX_BITPACK
static void
ctx_iterator_expand_s8_args (CtxIterator *iterator, CtxEntry *entry)
{
  int no = 0;
  for (int cno = 0; cno < 4; cno++)
    for (int d = 0; d < 2; d++, no++)
      iterator->bitpack_command[cno].data.f[d] =
        entry->data.s8[no] * 1.0f / CTX_SUBDIV;
  iterator->bitpack_command[0].code =
    iterator->bitpack_command[1].code =
      iterator->bitpack_command[2].code =
        iterator->bitpack_command[3].code = CTX_CONT;
  iterator->bitpack_length = 4;
  iterator->bitpack_pos = 0;
}

static void
ctx_iterator_expand_s16_args (CtxIterator *iterator, CtxEntry *entry)
{
  int no = 0;
  for (int cno = 0; cno < 2; cno++)
    for (int d = 0; d < 2; d++, no++)
      iterator->bitpack_command[cno].data.f[d] = entry->data.s16[no] * 1.0f /
          CTX_SUBDIV;
  iterator->bitpack_command[0].code =
    iterator->bitpack_command[1].code = CTX_CONT;
  iterator->bitpack_length = 2;
  iterator->bitpack_pos    = 0;
}
#endif

CtxCommand *
ctx_iterator_next (CtxIterator *iterator)
{
  CtxEntry *ret;
#if CTX_BITPACK
  int expand_bitpack = iterator->flags & CTX_ITERATOR_EXPAND_BITPACK;
again:
  if (CTX_UNLIKELY(iterator->bitpack_length))
    {
      ret = &iterator->bitpack_command[iterator->bitpack_pos];
      iterator->bitpack_pos += (ctx_conts_for_entry (ret) + 1);
      if (iterator->bitpack_pos >= iterator->bitpack_length)
        {
          iterator->bitpack_length = 0;
        }
      return (CtxCommand *) ret;
    }
#endif
  ret = _ctx_iterator_next (iterator);
#if CTX_BITPACK
  if (CTX_UNLIKELY(ret && expand_bitpack))
    switch ((CtxCode)(ret->code))
      {
        case CTX_REL_CURVE_TO_REL_LINE_TO:
          ctx_iterator_expand_s8_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
          iterator->bitpack_command[1].code =
          iterator->bitpack_command[2].code = CTX_CONT;
          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
          // 0.0 here is a common optimization - so check for it
          if (ret->data.s8[6]== 0 && ret->data.s8[7] == 0)
            { iterator->bitpack_length = 3; }
          else
            iterator->bitpack_length          = 4;
          goto again;
        case CTX_REL_LINE_TO_REL_CURVE_TO:
          ctx_iterator_expand_s8_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
          iterator->bitpack_command[1].code = CTX_REL_CURVE_TO;
          iterator->bitpack_length          = 2;
          goto again;
        case CTX_REL_CURVE_TO_REL_MOVE_TO:
          ctx_iterator_expand_s8_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_REL_CURVE_TO;
          iterator->bitpack_command[3].code = CTX_REL_MOVE_TO;
          iterator->bitpack_length          = 4;
          goto again;
        case CTX_REL_LINE_TO_X4:
          ctx_iterator_expand_s8_args (iterator, ret);
          iterator->bitpack_command[0].code =
          iterator->bitpack_command[1].code =
          iterator->bitpack_command[2].code =
          iterator->bitpack_command[3].code = CTX_REL_LINE_TO;
          iterator->bitpack_length          = 4;
          goto again;
        case CTX_REL_QUAD_TO_S16:
          ctx_iterator_expand_s16_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_REL_QUAD_TO;
          iterator->bitpack_length          = 1;
          goto again;
        case CTX_REL_QUAD_TO_REL_QUAD_TO:
          ctx_iterator_expand_s8_args (iterator, ret);
          iterator->bitpack_command[0].code =
          iterator->bitpack_command[2].code = CTX_REL_QUAD_TO;
          iterator->bitpack_length          = 3;
          goto again;
        case CTX_REL_LINE_TO_X2:
          ctx_iterator_expand_s16_args (iterator, ret);
          iterator->bitpack_command[0].code =
          iterator->bitpack_command[1].code = CTX_REL_LINE_TO;
          iterator->bitpack_length          = 2;
          goto again;
        case CTX_REL_LINE_TO_REL_MOVE_TO:
          ctx_iterator_expand_s16_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_REL_LINE_TO;
          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
          iterator->bitpack_length          = 2;
          goto again;
        case CTX_MOVE_TO_REL_LINE_TO:
          ctx_iterator_expand_s16_args (iterator, ret);
          iterator->bitpack_command[0].code = CTX_MOVE_TO;
          iterator->bitpack_command[1].code = CTX_REL_MOVE_TO;
          iterator->bitpack_length          = 2;
          goto again;
        case CTX_FILL_MOVE_TO:
          iterator->bitpack_command[1]      = *ret;
          iterator->bitpack_command[0].code = CTX_FILL;
          iterator->bitpack_command[1].code = CTX_MOVE_TO;
          iterator->bitpack_pos             = 0;
          iterator->bitpack_length          = 2;
          goto again;
        case CTX_LINEAR_GRADIENT:
        case CTX_QUAD_TO:
        case CTX_REL_QUAD_TO:
        case CTX_TEXTURE:
        case CTX_RECTANGLE:
        case CTX_VIEW_BOX:
        case CTX_ARC:
        case CTX_ARC_TO:
        case CTX_REL_ARC_TO:
        case CTX_COLOR:
        case CTX_SHADOW_COLOR:
        case CTX_RADIAL_GRADIENT:
        case CTX_CURVE_TO:
        case CTX_REL_CURVE_TO:
        case CTX_APPLY_TRANSFORM:
        case CTX_SOURCE_TRANSFORM:
        case CTX_ROUND_RECTANGLE:
        case CTX_TEXT:
        case CTX_STROKE_TEXT:
        case CTX_FONT:
        case CTX_LINE_DASH:
        case CTX_FILL:
        case CTX_PAINT:
        case CTX_NOP:
        case CTX_MOVE_TO:
        case CTX_LINE_TO:
        case CTX_REL_MOVE_TO:
        case CTX_REL_LINE_TO:
        case CTX_VER_LINE_TO:
        case CTX_REL_VER_LINE_TO:
        case CTX_HOR_LINE_TO:
        case CTX_REL_HOR_LINE_TO:
        case CTX_ROTATE:
        case CTX_FLUSH:
        case CTX_TEXT_ALIGN:
        case CTX_TEXT_BASELINE:
        case CTX_TEXT_DIRECTION:
        case CTX_MITER_LIMIT:
        case CTX_GLOBAL_ALPHA:
        case CTX_COMPOSITING_MODE:
        case CTX_BLEND_MODE:
        case CTX_SHADOW_BLUR:
        case CTX_SHADOW_OFFSET_X:
        case CTX_SHADOW_OFFSET_Y:
        case CTX_RESET:
        case CTX_EXIT:
        case CTX_BEGIN_PATH:
        case CTX_CLOSE_PATH:
        case CTX_SAVE:
        case CTX_CLIP:
        case CTX_PRESERVE:
        case CTX_DEFINE_GLYPH:
        case CTX_IDENTITY:
        case CTX_FONT_SIZE:
        case CTX_START_GROUP:
        case CTX_END_GROUP:
        case CTX_RESTORE:
        case CTX_LINE_WIDTH:
        case CTX_LINE_DASH_OFFSET:
        case CTX_STROKE:
        case CTX_KERNING_PAIR:
        case CTX_SCALE:
        case CTX_GLYPH:
        case CTX_SET_PIXEL:
        case CTX_FILL_RULE:
        case CTX_LINE_CAP:
        case CTX_LINE_JOIN:
        case CTX_NEW_PAGE:
        case CTX_SET_KEY:
        case CTX_TRANSLATE:
        case CTX_DEFINE_TEXTURE:
        case CTX_GRADIENT_STOP:
        case CTX_DATA: // XXX : would be better if we hide the DATAs
        case CTX_CONT: // shouldnt happen
        default:
          iterator->bitpack_length = 0;
#if 0
        default: // XXX remove - and get better warnings
          iterator->bitpack_command[0] = ret[0];
          iterator->bitpack_command[1] = ret[1];
          iterator->bitpack_command[2] = ret[2];
          iterator->bitpack_command[3] = ret[3];
          iterator->bitpack_command[4] = ret[4];
          iterator->bitpack_pos = 0;
          iterator->bitpack_length = 1;
          goto again;
#endif
      }
#endif
  return (CtxCommand *) ret;
}

static void ctx_drawlist_compact (CtxDrawlist *drawlist);
static void
ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size)
{
  int flags=drawlist->flags;
#if CTX_DRAWLIST_STATIC
  if (flags & CTX_DRAWLIST_EDGE_LIST)
    {
      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
      drawlist->entries = (CtxEntry*)&sbuf[0];
      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
    }
  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
    {
      static CtxEntry sbuf[CTX_MAX_EDGE_LIST_SIZE];
      drawlist->entries = &sbuf[0];
      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
    }
  else
    {
      static CtxEntry sbuf[CTX_MAX_JOURNAL_SIZE];
      drawlist->entries = &sbuf[0];
      drawlist->size = CTX_MAX_JOURNAL_SIZE;
      if(0)ctx_drawlist_compact (drawlist);
    }
#else
  int new_size = desired_size;
  int min_size = CTX_MIN_JOURNAL_SIZE;
  int max_size = CTX_MAX_JOURNAL_SIZE;
  if ((flags & CTX_DRAWLIST_EDGE_LIST))
    {
      min_size = CTX_MIN_EDGE_LIST_SIZE;
      max_size = CTX_MAX_EDGE_LIST_SIZE;
    }
  else if (flags & CTX_DRAWLIST_CURRENT_PATH)
    {
      min_size = CTX_MIN_EDGE_LIST_SIZE;
      max_size = CTX_MAX_EDGE_LIST_SIZE;
    }
  else
    {
#if 0
      ctx_drawlist_compact (drawlist);
#endif
    }

  if (CTX_UNLIKELY(new_size < drawlist->size))
    { return; }
  if (CTX_UNLIKELY(drawlist->size == max_size))
    { return; }
  new_size = ctx_maxi (new_size, min_size);
  //if (new_size < drawlist->count)
  //  { new_size = drawlist->count + 4; }
  new_size = ctx_mini (new_size, max_size);
  if (new_size != drawlist->size)
    {
      int item_size = sizeof (CtxEntry);
      if (flags & CTX_DRAWLIST_EDGE_LIST) item_size = sizeof (CtxSegment);
      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, drawlist->size);
  if (drawlist->entries)
    {
      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
      CtxEntry *ne =  (CtxEntry *) malloc (item_size * new_size);
      memcpy (ne, drawlist->entries, drawlist->size * item_size );
      free (drawlist->entries);
      drawlist->entries = ne;
      //drawlist->entries = (CtxEntry*)malloc (drawlist->entries, item_size * new_size);
    }
  else
    {
      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
      drawlist->entries = (CtxEntry *) malloc (item_size * new_size);
    }
  drawlist->size = new_size;
    }
  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
#endif
}


static inline int
ctx_drawlist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
{
  unsigned int max_size = CTX_MAX_JOURNAL_SIZE;
  int ret = drawlist->count;
  int flags = drawlist->flags;
  if (CTX_LIKELY((flags & CTX_DRAWLIST_EDGE_LIST ||
       flags & CTX_DRAWLIST_CURRENT_PATH)))
    {
      max_size = CTX_MAX_EDGE_LIST_SIZE;
    }
  if (CTX_UNLIKELY(flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES))
    {
      return ret;
    }
  if (CTX_UNLIKELY(ret + 64 >= drawlist->size - 40))
    {
      int new_ = CTX_MAX (drawlist->size * 2, ret + 1024);
      ctx_drawlist_resize (drawlist, new_);
    }

  if (CTX_UNLIKELY(drawlist->count >= max_size - 20))
    {
      return 0;
    }
  if ((flags & CTX_DRAWLIST_EDGE_LIST))
    ((CtxSegment*)(drawlist->entries))[drawlist->count] = *(CtxSegment*)entry;
  else
    drawlist->entries[drawlist->count] = *entry;
  ret = drawlist->count;
  drawlist->count++;
  return ret;
}


int
ctx_add_single (Ctx *ctx, void *entry)
{
  return ctx_drawlist_add_single (&ctx->drawlist, (CtxEntry *) entry);
}

static inline int
ctx_drawlist_add_entry (CtxDrawlist *drawlist, CtxEntry *entry)
{
  int length = ctx_conts_for_entry (entry) + 1;
  int ret = 0;
  for (int i = 0; i < length; i ++)
    {
      ret = ctx_drawlist_add_single (drawlist, &entry[i]);
    }
  return ret;
}

#if 0
int
ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
{
  int length = ctx_conts_for_entry (entry) + 1;
  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
  for (int i = 0; i < length; i++)
  {
    for (int j = pos + i + 1; j < tmp_pos; j++)
      drawlist->entries[j] = entry[j-1];
    drawlist->entries[pos + i] = entry[i];
  }
  return pos;
}
#endif
int
ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry)
{
  int length = ctx_conts_for_entry (entry) + 1;
  int tmp_pos = ctx_drawlist_add_entry (drawlist, entry);
#if 1
  for (int i = 0; i < length; i++)
  {
    for (int j = tmp_pos; j > pos + i; j--)
      drawlist->entries[j] = drawlist->entries[j-1];
    drawlist->entries[pos + i] = entry[i];
  }
  return pos;
#endif
  return tmp_pos;
}

int ctx_append_drawlist (Ctx *ctx, void *data, int length)
{
  CtxEntry *entries = (CtxEntry *) data;
  if (length % sizeof (CtxEntry) )
    {
      ctx_log("drawlist not multiple of 9\n");
      return -1;
    }
  for (unsigned int i = 0; i < length / sizeof (CtxEntry); i++)
    {
      ctx_drawlist_add_single (&ctx->drawlist, &entries[i]);
    }
  return 0;
}

int ctx_set_drawlist (Ctx *ctx, void *data, int length)
{
  CtxDrawlist *drawlist = &ctx->drawlist;
  if (drawlist->flags & CTX_DRAWLIST_DOESNT_OWN_ENTRIES)
    {
      return -1;
    }
  ctx->drawlist.count = 0;
  if (!data || length == 0)
    return 0;
  if (CTX_UNLIKELY(length % 9)) return -1;
  ctx_drawlist_resize (drawlist, length/9);
  memcpy (drawlist->entries, data, length);
  drawlist->count = length / 9;
  return length;
}

const CtxEntry *ctx_get_drawlist (Ctx *ctx, int *count)
{
  if (count) *count = ctx->drawlist.count;
  return ctx->drawlist.entries;
}

int
ctx_add_data (Ctx *ctx, void *data, int length)
{
  if (CTX_UNLIKELY(length % sizeof (CtxEntry) ))
    {
      //ctx_log("err\n");
      return -1;
    }
  /* some more input verification might be in order.. like
   * verify that it is well-formed up to length?
   *
   * also - it would be very useful to stop processing
   * upon flush - and do drawlist resizing.
   */
  return ctx_drawlist_add_entry (&ctx->drawlist, (CtxEntry *) data);
}

int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2])
{
  CtxEntry entry[3];
  entry[0].code = code;
  entry[0].data.u32[0] = u32[0];
  entry[0].data.u32[1] = u32[1];
  return ctx_drawlist_add_single (drawlist, &entry[0]);
}

int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length)
{
  CtxEntry entry[3] = {{CTX_DATA, {{0},}}};
  entry[0].data.u32[0] = 0;
  entry[0].data.u32[1] = 0;
  int ret = ctx_drawlist_add_single (drawlist, &entry[0]);
  if (CTX_UNLIKELY(!data)) { return -1; }
  int length_in_blocks;
  if (length <= 0) { length = strlen ( (char *) data) + 1; }
  length_in_blocks = length / sizeof (CtxEntry);
  length_in_blocks += (length % sizeof (CtxEntry) ) ?1:0;
  if ((signed)drawlist->count + length_in_blocks + 4 > drawlist->size)
    { ctx_drawlist_resize (drawlist, drawlist->count * 1.2 + length_in_blocks + 32); }
  if (CTX_UNLIKELY((signed)drawlist->count >= drawlist->size))
    { return -1; }
  drawlist->count += length_in_blocks;
  drawlist->entries[ret].data.u32[0] = length;
  drawlist->entries[ret].data.u32[1] = length_in_blocks;
  memcpy (&drawlist->entries[ret+1], data, length);
  {
    //int reverse = ctx_drawlist_add (drawlist, CTX_DATA_REV);
    CtxEntry entry[3] = {{CTX_DATA_REV, {{0},}}};
    entry[0].data.u32[0] = length;
    entry[0].data.u32[1] = length_in_blocks;
    ctx_drawlist_add_single (drawlist, &entry[0]);

    /* this reverse marker exist to enable more efficient
       front to back traversal, can be ignored in other
       direction, is this needed after string setters as well?
     */
  }
  return ret;
}

static inline CtxEntry
ctx_void (CtxCode code)
{
  CtxEntry command;
  command.code = code;
  return command;
}

static inline CtxEntry
ctx_f (CtxCode code, float x, float y)
{
  CtxEntry command;
  command.code = code;
  command.data.f[0] = x;
  command.data.f[1] = y;
  return command;
}

static CtxEntry
ctx_u32 (CtxCode code, uint32_t x, uint32_t y)
{
  CtxEntry command = ctx_void (code);
  command.data.u32[0] = x;
  command.data.u32[1] = y;
  return command;
}

#if 0
static CtxEntry
ctx_s32 (CtxCode code, int32_t x, int32_t y)
{
  CtxEntry command = ctx_void (code);
  command.data.s32[0] = x;
  command.data.s32[1] = y;
  return command;
}
#endif

static inline CtxEntry
ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1)
{
  CtxEntry command;
  command.code = code;
  command.data.s16[0] = x0;
  command.data.s16[1] = y0;
  command.data.s16[2] = x1;
  command.data.s16[3] = y1;
  return command;
}


static CtxEntry
ctx_u8 (CtxCode code,
        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
        uint8_t e, uint8_t f, uint8_t g, uint8_t h)
{
  CtxEntry command;
  command.code = code;
  command.data.u8[0] = a;
  command.data.u8[1] = b;
  command.data.u8[2] = c;
  command.data.u8[3] = d;
  command.data.u8[4] = e;
  command.data.u8[5] = f;
  command.data.u8[6] = g;
  command.data.u8[7] = h;
  return command;
}

static void
ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int len)
{
  CtxEntry commands[1 + 2 + (len+1+1)/9];
  ctx_memset (commands, 0, sizeof (commands) );
  commands[0] = ctx_u32 (code, arg0, arg1);
  commands[1].code = CTX_DATA;
  commands[1].data.u32[0] = len;
  commands[1].data.u32[1] = (len+1+1)/9 + 1;
  memcpy( (char *) &commands[2].data.u8[0], string, len);
  ( (char *) (&commands[2].data.u8[0]) ) [len]=0;
  ctx_process (ctx, commands);
}

static void
ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1)
{
  ctx_process_cmd_str_with_len (ctx, code, string, arg0, arg1, strlen (string));
}

static void
ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1)
{
  uint32_t iarg0;
  uint32_t iarg1;
  memcpy (&iarg0, &arg0, sizeof (iarg0));
  memcpy (&iarg1, &arg1, sizeof (iarg1));
  ctx_process_cmd_str_with_len (ctx, code, string, iarg0, iarg1, strlen (string));
}

#if CTX_BITPACK_PACKER
static unsigned int
ctx_last_history (CtxDrawlist *drawlist)
{
  unsigned int last_history = 0;
  unsigned int i = 0;
  while (i < drawlist->count)
    {
      CtxEntry *entry = &drawlist->entries[i];
      i += (ctx_conts_for_entry (entry) + 1);
    }
  return last_history;
}
#endif

#if CTX_BITPACK_PACKER

static float
find_max_dev (CtxEntry *entry, int nentrys)
{
  float max_dev = 0.0;
  for (int c = 0; c < nentrys; c++)
    {
      for (int d = 0; d < 2; d++)
        {
          if (entry[c].data.f[d] > max_dev)
            { max_dev = entry[c].data.f[d]; }
          if (entry[c].data.f[d] < -max_dev)
            { max_dev = -entry[c].data.f[d]; }
        }
    }
  return max_dev;
}

static void
pack_s8_args (CtxEntry *entry, int npairs)
{
  for (int c = 0; c < npairs; c++)
    for (int d = 0; d < 2; d++)
      { entry[0].data.s8[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
}

static void
pack_s16_args (CtxEntry *entry, int npairs)
{
  for (int c = 0; c < npairs; c++)
    for (int d = 0; d < 2; d++)
      { entry[0].data.s16[c*2+d]=entry[c].data.f[d] * CTX_SUBDIV; }
}
#endif

#if CTX_BITPACK_PACKER
static void
ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos)
{
  CtxIterator iterator;
  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
    { return; }
  ctx_iterator_init (&iterator, drawlist, start_pos, CTX_ITERATOR_FLAT);
  iterator.end_pos = drawlist->count - 5;
  CtxCommand *command = NULL;
  while ( (command = ctx_iterator_next (&iterator) ) )
    {
      CtxEntry *entry = &command->entry;
      /* things smaller than this have probably been scaled down
         beyond recognition, bailing for both better packing and less rasterization work
       */
      if (command[0].code == CTX_REL_CURVE_TO)
        {
          float max_dev = find_max_dev (entry, 3);
          if (max_dev < 1.0)
            {
              entry[0].code = CTX_REL_LINE_TO;
              entry[0].data.f[0] = entry[2].data.f[0];
              entry[0].data.f[1] = entry[2].data.f[1];
              entry[1].code = CTX_NOP;
              entry[2].code = CTX_NOP;
            }
        }
    }
}
#endif

#if CTX_BITPACK_PACKER
static void
ctx_drawlist_bitpack (CtxDrawlist *drawlist, unsigned int start_pos)
{
#if CTX_BITPACK
  unsigned int i = 0;
  if ( (drawlist->flags & CTX_TRANSFORMATION_BITPACK) == 0)
    { return; }
  ctx_drawlist_remove_tiny_curves (drawlist, drawlist->bitpack_pos);
  i = drawlist->bitpack_pos;
  if (start_pos > i)
    { i = start_pos; }
  while (i < drawlist->count - 4) /* the -4 is to avoid looking past
                                    initialized data we're not ready
                                    to bitpack yet*/
    {
      CtxEntry *entry = &drawlist->entries[i];
      if (entry[0].code == CTX_SET_RGBA_U8 &&
          entry[1].code == CTX_MOVE_TO &&
          entry[2].code == CTX_REL_LINE_TO &&
          entry[3].code == CTX_REL_LINE_TO &&
          entry[4].code == CTX_REL_LINE_TO &&
          entry[5].code == CTX_REL_LINE_TO &&
          entry[6].code == CTX_FILL &&
          ctx_fabsf (entry[2].data.f[0] - 1.0f) < 0.02f &&
          ctx_fabsf (entry[3].data.f[1] - 1.0f) < 0.02f)
        {
          entry[0].code = CTX_SET_PIXEL;
          entry[0].data.u16[2] = entry[1].data.f[0];
          entry[0].data.u16[3] = entry[1].data.f[1];
          entry[1].code = CTX_NOP;
          entry[2].code = CTX_NOP;
          entry[3].code = CTX_NOP;
          entry[4].code = CTX_NOP;
          entry[5].code = CTX_NOP;
          entry[6].code = CTX_NOP;
        }
#if 1
      else if (entry[0].code == CTX_REL_LINE_TO)
        {
          if (entry[1].code == CTX_REL_LINE_TO &&
              entry[2].code == CTX_REL_LINE_TO &&
              entry[3].code == CTX_REL_LINE_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_LINE_TO_X4;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else if (entry[1].code == CTX_REL_CURVE_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_LINE_TO_REL_CURVE_TO;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else if (entry[1].code == CTX_REL_LINE_TO &&
                   entry[2].code == CTX_REL_LINE_TO &&
                   entry[3].code == CTX_REL_LINE_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_LINE_TO_X4;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else if (entry[1].code == CTX_REL_MOVE_TO)
            {
              float max_dev = find_max_dev (entry, 2);
              if (max_dev < 31000 / CTX_SUBDIV)
                {
                  pack_s16_args (entry, 2);
                  entry[0].code = CTX_REL_LINE_TO_REL_MOVE_TO;
                  entry[1].code = CTX_NOP;
                }
            }
          else if (entry[1].code == CTX_REL_LINE_TO)
            {
              float max_dev = find_max_dev (entry, 2);
              if (max_dev < 31000 / CTX_SUBDIV)
                {
                  pack_s16_args (entry, 2);
                  entry[0].code = CTX_REL_LINE_TO_X2;
                  entry[1].code = CTX_NOP;
                }
            }
        }
#endif
#if 1
      else if (entry[0].code == CTX_REL_CURVE_TO)
        {
          if (entry[3].code == CTX_REL_LINE_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else if (entry[3].code == CTX_REL_MOVE_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_CURVE_TO_REL_MOVE_TO;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else
            {
              float max_dev = find_max_dev (entry, 3);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 3);
                  ctx_arg_s8 (6) =
                    ctx_arg_s8 (7) = 0;
                  entry[0].code = CTX_REL_CURVE_TO_REL_LINE_TO;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                }
            }
        }
#endif
#if 1
      else if (entry[0].code == CTX_REL_QUAD_TO)
        {
          if (entry[2].code == CTX_REL_QUAD_TO)
            {
              float max_dev = find_max_dev (entry, 4);
              if (max_dev < 114 / CTX_SUBDIV)
                {
                  pack_s8_args (entry, 4);
                  entry[0].code = CTX_REL_QUAD_TO_REL_QUAD_TO;
                  entry[1].code = CTX_NOP;
                  entry[2].code = CTX_NOP;
                  entry[3].code = CTX_NOP;
                }
            }
          else
            {
              float max_dev = find_max_dev (entry, 2);
              if (max_dev < 3100 / CTX_SUBDIV)
                {
                  pack_s16_args (entry, 2);
                  entry[0].code = CTX_REL_QUAD_TO_S16;
                  entry[1].code = CTX_NOP;
                }
            }
        }
#endif
#if 1
      else if (entry[0].code == CTX_FILL &&
               entry[1].code == CTX_MOVE_TO)
        {
          entry[0] = entry[1];
          entry[0].code = CTX_FILL_MOVE_TO;
          entry[1].code = CTX_NOP;
        }
#endif
#if 1
      else if (entry[0].code == CTX_MOVE_TO &&
               entry[1].code == CTX_MOVE_TO &&
               entry[2].code == CTX_MOVE_TO)
        {
          entry[0]      = entry[2];
          entry[0].code = CTX_MOVE_TO;
          entry[1].code = CTX_NOP;
          entry[2].code = CTX_NOP;
        }
#endif
#if 1
      else if ( (entry[0].code == CTX_MOVE_TO &&
                 entry[1].code == CTX_MOVE_TO) ||
                (entry[0].code == CTX_REL_MOVE_TO &&
                 entry[1].code == CTX_MOVE_TO) )
        {
          entry[0]      = entry[1];
          entry[0].code = CTX_MOVE_TO;
          entry[1].code = CTX_NOP;
        }
#endif
      i += (ctx_conts_for_entry (entry) + 1);
    }

  unsigned int source = drawlist->bitpack_pos;
  unsigned int target = drawlist->bitpack_pos;
  int removed = 0;
  /* remove nops that have been inserted as part of shortenings
   */
  while (source < drawlist->count)
    {
      CtxEntry *sentry = &drawlist->entries[source];
      CtxEntry *tentry = &drawlist->entries[target];
      while (sentry->code == CTX_NOP && source < drawlist->count)
        {
          source++;
          sentry = &drawlist->entries[source];
          removed++;
        }
      if (sentry != tentry)
        { *tentry = *sentry; }
      source ++;
      target ++;
    }
  drawlist->count -= removed;
  drawlist->bitpack_pos = drawlist->count;
#endif
}

#endif

static inline void
ctx_drawlist_compact (CtxDrawlist *drawlist)
{
#if CTX_BITPACK_PACKER
  unsigned int last_history;
  last_history = ctx_last_history (drawlist);
#else
  if (drawlist) {};
#endif
#if CTX_BITPACK_PACKER
  ctx_drawlist_bitpack (drawlist, last_history);
#endif
}

uint8_t *ctx_define_texture_pixel_data (CtxEntry *entry)
{
  return &entry[2 + 1 + 1 + ctx_conts_for_entry (&entry[2])].data.u8[0];
}

#ifndef __CTX_TRANSFORM
#define __CTX_TRANSFORM


static inline void
_ctx_matrix_apply_transform_only_x (const CtxMatrix *m, float *x, float y_in)
{
  //float x_in = *x;
  //*x = ( (x_in * m->m[0][0]) + (y_in * m->m[1][0]) + m->m[2][0]);
  float y_res;
  _ctx_matrix_apply_transform (m, x, &y_res);
}

void
ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
{
  _ctx_matrix_apply_transform (m, x, y);
}

static inline void
_ctx_user_to_device (CtxState *state, float *x, float *y)
{
  _ctx_matrix_apply_transform (&state->gstate.transform, x, y);
}

static void
_ctx_user_to_device_distance (CtxState *state, float *x, float *y)
{
  const CtxMatrix *m = &state->gstate.transform;
  _ctx_matrix_apply_transform (m, x, y);
  *x -= m->m[2][0];
  *y -= m->m[2][1];
}

void ctx_user_to_device          (Ctx *ctx, float *x, float *y)
{
  _ctx_user_to_device (&ctx->state, x, y);
}
void ctx_user_to_device_distance (Ctx *ctx, float *x, float *y)
{
  _ctx_user_to_device_distance (&ctx->state, x, y);
}



static inline void
_ctx_device_to_user (CtxState *state, float *x, float *y)
{
  CtxMatrix m = state->gstate.transform;
  ctx_matrix_invert (&m);
  _ctx_matrix_apply_transform (&m, x, y);
}

static void
_ctx_device_to_user_distance (CtxState *state, float *x, float *y)
{
  CtxMatrix m = state->gstate.transform;
  ctx_matrix_invert (&m);
  _ctx_matrix_apply_transform (&m, x, y);
  *x -= m.m[2][0];
  *y -= m.m[2][1];
}

void ctx_device_to_user          (Ctx *ctx, float *x, float *y)
{
  _ctx_device_to_user (&ctx->state, x, y);
}
void ctx_device_to_user_distance (Ctx *ctx, float *x, float *y)
{
  _ctx_device_to_user_distance (&ctx->state, x, y);
}






static void
ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f, float g, float h, float i)
{
  matrix->m[0][0] = a;
  matrix->m[0][1] = b;
  matrix->m[0][2] = c;
  matrix->m[1][0] = d;
  matrix->m[1][1] = e;
  matrix->m[1][2] = f;
  matrix->m[2][0] = g;
  matrix->m[2][1] = h;
  matrix->m[2][2] = i;
}


void
ctx_matrix_identity (CtxMatrix *matrix)
{
  _ctx_matrix_identity (matrix);
}

void
ctx_matrix_multiply (CtxMatrix       *result,
                     const CtxMatrix *t,
                     const CtxMatrix *s)
{
  _ctx_matrix_multiply (result, t, s);
}

void
ctx_matrix_translate (CtxMatrix *matrix, float x, float y)
{
  CtxMatrix transform;
  transform.m[0][0] = 1.0f;
  transform.m[0][1] = 0.0f;
  transform.m[0][2] = x;
  transform.m[1][0] = 0.0f;
  transform.m[1][1] = 1.0f;
  transform.m[1][2] = y;
  transform.m[2][0] = 0.0f;
  transform.m[2][1] = 0.0f;
  transform.m[2][2] = 1.0f;
  _ctx_matrix_multiply (matrix, matrix, &transform);
}

void
ctx_matrix_scale (CtxMatrix *matrix, float x, float y)
{
  CtxMatrix transform;
  transform.m[0][0] = x;
  transform.m[0][1] = 0.0f;
  transform.m[0][2] = 0.0f;
  transform.m[1][0] = 0.0f;
  transform.m[1][1] = y;
  transform.m[1][2] = 0.0f;
  transform.m[2][0] = 0.0f;
  transform.m[2][1] = 0.0f;
  transform.m[2][2] = 1.0;
  _ctx_matrix_multiply (matrix, matrix, &transform);
}


void
ctx_matrix_rotate (CtxMatrix *matrix, float angle)
{
  CtxMatrix transform;
  float val_sin = ctx_sinf (-angle);
  float val_cos = ctx_cosf (-angle);
  transform.m[0][0] = val_cos;
  transform.m[0][1] = val_sin;
  transform.m[0][2] = 0;
  transform.m[1][0] = -val_sin;
  transform.m[1][1] = val_cos;
  transform.m[1][2] = 0;
  transform.m[2][0] = 0.0f;
  transform.m[2][1] = 0.0f;
  transform.m[2][2] = 1.0f;
  _ctx_matrix_multiply (matrix, matrix, &transform);
}

#if 0
static void
ctx_matrix_skew_x (CtxMatrix *matrix, float angle)
{
  CtxMatrix transform;
  float val_tan = ctx_tanf (angle);
  transform.m[0][0] =    1.0f;
  transform.m[0][1] = 0.0f;
  transform.m[1][0] = val_tan;
  transform.m[1][1] = 1.0f;
  transform.m[2][0] =    0.0f;
  transform.m[2][1] = 0.0f;
  _ctx_matrix_multiply (matrix, &transform, matrix);
}

static void
ctx_matrix_skew_y (CtxMatrix *matrix, float angle)
{
  CtxMatrix transform;
  float val_tan = ctx_tanf (angle);
  transform.m[0][0] =    1.0f;
  transform.m[0][1] = val_tan;
  transform.m[1][0] =    0.0f;
  transform.m[1][1] = 1.0f;
  transform.m[2][0] =    0.0f;
  transform.m[2][1] = 0.0f;
  _ctx_matrix_multiply (matrix, &transform, matrix);
}
#endif


void
ctx_identity (Ctx *ctx)
{
  CTX_PROCESS_VOID (CTX_IDENTITY);
}



void
ctx_apply_transform (Ctx *ctx, float a, float b,
                     float c, float d, 
                     float e, float f, float g, float h, float i)
{
  CtxEntry command[5]=
  {
    ctx_f (CTX_APPLY_TRANSFORM, a, b),
    ctx_f (CTX_CONT,            c, d),
    ctx_f (CTX_CONT,            e, f),
    ctx_f (CTX_CONT,            g, h),
    ctx_f (CTX_CONT,            i, 0)
  };
  ctx_process (ctx, command);
}

void
ctx_get_transform  (Ctx *ctx, float *a, float *b,
                    float *c, float *d,
                    float *e, float *f,
                    float *g, float *h,
                    float *i)
{
  if (a) { *a = ctx->state.gstate.transform.m[0][0]; }
  if (b) { *b = ctx->state.gstate.transform.m[0][1]; }
  if (c) { *c = ctx->state.gstate.transform.m[0][2]; }
  if (d) { *d = ctx->state.gstate.transform.m[1][0]; }
  if (e) { *e = ctx->state.gstate.transform.m[1][1]; }
  if (f) { *f = ctx->state.gstate.transform.m[1][2]; }
  if (g) { *g = ctx->state.gstate.transform.m[2][0]; }
  if (h) { *h = ctx->state.gstate.transform.m[2][1]; }
  if (i) { *i = ctx->state.gstate.transform.m[2][2]; }
}

void
ctx_source_transform (Ctx *ctx, float a, float b,  // hscale, hskew
                      float c, float d,  // vskew,  vscale
                      float e, float f,
                      float g, float h,
                      float i)  // htran,  vtran
{
  CtxEntry command[5]=
  {
    ctx_f (CTX_SOURCE_TRANSFORM, a, b),
    ctx_f (CTX_CONT,             c, d),
    ctx_f (CTX_CONT,             e, f),
    ctx_f (CTX_CONT,             g, h),
    ctx_f (CTX_CONT,             i, 0)
  };
  ctx_process (ctx, command);
}

void
ctx_source_transform_matrix (Ctx *ctx, CtxMatrix *matrix)
{
  ctx_source_transform (ctx,
    matrix->m[0][0], matrix->m[0][1], matrix->m[0][2],
    matrix->m[1][0], matrix->m[1][1], matrix->m[1][2],
    matrix->m[2][0], matrix->m[2][1], matrix->m[2][2]
    
    );
}

void ctx_apply_matrix (Ctx *ctx, CtxMatrix *matrix)
{
  ctx_apply_transform (ctx,
    matrix->m[0][0], matrix->m[0][1], matrix->m[0][2],
    matrix->m[1][0], matrix->m[1][1], matrix->m[1][2],
    matrix->m[2][0], matrix->m[2][1], matrix->m[2][2]);
}

void ctx_get_matrix (Ctx *ctx, CtxMatrix *matrix)
{
  *matrix = ctx->state.gstate.transform;
}

void ctx_set_matrix (Ctx *ctx, CtxMatrix *matrix)
{
  ctx_identity (ctx);
  ctx_apply_matrix (ctx, matrix);
}

void ctx_rotate (Ctx *ctx, float x)
{
  if (x == 0.0f)
    return;
  CTX_PROCESS_F1 (CTX_ROTATE, x);
  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
    { ctx->drawlist.count--; }
}

void ctx_scale (Ctx *ctx, float x, float y)
{
  if (x == 1.0f && y == 1.0f)
    return;
  CTX_PROCESS_F (CTX_SCALE, x, y);
  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
    { ctx->drawlist.count--; }
}

void ctx_translate (Ctx *ctx, float x, float y)
{
  if (x == 0.0f && y == 0.0f)
    return;
  CTX_PROCESS_F (CTX_TRANSLATE, x, y);
  if (ctx->transformation & CTX_TRANSFORMATION_SCREEN_SPACE)
    { ctx->drawlist.count--; }
}

static inline float
ctx_matrix_determinant (const CtxMatrix *m)
{
  float det = m->m[0][0] * (m->m[1][1] * m->m[2][2] -
                            m->m[1][2] * m->m[2][1])
              - m->m[0][1] * (m->m[1][0] * m->m[2][2] -
                              m->m [1][2] * m->m [2][0])
              + m->m[0][2] * (m->m[1][0] * m->m[2][1] -
                              m->m[1][1] * m->m[2][0]);
  return det;
}

void
ctx_matrix_invert (CtxMatrix *m)
{
  CtxMatrix t = *m;
  float c = 1.0f / ctx_matrix_determinant (m);

  m->m [0][0] = (t.m [1][1] * t.m [2][2] -
                   t.m [1][2] * t.m [2][1]) * c;
  m->m [1][0] = (t.m [1][2] * t.m [2][0] -
                   t.m [1][0] * t.m [2][2]) * c;
  m->m [2][0] = (t.m [1][0] * t.m [2][1] -
                   t.m [1][1] * t.m [2][0]) * c;

  m->m [0][1] = (t.m [0][2] * t.m [2][1] -
                   t.m [0][1] * t.m [2][2]) * c;
  m->m [1][1] = (t.m [0][0] * t.m [2][2] -
                   t.m [0][2] * t.m [2][0]) * c;
  m->m [2][1] = (t.m [0][1] * t.m [2][0] -
                   t.m [0][0] * t.m [2][1]) * c;

  m->m [0][2] = (t.m [0][1] * t.m [1][2] -
                   t.m [0][2] * t.m [1][1]) * c;
  m->m [1][2] = (t.m [0][2] * t.m [1][0] -
                   t.m [0][0] * t.m [1][2]) * c;
  m->m [2][2] = (t.m [0][0] * t.m [1][1] -
                   t.m [0][1] * t.m [1][0]) * c;
}



#endif
#if CTX_AUDIO

//#include <string.h>
//#include "ctx-internal.h"
//#include "mmm.h"

#if !__COSMOPOLITAN__

#include <pthread.h>
#if CTX_ALSA
#include <alsa/asoundlib.h>
#endif



//#include <alloca.h>

#endif

#define DESIRED_PERIOD_SIZE 1000

int ctx_pcm_bytes_per_frame (CtxPCM format)
{
  switch (format)
  {
    case CTX_f32:  return 4;
    case CTX_f32S: return 8;
    case CTX_s16:  return 2;
    case CTX_s16S: return 4;
    default: return 1;
  }
}

static float    ctx_host_freq     = 48000;
static CtxPCM   ctx_host_format   = CTX_s16S;
static float    client_freq   = 48000;
static CtxPCM   ctx_client_format = CTX_s16S;
static int      ctx_pcm_queued    = 0;
static int      ctx_pcm_cur_left  = 0;
static CtxList *ctx_pcm_list;                 /* data is a blob a 32bit uint first, followed by pcm-data */


//static long int ctx_pcm_queued_ticks = 0;  /*  the number of ticks into the future
  //                                      *  we've queued audio for
                                       


int
ctx_pcm_channels (CtxPCM format)
{
  switch (format)
  {
    case CTX_s16:
    case CTX_f32:
      return 1;
    case CTX_s16S:
    case CTX_f32S:
      return 2;
  }
  return 0;
}

/* todo: only start audio thread on first write - enabling dynamic choice
 * of sample-rate? or is it better to keep to opening 48000 as a standard
 * and do better internal resampling for others?
 */

#if CTX_ALSA
static snd_pcm_t *alsa_open (char *dev, int rate, int channels)
{
   snd_pcm_hw_params_t *hwp;
   snd_pcm_sw_params_t *swp;
   snd_pcm_t *h;
   int r;
   int dir;
   snd_pcm_uframes_t period_size_min;
   snd_pcm_uframes_t period_size_max;
   snd_pcm_uframes_t period_size;
   snd_pcm_uframes_t buffer_size;

   if ((r = snd_pcm_open(&h, dev, SND_PCM_STREAM_PLAYBACK, 0) < 0))
           return NULL;

   hwp = alloca(snd_pcm_hw_params_sizeof());
   memset(hwp, 0, snd_pcm_hw_params_sizeof());
   snd_pcm_hw_params_any(h, hwp);

   snd_pcm_hw_params_set_access(h, hwp, SND_PCM_ACCESS_RW_INTERLEAVED);
   snd_pcm_hw_params_set_format(h, hwp, SND_PCM_FORMAT_S16_LE);
   snd_pcm_hw_params_set_rate(h, hwp, rate, 0);
   snd_pcm_hw_params_set_channels(h, hwp, channels);
   dir = 0;
   snd_pcm_hw_params_get_period_size_min(hwp, &period_size_min, &dir);
   dir = 0;
   snd_pcm_hw_params_get_period_size_max(hwp, &period_size_max, &dir);

   period_size = DESIRED_PERIOD_SIZE;

   dir = 0;
   r = snd_pcm_hw_params_set_period_size_near(h, hwp, &period_size, &dir);
   r = snd_pcm_hw_params_get_period_size(hwp, &period_size, &dir);
   buffer_size = period_size * 4;
   r = snd_pcm_hw_params_set_buffer_size_near(h, hwp, &buffer_size);
   r = snd_pcm_hw_params(h, hwp);
   swp = alloca(snd_pcm_sw_params_sizeof());
   memset(hwp, 0, snd_pcm_sw_params_sizeof());
   snd_pcm_sw_params_current(h, swp);
   r = snd_pcm_sw_params_set_avail_min(h, swp, period_size);
   snd_pcm_sw_params_set_start_threshold(h, swp, 0);
   r = snd_pcm_sw_params(h, swp);
   r = snd_pcm_prepare(h);

   return h;
}

static  snd_pcm_t *h = NULL;
static void *ctx_alsa_audio_start(Ctx *ctx)
{
//  Lyd *lyd = aux;
  int c;

  /* The audio handler is implemented as a mixer that adds data on top
   * of 0s, XXX: it should be ensured that minimal work is there is
   * no data available.
   */
  for (;;)
  {
    int client_channels = ctx_pcm_channels (ctx_client_format);
    int is_float = 0;
    int16_t data[81920*8]={0,};

    if (ctx_client_format == CTX_f32 ||
        ctx_client_format == CTX_f32S)
      is_float = 1;

    c = snd_pcm_wait(h, 1000);

    if (c >= 0)
       c = snd_pcm_avail_update(h);

    if (c > 1000) c = 1000; // should use max mmm buffer sizes

    if (c == -EPIPE)
      snd_pcm_prepare(h);

    if (c > 0)
    {
      int i;
      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
      {
        if (ctx_pcm_list && ctx_pcm_cur_left)  //  XXX  this line can be removed
        {
          uint32_t *packet_sizep = (ctx_pcm_list->data);
          uint32_t packet_size = *packet_sizep;
          uint16_t left = 0, right = 0;

          if (is_float)
          {
            float *packet = (ctx_pcm_list->data);
            packet += 4;
            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
            left = right = packet[0] * (1<<15);
            if (client_channels > 1)
              right = packet[0] * (1<<15);
          }
          else // s16
          {
            uint16_t *packet = (ctx_pcm_list->data);
            packet += 8;
            packet += (packet_size - ctx_pcm_cur_left) * client_channels;

            left = right = packet[0];
            if (client_channels > 1)
              right = packet[1];
          }
          data[i * 2 + 0] = left;
          data[i * 2 + 1] = right;

          ctx_pcm_cur_left--;
          ctx_pcm_queued --;
          if (ctx_pcm_cur_left == 0)
          {
            void *old = ctx_pcm_list->data;
            ctx_list_remove (&ctx_pcm_list, old);
            free (old);
            ctx_pcm_cur_left = 0;
            if (ctx_pcm_list)
            {
              uint32_t *packet_sizep = (ctx_pcm_list->data);
              uint32_t packet_size = *packet_sizep;
              ctx_pcm_cur_left = packet_size;
            }
          }
        }
      }

    c = snd_pcm_writei(h, data, c);
    if (c < 0)
      c = snd_pcm_recover (h, c, 0);
     }else{
      if (getenv("LYD_FATAL_UNDERRUNS"))
        {
          printf ("dying XXxx need to add API for this debug\n");
          //printf ("%i", lyd->active);
          exit(0);
        }
      fprintf (stderr, "ctx alsa underun\n");
      //exit(0);
    }
  }
}
#endif

static char MuLawCompressTable[256] =
{
   0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
   4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};

static unsigned char LinearToMuLawSample(int16_t sample)
{
  const int cBias = 0x84;
  const int cClip = 32635;
  int sign = (sample >> 8) & 0x80;

  if (sign)
    sample = (int16_t)-sample;

  if (sample > cClip)
    sample = cClip;

  sample = (int16_t)(sample + cBias);

  int exponent = (int)MuLawCompressTable[(sample>>7) & 0xFF];
  int mantissa = (sample >> (exponent+3)) & 0x0F;

  int compressedByte = ~ (sign | (exponent << 4) | mantissa);

  return (unsigned char)compressedByte;
}

void ctx_ctx_pcm (Ctx *ctx)
{
    int client_channels = ctx_pcm_channels (ctx_client_format);
    int is_float = 0;
    uint8_t data[81920*8]={0,};
    int c;

    if (ctx_client_format == CTX_f32 ||
        ctx_client_format == CTX_f32S)
      is_float = 1;

    c = 2000;

    if (c > 0 && ctx_pcm_list)
    {
      int i;
      for (i = 0; i < c && ctx_pcm_cur_left; i ++)
      {
        if (ctx_pcm_list && ctx_pcm_cur_left)
        {
          uint32_t *packet_sizep = (ctx_pcm_list->data);
          uint32_t packet_size = *packet_sizep;
          int left = 0, right = 0;

          if (is_float)
          {
            float *packet = (ctx_pcm_list->data);
            packet += 4;
            packet += (packet_size - ctx_pcm_cur_left) * client_channels;
            left = right = packet[0] * (1<<15);
            if (client_channels > 1)
              right = packet[1] * (1<<15);
          }
          else // s16
          {
            uint16_t *packet = (ctx_pcm_list->data);
            packet += 8;
            packet += (packet_size - ctx_pcm_cur_left) * client_channels;

            left = right = packet[0];
            if (client_channels > 1)
              right = packet[1];
          }
          data[i] = LinearToMuLawSample((left+right)/2);

          ctx_pcm_cur_left--;
          ctx_pcm_queued --;
          if (ctx_pcm_cur_left == 0)
          {
            void *old = ctx_pcm_list->data;
            ctx_list_remove (&ctx_pcm_list, old);
            free (old);
            ctx_pcm_cur_left = 0;
            if (ctx_pcm_list)
            {
              uint32_t *packet_sizep = (ctx_pcm_list->data);
              uint32_t packet_size = *packet_sizep;
              ctx_pcm_cur_left = packet_size;
            }
          }
        }
      }

    char encoded[81920*8]="";

    int encoded_len = ctx_a85enc (data, encoded, i);
    fprintf (stdout, "\033_Af=%i;", i);
    fwrite (encoded, 1, encoded_len, stdout);
    fwrite ("\e\\", 1, 2, stdout);
    fflush (stdout);
    }
}

int ctx_pcm_init (Ctx *ctx)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return 0;
  }
  else
#endif
  if (ctx_backend_type (ctx) == CTX_BACKEND_CTX)
  {
     ctx_host_freq = 8000;
     ctx_host_format = CTX_s16;
#if 0
     pthread_t tid;
     pthread_create(&tid, NULL, (void*)ctx_audio_start, ctx);
#endif
  }
  else
  {
#if CTX_ALSA
     pthread_t tid;
     h = alsa_open("default", ctx_host_freq, ctx_pcm_channels (ctx_host_format));
  if (!h) {
    fprintf(stderr, "ctx unable to open ALSA device (%d channels, %f Hz), dying\n",
            ctx_pcm_channels (ctx_host_format), ctx_host_freq);
    return -1;
  }
  pthread_create(&tid, NULL, (void*)ctx_alsa_audio_start, ctx);
#endif
  }
  return 0;
}

int ctx_pcm_queue (Ctx *ctx, const int8_t *data, int frames)
{
  static int inited = 0;
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return mmm_pcm_queue (ctx->backend_data, data, frames);
  }
  else
#endif
  {
    if (!inited)
    {
      ctx_pcm_init (ctx);
      inited = 1;
    }
    float factor = client_freq * 1.0 / ctx_host_freq;
    int   scaled_frames = frames / factor;
    int   bpf = ctx_pcm_bytes_per_frame (ctx_client_format);

    uint8_t *packet = malloc (scaled_frames * ctx_pcm_bytes_per_frame (ctx_client_format) + 16);
    *((uint32_t *)packet) = scaled_frames;

    if (factor > 0.999 && factor < 1.0001)
    {
       memcpy (packet + 16, data, frames * bpf);
    }
    else
    {
      /* a crude nearest / sample-and hold resampler */
      int i;
      for (i = 0; i < scaled_frames; i++)
      {
        int source_frame = i * factor;
        memcpy (packet + 16 + bpf * i, data + source_frame * bpf, bpf);
      }
    }
    if (ctx_pcm_list == NULL)     // otherwise it is another frame at front
      ctx_pcm_cur_left = scaled_frames;  // and current cur_left is valid

    ctx_list_append (&ctx_pcm_list, packet);
    ctx_pcm_queued += scaled_frames;

    return frames;
  }
  return 0;
}

static int ctx_pcm_get_queued_frames (Ctx *ctx)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return mmm_pcm_get_queued_frames (ctx->backend_data);
  }
#endif
  return ctx_pcm_queued;
}

int ctx_pcm_get_queued (Ctx *ctx)
{
  return ctx_pcm_get_queued_frames (ctx);
}

float ctx_pcm_get_queued_length (Ctx *ctx)
{
  return 1.0 * ctx_pcm_get_queued_frames (ctx) / ctx_host_freq;
}

int ctx_pcm_get_frame_chunk (Ctx *ctx)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return mmm_pcm_get_frame_chunk (ctx->backend_data);
  }
#endif
  if (ctx_backend_type (ctx) == CTX_BACKEND_CTX)
  {
    // 300 stuttering
    // 350 nothing
    // 380 slight buzz
    // 390  buzzing
    // 400 ok - but sometimes falling out
    // 410 buzzing
    // 420 ok - but odd latency
    // 450 buzzing

    if (ctx_pcm_get_queued_frames (ctx) > 400)
      return 0;
    else
      return 400 - ctx_pcm_get_queued_frames (ctx);

  }

  if (ctx_pcm_get_queued_frames (ctx) > 1000)
    return 0;
  else
    return 1000 - ctx_pcm_get_queued_frames (ctx);
}

void ctx_pcm_set_sample_rate (Ctx *ctx, int sample_rate)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    mmm_pcm_set_sample_rate (ctx->backend_data, sample_rate);
  }
  else
#endif
    client_freq = sample_rate;
}

void ctx_pcm_set_format (Ctx *ctx, CtxPCM format)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    mmm_pcm_set_format (ctx->backend_data, format);
  }
  else
#endif
    ctx_client_format = format;
}

CtxPCM ctx_pcm_get_format (Ctx *ctx)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return mmm_pcm_get_format (ctx->backend_data);
  }
#endif
  return ctx_client_format;
}

int ctx_pcm_get_sample_rate (Ctx *ctx)
{
#if 0
  if (!strcmp (ctx->backend->name, "mmm") ||
      !strcmp (ctx->backend->name, "mmm-client"))
  {
    return mmm_pcm_get_sample_rate (ctx->backend_data);
  }
#endif
  return client_freq;
}

#else

void ctx_pcm_set_format (Ctx *ctx, CtxPCM format) { }
void ctx_pcm_set_sample_rate (Ctx *ctx, int sample_rate) { }
int ctx_pcm_get_sample_rate (Ctx *ctx) { return 48000; }
CtxPCM ctx_pcm_get_format (Ctx *ctx) { return CTX_s16S; }
int ctx_pcm_queue (Ctx *ctx, const int8_t *data, int frames) { return frames; }
float ctx_pcm_get_queued_length (Ctx *ctx) { return 0.0; }

#endif
 /* Copyright (C) 2020 Øyvind Kolås <pippin@gimp.org>
 */

#if CTX_FORMATTER

/* returns the maximum string length including terminating \0 */
int ctx_a85enc_len (int input_length)
{
  return (input_length / 4 + 1) * 5;
}

int ctx_a85enc (const void *srcp, char *dst, int count)
{
  const uint8_t *src = (uint8_t*)srcp;
  int out_len = 0;

  int padding = 4-(count % 4);
  if (padding == 4) padding = 0;

  for (int i = 0; i < (count+3)/4; i ++)
  {
    uint32_t input = 0;
    for (int j = 0; j < 4; j++)
    {
      input = (input << 8);
      if (i*4+j<=count)
        input += src[i*4+j];
    }

    int divisor = 85 * 85 * 85 * 85;
#if 0
    if (input == 0)
    {
        dst[out_len++] = 'z';
    }
    /* todo: encode 4 spaces as 'y' */
    else
#endif
    {
      for (int j = 0; j < 5; j++)
      {
        dst[out_len++] = ((input / divisor) % 85) + '!';
        divisor /= 85;
      }
    }
  }
  out_len -= padding;
  dst[out_len]=0;
  return out_len;
}
#endif

#if CTX_PARSER

int ctx_a85dec (const char *src, char *dst, int count)
{
  int out_len = 0;
  uint32_t val = 0;
  int k = 0;
  int i = 0;
  int p = 0;
  for (i = 0; i < count; i ++)
  {
    p = src[i];
    val *= 85;
    if (CTX_UNLIKELY(p == '~'))
    {
      break;
    }
#if 0
    else if (p == 'z')
    {
      for (int j = 0; j < 4; j++)
        dst[out_len++] = 0;
      k = 0;
    }
    else if (p == 'y') /* lets support this extension */
    {
      for (int j = 0; j < 4; j++)
        dst[out_len++] = 32;
      k = 0;
    }
#endif
    else if (CTX_LIKELY(p >= '!' && p <= 'u'))
    {
      val += p-'!';
      if (CTX_UNLIKELY (k % 5 == 4))
      {
         for (int j = 0; j < 4; j++)
         {
           dst[out_len++] = (val & ((unsigned)0xff << 24)) >> 24;
           val <<= 8;
         }
         val = 0;
      }
      k++;
    }
    // we treat all other chars as whitespace
  }
  if (CTX_LIKELY (p != '~'))
  { 
    val *= 85;
  }
  k = k % 5;
  if (k)
  {
    val += 84;
    for (int j = k; j < 4; j++)
    {
      val *= 85;
      val += 84;
    }

    for (int j = 0; j < k-1; j++)
    {
      dst[out_len++] = (val & ((unsigned)0xff << 24)) >> 24;
      val <<= 8;
    }
    val = 0;
  }
  dst[out_len] = 0;
  return out_len;
}

#if 1
int ctx_a85len (const char *src, int count)
{
  int out_len = 0;
  int k = 0;
  for (int i = 0; i < count; i ++)
  {
    if (src[i] == '~')
      break;
    else if (src[i] == 'z')
    {
      for (int j = 0; j < 4; j++)
        out_len++;
      k = 0;
    }
    else if (src[i] >= '!' && src[i] <= 'u')
    {
      if (k % 5 == 4)
        out_len += 4;
      k++;
    }
    // we treat all other chars as whitespace
  }
  k = k % 5;
  if (k)
    out_len += k-1;
  return out_len;
}
#endif

#endif

#if CTX_IMPLEMENTATION

#define SHA1_IMPLEMENTATION
/* LibTomCrypt, modular cryptographic library -- Tom St Denis
 *
 * LibTomCrypt is a library that provides various cryptographic
 * algorithms in a highly modular and flexible manner.
 *
 * The library is free for all purposes without any express
 * guarantee it works.
 *
 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
 *
 * The plain ANSIC sha1 functionality has been extracted from libtomcrypt,
 * and is included directly in the sources. /Øyvind K. - since libtomcrypt
 * is public domain the adaptations done here to make the sha1 self contained
 * also is public domain.
 */
#ifndef __SHA1_H
#define __SHA1_H
#if !__COSMOPOLITAN__
#include <inttypes.h>
#endif


int ctx_sha1_init(CtxSHA1 * sha1);
CtxSHA1 *ctx_sha1_new (void)
{
  CtxSHA1 *state = (CtxSHA1*)calloc (sizeof (CtxSHA1), 1);
  ctx_sha1_init (state);
  return state;
}
void ctx_sha1_free (CtxSHA1 *sha1)
{
  free (sha1);
}

#if 0
          CtxSHA1 sha1;
          ctx_sha1_init (&sha1);
          ctx_sha1_process(&sha1, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
          ctx_sha1_done(&sha1, (unsigned char*)ctx_sha1_hash);
#endif

#ifdef SHA1_FF0
#undef SHA1_FF0
#endif
#ifdef SHA1_FF1
#undef SHA1_FF1
#endif

#ifdef SHA1_IMPLEMENTATION
#if !__COSMOPOLITAN__
#include <stdlib.h>
#include <string.h>
#endif

#define STORE64H(x,                                                             y)                                                                     \
   { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned                char)(((x)>>48)&255);     \
     (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned                char)(((x)>>32)&255);     \
     (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned                char)(((x)>>16)&255);     \
     (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); }

#define STORE32H(x,                                                             y)                                                                     \
     { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned              char)(((x)>>16)&255);   \
       (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned               char)((x)&255); }

#define LOAD32H(x, y)                            \
     { x = ((unsigned long)((y)[0] & 255)<<24) | \
           ((unsigned long)((y)[1] & 255)<<16) | \
           ((unsigned long)((y)[2] & 255)<<8)  | \
           ((unsigned long)((y)[3] & 255)); }

/* rotates the hard way */
#define ROL(x, y)  ((((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
#define ROLc(x, y) ROL(x,y)

#define CRYPT_OK     0
#define CRYPT_ERROR  1
#define CRYPT_NOP    2

#ifndef MAX
   #define MAX(x, y) ( ((x)>(y))?(x):(y) )
#endif
#ifndef MIN
   #define MIN(x, y) ( ((x)<(y))?(x):(y) )
#endif

/* a simple macro for making hash "process" functions */
#define HASH_PROCESS(func_name, compress_name, state_var, block_size)               \
int func_name (CtxSHA1 *sha1, const unsigned char *in, unsigned long inlen)      \
{                                                                                   \
    unsigned long n;                                                                \
    int           err;                                                              \
    assert (sha1 != NULL);                                                          \
    assert (in != NULL);                                                            \
    if (sha1->curlen > sizeof(sha1->buf)) {                                         \
       return -1;                                                                   \
    }                                                                               \
    while (inlen > 0) {                                                             \
        if (sha1->curlen == 0 && inlen >= block_size) {                             \
           if ((err = compress_name (sha1, (unsigned char *)in)) != CRYPT_OK) {     \
              return err;                                                           \
           }                                                                        \
           sha1->length += block_size * 8;                                          \
           in             += block_size;                                            \
           inlen          -= block_size;                                            \
        } else {                                                                    \
           n = MIN(inlen, (block_size - sha1->curlen));                             \
           memcpy(sha1->buf + sha1->curlen, in, (size_t)n);                         \
           sha1->curlen += n;                                                       \
           in             += n;                                                     \
           inlen          -= n;                                                     \
           if (sha1->curlen == block_size) {                                        \
              if ((err = compress_name (sha1, sha1->buf)) != CRYPT_OK) {            \
                 return err;                                                        \
              }                                                                     \
              sha1->length += 8*block_size;                                         \
              sha1->curlen = 0;                                                     \
           }                                                                        \
       }                                                                            \
    }                                                                               \
    return CRYPT_OK;                                                                \
}

/**********************/

#define F0(x,y,z)  (z ^ (x & (y ^ z)))
#define F1(x,y,z)  (x ^ y ^ z)
#define F2(x,y,z)  ((x & y) | (z & (x | y)))
#define F3(x,y,z)  (x ^ y ^ z)

static int  ctx_sha1_compress(CtxSHA1 *sha1, unsigned char *buf)
{
    uint32_t a,b,c,d,e,W[80],i;

    /* copy the state into 512-bits into W[0..15] */
    for (i = 0; i < 16; i++) {
        LOAD32H(W[i], buf + (4*i));
    }

    /* copy state */
    a = sha1->state[0];
    b = sha1->state[1];
    c = sha1->state[2];
    d = sha1->state[3];
    e = sha1->state[4];

    /* expand it */
    for (i = 16; i < 80; i++) {
        W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); 
    }

    /* compress */
    /* round one */
    #define SHA1_FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
    #define SHA1_FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
    #define SHA1_FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
    #define SHA1_FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
 
    for (i = 0; i < 20; ) {
       SHA1_FF0(a,b,c,d,e,i++);
       SHA1_FF0(e,a,b,c,d,i++);
       SHA1_FF0(d,e,a,b,c,i++);
       SHA1_FF0(c,d,e,a,b,i++);
       SHA1_FF0(b,c,d,e,a,i++);
    }

    /* round two */
    for (; i < 40; )  { 
       SHA1_FF1(a,b,c,d,e,i++);
       SHA1_FF1(e,a,b,c,d,i++);
       SHA1_FF1(d,e,a,b,c,i++);
       SHA1_FF1(c,d,e,a,b,i++);
       SHA1_FF1(b,c,d,e,a,i++);
    }

    /* round three */
    for (; i < 60; )  { 
       SHA1_FF2(a,b,c,d,e,i++);
       SHA1_FF2(e,a,b,c,d,i++);
       SHA1_FF2(d,e,a,b,c,i++);
       SHA1_FF2(c,d,e,a,b,i++);
       SHA1_FF2(b,c,d,e,a,i++);
    }

    /* round four */
    for (; i < 80; )  { 
       SHA1_FF3(a,b,c,d,e,i++);
       SHA1_FF3(e,a,b,c,d,i++);
       SHA1_FF3(d,e,a,b,c,i++);
       SHA1_FF3(c,d,e,a,b,i++);
       SHA1_FF3(b,c,d,e,a,i++);
    }

    #undef SHA1_FF0
    #undef SHA1_FF1
    #undef SHA1_FF2
    #undef SHA1_FF3

    /* store */
    sha1->state[0] = sha1->state[0] + a;
    sha1->state[1] = sha1->state[1] + b;
    sha1->state[2] = sha1->state[2] + c;
    sha1->state[3] = sha1->state[3] + d;
    sha1->state[4] = sha1->state[4] + e;

    return CRYPT_OK;
}

/**
   Initialize the hash state
   @param md   The hash state you wish to initialize
   @return CRYPT_OK if successful
*/
int ctx_sha1_init(CtxSHA1 * sha1)
{
   assert(sha1 != NULL);
   sha1->state[0] = 0x67452301UL;
   sha1->state[1] = 0xefcdab89UL;
   sha1->state[2] = 0x98badcfeUL;
   sha1->state[3] = 0x10325476UL;
   sha1->state[4] = 0xc3d2e1f0UL;
   sha1->curlen = 0;
   sha1->length = 0;
   return CRYPT_OK;
}

/**
   Process a block of memory though the hash
   @param md     The hash state
   @param in     The data to hash
   @param inlen  The length of the data (octets)
   @return CRYPT_OK if successful
*/
HASH_PROCESS(ctx_sha1_process, ctx_sha1_compress, sha1, 64)

/**
   Terminate the hash to get the digest
   @param md  The hash state
   @param out [out] The destination of the hash (20 bytes)
   @return CRYPT_OK if successful
*/
int ctx_sha1_done(CtxSHA1 * sha1, unsigned char *out)
{
    int i;

    assert(sha1 != NULL);
    assert(out != NULL);

    if (sha1->curlen >= sizeof(sha1->buf)) {
       return -1;
    }

    /* increase the length of the message */
    sha1->length += sha1->curlen * 8;

    /* append the '1' bit */
    sha1->buf[sha1->curlen++] = (unsigned char)0x80;

    /* if the length is currently above 56 bytes we append zeros
     * then compress.  Then we can fall back to padding zeros and length
     * encoding like normal.
     */
    if (sha1->curlen > 56) {
        while (sha1->curlen < 64) {
            sha1->buf[sha1->curlen++] = (unsigned char)0;
        }
        ctx_sha1_compress(sha1, sha1->buf);
        sha1->curlen = 0;
    }

    /* pad upto 56 bytes of zeroes */
    while (sha1->curlen < 56) {
        sha1->buf[sha1->curlen++] = (unsigned char)0;
    }

    /* store length */
    STORE64H(sha1->length, sha1->buf+56);
    ctx_sha1_compress(sha1, sha1->buf);

    /* copy output */
    for (i = 0; i < 5; i++) {
        STORE32H(sha1->state[i], out+(4*i));
    }
    return CRYPT_OK;
}
#endif

#endif
#endif
#ifdef CTX_X86_64

enum
{
  ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
  ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
  ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,
};

enum
{
  ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0,
  ARCH_X86_INTEL_FEATURE_SSSE3    = 1 << 9,
  ARCH_X86_INTEL_FEATURE_FMA      = 1 << 12,
  ARCH_X86_INTEL_FEATURE_SSE4_1   = 1 << 19,
  ARCH_X86_INTEL_FEATURE_SSE4_2   = 1 << 20,
  ARCH_X86_INTEL_FEATURE_MOVBE    = 1 << 22,
  ARCH_X86_INTEL_FEATURE_POPCNT   = 1 << 23,
  ARCH_X86_INTEL_FEATURE_XSAVE    = 1 << 26,
  ARCH_X86_INTEL_FEATURE_OSXSAVE  = 1 << 27,
  ARCH_X86_INTEL_FEATURE_AVX      = 1 << 28,
  ARCH_X86_INTEL_FEATURE_F16C     = 1 << 29
};

enum
{
  ARCH_X86_INTEL_FEATURE_BMI1     = 1 << 3,
  ARCH_X86_INTEL_FEATURE_BMI2     = 1 << 8,
  ARCH_X86_INTEL_FEATURE_AVX2     = 1 << 5,
};

#define cpuid(a,b,eax,ebx,ecx,edx)                     \
  __asm__("cpuid"                                           \
           : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
           : "0" (a), "2" (b)  )

/* returns x86_64 microarchitecture level
 *   0
 */
int
ctx_x86_64_level (void)
{
  int level = 0;
  uint32_t eax, ebx, ecx, edx;
  cpuid (1, 0, eax, ebx, ecx, edx);

  if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)   return level;
  if ((edx & ARCH_X86_INTEL_FEATURE_XMM) == 0)   return level;
  level = 1;

  if ((ecx & ARCH_X86_INTEL_FEATURE_SSSE3)==0)   return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_SSE4_1)==0)  return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_SSE4_2)==0)  return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_POPCNT)==0)  return level;
  level = 2;

  if ((ecx & ARCH_X86_INTEL_FEATURE_AVX)==0)     return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_OSXSAVE)==0) return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_XSAVE)==0)   return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_FMA)==0)     return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_F16C)==0)    return level;
  if ((ecx & ARCH_X86_INTEL_FEATURE_MOVBE)==0)   return level;

  cpuid (0, 0, eax, ebx, ecx, edx);
  if (eax >= 7)
  {
    cpuid (2, 0, eax, ebx, ecx, edx);
    if ((ebx & ARCH_X86_INTEL_FEATURE_AVX2)==0)  return level;
    if ((ebx & ARCH_X86_INTEL_FEATURE_BMI1)==0)  return level;
    if ((ebx & ARCH_X86_INTEL_FEATURE_BMI2)==0)  return level;
    level = 3; 
  }
  return level;
}

#endif

#ifdef CTX_ARMV7L

#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <elf.h>


int ctx_arm_has_neon (int *armv)
{
  /* TODO : add or hardcode the other ways it can be on arm, where
   *        this info comes from the system and not from running cpu
   *        instructions
   */
  int has_neon = 0;
  int arm_level = 5;
  int fd = open ("/proc/self/auxv", O_RDONLY);
  Elf32_auxv_t auxv;
  if (fd >= 0)
  {
    while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t))
    {
      if (auxv.a_type == AT_HWCAP)
      {
        if (auxv.a_un.a_val & 4096)
          has_neon = 1;
      }
      else if (auxv.a_type == AT_PLATFORM)
      {
        if (!strncmp ((const char*)auxv.a_un.a_val, "v6l", 3))
          arm_level = 6;
        else if (!strncmp ((const char*)auxv.a_un.a_val, "v7l", 3))
          arm_level = 7;
        else if (!strncmp ((const char*)auxv.a_un.a_val, "v8l", 3))
          arm_level = 8;
      }
    }
    close (fd);
  }
  if (armv) *armv = arm_level;
  return has_neon;
}
#endif
#include <stdio.h>
#include <string.h>

#if CTX_FORMATTER

static int ctx_yenc (const char *src, char *dst, int count)
{
  int out_len = 0;
  for (int i = 0; i < count; i ++)
  {
    int o = (src[i] + 42) % 256;
    switch (o)
    {
      case 0x00: //null
      case 0x20: //space// but better safe
      case 0x0A: //lf   // than sorry
      case 0x0D: //cr
      case 0x09: //tab  // not really needed
      case 0x10: //datalink escape (used by ctx)
      case 0x11: //xoff
      case 0x13: //xon
      case 0x1b: //
      case 0xff: //
      case 0x3D: //=
        dst[out_len++] = '=';
        o = (o + 64) % 256;
        /* FALLTHROUGH */
      default:
        dst[out_len++] = o;
        break;
    }
  }
  dst[out_len]=0;
  return out_len;
}
#endif

#if CTX_PARSER
static int ctx_ydec (const char *tmp_src, char *dst, int count)
{
  const char *src = tmp_src;
#if 0
  if (tmp_src == dst)
  {
    src = malloc (count);
    memcpy (src, tmp_src, count);
  }
#endif
  int out_len = 0;
  for (int i = 0; i < count; i ++)
  {
    int o = src[i];
    switch (o)
    {
      case '=':
        i++;
        o = src[i];
        if (o == 'y')
        {
          dst[out_len]=0;
#if 0
          if (tmp_src == dst) free (src);
#endif
          return out_len;
        }
        o = (o-42-64) % 256;
        dst[out_len++] = o;
        break;
      case '\n':
      case '\e':
      case '\r':
      case '\0':
        break;
      default:
        o = (o-42) % 256;
        dst[out_len++] = o;
        break;
    }
  }
  dst[out_len]=0;
#if 0
  if (tmp_src == dst) free (src);
#endif
  return out_len;
}
#endif

#if 0
int main (){
  char *input="this is a testæøåÅØ'''\"!:_asdac\n\r";
  char  encoded[256]="";
  char  decoded[256]="";
  int   in_len = strlen (input);
  int   out_len;
  int   dec_len;

  printf ("input: %s\n", input);

  out_len = ctx_yenc (input, encoded, in_len);
  printf ("encoded: %s\n", encoded);

  dec_len = ydec (encoded, encoded, out_len);

  printf ("decoded: %s\n", encoded);

  return 0;
}
#endif
#ifndef __CTX_UTIL_H
#define __CTX_UTIL_H


static int ctx_str_is_number (const char *str)
{
  int got_digit = 0;
  for (int i = 0; str[i]; i++)
  {
    if (str[i] >= '0' && str[i] <= '9')
    {
       got_digit ++;
    }
    else if (str[i] == '.')
    {
    }
    else
      return 0;
  }
  if (got_digit)
    return 1;
  return 0;
}

#if CTX_GET_CONTENTS

typedef struct CtxFileContent
{
  char *path;
  unsigned char *contents;
  long  length;
  int   free_data;
} CtxFileContent;

CtxList *registered_contents = NULL;

void
ctx_register_contents (const char *path,
                       const unsigned char *contents,
                       long length,
                       int  free_data)
{
  // if (path[0] != '/') && strchr(path, ':')) 
  //   with this check regular use is faster, but we lose
  //   generic filesystem overrides..
  for (CtxList *l = registered_contents; l; l = l->next)
  {
    CtxFileContent *c = (CtxFileContent*)l->data;
    if (!strcmp (c->path, path))
    {
       if (c->free_data)
       {
         free (c->contents);
       }
       c->free_data = free_data;
       c->contents = (unsigned char*)contents;
       c->length = length;
       return;
    }
  }
  CtxFileContent *c = (CtxFileContent*)calloc (sizeof (CtxFileContent), 1);
  c->free_data = free_data;
  c->contents = (unsigned char*)contents;
  c->length    = length;
  ctx_list_append (&registered_contents, c);
}

void
_ctx_file_set_contents (const char     *path,
                        const unsigned char  *contents,
                        long            length)
{
  FILE *file;
  file = fopen (path, "wb");
  if (!file)
    { return; }
  if (length < 0) length = strlen ((const char*)contents);
  fwrite (contents, 1, length, file);
  fclose (file);
}

static int
___ctx_file_get_contents (const char     *path,
                          unsigned char **contents,
                          long           *length,
                          long            max_len)
{
  FILE *file;
  long  size;
  long  remaining;
  char *buffer;
  file = fopen (path, "rb");
  if (!file)
    { return -1; }
  fseek (file, 0, SEEK_END);
  size = remaining = ftell (file);

  if (size > max_len)
  {
     size = remaining = max_len;
  }

  if (length)
    { *length =size; }
  rewind (file);
  buffer = (char*)malloc (size + 8);
  if (!buffer)
    {
      fclose (file);
      return -1;
    }
  remaining -= fread (buffer, 1, remaining, file);
  if (remaining)
    {
      fclose (file);
      free (buffer);
      return -1;
    }
  fclose (file);
  *contents = (unsigned char*) buffer;
  buffer[size] = 0;
  return 0;
}

static int
__ctx_file_get_contents (const char     *path,
                        unsigned char **contents,
                        long           *length)
{
  return ___ctx_file_get_contents (path, contents, length, 1024*1024*1024);
}

#if !__COSMOPOLITAN__
#include <limits.h>
#endif




#endif


#endif


static float ctx_state_get (CtxState *state, uint32_t hash)
{
  for (int i = state->gstate.keydb_pos-1; i>=0; i--)
    {
      if (state->keydb[i].key == hash)
        { return state->keydb[i].value; }
    }
  return -0.0;
}

static void ctx_state_set (CtxState *state, uint32_t key, float value)
{
  if (key != CTX_new_state)
    {
      if (ctx_state_get (state, key) == value)
        { return; }
      for (int i = state->gstate.keydb_pos-1;
           i >= 0 && state->keydb[i].key != CTX_new_state;
           i--)
        {
          if (state->keydb[i].key == key)
            {
              state->keydb[i].value = value;
              return;
            }
        }
    }
  if (state->gstate.keydb_pos >= CTX_MAX_KEYDB)
    { return; }
  state->keydb[state->gstate.keydb_pos].key = key;
  state->keydb[state->gstate.keydb_pos].value = value;
  state->gstate.keydb_pos++;
}


#define CTX_KEYDB_STRING_START (-90000.0)
#define CTX_KEYDB_STRING_END   (CTX_KEYDB_STRING_START + CTX_STRINGPOOL_SIZE)

static int ctx_float_is_string (float val)
{
  return val >= CTX_KEYDB_STRING_START && val <= CTX_KEYDB_STRING_END;
}

static int ctx_float_to_string_index (float val)
{
  int idx = -1;
  if (ctx_float_is_string (val))
  {
    idx = val - CTX_KEYDB_STRING_START;
  }
  return idx;
}

static float ctx_string_index_to_float (int index)
{
  return CTX_KEYDB_STRING_START + index;
}

static void *ctx_state_get_blob (CtxState *state, uint32_t key)
{
  float stored = ctx_state_get (state, key);
  int idx = ctx_float_to_string_index (stored);
  if (idx >= 0)
  {
     // can we know length?
     return &state->stringpool[idx];
  }

  // format number as string?
  return NULL;
}

static const char *ctx_state_get_string (CtxState *state, uint32_t key)
{
  const char *ret = (char*)ctx_state_get_blob (state, key);
  if (ret && ret[0] == 127)
    return NULL;
  return ret;
}


static void ctx_state_set_blob (CtxState *state, uint32_t key, uint8_t *data, int len)
{
  int idx = state->gstate.stringpool_pos;

  if (idx + len > CTX_STRINGPOOL_SIZE)
  {
    ctx_log ("blowing varpool size [%c..]\n", data[0]);
    //fprintf (stderr, "blowing varpool size [%c%c%c..]\n", data[0],data[1], data[1]?data[2]:0);
#if 0
    for (int i = 0; i< CTX_STRINGPOOL_SIZE; i++)
    {
       if (i==0) fprintf (stderr, "\n%i ", i);
       else      fprintf (stderr, "%c", state->stringpool[i]);
    }
#endif
    return;
  }

  memcpy (&state->stringpool[idx], data, len);
  state->gstate.stringpool_pos+=len;
  state->stringpool[state->gstate.stringpool_pos++]=0;
  ctx_state_set (state, key, ctx_string_index_to_float (idx));
}

static void ctx_state_set_string (CtxState *state, uint32_t key, const char *string)
{
  float old_val = ctx_state_get (state, key);
  int   old_idx = ctx_float_to_string_index (old_val);

  if (old_idx >= 0)
  {
    const char *old_string = ctx_state_get_string (state, key);
    if (old_string && !strcmp (old_string, string))
      return;
  }

  if (ctx_str_is_number (string))
  {
    ctx_state_set (state, key, strtod (string, NULL));
    return;
  }
  // should do same with color
 
  // XXX should special case when the string modified is at the
  //     end of the stringpool.
  //
  //     for clips the behavior is howevre ideal, since
  //     we can have more than one clip per save/restore level
  ctx_state_set_blob (state, key, (uint8_t*)string, strlen(string));
}

static int ctx_state_get_color (CtxState *state, uint32_t key, CtxColor *color)
{
  CtxColor *stored = (CtxColor*)ctx_state_get_blob (state, key);
  if (stored)
  {
    if (stored->magic == 127)
    {
      *color = *stored;
      return 0;
    }
  }
  return -1;
}

static void ctx_state_set_color (CtxState *state, uint32_t key, CtxColor *color)
{
  CtxColor mod_color;
  CtxColor old_color;
  mod_color = *color;
  mod_color.magic = 127;
  if (ctx_state_get_color (state, key, &old_color)==0)
  {
    if (!memcmp (&mod_color, &old_color, sizeof (mod_color)))
      return;
  }
  ctx_state_set_blob (state, key, (uint8_t*)&mod_color, sizeof (CtxColor));
}

const char *ctx_get_string (Ctx *ctx, uint32_t hash)
{
  return ctx_state_get_string (&ctx->state, hash);
}
float ctx_get_float (Ctx *ctx, uint32_t hash)
{
  return ctx_state_get (&ctx->state, hash);
}
int ctx_get_int (Ctx *ctx, uint32_t hash)
{
  return ctx_state_get (&ctx->state, hash);
}
void ctx_set_float (Ctx *ctx, uint32_t hash, float value)
{
  ctx_state_set (&ctx->state, hash, value);
}
void ctx_set_string (Ctx *ctx, uint32_t hash, const char *value)
{
  ctx_state_set_string (&ctx->state, hash, value);
}
void ctx_set_color (Ctx *ctx, uint32_t hash, CtxColor *color)
{
  ctx_state_set_color (&ctx->state, hash, color);
}
int  ctx_get_color (Ctx *ctx, uint32_t hash, CtxColor *color)
{
  return ctx_state_get_color (&ctx->state, hash, color);
}
int ctx_is_set (Ctx *ctx, uint32_t hash)
{
  return ctx_get_float (ctx, hash) != -0.0f;
}
int ctx_is_set_now (Ctx *ctx, uint32_t hash)
{
  return ctx_is_set (ctx, hash);
}
#ifndef __CTX_COLOR
#define __CTX_COLOR

int ctx_color_model_get_components (CtxColorModel model)
{
  switch (model)
    {
      case CTX_GRAY:
        return 1;
      case CTX_GRAYA:
      case CTX_GRAYA_A:
        return 1;
      case CTX_RGB:
      case CTX_LAB:
      case CTX_LCH:
      case CTX_DRGB:
        return 3;
      case CTX_CMYK:
      case CTX_DCMYK:
      case CTX_LABA:
      case CTX_LCHA:
      case CTX_RGBA:
      case CTX_DRGBA:
      case CTX_RGBA_A:
      case CTX_RGBA_A_DEVICE:
        return 4;
      case CTX_DCMYKA:
      case CTX_CMYKA:
      case CTX_CMYKA_A:
      case CTX_DCMYKA_A:
        return 5;
    }
  return 0;
}

#if CTX_U8_TO_FLOAT_LUT
float ctx_u8_float[256];
#endif

CtxColor *ctx_color_new (void)
{
  CtxColor *color = (CtxColor*)ctx_calloc (sizeof (CtxColor), 1);
  return color;
}

int ctx_color_is_transparent (CtxColor *color)
{
  return color->alpha <= 0.001f;
}


void ctx_color_free (CtxColor *color)
{
  free (color);
}

static void ctx_color_set_RGBA8 (CtxState *state, CtxColor *color, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
  color->original = color->valid = CTX_VALID_RGBA_U8;
  color->rgba[0] = r;
  color->rgba[1] = g;
  color->rgba[2] = b;
  color->rgba[3] = a;
#if CTX_ENABLE_CM
  color->space = state->gstate.device_space;
#endif
}

#if 0
static void ctx_color_set_RGBA8_ (CtxColor *color, const uint8_t *in)
{
  ctx_color_set_RGBA8 (color, in[0], in[1], in[2], in[3]);
}
#endif

static void ctx_color_set_graya (CtxState *state, CtxColor *color, float gray, float alpha)
{
  color->original = color->valid = CTX_VALID_GRAYA;
  color->l = gray;
  color->alpha = alpha;
}
#if 0
static void ctx_color_set_graya_ (CtxColor *color, const float *in)
{
  return ctx_color_set_graya (color, in[0], in[1]);
}
#endif

void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
{
#if CTX_ENABLE_CM
  color->original = color->valid = CTX_VALID_RGBA;
  color->red      = r;
  color->green    = g;
  color->blue     = b;
  color->space    = state->gstate.rgb_space;
#else
  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
  color->device_red   = r;
  color->device_green = g;
  color->device_blue  = b;
#endif
  color->alpha        = a;
}

static void ctx_color_set_drgba (CtxState *state, CtxColor *color, float r, float g, float b, float a)
{
#if CTX_ENABLE_CM
  color->original     = color->valid = CTX_VALID_RGBA_DEVICE;
  color->device_red   = r;
  color->device_green = g;
  color->device_blue  = b;
  color->alpha        = a;
  color->space        = state->gstate.device_space;
#else
  ctx_color_set_rgba (state, color, r, g, b, a);
#endif
}

#if 0
static void ctx_color_set_rgba_ (CtxState *state, CtxColor *color, const float *in)
{
  ctx_color_set_rgba (color, in[0], in[1], in[2], in[3]);
}
#endif

/* the baseline conversions we have whether CMYK support is enabled or not,
 * providing an effort at right rendering
 */
static void ctx_cmyk_to_rgb (float c, float m, float y, float k, float *r, float *g, float *b)
{
  *r = (1.0f-c) * (1.0f-k);
  *g = (1.0f-m) * (1.0f-k);
  *b = (1.0f-y) * (1.0f-k);
}

void ctx_rgb_to_cmyk (float r, float g, float b,
                      float *c_out, float *m_out, float *y_out, float *k_out)
{
  float c = 1.0f - r;
  float m = 1.0f - g;
  float y = 1.0f - b;
  float k = ctx_minf (c, ctx_minf (y, m) );
  if (k < 1.0f)
    {
      c = (c - k) / (1.0f - k);
      m = (m - k) / (1.0f - k);
      y = (y - k) / (1.0f - k);
    }
  else
    {
      c = m = y = 0.0f;
    }
  *c_out = c;
  *m_out = m;
  *y_out = y;
  *k_out = k;
}

#if CTX_ENABLE_CMYK
static void ctx_color_set_cmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a)
{
  color->original = color->valid = CTX_VALID_CMYKA;
  color->cyan     = c;
  color->magenta  = m;
  color->yellow   = y;
  color->key      = k;
  color->alpha    = a;
#if CTX_ENABLE_CM
  color->space    = state->gstate.cmyk_space;
#endif
}

static void ctx_color_set_dcmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a)
{
  color->original       = color->valid = CTX_VALID_DCMYKA;
  color->device_cyan    = c;
  color->device_magenta = m;
  color->device_yellow  = y;
  color->device_key     = k;
  color->alpha          = a;
#if CTX_ENABLE_CM
  color->space = state->gstate.device_space;
#endif
}

#endif

#if CTX_ENABLE_CM

static void ctx_rgb_user_to_device (CtxState *state, float rin, float gin, float bin,
                                    float *rout, float *gout, float *bout)
{
#if CTX_BABL
#if 0
  fprintf (stderr, "-[%p %p\n",
    state->gstate.fish_rgbaf_user_to_device,
    state->gstate.fish_rgbaf_device_to_user);
#endif
  if (state->gstate.fish_rgbaf_user_to_device)
  {
    float rgbaf[4]={rin,gin,bin,1.0};
    float rgbafo[4];
    babl_process (state->gstate.fish_rgbaf_user_to_device,
                  rgbaf, rgbafo, 1);

    *rout = rgbafo[0];
    *gout = rgbafo[1];
    *bout = rgbafo[2];
    return;
  }
#endif
  *rout = rin;
  *gout = gin;
  *bout = bin;
}

static void ctx_rgb_device_to_user (CtxState *state, float rin, float gin, float bin,
                                    float *rout, float *gout, float *bout)
{
#if CTX_BABL
#if 0
  fprintf (stderr, "=[%p %p\n",
    state->gstate.fish_rgbaf_user_to_device,
    state->gstate.fish_rgbaf_device_to_user);
#endif
  if (state->gstate.fish_rgbaf_device_to_user)
  {
    float rgbaf[4]={rin,gin,bin,1.0};
    float rgbafo[4];
    babl_process (state->gstate.fish_rgbaf_device_to_user,
                  rgbaf, rgbafo, 1);

    *rout = rgbafo[0];
    *gout = rgbafo[1];
    *bout = rgbafo[2];
    return;
  }
#endif
  *rout = rin;
  *gout = gin;
  *bout = bin;
}
#endif

static void ctx_color_get_drgba (CtxState *state, CtxColor *color, float *out)
{
  if (! (color->valid & CTX_VALID_RGBA_DEVICE) )
    {
#if CTX_ENABLE_CM
      if (color->valid & CTX_VALID_RGBA)
        {
          ctx_rgb_user_to_device (state, color->red, color->green, color->blue,
                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
        }
      else
#endif
        if (color->valid & CTX_VALID_RGBA_U8)
          {
            float red = ctx_u8_to_float (color->rgba[0]);
            float green = ctx_u8_to_float (color->rgba[1]);
            float blue = ctx_u8_to_float (color->rgba[2]);
#if CTX_ENABLE_CM
            ctx_rgb_user_to_device (state, red, green, blue,
                                  & (color->device_red), & (color->device_green), & (color->device_blue) );
#else
            color->device_red = red;
            color->device_green = green;
            color->device_blue = blue;
#endif
            color->alpha        = ctx_u8_to_float (color->rgba[3]);
          }
#if CTX_ENABLE_CMYK
        else if (color->valid & CTX_VALID_CMYKA)
          {
            ctx_cmyk_to_rgb (color->cyan, color->magenta, color->yellow, color->key,
                             &color->device_red,
                             &color->device_green,
                             &color->device_blue);
          }
#endif
        else if (color->valid & CTX_VALID_GRAYA)
          {
            color->device_red   =
              color->device_green =
                color->device_blue  = color->l;
          }
      color->valid |= CTX_VALID_RGBA_DEVICE;
    }
  out[0] = color->device_red;
  out[1] = color->device_green;
  out[2] = color->device_blue;
  out[3] = color->alpha;
}


static inline void
_ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
{
#if CTX_ENABLE_CM
  if (! (color->valid & CTX_VALID_RGBA) )
    {
      ctx_color_get_drgba (state, color, out);
      if (color->valid & CTX_VALID_RGBA_DEVICE)
        {
          ctx_rgb_device_to_user (state, color->device_red, color->device_green, color->device_blue,
                                  & (color->red), & (color->green), & (color->blue) );
        }
      color->valid |= CTX_VALID_RGBA;
    }
  out[0] = color->red;
  out[1] = color->green;
  out[2] = color->blue;
  out[3] = color->alpha;
#else
  ctx_color_get_drgba (state, color, out);
#endif
}

void ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out)
{
  _ctx_color_get_rgba (state, color, out);
}



float ctx_float_color_rgb_to_gray (CtxState *state, const float *rgb)
{
        // XXX todo replace with correct according to primaries
  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
}
uint8_t ctx_u8_color_rgb_to_gray (CtxState *state, const uint8_t *rgb)
{
        // XXX todo replace with correct according to primaries
  return CTX_CSS_RGB_TO_LUMINANCE(rgb);
}

void ctx_color_get_graya (CtxState *state, CtxColor *color, float *out)
{
  if (! (color->valid & CTX_VALID_GRAYA) )
    {
      float rgba[4];
      ctx_color_get_drgba (state, color, rgba);
      color->l = ctx_float_color_rgb_to_gray (state, rgba);
      color->valid |= CTX_VALID_GRAYA;
    }
  out[0] = color->l;
  out[1] = color->alpha;
}

#if CTX_ENABLE_CMYK
void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out)
{
  if (! (color->valid & CTX_VALID_CMYKA) )
    {
      if (color->valid & CTX_VALID_GRAYA)
        {
          color->cyan = color->magenta = color->yellow = 0.0;
          color->key = color->l;
        }
      else
        {
          float rgba[4];
          ctx_color_get_rgba (state, color, rgba);
          ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2],
                           &color->cyan, &color->magenta, &color->yellow, &color->key);
          color->alpha = rgba[3];
        }
      color->valid |= CTX_VALID_CMYKA;
    }
  out[0] = color->cyan;
  out[1] = color->magenta;
  out[2] = color->yellow;
  out[3] = color->key;
  out[4] = color->alpha;
}

#if 0
static void ctx_color_get_cmyka_u8 (CtxState *state, CtxColor *color, uint8_t *out)
{
  if (! (color->valid & CTX_VALID_CMYKA_U8) )
    {
      float cmyka[5];
      ctx_color_get_cmyka (color, cmyka);
      for (int i = 0; i < 5; i ++)
        { color->cmyka[i] = ctx_float_to_u8 (cmyka[i]); }
      color->valid |= CTX_VALID_CMYKA_U8;
    }
  out[0] = color->cmyka[0];
  out[1] = color->cmyka[1];
  out[2] = color->cmyka[2];
  out[3] = color->cmyka[3];
}
#endif
#endif

static inline void
_ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
{
  if (! (color->valid & CTX_VALID_RGBA_U8) )
    {
      float rgba[4];
      ctx_color_get_drgba (state, color, rgba);
      for (int i = 0; i < 4; i ++)
        { color->rgba[i] = ctx_float_to_u8 (rgba[i]); }
      color->valid |= CTX_VALID_RGBA_U8;
    }
  out[0] = color->rgba[0];
  out[1] = color->rgba[1];
  out[2] = color->rgba[2];
  out[3] = color->rgba[3];
}

void
ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out)
{
  _ctx_color_get_rgba8 (state, color, out);
}

void ctx_color_get_graya_u8 (CtxState *state, CtxColor *color, uint8_t *out)
{
  if (! (color->valid & CTX_VALID_GRAYA_U8) )
    {
      float graya[2];
      ctx_color_get_graya (state, color, graya);
      color->l_u8 = ctx_float_to_u8 (graya[0]);
      color->rgba[3] = ctx_float_to_u8 (graya[1]);
      color->valid |= CTX_VALID_GRAYA_U8;
    }
  out[0] = color->l_u8;
  out[1] = color->rgba[3];
}

#if 0
void
ctx_get_rgba (Ctx *ctx, float *rgba)
{
  ctx_color_get_rgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
}

void
ctx_get_drgba (Ctx *ctx, float *rgba)
{
  ctx_color_get_drgba (& (ctx->state), &ctx->state.gstate.source.color, rgba);
}
#endif

int ctx_in_fill (Ctx *ctx, float x, float y)
{
  float x1, y1, x2, y2;
  ctx_path_extents (ctx, &x1, &y1, &x2, &y2);

  if (x1 <= x && x <= x2 &&
      y1 <= y && y <= y2)
  {
#if CTX_CURRENT_PATH
     uint32_t pixel = 0;
     CtxMatrix transform;
     ctx_get_matrix (ctx, &transform);
     Ctx *tester = ctx_new_for_framebuffer (&pixel, 1, 1, 4, CTX_FORMAT_RGBA8);
     CtxIterator *iterator = ctx_current_path (ctx);
     CtxCommand *command;
     ctx_set_matrix (tester, &transform);
     ctx_rgb (tester, 1,1,1);
     ctx_translate (tester, x, y);
     while ((command = ctx_iterator_next (iterator)))
     {
       fprintf (stderr, "%c", command->code);
       ctx_process (tester, (CtxEntry*)command);
     }
     fprintf (stderr, "foo\n");
     ctx_fill (ctx);
     ctx_free (tester);
     if (pixel == 0xffffff) return 1;
#else
     return 1;
#endif
  }
  return 0;
}


#if CTX_ENABLE_CMYK
#if 0
void
ctx_get_cmyka (Ctx *ctx, float *cmyka)
{
  ctx_color_get_cmyka (& (ctx->state), &ctx->state.gstate.source.color, cmyka);
}
#endif
#endif
#if 0
void
ctx_get_graya (Ctx *ctx, float *ya)
{
  ctx_color_get_graya (& (ctx->state), &ctx->state.gstate.source.color, ya);
}
#endif

void ctx_stroke_source (Ctx *ctx)
{
  CtxEntry set_stroke = ctx_void (CTX_STROKE_SOURCE);
  ctx_process (ctx, &set_stroke);
}


static void ctx_color_raw (Ctx *ctx, CtxColorModel model, float *components, int stroke)
{
#if 0
  CtxSource *source = stroke?
          &ctx->state.gstate.source_stroke:
          &ctx->state.gstate.source_fill;

  if (model == CTX_RGB || model == CTX_RGBA)
  {
    float rgba[4];
  // XXX it should be possible to disable this, to get a more accurate record
  // when it is intentional
    float a = 1.0f;
    if (model == CTX_RGBA) a = components[3];
    ctx_color_get_rgba (&ctx->state, &source->color, rgba);
    if (rgba[0] == components[0] && rgba[1] == components[1] && rgba[2] == components[2] && rgba[3] == a)
     return;
  }
#endif

  if (stroke)
  {
    ctx_stroke_source (ctx);
  }

  CtxEntry command[3]= {
  ctx_f (CTX_COLOR, model, 0)
  };
  switch (model)
  {
    case CTX_RGBA:
    case CTX_RGBA_A:
    case CTX_RGBA_A_DEVICE:
    case CTX_DRGBA:
    case CTX_LABA:
    case CTX_LCHA:
      command[2].data.f[0]=components[3];
      /*FALLTHROUGH*/
    case CTX_RGB:
    case CTX_LAB:
    case CTX_LCH:
    case CTX_DRGB:
      command[0].data.f[1]=components[0];
      command[1].data.f[0]=components[1];
      command[1].data.f[1]=components[2];
      break;
    case CTX_DCMYKA:
    case CTX_CMYKA:
    case CTX_DCMYKA_A:
    case CTX_CMYKA_A:
      command[2].data.f[1]=components[4];
      /*FALLTHROUGH*/
    case CTX_CMYK:
    case CTX_DCMYK:
      command[0].data.f[1]=components[0];
      command[1].data.f[0]=components[1];
      command[1].data.f[1]=components[2];
      command[2].data.f[0]=components[3];
      break;
    case CTX_GRAYA:
    case CTX_GRAYA_A:
      command[1].data.f[0]=components[1];
      /*FALLTHROUGH*/
    case CTX_GRAY:
      command[0].data.f[1]=components[0];
      break;
  }
  ctx_process (ctx, command);
}

void ctx_rgba (Ctx *ctx, float r, float g, float b, float a)
{
  float components[4]={r,g,b,a};
  ctx_color_raw (ctx, CTX_RGBA, components, 0);
}

void ctx_rgba_stroke (Ctx *ctx, float r, float g, float b, float a)
{
  float components[4]={r,g,b,a};
  ctx_color_raw (ctx, CTX_RGBA, components, 1);
}

void ctx_rgb (Ctx *ctx, float   r, float   g, float   b)
{
  ctx_rgba (ctx, r, g, b, 1.0f);
}

void ctx_rgb_stroke (Ctx *ctx, float   r, float   g, float   b)
{
  ctx_rgba_stroke (ctx, r, g, b, 1.0f);
}

void ctx_gray_stroke   (Ctx *ctx, float gray)
{
  ctx_color_raw (ctx, CTX_GRAY, &gray, 1);
}
void ctx_gray (Ctx *ctx, float gray)
{
  ctx_color_raw (ctx, CTX_GRAY, &gray, 0);
}

void ctx_drgba_stroke (Ctx *ctx, float r, float g, float b, float a)
{
  float components[4]={r,g,b,a};
  ctx_color_raw (ctx, CTX_DRGBA, components, 1);
}
void ctx_drgba (Ctx *ctx, float r, float g, float b, float a)
{
  float components[4]={r,g,b,a};
  ctx_color_raw (ctx, CTX_DRGBA, components, 0);
}

#if CTX_ENABLE_CMYK

void ctx_cmyka_stroke (Ctx *ctx, float c, float m, float y, float k, float a)
{
  float components[5]={c,m,y,k,a};
  ctx_color_raw (ctx, CTX_CMYKA, components, 1);
}
void ctx_cmyka (Ctx *ctx, float c, float m, float y, float k, float a)
{
  float components[5]={c,m,y,k,a};
  ctx_color_raw (ctx, CTX_CMYKA, components, 0);
}
void ctx_cmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
{
  float components[4]={c,m,y,k};
  ctx_color_raw (ctx, CTX_CMYK, components, 1);
}
void ctx_cmyk (Ctx *ctx, float c, float m, float y, float k)
{
  float components[4]={c,m,y,k};
  ctx_color_raw (ctx, CTX_CMYK, components, 0);
}

#if 0
static void ctx_dcmyk_raw (Ctx *ctx, float c, float m, float y, float k, int stroke)
{
  float components[5]={c,m,y,k,1.0f};
  ctx_color_raw (ctx, CTX_DCMYKA, components, stroke);
}

static void ctx_dcmyka_raw (Ctx *ctx, float c, float m, float y, float k, float a, int stroke)
{
  CtxEntry command[3]=
  {
    ctx_f (CTX_COLOR, CTX_DCMYKA + 512 * stroke, c),
    ctx_f (CTX_CONT, m, y),
    ctx_f (CTX_CONT, k, a)
  };
  ctx_process (ctx, command);
}
#endif

void ctx_dcmyk_stroke   (Ctx *ctx, float c, float m, float y, float k)
{
  float components[5]={c,m,y,k,1.0f};
  ctx_color_raw (ctx, CTX_DCMYK, components, 1);
}
void ctx_dcmyk (Ctx *ctx, float c, float m, float y, float k)
{
  float components[5]={c,m,y,k,1.0f};
  ctx_color_raw (ctx, CTX_DCMYK, components, 0);
}

void ctx_dcmyka_stroke   (Ctx *ctx, float c, float m, float y, float k, float a)
{
  float components[5]={c,m,y,k,a};
  ctx_color_raw (ctx, CTX_DCMYKA, components, 1);
}
void ctx_dcmyka (Ctx *ctx, float c, float m, float y, float k, float a)
{
  float components[5]={c,m,y,k,a};
  ctx_color_raw (ctx, CTX_DCMYKA, components, 0);
}

#endif

/* XXX: missing CSS1:
 *
 *   EM { color: rgb(110%, 0%, 0%) }  // clipped to 100% 
 *
 *
 *   :first-letter
 *   :first-list
 *   :link :visited :active
 *
 */

typedef struct ColorDef {
  uint64_t name;
  float r;
  float g;
  float b;
  float a;
} ColorDef;

#if 0
#define CTX_silver 	CTX_STRH('s','i','l','v','e','r',0,0,0,0,0,0,0,0)
#define CTX_fuchsia 	CTX_STRH('f','u','c','h','s','i','a',0,0,0,0,0,0,0)
#define CTX_gray 	CTX_STRH('g','r','a','y',0,0,0,0,0,0,0,0,0,0)
#define CTX_yellow 	CTX_STRH('y','e','l','l','o','w',0,0,0,0,0,0,0,0)
#define CTX_white 	CTX_STRH('w','h','i','t','e',0,0,0,0,0,0,0,0,0)
#define CTX_maroon 	CTX_STRH('m','a','r','o','o','n',0,0,0,0,0,0,0,0)
#define CTX_magenta 	CTX_STRH('m','a','g','e','n','t','a',0,0,0,0,0,0,0)
#define CTX_blue 	CTX_STRH('b','l','u','e',0,0,0,0,0,0,0,0,0,0)
#define CTX_green 	CTX_STRH('g','r','e','e','n',0,0,0,0,0,0,0,0,0)
#define CTX_red 	CTX_STRH('r','e','d',0,0,0,0,0,0,0,0,0,0,0)
#define CTX_purple 	CTX_STRH('p','u','r','p','l','e',0,0,0,0,0,0,0,0)
#define CTX_olive 	CTX_STRH('o','l','i','v','e',0,0,0,0,0,0,0,0,0)
#define CTX_teal        CTX_STRH('t','e','a','l',0,0,0,0,0,0,0,0,0,0)
#define CTX_black 	CTX_STRH('b','l','a','c','k',0,0,0,0,0,0,0,0,0)
#define CTX_cyan 	CTX_STRH('c','y','a','n',0,0,0,0,0,0,0,0,0,0)
#define CTX_navy 	CTX_STRH('n','a','v','y',0,0,0,0,0,0,0,0,0,0)
#define CTX_lime 	CTX_STRH('l','i','m','e',0,0,0,0,0,0,0,0,0,0)
#define CTX_aqua 	CTX_STRH('a','q','u','a',0,0,0,0,0,0,0,0,0,0)
#define CTX_transparent CTX_STRH('t','r','a','n','s','p','a','r','e','n','t',0,0,0)
#endif

static ColorDef _ctx_colors[]={
  {CTX_black,    0, 0, 0, 1},
  {CTX_red,      1, 0, 0, 1},
  {CTX_green,    0, 1, 0, 1},
  {CTX_yellow,   1, 1, 0, 1},
  {CTX_blue,     0, 0, 1, 1},
  {CTX_fuchsia,  1, 0, 1, 1},
  {CTX_cyan,     0, 1, 1, 1},
  {CTX_white,    1, 1, 1, 1},
  {CTX_silver,   0.75294, 0.75294, 0.75294, 1},
  {CTX_gray,     0.50196, 0.50196, 0.50196, 1},
  {CTX_magenta,  0.50196, 0, 0.50196, 1},
  {CTX_maroon,   0.50196, 0, 0, 1},
  {CTX_purple,   0.50196, 0, 0.50196, 1},
  {CTX_green,    0, 0.50196, 0, 1},
  {CTX_lime,     0, 1, 0, 1},
  {CTX_olive,    0.50196, 0.50196, 0, 1},
  {CTX_navy,     0, 0,      0.50196, 1},
  {CTX_teal,     0, 0.50196, 0.50196, 1},
  {CTX_aqua,     0, 1, 1, 1},
  {CTX_transparent, 0, 0, 0, 0},
  {CTX_none,     0, 0, 0, 0},
};

static int xdigit_value(const char xdigit)
{
  if (xdigit >= '0' && xdigit <= '9')
   return xdigit - '0';
  switch (xdigit)
  {
    case 'A':case 'a': return 10;
    case 'B':case 'b': return 11;
    case 'C':case 'c': return 12;
    case 'D':case 'd': return 13;
    case 'E':case 'e': return 14;
    case 'F':case 'f': return 15;
  }
  return 0;
}

static int
ctx_color_parse_rgb (CtxState *ctxstate, CtxColor *color, const char *color_string)
{
  float dcolor[4] = {0,0,0,1};
  while (*color_string && *color_string != '(')
    color_string++;
  if (*color_string) color_string++;

  {
    int n_floats = 0;
    char *p =    (char*)color_string;
    char *prev = (char*)NULL;
    for (; p && n_floats < 4 && p != prev && *p; )
    {
      float val;
      prev = p;
      val = _ctx_parse_float (p, &p);
      if (p != prev)
      {
        if (n_floats < 3)
          dcolor[n_floats++] = val/255.0;
        else
          dcolor[n_floats++] = val;

        while (*p == ' ' || *p == ',')
        {
          p++;
          prev++;
        }
      }
    }
  }
  ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
  return 0;
}

static int ctx_isxdigit (uint8_t ch)
{
  if (ch >= '0' && ch <= '9') return 1;
  if (ch >= 'a' && ch <= 'f') return 1;
  if (ch >= 'A' && ch <= 'F') return 1;
  return 0;
}

static int
mrg_color_parse_hex (CtxState *ctxstate, CtxColor *color, const char *color_string)
{
  float dcolor[4]={0,0,0,1};
  int string_length = strlen (color_string);
  int i;
  dcolor[3] = 1.0;

  if (string_length == 7 ||  /* #rrggbb   */
      string_length == 9)    /* #rrggbbaa */
    {
      int num_iterations = (string_length - 1) / 2;
  
      for (i = 0; i < num_iterations; ++i)
        {
          if (ctx_isxdigit (color_string[2 * i + 1]) &&
              ctx_isxdigit (color_string[2 * i + 2]))
            {
              dcolor[i] = (xdigit_value (color_string[2 * i + 1]) << 4 |
                           xdigit_value (color_string[2 * i + 2])) / 255.f;
            }
          else
            {
              return 0;
            }
        }
      /* Successful #rrggbb(aa) parsing! */
      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
      return 1;
    }
  else if (string_length == 4 ||  /* #rgb  */
           string_length == 5)    /* #rgba */
    {
      int num_iterations = string_length - 1;
      for (i = 0; i < num_iterations; ++i)
        {
          if (ctx_isxdigit (color_string[i + 1]))
            {
              dcolor[i] = (xdigit_value (color_string[i + 1]) << 4 |
                           xdigit_value (color_string[i + 1])) / 255.f;
            }
          else
            {
              return 0;
            }
        }
      ctx_color_set_rgba (ctxstate, color, dcolor[0], dcolor[1],dcolor[2],dcolor[3]);
      /* Successful #rgb(a) parsing! */
      return 0;
    }
  /* String was of unsupported length. */
  return 1;
}

int ctx_color_set_from_string (Ctx *ctx, CtxColor *color, const char *string)
{
  int i;
  uint32_t hash = ctx_strhash (string);
//  ctx_color_set_rgba (&(ctx->state), color, 0.4,0.1,0.9,1.0);
//  return 0;
    //rgba[0], rgba[1], rgba[2], rgba[3]);

  if (hash == CTX_currentColor)
  {
    float rgba[4];
    CtxColor ccolor;
    ctx_get_color (ctx, CTX_color, &ccolor);
    ctx_color_get_rgba (&(ctx->state), &ccolor, rgba);
    ctx_color_set_rgba (&(ctx->state), color, rgba[0], rgba[1], rgba[2], rgba[3]);
    return 0;
  }

  for (i = (sizeof(_ctx_colors)/sizeof(_ctx_colors[0]))-1; i>=0; i--)
  {
    if (hash == _ctx_colors[i].name)
    {
      ctx_color_set_rgba (&(ctx->state), color,
       _ctx_colors[i].r, _ctx_colors[i].g, _ctx_colors[i].b, _ctx_colors[i].a);
      return 0;
    }
  }

  if (string[0] == '#')
    mrg_color_parse_hex (&(ctx->state), color, string);
  else if (string[0] == 'r' &&
      string[1] == 'g' &&
      string[2] == 'b'
      )
    ctx_color_parse_rgb (&(ctx->state), color, string);

  return 0;
}

int ctx_color (Ctx *ctx, const char *string)
{
  CtxColor color = {0,};
  ctx_color_set_from_string (ctx, &color, string);
  float rgba[4];
  ctx_color_get_rgba (&(ctx->state), &color, rgba);
  ctx_color_raw (ctx, CTX_RGBA, rgba, 0);
  return 0;
}

void
ctx_rgba8 (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
#if 0
  CtxEntry command = ctx_u8 (CTX_SET_RGBA_U8, r, g, b, a, 0, 0, 0, 0);

  uint8_t rgba[4];
  ctx_color_get_rgba8 (&ctx->state, &ctx->state.gstate.source.color, rgba);
  if (rgba[0] == r && rgba[1] == g && rgba[2] == b && rgba[3] == a)
     return;

  ctx_process (ctx, &command);
#else
  ctx_rgba (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
#endif
}

void ctx_rgba8_stroke (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
  ctx_rgba_stroke (ctx, r/255.0f, g/255.0f, b/255.0f, a/255.0f);
}


#endif 

#if CTX_BABL
void ctx_rasterizer_colorspace_babl (CtxState      *state,
                                     CtxColorSpace  space_slot,
                                     const Babl    *space)
{
  switch (space_slot)
  {
    case CTX_COLOR_SPACE_DEVICE_RGB:
      state->gstate.device_space = space;
      break;
    case CTX_COLOR_SPACE_DEVICE_CMYK:
      state->gstate.device_space = space;
      break;
    case CTX_COLOR_SPACE_USER_RGB:
      state->gstate.rgb_space = space;
      break;
    case CTX_COLOR_SPACE_USER_CMYK:
      state->gstate.cmyk_space = space;
      break;
    case CTX_COLOR_SPACE_TEXTURE:
      state->gstate.texture_space = space;
      break;
  }

  const Babl *srgb = babl_space ("sRGB");
  if (!state->gstate.texture_space) 
       state->gstate.texture_space = srgb;
  if (!state->gstate.device_space) 
       state->gstate.device_space = srgb;
  if (!state->gstate.rgb_space) 
       state->gstate.rgb_space = srgb;

  //fprintf (stderr, "%s\n", babl_get_name (state->gstate.device_space));

  state->gstate.fish_rgbaf_device_to_user = babl_fish (
       babl_format_with_space ("R'G'B'A float", state->gstate.device_space),
       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space));
  state->gstate.fish_rgbaf_user_to_device = babl_fish (
       babl_format_with_space ("R'G'B'A float", state->gstate.rgb_space),
       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
  state->gstate.fish_rgbaf_texture_to_device = babl_fish (
       babl_format_with_space ("R'G'B'A float", state->gstate.texture_space),
       babl_format_with_space ("R'G'B'A float", state->gstate.device_space));
}
#endif

void ctx_rasterizer_colorspace_icc (CtxState      *state,
                                    CtxColorSpace  space_slot,
                                    char          *icc_data,
                                    int            icc_length)
{
#if CTX_BABL
   const char *error = NULL;
   const Babl *space = NULL;

   if (icc_data == NULL) space = babl_space ("sRGB");
   else if (icc_length < 32)
   {
      if (icc_data[0] == '0' && icc_data[1] == 'x')
        sscanf (icc_data, "%p", &space);
      else
      {
        char tmp[24];
        int i;
        for (i = 0; i < icc_length; i++)
          tmp[i]= (icc_data[i]>='A' && icc_data[i]<='Z')?icc_data[i]+('a'-'A'):icc_data[i];
        tmp[icc_length]=0;
        if (!strcmp (tmp, "srgb"))            space = babl_space ("sRGB");
        else if (!strcmp (tmp, "scrgb"))      space = babl_space ("scRGB");
        else if (!strcmp (tmp, "acescg"))     space = babl_space ("ACEScg");
        else if (!strcmp (tmp, "adobe"))      space = babl_space ("Adobe");
        else if (!strcmp (tmp, "apple"))      space = babl_space ("Apple");
        else if (!strcmp (tmp, "rec2020"))    space = babl_space ("Rec2020");
        else if (!strcmp (tmp, "aces2065-1")) space = babl_space ("ACES2065-1");
      }
   }

   if (!space)
   {
     space = babl_space_from_icc (icc_data, icc_length, BABL_ICC_INTENT_RELATIVE_COLORIMETRIC, &error);
   }
   if (space)
   {
     ctx_rasterizer_colorspace_babl (state, space_slot, space);
   }
#endif
}

void ctx_colorspace (Ctx           *ctx,
                     CtxColorSpace  space_slot,
                     unsigned char *data,
                     int            data_length)
{
  if (data)
  {
    if (data_length <= 0) data_length = (int)strlen ((char*)data);
    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, (char*)data, space_slot, 0, data_length);
  }
  else
  {
    ctx_process_cmd_str_with_len (ctx, CTX_COLOR_SPACE, "sRGB", space_slot, 0, 4);
  }
}

void ctx_gradient_add_stop_u8
(Ctx *ctx, float pos, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
{
  CtxEntry entry = ctx_f (CTX_GRADIENT_STOP, pos, 0);
  entry.data.u8[4+0] = r;
  entry.data.u8[4+1] = g;
  entry.data.u8[4+2] = b;
  entry.data.u8[4+3] = a;
  ctx_process (ctx, &entry);
}

void ctx_gradient_add_stop
(Ctx *ctx, float pos, float r, float g, float b, float a)
{
  int ir = r * 255;
  int ig = g * 255;
  int ib = b * 255;
  int ia = a * 255;
  ir = CTX_CLAMP (ir, 0,255);
  ig = CTX_CLAMP (ig, 0,255);
  ib = CTX_CLAMP (ib, 0,255);
  ia = CTX_CLAMP (ia, 0,255);
  ctx_gradient_add_stop_u8 (ctx, pos, ir, ig, ib, ia);
}

void ctx_gradient_add_stop_string
(Ctx *ctx, float pos, const char *string)
{
  CtxColor color = {0,};
  ctx_color_set_from_string (ctx, &color, string);
  float rgba[4];
  ctx_color_get_rgba (&(ctx->state), &color, rgba);
  ctx_gradient_add_stop (ctx, pos, rgba[0], rgba[1], rgba[2], rgba[3]);
}

//  deviceRGB .. settable when creating an RGB image surface..
//               queryable when running in terminal - is it really needed?
//               though it is settable ; and functional for changing this state at runtime..
//
//  userRGB - settable at any time, stored in save|restore 
//  texture - set as the space of data on subsequent 

CtxBuffer *ctx_buffer_new_bare (void)
{
  CtxBuffer *buffer = (CtxBuffer *) ctx_calloc (sizeof (CtxBuffer), 1);
  return buffer;
}

void ctx_buffer_set_data (CtxBuffer *buffer,
                          void *data, int width, int height,
                          int stride,
                          CtxPixelFormat pixel_format,
                          void (*freefunc) (void *pixels, void *user_data),
                          void *user_data)
{
  if (buffer->free_func)
    { buffer->free_func (buffer->data, buffer->user_data); }
  if (stride <= 0)
    stride = ctx_pixel_format_get_stride (pixel_format, width);
  buffer->data      = data;
  buffer->width     = width;
  buffer->height    = height;
  buffer->stride    = stride;
  buffer->format    = ctx_pixel_format_info (pixel_format);
  buffer->free_func = freefunc;
  buffer->user_data = user_data;
}

CtxBuffer *ctx_buffer_new_for_data (void *data, int width, int height,
                                    int stride,
                                    CtxPixelFormat pixel_format,
                                    void (*freefunc) (void *pixels, void *user_data),
                                    void *user_data)
{
  CtxBuffer *buffer = ctx_buffer_new_bare ();
  ctx_buffer_set_data (buffer, data, width, height, stride, pixel_format,
                       freefunc, user_data);
  return buffer;
}

void ctx_buffer_pixels_free (void *pixels, void *userdata)
{
  free (pixels);
}

CtxBuffer *ctx_buffer_new (int width, int height,
                           CtxPixelFormat pixel_format)
{
  //CtxPixelFormatInfo *info = ctx_pixel_format_info (pixel_format);
  CtxBuffer *buffer = ctx_buffer_new_bare ();
  int stride = ctx_pixel_format_get_stride (pixel_format, width);
  int data_len = stride * height;
  if (pixel_format == CTX_FORMAT_YUV420)
    data_len = width * height + ((width/2) * (height/2)) * 2;

  uint8_t *pixels = (uint8_t*)ctx_calloc (data_len, 1);

  ctx_buffer_set_data (buffer, pixels, width, height, stride, pixel_format,
                       ctx_buffer_pixels_free, NULL);
  return buffer;
}

static void ctx_buffer_deinit (CtxBuffer *buffer)
{
  if (buffer->free_func)
    buffer->free_func (buffer->data, buffer->user_data);
  if (buffer->eid)
  {
    free (buffer->eid);
  }
  buffer->eid = NULL;
  buffer->data = NULL;
  buffer->free_func = NULL;
  buffer->user_data  = NULL;
  if (buffer->color_managed)
  {
    if (buffer->color_managed != buffer)
    {
      ctx_buffer_free (buffer->color_managed);
    }
    buffer->color_managed = NULL;
  }
}

void ctx_buffer_free (CtxBuffer *buffer)
{
  ctx_buffer_deinit (buffer);
  free (buffer);
}

#if 0
static int
ctx_texture_check_eid (Ctx *ctx, const char *eid, int *tw, int *th)
{
  for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
  {
    if (ctx->texture[i].data &&
        ctx->texture[i].eid  &&
        !strcmp (ctx->texture[i].eid, eid))
    {
      if (tw) *tw = ctx->texture[i].width;
      if (th) *th = ctx->texture[i].height;
      ctx->texture[i].frame = ctx->texture_cache->frame;
      return i;
    }
  }
  return -1;
}
#endif

const char* ctx_texture_init (Ctx           *ctx,
                              const char    *eid,
                              int            width,
                              int            height,
                              int            stride,
                              CtxPixelFormat format,
                              void          *space,
                              uint8_t       *pixels,
                              void (*freefunc) (void *pixels, void *user_data),
                              void          *user_data)
{
  int id = 0;
  if (eid)
  {
    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
    {
      if (ctx->texture[i].data &&
          ctx->texture[i].eid &&
          !strcmp (ctx->texture[i].eid, eid))
      {
        ctx->texture[i].frame = ctx->texture_cache->frame;
        if (freefunc && user_data != (void*)23)
          freefunc (pixels, user_data);
        return ctx->texture[i].eid;
      }
      if (ctx->texture[i].data == NULL 
          ||   (ctx->texture_cache->frame - ctx->texture[i].frame >= 2))
        id = i;
    }
  } else
  {
    for (int i = 0; i <  CTX_MAX_TEXTURES; i++)
    {
      if (ctx->texture[i].data == NULL 
          || (ctx->texture_cache->frame - ctx->texture[i].frame > 2))
        id = i;
    }
  }
  //int bpp = ctx_pixel_format_bits_per_pixel (format);
  ctx_buffer_deinit (&ctx->texture[id]);

  if (stride<=0)
  {
    stride = ctx_pixel_format_get_stride ((CtxPixelFormat)format, width);
  }

  int data_len = stride * height;
  if (format == CTX_FORMAT_YUV420)
          data_len = width * height +
                  2 * ((width/2)*(height/2));

  if (freefunc == ctx_buffer_pixels_free && user_data == (void*)23)
  {
     uint8_t *tmp = (uint8_t*)malloc (data_len);
     memcpy (tmp, pixels, data_len);
     pixels = tmp;
  }

  ctx_buffer_set_data (&ctx->texture[id],
                       pixels, width, height,
                       stride, format,
                       freefunc, user_data);
#if CTX_ENABLE_CM
  ctx->texture[id].space = space;
#endif
  ctx->texture[id].frame = ctx->texture_cache->frame;
  if (eid)
  {
    /* we got an eid, this is the fast path */
    ctx->texture[id].eid = strdup (eid);
  }
  else
  {
    uint8_t hash[20];
    char ascii[41];

    CtxSHA1 *sha1 = ctx_sha1_new ();
    ctx_sha1_process (sha1, pixels, stride * height);
    ctx_sha1_done (sha1, hash);
    ctx_sha1_free (sha1);
    const char *hex="0123456789abcdef";
    for (int i = 0; i < 20; i ++)
    {
       ascii[i*2]=hex[hash[i]/16];
       ascii[i*2+1]=hex[hash[i]%16];
    }
    ascii[40]=0;
    ctx->texture[id].eid = strdup (ascii);
  }
  return ctx->texture[id].eid;
}

void
_ctx_texture_prepare_color_management (CtxState      *state,
                                       CtxBuffer     *buffer)
{
// _ctx_texture_lock ();
   switch (buffer->format->pixel_format)
   {
#if CTX_BABL
     case CTX_FORMAT_RGBA8:
       if (buffer->space == state->gstate.device_space)
       {
         buffer->color_managed = buffer;
       }
       else
       {
          CtxBuffer *color_managed = ctx_buffer_new (buffer->width, buffer->height,
                                                  CTX_FORMAT_RGBA8);
          babl_process (
             babl_fish (babl_format_with_space ("R'G'B'A u8", buffer->space),
                        babl_format_with_space ("R'G'B'A u8", state->gstate.device_space)),
             buffer->data, color_managed->data,
             buffer->width * buffer->height
             );
          buffer->color_managed = color_managed;
       }
       break;
     case CTX_FORMAT_RGB8:
       if (buffer->space == state->gstate.device_space)
       {
         buffer->color_managed = buffer;
       }
       else
       {
         CtxBuffer *color_managed = ctx_buffer_new (buffer->width, buffer->height,
                                               CTX_FORMAT_RGB8);
         babl_process (
            babl_fish (babl_format_with_space ("R'G'B' u8", buffer->space),
                       babl_format_with_space ("R'G'B' u8", state->gstate.device_space)),
            buffer->data, color_managed->data,
            buffer->width * buffer->height
          );
         buffer->color_managed = color_managed;
       }
       break;
#endif
     default:
       buffer->color_managed = buffer;
   }
//  _ctx_texture_unlock ();
}



int ctx_utf8_len (const unsigned char first_byte)
{
  if      ( (first_byte & 0x80) == 0)
    { return 1; } /* ASCII */
  else if ( (first_byte & 0xE0) == 0xC0)
    { return 2; }
  else if ( (first_byte & 0xF0) == 0xE0)
    { return 3; }
  else if ( (first_byte & 0xF8) == 0xF0)
    { return 4; }
  return 1;
}


const char *ctx_utf8_skip (const char *s, int utf8_length)
{
  int count;
  if (!s)
    { return NULL; }
  for (count = 0; *s; s++)
    {
      if ( (*s & 0xC0) != 0x80)
        { count++; }
      if (count == utf8_length + 1)
        { return s; }
    }
  return s;
}

//  XXX  :  unused
int ctx_utf8_strlen (const char *s)
{
  int count;
  if (!s)
    { return 0; }
  for (count = 0; *s; s++)
    if ( (*s & 0xC0) != 0x80)
      { count++; }
  return count;
}

int
ctx_unichar_to_utf8 (uint32_t  ch,
                     uint8_t  *dest)
{
  /* http://www.cprogramming.com/tutorial/utf8.c  */
  /*  Basic UTF-8 manipulation routines
    by Jeff Bezanson
    placed in the public domain Fall 2005 ... */
  if (ch < 0x80)
    {
      dest[0] = (char) ch;
      return 1;
    }
  if (ch < 0x800)
    {
      dest[0] = (ch>>6) | 0xC0;
      dest[1] = (ch & 0x3F) | 0x80;
      return 2;
    }
  if (ch < 0x10000)
    {
      dest[0] = (ch>>12) | 0xE0;
      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
      dest[2] = (ch & 0x3F) | 0x80;
      return 3;
    }
  if (ch < 0x110000)
    {
      dest[0] = (ch>>18) | 0xF0;
      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
      dest[3] = (ch & 0x3F) | 0x80;
      return 4;
    }
  return 0;
}

uint32_t
ctx_utf8_to_unichar (const char *input)
{
  const uint8_t *utf8 = (const uint8_t *) input;
  uint8_t c = utf8[0];
  if ( (c & 0x80) == 0)
    { return c; }
  else if ( (c & 0xE0) == 0xC0)
    return ( (utf8[0] & 0x1F) << 6) |
           (utf8[1] & 0x3F);
  else if ( (c & 0xF0) == 0xE0)
    return ( (utf8[0] & 0xF)  << 12) |
           ( (utf8[1] & 0x3F) << 6) |
           (utf8[2] & 0x3F);
  else if ( (c & 0xF8) == 0xF0)
    return ( (utf8[0] & 0x7)  << 18) |
           ( (utf8[1] & 0x3F) << 12) |
           ( (utf8[2] & 0x3F) << 6) |
           (utf8[3] & 0x3F);
  else if ( (c & 0xFC) == 0xF8)
    return ( (utf8[0] & 0x3)  << 24) |
           ( (utf8[1] & 0x3F) << 18) |
           ( (utf8[2] & 0x3F) << 12) |
           ( (utf8[3] & 0x3F) << 6) |
           (utf8[4] & 0x3F);
  else if ( (c & 0xFE) == 0xFC)
    return ( (utf8[0] & 0x1)  << 30) |
           ( (utf8[1] & 0x3F) << 24) |
           ( (utf8[2] & 0x3F) << 18) |
           ( (utf8[3] & 0x3F) << 12) |
           ( (utf8[4] & 0x3F) << 6) |
           (utf8[5] & 0x3F);
  return 0;
}
#if CTX_EVENTS

#if !__COSMOPOLITAN__
#include <termios.h>

#include <fcntl.h>
#include <sys/ioctl.h>
#endif

#if 0
int ctx_terminal_width (void)
{
  char buf[1024];
  struct termios orig_attr;
  struct termios raw;
  tcgetattr (STDIN_FILENO, &orig_attr);
  raw = orig_attr;
  raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON);
  raw.c_oflag &= ~(OPOST);
  raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG);
  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
    return 0;
  fprintf (stderr, "\e[14t");
  //tcflush(STDIN_FILENO, 1);
#if __COSMOPOLITAN__
  /// XXX ?
#else
  tcdrain(STDIN_FILENO);
#endif
  int length = 0;
  usleep (1000 * 60); // to account for possibly lowish latency ssh,
                      // should be made configurable ; perhaps in
                      // an env var
  struct timeval tv = {0,0};
  fd_set rfds;
  
  FD_ZERO(&rfds);
  FD_SET(0, &rfds);
  tv.tv_usec = 1000 * 5;

  for (int n = 0; select(1, &rfds, NULL, NULL, &tv) && n < 20; n++)
  {
    length += read (STDIN_FILENO, &buf[length], 1);
  }
  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
  if (length == -1)
  {
    return 0;
  }
  char *semi = strchr (buf, ';');
  buf[length]=0;
  if (semi) {semi++; semi = strchr (semi, ';');}
  if (semi)
  {
    return atoi(semi + 1);
  }
  return 0;
}

int ctx_terminal_height (void)
{
  char buf[1024];
  struct termios orig_attr;
  struct termios raw;
  tcgetattr (STDIN_FILENO, &orig_attr);
  raw = orig_attr;
  raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON);
  raw.c_oflag &= ~(OPOST);
  raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG);
  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
    return 0;
  fprintf (stderr, "\e[14t");
  //tcflush(STDIN_FILENO, 1);
#if !__COSMOPOLITAN__
  tcdrain(STDIN_FILENO);
#endif
  int length = 0;
  usleep (1000 * 60); // to account for possibly lowish latency ssh,
                      // should be made configurable ; perhaps in
                      // an env var
  struct timeval tv = {0,0};
  fd_set rfds;
  
  FD_ZERO(&rfds);
  FD_SET(0, &rfds);
  tv.tv_usec = 1000 * 5;

  for (int n = 0; select(1, &rfds, NULL, NULL, &tv) && n < 20; n++)
  {
    length += read (STDIN_FILENO, &buf[length], 1);
  }
  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
  if (length == -1)
  {
    return 0;
  }
  char *semi = strchr (buf, ';');
  buf[length]=0;
  if (semi)
  {
    return atoi(semi + 1);
  }
  return 0;
}
#else


int ctx_terminal_width (void)
{
  struct winsize ws; 
  if (ioctl(0,TIOCGWINSZ,&ws)!=0)
    return 640;
  return ws.ws_xpixel;
} 

int ctx_terminal_height (void)
{
  struct winsize ws; 
  if (ioctl(0,TIOCGWINSZ,&ws)!=0)
    return 450;
  return ws.ws_ypixel;
}

#endif

int ctx_terminal_cols (void)
{
  struct winsize ws; 
  if (ioctl(0,TIOCGWINSZ,&ws)!=0)
    return 80;
  return ws.ws_col;
} 

int ctx_terminal_rows (void)
{
  struct winsize ws; 
  if (ioctl(0,TIOCGWINSZ,&ws)!=0)
    return 25;
  return ws.ws_row;
}





#define DECTCEM_CURSOR_SHOW      "\033[?25h"
#define DECTCEM_CURSOR_HIDE      "\033[?25l"
#define TERMINAL_MOUSE_OFF       "\033[?1000l\033[?1003l"
#define TERMINAL_MOUSE_ON_BASIC  "\033[?1000h"
#define TERMINAL_MOUSE_ON_DRAG   "\033[?1000h\033[?1003h" /* +ON_BASIC for wider */
#define TERMINAL_MOUSE_ON_FULL   "\033[?1000h\033[?1004h" /* compatibility */
#define XTERM_ALTSCREEN_ON       "\033[?47h"
#define XTERM_ALTSCREEN_OFF      "\033[?47l"

/*************************** input handling *************************/

#if !__COSMOPOLITAN__
#include <termios.h>
#include <errno.h>
#include <signal.h>
#endif

#define DELAY_MS  100  

#ifndef MIN
#define MIN(a,b) (((a)<(b))?(a):(b))
#endif

static int  size_changed = 0;       /* XXX: global state */
static int  ctx_term_signal_installed = 0;   /* XXX: global state */

static const char *mouse_modes[]=
{TERMINAL_MOUSE_OFF,
 TERMINAL_MOUSE_ON_BASIC,
 TERMINAL_MOUSE_ON_DRAG,
 TERMINAL_MOUSE_ON_FULL,
 NULL};

/* note that a nick can have multiple occurences, the labels
 * should be kept the same for all occurences of a combination. */
typedef struct NcKeyCode {
  const char *nick;          /* programmers name for key (combo) */
  const char *label;         /* utf8 label for key */
  const char  sequence[10];  /* terminal sequence */
} NcKeyCode;
static const NcKeyCode keycodes[]={  

  {"up",                  "↑",     "\033[A"},
  {"down",                "↓",     "\033[B"},
  {"right",               "→",     "\033[C"},
  {"left",                "←",     "\033[D"},

  {"shift-up",            "⇧↑",    "\033[1;2A"},
  {"shift-down",          "⇧↓",    "\033[1;2B"},
  {"shift-right",         "⇧→",    "\033[1;2C"},
  {"shift-left",          "⇧←",    "\033[1;2D"},

  {"alt-up",              "^↑",    "\033[1;3A"},
  {"alt-down",            "^↓",    "\033[1;3B"},
  {"alt-right",           "^→",    "\033[1;3C"},
  {"alt-left",            "^←",    "\033[1;3D"},

  {"alt-shift-up",        "alt-s↑", "\033[1;4A"},
  {"alt-shift-down",      "alt-s↓", "\033[1;4B"},
  {"alt-shift-right",     "alt-s→", "\033[1;4C"},
  {"alt-shift-left",      "alt-s←", "\033[1;4D"},

  {"control-up",          "^↑",    "\033[1;5A"},
  {"control-down",        "^↓",    "\033[1;5B"},
  {"control-right",       "^→",    "\033[1;5C"},
  {"control-left",        "^←",    "\033[1;5D"},

  /* putty */
  {"control-up",          "^↑",    "\033OA"},
  {"control-down",        "^↓",    "\033OB"},
  {"control-right",       "^→",    "\033OC"},
  {"control-left",        "^←",    "\033OD"},

  {"control-shift-up",    "^⇧↑",   "\033[1;6A"},
  {"control-shift-down",  "^⇧↓",   "\033[1;6B"},
  {"control-shift-right", "^⇧→",   "\033[1;6C"},
  {"control-shift-left",  "^⇧←",   "\033[1;6D"},

  {"control-up",          "^↑",    "\033Oa"},
  {"control-down",        "^↓",    "\033Ob"},
  {"control-right",       "^→",    "\033Oc"},
  {"control-left",        "^←",    "\033Od"},

  {"shift-up",            "⇧↑",    "\033[a"},
  {"shift-down",          "⇧↓",    "\033[b"},
  {"shift-right",         "⇧→",    "\033[c"},
  {"shift-left",          "⇧←",    "\033[d"},

  {"insert",              "ins",   "\033[2~"},
  {"delete",              "del",   "\033[3~"},
  {"page-up",             "PgUp",  "\033[5~"},
  {"page-down",           "PdDn",  "\033[6~"},
  {"home",                "Home",  "\033OH"},
  {"end",                 "End",   "\033OF"},
  {"home",                "Home",  "\033[H"},
  {"end",                 "End",   "\033[F"},
  {"control-delete",      "^del",  "\033[3;5~"},
  {"shift-delete",        "⇧del",  "\033[3;2~"},
  {"control-shift-delete","^⇧del", "\033[3;6~"},

  {"F1",        "F1",  "\033[10~"},
  {"F2",        "F2",  "\033[11~"},
  {"F3",        "F3",  "\033[12~"},
  {"F4",        "F4",  "\033[13~"},
  {"F1",        "F1",  "\033OP"},
  {"F2",        "F2",  "\033OQ"},
  {"F3",        "F3",  "\033OR"},
  {"F4",        "F4",  "\033OS"},
  {"F5",        "F5",  "\033[15~"},
  {"F6",        "F6",  "\033[16~"},
  {"F7",        "F7",  "\033[17~"},
  {"F8",        "F8",  "\033[18~"},
  {"F9",        "F9",  "\033[19~"},
  {"F9",        "F9",  "\033[20~"},
  {"F10",       "F10", "\033[21~"},
  {"F11",       "F11", "\033[22~"},
  {"F12",       "F12", "\033[23~"},
  {"tab",       "↹",     {9, '\0'}},
  {"shift-tab", "shift+↹",  "\033[Z"},
  {"backspace", "⌫",  {127, '\0'}},
  {"space",     "␣",   " "},
  {"esc",        "␛",  "\033"},
  {"return",    "⏎",  {10,0}},
  {"return",    "⏎",  {13,0}},
  /* this section could be autogenerated by code */
  {"control-a", "^A",  {1,0}},
  {"control-b", "^B",  {2,0}},
  {"control-c", "^C",  {3,0}},
  {"control-d", "^D",  {4,0}},
  {"control-e", "^E",  {5,0}},
  {"control-f", "^F",  {6,0}},
  {"control-g", "^G",  {7,0}},
  {"control-h", "^H",  {8,0}}, /* backspace? */
  {"control-i", "^I",  {9,0}}, /* tab */
  {"control-j", "^J",  {10,0}},
  {"control-k", "^K",  {11,0}},
  {"control-l", "^L",  {12,0}},
  {"control-n", "^N",  {14,0}},
  {"control-o", "^O",  {15,0}},
  {"control-p", "^P",  {16,0}},
  {"control-q", "^Q",  {17,0}},
  {"control-r", "^R",  {18,0}},
  {"control-s", "^S",  {19,0}},
  {"control-t", "^T",  {20,0}},
  {"control-u", "^U",  {21,0}},
  {"control-v", "^V",  {22,0}},
  {"control-w", "^W",  {23,0}},
  {"control-x", "^X",  {24,0}},
  {"control-y", "^Y",  {25,0}},
  {"control-z", "^Z",  {26,0}},
  {"alt-0",     "%0",  "\0330"},
  {"alt-1",     "%1",  "\0331"},
  {"alt-2",     "%2",  "\0332"},
  {"alt-3",     "%3",  "\0333"},
  {"alt-4",     "%4",  "\0334"},
  {"alt-5",     "%5",  "\0335"},
  {"alt-6",     "%6",  "\0336"},
  {"alt-7",     "%7",  "\0337"}, /* backspace? */
  {"alt-8",     "%8",  "\0338"},
  {"alt-9",     "%9",  "\0339"},
  {"alt-+",     "%+",  "\033+"},
  {"alt--",     "%-",  "\033-"},
  {"alt-/",     "%/",  "\033/"},
  {"alt-a",     "%A",  "\033a"},
  {"alt-b",     "%B",  "\033b"},
  {"alt-c",     "%C",  "\033c"},
  {"alt-d",     "%D",  "\033d"},
  {"alt-e",     "%E",  "\033e"},
  {"alt-f",     "%F",  "\033f"},
  {"alt-g",     "%G",  "\033g"},
  {"alt-h",     "%H",  "\033h"}, /* backspace? */
  {"alt-i",     "%I",  "\033i"},
  {"alt-j",     "%J",  "\033j"},
  {"alt-k",     "%K",  "\033k"},
  {"alt-l",     "%L",  "\033l"},
  {"alt-n",     "%N",  "\033m"},
  {"alt-n",     "%N",  "\033n"},
  {"alt-o",     "%O",  "\033o"},
  {"alt-p",     "%P",  "\033p"},
  {"alt-q",     "%Q",  "\033q"},
  {"alt-r",     "%R",  "\033r"},
  {"alt-s",     "%S",  "\033s"},
  {"alt-t",     "%T",  "\033t"},
  {"alt-u",     "%U",  "\033u"},
  {"alt-v",     "%V",  "\033v"},
  {"alt-w",     "%W",  "\033w"},
  {"alt-x",     "%X",  "\033x"},
  {"alt-y",     "%Y",  "\033y"},
  {"alt-z",     "%Z",  "\033z"},
  {"shift-tab", "shift-↹", {27, 9, 0}},
  /* Linux Console  */
  {"home",      "Home", "\033[1~"},
  {"end",       "End",  "\033[4~"},
  {"F1",        "F1",   "\033[[A"},
  {"F2",        "F2",   "\033[[B"},
  {"F3",        "F3",   "\033[[C"},
  {"F4",        "F4",   "\033[[D"},
  {"F5",        "F5",   "\033[[E"},
  {"F6",        "F6",   "\033[[F"},
  {"F7",        "F7",   "\033[[G"},
  {"F8",        "F8",   "\033[[H"},
  {"F9",        "F9",   "\033[[I"},
  {"F10",       "F10",  "\033[[J"},
  {"F11",       "F11",  "\033[[K"},
  {"F12",       "F12",  "\033[[L"}, 
  {"ok",        "",     "\033[0n"},
  {NULL, }
};

static struct termios orig_attr;    /* in order to restore at exit */
static int    nc_is_raw = 0;
static int    atexit_registered = 0;
static int    mouse_mode = NC_MOUSE_NONE;

static void _nc_noraw (void)
{
  if (nc_is_raw && tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr) != -1)
    nc_is_raw = 0;
}

void
nc_at_exit (void)
{
  printf (TERMINAL_MOUSE_OFF);
  printf (XTERM_ALTSCREEN_OFF);
  _nc_noraw();
  fprintf (stdout, "\e[?25h");
  //if (ctx_native_events)
  fprintf (stdout, "\e[?201l");
  fprintf (stdout, "\e[?1049l");
}

static const char *mouse_get_event_int (Ctx *n, int *x, int *y)
{
  static int prev_state = 0;
  const char *ret = "pm";
  float relx, rely;
  signed char buf[3];
  read (n->mouse_fd, buf, 3);
  relx = buf[1];
  rely = -buf[2];

  n->mouse_x += relx * 0.1;
  n->mouse_y += rely * 0.1;

  if (n->mouse_x < 1) n->mouse_x = 1;
  if (n->mouse_y < 1) n->mouse_y = 1;
  if (n->mouse_x >= n->width)  n->mouse_x = n->width;
  if (n->mouse_y >= n->height) n->mouse_y = n->height;

  if (x) *x = n->mouse_x;
  if (y) *y = n->mouse_y;

  if ((prev_state & 1) != (buf[0] & 1))
    {
      if (buf[0] & 1) ret = "pp";
    }
  else if (buf[0] & 1)
    ret = "pd";

  if ((prev_state & 2) != (buf[0] & 2))
    {
      if (buf[0] & 2) ret = "mouse2-press";
    }
  else if (buf[0] & 2)
    ret = "mouse2-drag";

  if ((prev_state & 4) != (buf[0] & 4))
    {
      if (buf[0] & 4) ret = "mouse1-press";
    }
  else if (buf[0] & 4)
    ret = "mouse1-drag";

  prev_state = buf[0];
  return ret;
}

static const char *mev_type = NULL;
static int         mev_x = 0;
static int         mev_y = 0;
static int         mev_q = 0;

static const char *mouse_get_event (Ctx  *n, int *x, int *y)
{
  if (!mev_q)
    return NULL;
  *x = mev_x;
  *y = mev_y;
  mev_q = 0;
  return mev_type;
}

static int mouse_has_event (Ctx *n)
{
  struct timeval tv;
  int retval;

  if (mouse_mode == NC_MOUSE_NONE)
    return 0;

  if (mev_q)
    return 1;

  if (n->mouse_fd == 0)
    return 0;
  return 0;

  {
    fd_set rfds;
    FD_ZERO (&rfds);
    FD_SET(n->mouse_fd, &rfds);
    tv.tv_sec = 0; tv.tv_usec = 0;
    retval = select (n->mouse_fd+1, &rfds, NULL, NULL, &tv);
  }

  if (retval != 0)
    {
      int nx = 0, ny = 0;
      const char *type = mouse_get_event_int (n, &nx, &ny);

      if ((mouse_mode < NC_MOUSE_DRAG && mev_type && !strcmp (mev_type, "drag")) ||
          (mouse_mode < NC_MOUSE_ALL && mev_type && !strcmp (mev_type, "motion")))
        {
          mev_q = 0;
          return mouse_has_event (n);
        }

      if ((mev_type && !strcmp (type, mev_type) && !strcmp (type, "pm")) ||
         (mev_type && !strcmp (type, mev_type) && !strcmp (type, "mouse1-drag")) ||
         (mev_type && !strcmp (type, mev_type) && !strcmp (type, "mouse2-drag")))
        {
          if (nx == mev_x && ny == mev_y)
          {
            mev_q = 0;
            return mouse_has_event (n);
          }
        }
      mev_x = nx;
      mev_y = ny;
      mev_type = type;
      mev_q = 1;
    }
  return retval != 0;
}


static int _nc_raw (void)
{
  struct termios raw;
  if (!isatty (STDIN_FILENO))
    return -1;
  if (!atexit_registered)
    {
      //atexit (nc_at_exit);
      atexit_registered = 1;
    }
  if (tcgetattr (STDIN_FILENO, &orig_attr) == -1)
    return -1;
  raw = orig_attr;  /* modify the original mode */
  raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON);
  raw.c_oflag &= ~(OPOST);
  raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG);
  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
    return -1;
  nc_is_raw = 1;
#if !__COSMOPOLITAN__
  tcdrain(STDIN_FILENO);
  tcflush(STDIN_FILENO, 1);
#endif
  return 0;
}

static int match_keycode (const char *buf, int length, const NcKeyCode **ret)
{
  int i;
  int matches = 0;

  if (!strncmp (buf, "\033[M", MIN(length,3)))
    {
      if (length >= 6)
        return 9001;
      return 2342;
    }
  for (i = 0; keycodes[i].nick; i++)
    if (!strncmp (buf, keycodes[i].sequence, length))
      {
        matches ++;
        if ((int)strlen (keycodes[i].sequence) == length && ret)
          {
            *ret = &keycodes[i];
            return 1;
          }
      }
  if (matches != 1 && ret)
    *ret = NULL;
  return matches==1?2:matches;
}

static void nc_resize_term (int  dummy)
{
  size_changed = 1;
}

int ctx_nct_has_event (Ctx  *n, int delay_ms)
{
  struct timeval tv;
  int retval;
  fd_set rfds;

  if (size_changed)
    return 1;
  FD_ZERO (&rfds);
  FD_SET (STDIN_FILENO, &rfds);
  tv.tv_sec = 0; tv.tv_usec = delay_ms * 1000; 
  retval = select (1, &rfds, NULL, NULL, &tv);
  if (size_changed)
    return 1;
  return retval == 1 && retval != -1;
}

const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y)
{
  unsigned char buf[20];
  int length;


  if (x) *x = -1;
  if (y) *y = -1;

  if (!ctx_term_signal_installed)
    {
      _nc_raw ();
      ctx_term_signal_installed = 1;
      signal (SIGWINCH, nc_resize_term);
    }
  if (mouse_mode) // XXX too often to do it all the time!
    printf("%s", mouse_modes[mouse_mode]);

  {
    int elapsed = 0;
    int got_event = 0;

    do {
      if (size_changed)
        {
          size_changed = 0;
          return "size-changed";
        }
      got_event = mouse_has_event (n);
      if (!got_event)
        got_event = ctx_nct_has_event (n, MIN(DELAY_MS, timeoutms-elapsed));
      if (size_changed)
        {
          size_changed = 0;
          return "size-changed";
        }
      /* only do this if the client has asked for idle events,
       * and perhaps programmed the ms timer?
       */
      elapsed += MIN(DELAY_MS, timeoutms-elapsed);
      if (!got_event && timeoutms && elapsed >= timeoutms)
        return "idle";
    } while (!got_event);
  }

  if (mouse_has_event (n))
    return mouse_get_event (n, x, y);

  for (length = 0; length < 10; length ++)
    if (read (STDIN_FILENO, &buf[length], 1) != -1)
      {
        const NcKeyCode *match = NULL;

        /* special case ESC, so that we can use it alone in keybindings */
        if (length == 0 && buf[0] == 27)
          {
            struct timeval tv;
            fd_set rfds;
            FD_ZERO (&rfds);
            FD_SET (STDIN_FILENO, &rfds);
            tv.tv_sec = 0;
            tv.tv_usec = 1000 * DELAY_MS;
            if (select (1, &rfds, NULL, NULL, &tv) == 0)
              return "esc";
          }

        switch (match_keycode ((const char*)buf, length + 1, &match))
          {
            case 1: /* unique match */
              if (!match)
                return NULL;
              if (!strcmp(match->nick, "ok"))
              {
                ctx_frame_ack = 1;
                return NULL;
              }
              return match->nick;
              break;
            case 9001: /* mouse event */
              if (x) *x = ((unsigned char)buf[4]-32)*1.0;
              if (y) *y = ((unsigned char)buf[5]-32)*1.0;
              switch (buf[3])
                {
                        /* XXX : todo reduce this to less string constants */
                  case 32:  return "pp";
                  case 33:  return "mouse1-press";
                  case 34:  return "mouse2-press";
                  case 40:  return "alt-pp";
                  case 41:  return "alt-mouse1-press";
                  case 42:  return "alt-mouse2-press";
                  case 48:  return "control-pp";
                  case 49:  return "control-mouse1-press";
                  case 50:  return "control-mouse2-press";
                  case 56:  return "alt-control-pp";
                  case 57:  return "alt-control-mouse1-press";
                  case 58:  return "alt-control-mouse2-press";
                  case 64:  return "pd";
                  case 65:  return "mouse1-drag";
                  case 66:  return "mouse2-drag";
                  case 71:  return "pm"; /* shift+motion */
                  case 72:  return "alt-pd";
                  case 73:  return "alt-mouse1-drag";
                  case 74:  return "alt-mouse2-drag";
                  case 75:  return "pm"; /* alt+motion */
                  case 80:  return "control-pd";
                  case 81:  return "control-mouse1-drag";
                  case 82:  return "control-mouse2-drag";
                  case 83:  return "pm"; /* ctrl+motion */
                  case 91:  return "pm"; /* ctrl+alt+motion */
                  case 95:  return "pm"; /* ctrl+alt+shift+motion */
                  case 96:  return "scroll-up";
                  case 97:  return "scroll-down";
                  case 100: return "shift-scroll-up";
                  case 101: return "shift-scroll-down";
                  case 104: return "alt-scroll-up";
                  case 105: return "alt-scroll-down";
                  case 112: return "control-scroll-up";
                  case 113: return "control-scroll-down";
                  case 116: return "control-shift-scroll-up";
                  case 117: return "control-shift-scroll-down";
                  case 35: /* (or release) */
                  case 51: /* (or ctrl-release) */
                  case 43: /* (or alt-release) */
                  case 67: return "pm";
                           /* have a separate pd ? */
                  default: {
                             static char rbuf[100];
                             sprintf (rbuf, "mouse (unhandled state: %i)", buf[3]);
                             return rbuf;
                           }
                }
            case 0: /* no matches, bail*/
              { 
                static char ret[256];
                if (length == 0 && ctx_utf8_len (buf[0])>1) /* single unicode
                                                               char */
                  {
                    int n_read = 
                    read (STDIN_FILENO, &buf[length+1], ctx_utf8_len(buf[0])-1);
                    if (n_read)
                    {
                      buf[ctx_utf8_len(buf[0])]=0;
                      strcpy (ret, (const char*)buf);
                    }
                    return ret;
                  }
                if (length == 0) /* ascii */
                  {
                    buf[1]=0;
                    strcpy (ret, (const char*)buf);
                    return ret;
                  }
                sprintf (ret, "unhandled %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c'",
                  length>=0? buf[0]: 0, length>=0? buf[0]>31?buf[0]:'?': ' ', 
                  length>=1? buf[1]: 0, length>=1? buf[1]>31?buf[1]:'?': ' ', 
                  length>=2? buf[2]: 0, length>=2? buf[2]>31?buf[2]:'?': ' ', 
                  length>=3? buf[3]: 0, length>=3? buf[3]>31?buf[3]:'?': ' ',
                  length>=4? buf[4]: 0, length>=4? buf[4]>31?buf[4]:'?': ' ',
                  length>=5? buf[5]: 0, length>=5? buf[5]>31?buf[5]:'?': ' ',
                  length>=6? buf[6]: 0, length>=6? buf[6]>31?buf[6]:'?': ' ');
                return ret;
              }
              return NULL;
            default: /* continue */
              break;
          }
      }
    else
      return "key read eek";
  return "fail";
}

void ctx_nct_consume_events (Ctx *ctx)
{
  int ix, iy;
  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
  const char *event = NULL;

  do {
    float x, y;
    event = ctx_nct_get_event (ctx, 50, &ix, &iy);

    x = (ix - 1.0 + 0.5) / ctxctx->cols * ctx->width;
    y = (iy - 1.0)       / ctxctx->rows * ctx->height;

    if (!strcmp (event, "pp"))
    {
      ctx_pointer_press (ctx, x, y, 0, 0);
      ctxctx->was_down = 1;
    } else if (!strcmp (event, "pr"))
    {
      ctx_pointer_release (ctx, x, y, 0, 0);
      ctxctx->was_down = 0;
    } else if (!strcmp (event, "pm"))
    {
      //nct_set_cursor_pos (backend->term, ix, iy);
      //nct_flush (backend->term);
      if (ctxctx->was_down)
      {
        ctx_pointer_release (ctx, x, y, 0, 0);
        ctxctx->was_down = 0;
      }
      ctx_pointer_motion (ctx, x, y, 0, 0);
    } else if (!strcmp (event, "pd"))
    {
      ctx_pointer_motion (ctx, x, y, 0, 0);
    } else if (!strcmp (event, "size-changed"))
    {
#if 0
      int width = nct_sys_terminal_width ();
      int height = nct_sys_terminal_height ();
      nct_set_size (backend->term, width, height);
      width *= CPX;
      height *= CPX;
      free (mrg->glyphs);
      free (mrg->styles);
      free (backend->nct_pixels);
      backend->nct_pixels = calloc (width * height * 4, 1);
      mrg->glyphs = calloc ((width/CPX) * (height/CPX) * 4, 1);
      mrg->styles = calloc ((width/CPX) * (height/CPX) * 1, 1);
      mrg_set_size (mrg, width, height);
      mrg_queue_draw (mrg, NULL);
#endif
      //if (ctx_backend_is_ctx (ctx))
#if 0
      {
        int width = ctx_terminal_width ();
        int height = ctx_terminal_height ();
        ctx_set_size (ctx, width, height);
      }
#endif

    }
    else
    {
      if (!strcmp (event, "esc"))
        ctx_key_press (ctx, 0, "escape", 0);
      else if (!strcmp (event, "space"))
        ctx_key_press (ctx, 0, "space", 0);
      else if (!strcmp (event, "enter"))
        ctx_key_press (ctx, 0, "\n", 0);
      else if (!strcmp (event, "return"))
        ctx_key_press (ctx, 0, "return", 0);
      else if (!strcmp (event, "idle"))
      {
        event = NULL;
      }
      else
      ctx_key_press (ctx, 0, event, 0);
    }
  }  while (event);
}

const char *ctx_native_get_event (Ctx *n, int timeoutms)
{
  static unsigned char buf[256];
  int length;

  if (!ctx_term_signal_installed)
    {
      _nc_raw ();
      ctx_term_signal_installed = 1;
      signal (SIGWINCH, nc_resize_term);
    }
//if (mouse_mode) // XXX too often to do it all the time!
//  printf("%s", mouse_modes[mouse_mode]);

    int got_event = 0;
  {
    int elapsed = 0;

    do {
      if (size_changed)
        {
          size_changed = 0;
          return "size-changed";
        }
      got_event = ctx_nct_has_event (n, MIN(DELAY_MS, timeoutms-elapsed));
      if (size_changed)
        {
          size_changed = 0;
          return "size-changed";
        }
      /* only do this if the client has asked for idle events,
       * and perhaps programmed the ms timer?
       */
      elapsed += MIN(DELAY_MS, timeoutms-elapsed);
      if (!got_event && timeoutms && elapsed >= timeoutms)
      {
        return "idle";
      }
    } while (!got_event);
  }

  for (length = 0; got_event && length < 200; length ++)
  {
    if (read (STDIN_FILENO, &buf[length], 1) != -1)
      {
         buf[length+1] = 0;
         if (!strcmp ((char*)buf, "\e[0n"))
         {
           ctx_frame_ack = 1;
           return NULL;
         }
         else if (buf[length]=='\n')
         {
           buf[length]=0;
           return (const char*)buf;
         }
      }
      got_event = ctx_nct_has_event (n, 5);
    }
  return NULL;
}

const char *ctx_key_get_label (Ctx  *n, const char *nick)
{
  int j;
  int found = -1;
  for (j = 0; keycodes[j].nick; j++)
    if (found == -1 && !strcmp (keycodes[j].nick, nick))
      return keycodes[j].label;
  return NULL;
}

void _ctx_mouse (Ctx *term, int mode)
{
  //if (term->is_st && mode > 1)
  //  mode = 1;
  if (mode != mouse_mode)
  {
    printf ("%s", mouse_modes[mode]);
    fflush (stdout);
  }
  mouse_mode = mode;
}


#endif

#if !__COSMOPOLITAN__
#include <sys/time.h>
#endif

#ifdef EMSCRIPTEN
#include "emscripten.h"
#endif

#define usecs(time)    ((uint64_t)(time.tv_sec - start_time.tv_sec) * 1000000 + time.     tv_usec)

#if !__COSMOPOLITAN__
static struct timeval start_time;

static void
_ctx_init_ticks (void)
{
  static int done = 0;
  if (done)
    return;
  done = 1;
  gettimeofday (&start_time, NULL);
}

static inline unsigned long
_ctx_ticks (void)
{
  struct timeval measure_time;
  gettimeofday (&measure_time, NULL);
  return usecs (measure_time) - usecs (start_time);
}

CTX_EXPORT unsigned long
ctx_ticks (void)
{
  _ctx_init_ticks ();
  return _ctx_ticks ();
}



enum _CtxFlags {
   CTX_FLAG_DIRECT = (1<<0),
};
typedef enum _CtxFlags CtxFlags;


int _ctx_max_threads = 1;
int _ctx_enable_hash_cache = 1;
#if CTX_SHAPE_CACHE
extern int _ctx_shape_cache_enabled;
#endif

#if CTX_THREADS
static mtx_t _ctx_texture_mtx;
#endif

void _ctx_texture_lock (void)
{
#if CTX_THREADS
  mtx_lock (&_ctx_texture_mtx);
#endif
}

void _ctx_texture_unlock (void)
{
#if CTX_THREADS
  mtx_unlock (&_ctx_texture_mtx);
#endif
}

void
ctx_init (int *argc, char ***argv)
{
#if 0
  const char *backend = getenv ("CTX_BACKEND");
  if (!backend || strcmp (backend, "ctx"))
  {
    int i;
    char *new_argv[*argc+5];
    new_argv[0] = "ctx";
    new_argv[1] = "-e";
    new_argv[2] = "--";
    for (i = 0; i < *argc; i++)
    {
      new_argv[i+3] = *argv[i];
    }
    new_argv[i+3] = NULL;
    execvp (new_argv[0], new_argv);
  }
#endif
}

#if 0
int ctx_count (Ctx *ctx)
{
  return ctx->drawlist.count;
}
#endif

extern int _ctx_damage_control;


#if CTX_EVENTS

void ctx_list_backends(void)
{
    fprintf (stderr, "possible values for CTX_BACKEND:\n");
    fprintf (stderr, " ctx");
#if CTX_SDL
    fprintf (stderr, " SDL");
#endif
#if CTX_KMS
    fprintf (stderr, " kms");
#endif
#if CTX_FB
    fprintf (stderr, " fb");
#endif
    fprintf (stderr, " term");
    fprintf (stderr, " termimg");
    fprintf (stderr, "\n");
}

static uint32_t ctx_ms (Ctx *ctx)
{
  return _ctx_ticks () / 1000;
}

static int is_in_ctx (void);

#if EMSCRIPTEN

CTX_EXPORT Ctx *
get_context (void);

static Ctx *ctx_new_ui (int width, int height, const char *backend)
{
   return get_context ();
}
#else

static Ctx *ctx_new_ui (int width, int height, const char *backend)
{
#if CTX_TILED
  if (getenv ("CTX_DAMAGE_CONTROL"))
  {
    const char * val = getenv ("CTX_DAMAGE_CONTROL");
    if (!strcmp (val, "0") ||
        !strcmp (val, "off"))
      _ctx_damage_control = 0;
    else
      _ctx_damage_control = 1;
  }
#endif

  if (getenv ("CTX_HASH_CACHE"))
  {
    const char * val = getenv ("CTX_HASH_CACHE");
    if (!strcmp (val, "0"))
      _ctx_enable_hash_cache = 0;
    if (!strcmp (val, "off"))
      _ctx_enable_hash_cache = 0;
  }

  if (getenv ("CTX_THREADS"))
  {
    int val = atoi (getenv ("CTX_THREADS"));
    _ctx_max_threads = val;
  }
  else
  {
    _ctx_max_threads = 2;
#ifdef _SC_NPROCESSORS_ONLN
    _ctx_max_threads = sysconf (_SC_NPROCESSORS_ONLN) / 2;
#endif
  }
  
#if CTX_THREADS
  mtx_init (&_ctx_texture_mtx, mtx_plain);
#endif

  if (_ctx_max_threads < 1) _ctx_max_threads = 1;
  if (_ctx_max_threads > CTX_MAX_THREADS) _ctx_max_threads = CTX_MAX_THREADS;

  //fprintf (stderr, "ctx using %i threads\n", _ctx_max_threads);
  if (!backend)
    backend = getenv ("CTX_BACKEND");

  if (backend && !strcmp (backend, ""))
    backend = NULL;
  if (backend && !strcmp (backend, "auto"))
    backend = NULL;
  if (backend && !strcmp (backend, "list"))
  {
    ctx_list_backends ();
    exit (-1);
  }

  Ctx *ret = NULL;

  /* we do the query on auto but not on directly set ctx
   *
   */
  if ((backend && !strcmp(backend, "ctx")) ||
      (backend == NULL && is_in_ctx ()))
  {
    if (!backend || !strcmp (backend, "ctx"))
    {
      // full blown ctx protocol - in terminal or standalone
      ret = ctx_new_ctx (width, height);
    }
  }

#if CTX_HEADLESS
  if (!ret)
    {
      if (backend && !strcmp (backend, "headless"))
        ret = ctx_new_headless (width, height);
    }
#endif

#if CTX_SDL
  if (!ret && getenv ("DISPLAY"))
  {
    if ((backend==NULL) || (!strcmp (backend, "SDL")))
      ret = ctx_new_sdl (width, height);
  }
#endif

#if CTX_KMS
  if (!ret && !getenv ("DISPLAY"))
  {
    if ((backend==NULL) || (!strcmp (backend, "kms")))
      ret = ctx_new_kms (width, height);
  }
#endif


#if CTX_FB
  if (!ret && !getenv ("DISPLAY"))
    {
      if ((backend==NULL) || (!strcmp (backend, "fb")))
        ret = ctx_new_fb (width, height);
    }
#endif

#if CTX_RASTERIZER
  // braille in terminal
  if (!ret)
  {
    if ((backend==NULL) || (!strcmp (backend, "term")))
    ret = ctx_new_term (width, height);
  }
  if (!ret)
  {
    if ((backend==NULL) || (!strcmp (backend, "termimg")))
    ret = ctx_new_termimg (width, height);
  }
#endif
  if (!ret)
  {
    fprintf (stderr, "no interactive ctx backend\n");
    ctx_list_backends ();
    exit (2);
  }
  ctx_get_event (ret); // enables events
  return ret;
}
#endif
#endif
#else
void _ctx_texture_unlock (void)
{
}
void _ctx_texture_lock (void)
{
}

#endif
void _ctx_resized (Ctx *ctx, int width, int height, long time);

void ctx_set_size (Ctx *ctx, int width, int height)
{
  if (ctx->width != width || ctx->height != height)
  {
    ctx->width = width;
    ctx->height = height;
#if CTX_EVENTS
    _ctx_resized (ctx, width, height, 0);
#endif
  }
}

#if CTX_EVENTS


static int is_in_ctx (void)
{
  char buf[1024];
  struct termios orig_attr;
  struct termios raw;
  tcgetattr (STDIN_FILENO, &orig_attr);
  raw = orig_attr;
  raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON);
  raw.c_oflag &= ~(OPOST);
  raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG);
  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
    return 0;
  fprintf (stderr, "\e[?200$p");
  //tcflush(STDIN_FILENO, 1);
#if !__COSMOPOLITAN__
  tcdrain(STDIN_FILENO);
#endif
  int length = 0;
  usleep (1000 * 60); // to account for possibly lowish latency ssh,
                      // should be made configurable ; perhaps in
                      // an env var
  struct timeval tv = {0,0};
  fd_set rfds;
  
  FD_ZERO(&rfds);
  FD_SET(0, &rfds);
  tv.tv_usec = 1000 * 5;

  for (int n = 0; select(1, &rfds, NULL, NULL, &tv) && n < 20; n++)
  {
    length += read (STDIN_FILENO, &buf[length], 1);
  }
  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
  if (length == -1)
  {
    return 0;
  }
  char *semi = strchr (buf, ';');
  buf[length]=0;
  if (semi &&  semi[1] == '2')
  {
    return 1;
  }
  return 0;
}

typedef struct CtxIdleCb {
  int (*cb) (Ctx *ctx, void *idle_data);
  void *idle_data;

  void (*destroy_notify)(void *destroy_data);
  void *destroy_data;

  int   ticks_full;
  int   ticks_remaining;
  int   is_idle;
  int   id;
} CtxIdleCb;

void _ctx_events_init (Ctx *ctx)
{
  CtxEvents *events = &ctx->events;
  _ctx_init_ticks ();
  events->tap_delay_min  = 40;
  events->tap_delay_max  = 800;
  events->tap_delay_max  = 8000000; /* quick reflexes needed making it hard for some is an argument against very short values  */

  events->tap_delay_hold = 1000;
  events->tap_hysteresis = 32;  /* XXX: should be ppi dependent */
}


void _ctx_idle_iteration (Ctx *ctx)
{
  static unsigned long prev_ticks = 0;
  CtxList *l;
  unsigned long ticks = ctx_ticks ();
  long tick_delta = (prev_ticks == 0) ? 0 : ticks - prev_ticks;
  prev_ticks = ticks;


  if (!ctx->events.idles && !ctx->events.idles_to_add)
  {
#ifdef EMSCRIPTEN
#ifdef ASYNCIFY
    emscripten_sleep (10);
#endif
#endif
    return;
  }

  ctx->events.in_idle_dispatch=1;

  for (l = ctx->events.idles; l; l = l->next)
  {
    CtxIdleCb *item = l->data;

    long rem = item->ticks_remaining;
    if (item->ticks_remaining >= 0)
    {
      rem -= tick_delta;

      item->ticks_remaining -= tick_delta / 100;

    if (rem < 0)
    {
      int to_be_removed = 0;
      for (CtxList *l2 = ctx->events.idles_to_remove; l2; l2=l2->next)
      {
        CtxIdleCb *item2 = l2->data;
        if (item2 == item) to_be_removed = 1;
      }
      
      if (!to_be_removed)
      {
      if (item->cb (ctx, item->idle_data) == 0)
      {
        ctx_list_prepend (&ctx->events.idles_to_remove, item);
      }
      else
        item->ticks_remaining = item->ticks_full;
      }
    }
    else
        item->ticks_remaining = rem;
    }
    else
    {
      int to_be_removed = 0;
      for (CtxList *l2 = ctx->events.idles_to_remove; l2; l2=l2->next)
      {
        CtxIdleCb *item2 = l2->data;
        if (item2 == item) to_be_removed = 1;
      }
      
      if (!to_be_removed)
      {
        if (item->cb (ctx, item->idle_data) == 0)
        {
          ctx_list_prepend (&ctx->events.idles_to_remove, item);
        }
        else
          item->ticks_remaining = item->ticks_full;
      }
    }
  }

  while (ctx->events.idles_to_add)
  {
    CtxIdleCb *item = ctx->events.idles_to_add->data;
    ctx_list_prepend (&ctx->events.idles, item);
    ctx_list_remove (&ctx->events.idles_to_add, item);
  }

  while (ctx->events.idles_to_remove)
  {
    CtxIdleCb *item = ctx->events.idles_to_remove->data;
    ctx_list_remove (&ctx->events.idles, item);
    ctx_list_remove (&ctx->events.idles_to_remove, item);
    if (item->destroy_notify)
      item->destroy_notify (item->destroy_data);
  }
  ctx->events.in_idle_dispatch=0;
#if EMSCRIPTEN
#ifdef ASYNCIFY
   emscripten_sleep(1);
#endif
#endif
}


void ctx_add_key_binding_full (Ctx *ctx,
                           const char *key,
                           const char *action,
                           const char *label,
                           CtxCb       cb,
                           void       *cb_data,
                           CtxDestroyNotify destroy_notify,
                           void       *destroy_data)
{
  CtxEvents *events = &ctx->events;
  if (events->n_bindings +1 >= CTX_MAX_KEYBINDINGS)
  {
    fprintf (stderr, "warning: binding overflow\n");
    return;
  }
  events->bindings[events->n_bindings].nick = strdup (key);
  strcpy (events->bindings[events->n_bindings].nick, key);

  if (action)
    events->bindings[events->n_bindings].command = action ? strdup (action) : NULL;
  if (label)
    events->bindings[events->n_bindings].label = label ? strdup (label) : NULL;
  events->bindings[events->n_bindings].cb = cb;
  events->bindings[events->n_bindings].cb_data = cb_data;
  events->bindings[events->n_bindings].destroy_notify = destroy_notify;
  events->bindings[events->n_bindings].destroy_data = destroy_data;
  events->n_bindings++;
}

void ctx_add_key_binding (Ctx *ctx,
                          const char *key,
                          const char *action,
                          const char *label,
                          CtxCb       cb,
                          void       *cb_data)
{
  ctx_add_key_binding_full (ctx, key, action, label, cb, cb_data, NULL, NULL);
}

void ctx_clear_bindings (Ctx *ctx)
{
  CtxEvents *events = &ctx->events;
  int i;
  for (i = 0; events->bindings[i].nick; i ++)
  {
    if (events->bindings[i].destroy_notify)
      events->bindings[i].destroy_notify (events->bindings[i].destroy_data);
    free (events->bindings[i].nick);
    if (events->bindings[i].command)
      free (events->bindings[i].command);
    if (events->bindings[i].label)
      free (events->bindings[i].label);
  }
  memset (&events->bindings, 0, sizeof (events->bindings));
  events->n_bindings = 0;
}

static void
ctx_collect_events (CtxEvent *event, void *data, void *data2);
static void _ctx_bindings_key_press (CtxEvent *event, void *data1, void *data2)
{
  Ctx *ctx = event->ctx;
  CtxEvents *events = &ctx->events;
  int i;
  int handled = 0;

  for (i = events->n_bindings-1; i>=0; i--)
    if (!strcmp (events->bindings[i].nick, event->string))
    {
      if (events->bindings[i].cb)
      {
        events->bindings[i].cb (event, events->bindings[i].cb_data, NULL);
        if (event->stop_propagate)
          return;
        handled = 1;
      }
    }
  if (!handled)
  for (i = events->n_bindings-1; i>=0; i--)
    if (!strcmp (events->bindings[i].nick, "any"))
    {
      if (events->bindings[i].cb)
      {
        events->bindings[i].cb (event, events->bindings[i].cb_data, NULL);
        if (event->stop_propagate)
          return;
      }
    }
  ctx_collect_events (event, data1, data2);
}

CtxBinding *ctx_get_bindings (Ctx *ctx)
{
  return &ctx->events.bindings[0];
}

void ctx_remove_idle (Ctx *ctx, int handle)
{
  CtxList *l;
  //CtxList *to_remove = NULL;

  if (!ctx->events.idles)
  {
    return;
  }

  for (l = ctx->events.idles; l; l = l->next)
  {
    CtxIdleCb *item = l->data;
    if (item->id == handle)
    {
      ctx_list_prepend (&ctx->events.idles_to_remove, item);
    }
  }

  if (ctx->events.in_idle_dispatch)
    return;

  while (ctx->events.idles_to_remove)
  {
    CtxIdleCb *item = ctx->events.idles_to_remove->data;
    ctx_list_remove (&ctx->events.idles, item);
    ctx_list_remove (&ctx->events.idles_to_remove, item);
    if (item->destroy_notify)
      item->destroy_notify (item->destroy_data);
  }
}

int ctx_add_timeout_full (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                          void (*destroy_notify)(void *destroy_data), void *destroy_data)
{
  CtxIdleCb *item = calloc (sizeof (CtxIdleCb), 1);
  item->cb              = idle_cb;
  item->idle_data       = idle_data;
  item->id              = ++ctx->events.idle_id;
  item->ticks_full      = 
  item->ticks_remaining = ms * 1000;
  item->destroy_notify  = destroy_notify;
  item->destroy_data    = destroy_data;
  if (ctx->events.in_idle_dispatch)
  ctx_list_append (&ctx->events.idles_to_add, item);
  else
  ctx_list_append (&ctx->events.idles, item);
  return item->id;
}

int ctx_add_timeout (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data)
{
  return ctx_add_timeout_full (ctx, ms, idle_cb, idle_data, NULL, NULL);
}

int ctx_add_idle_full (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                                 void (*destroy_notify)(void *destroy_data), void *destroy_data)
{
  CtxIdleCb *item = calloc (sizeof (CtxIdleCb), 1);
  item->cb = idle_cb;
  item->idle_data = idle_data;
  item->id = ++ctx->events.idle_id;
  item->ticks_full =
  item->ticks_remaining = -1;
  item->is_idle = 1;
  item->destroy_notify = destroy_notify;
  item->destroy_data = destroy_data;
  ctx_list_append (&ctx->events.idles, item);
  return item->id;
}

int ctx_add_idle (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data)
{
  return ctx_add_idle_full (ctx, idle_cb, idle_data, NULL, NULL);
}

#endif
/* using bigger primes would be a good idea, this falls apart due to rounding
 * when zoomed in close
 */
static inline double ctx_path_hash (void *path)
{
  double ret = 0;
#if 0
  int i;
  cairo_path_data_t *data;
  if (!path)
    return 0.99999;
  for (i = 0; i <path->num_data; i += path->data[i].header.length)
  {
    data = &path->data[i];
    switch (data->header.type) {
      case CAIRO_PATH_MOVE_TO:
        ret *= 17;
        ret += data[1].point.x;
        ret *= 113;
        ret += data[1].point.y;
        break;
      case CAIRO_PATH_LINE_TO:
        ret *= 121;
        ret += data[1].point.x;
        ret *= 1021;
        ret += data[1].point.y;
        break;
      case CAIRO_PATH_CURVE_TO:
        ret *= 3111;
        ret += data[1].point.x;
        ret *= 23;
        ret += data[1].point.y;
        ret *= 107;
        ret += data[2].point.x;
        ret *= 739;
        ret += data[2].point.y;
        ret *= 3;
        ret += data[3].point.x;
        ret *= 51;
        ret += data[3].point.y;
        break;
      case CAIRO_PATH_CLOSE_PATH:
        ret *= 51;
        break;
    }
  }
#endif
  return ret;
}

#if CTX_EVENTS
void _ctx_item_ref (CtxItem *item)
{
  if (item->ref_count < 0)
  {
    fprintf (stderr, "EEEEK!\n");
  }
  item->ref_count++;
}


void _ctx_item_unref (CtxItem *item)
{
  if (item->ref_count <= 0)
  {
    fprintf (stderr, "EEEEK!\n");
    return;
  }
  item->ref_count--;
  if (item->ref_count <=0)
  {
    {
      int i;
      for (i = 0; i < item->cb_count; i++)
      {
        if (item->cb[i].finalize)
          item->cb[i].finalize (item->cb[i].data1, item->cb[i].data2,
                                   item->cb[i].finalize_data);
      }
    }
    if (item->path)
    {
      //cairo_path_destroy (item->path);
    }
    free (item);
  }
}


void _ctx_item_unref2 (void *data, void *data2)
{
  CtxItem *item = (CtxItem*)data;
  _ctx_item_unref (item);
}


static int
path_equal (void *path,
            void *path2)
{
  //  XXX
  return 0;
}

void ctx_listen_set_cursor (Ctx      *ctx,
                            CtxCursor cursor)
{
  if (ctx->events.last_item)
  {
    ctx->events.last_item->cursor = cursor;
  }
}

void ctx_listen_full (Ctx     *ctx,
                      float    x,
                      float    y,
                      float    width,
                      float    height,
                      CtxEventType  types,
                      CtxCb    cb,
                      void    *data1,
                      void    *data2,
                      void   (*finalize)(void *listen_data,
                                         void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data)
{
  if (!ctx->events.frozen)
  {
    CtxItem *item;

    /* early bail for listeners outside screen  */
    /* XXX: fixme respect clipping */
    {
      float tx = x;
      float ty = y;
      float tw = width;
      float th = height;
      _ctx_user_to_device (&ctx->state, &tx, &ty);
      _ctx_user_to_device_distance (&ctx->state, &tw, &th);
      if (ty > ctx->height * 2 ||
          tx > ctx->width * 2 ||
          tx + tw < 0 ||
          ty + th < 0)
      {
        if (finalize)
          finalize (data1, data2, finalize_data);
        return;
      }
    }

    item = calloc (sizeof (CtxItem), 1);
    item->x0 = x;
    item->y0 = y;
    item->x1 = x + width;
    item->y1 = y + height;
    item->cb[0].types = types;
    item->cb[0].cb = cb;
    item->cb[0].data1 = data1;
    item->cb[0].data2 = data2;
    item->cb[0].finalize = finalize;
    item->cb[0].finalize_data = finalize_data;
    item->cb_count = 1;
    item->types = types;
    //item->path = cairo_copy_path (cr); // XXX
    item->path_hash = ctx_path_hash (item->path);
    ctx_get_matrix (ctx, &item->inv_matrix);
    ctx_matrix_invert (&item->inv_matrix);

    if (ctx->events.items)
    {
      CtxList *l;
      for (l = ctx->events.items; l; l = l->next)
      {
        CtxItem *item2 = l->data;

        /* store multiple callbacks for one entry when the paths
         * are exact matches, reducing per event traversal checks at the
         * cost of a little paint-hit (XXX: is this the right tradeoff,
         * perhaps it is better to spend more time during event processing
         * than during paint?)
         */
        if (item->path_hash == item2->path_hash &&
            path_equal (item->path, item2->path))
        {
          /* found an item, copy over cb data  */
          item2->cb[item2->cb_count] = item->cb[0];
          free (item);
          item2->cb_count++;
          item2->types |= types;
          return;
        }
      }
    }
    item->ref_count       = 1;
    ctx->events.last_item = item;
    ctx_list_prepend_full (&ctx->events.items, item, _ctx_item_unref2, NULL);
      return;
  }
}

void ctx_event_stop_propagate (CtxEvent *event)
{
  if (event)
    event->stop_propagate = 1;
}

void ctx_listen (Ctx          *ctx,
                 CtxEventType  types,
                 CtxCb         cb,
                 void*         data1,
                 void*         data2)
{
  float x, y, width, height;
  /* generate bounding box of what to listen for - from current cairo path */
  if (types & CTX_KEY)
  {
    x = 0;
    y = 0;
    width = 0;
    height = 0;
  }
  else
  {
     float ex1,ey1,ex2,ey2;
     ctx_path_extents (ctx, &ex1, &ey1, &ex2, &ey2);
     x = ex1;
     y = ey1;
     width = ex2 - ex1;
     height = ey2 - ey1;
  }

  if (types == CTX_DRAG_MOTION)
    types = CTX_DRAG_MOTION | CTX_DRAG_PRESS;
  return ctx_listen_full (ctx, x, y, width, height, types, cb, data1, data2, NULL, NULL);
}

void  ctx_listen_with_finalize (Ctx          *ctx,
                                CtxEventType  types,
                                CtxCb         cb,
                                void*         data1,
                                void*         data2,
                      void   (*finalize)(void *listen_data, void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data)
{
  float x, y, width, height;
  /* generate bounding box of what to listen for - from current cairo path */
  if (types & CTX_KEY)
  {
    x = 0;
    y = 0;
    width = 0;
    height = 0;
  }
  else
  {
     float ex1,ey1,ex2,ey2;
     ctx_path_extents (ctx, &ex1, &ey1, &ex2, &ey2);
     x = ex1;
     y = ey1;
     width = ex2 - ex1;
     height = ey2 - ey1;
  }

  if (types == CTX_DRAG_MOTION)
    types = CTX_DRAG_MOTION | CTX_DRAG_PRESS;
  return ctx_listen_full (ctx, x, y, width, height, types, cb, data1, data2, finalize, finalize_data);
}


static void ctx_report_hit_region (CtxEvent *event,
                       void     *data,
                       void     *data2)
{
  const char *id = data;

  fprintf (stderr, "hit region %s\n", id);
  // XXX: NYI
}

void ctx_add_hit_region (Ctx *ctx, const char *id)
{
  char *id_copy = strdup (id);
  float x, y, width, height;
  /* generate bounding box of what to listen for - from current cairo path */
  {
     float ex1,ey1,ex2,ey2;
     ctx_path_extents (ctx, &ex1, &ey1, &ex2, &ey2);
     x = ex1;
     y = ey1;
     width = ex2 - ex1;
     height = ey2 - ey1;
  }
  
  return ctx_listen_full (ctx, x, y, width, height,
                          CTX_POINTER, ctx_report_hit_region,
                          id_copy, NULL, (void*)free, NULL);
}

typedef struct _CtxGrab CtxGrab;

struct _CtxGrab
{
  CtxItem *item;
  int      device_no;
  int      timeout_id;
  int      start_time;
  float    x; // for tap and hold
  float    y;
  CtxEventType  type;
};

static void grab_free (Ctx *ctx, CtxGrab *grab)
{
  if (grab->timeout_id)
  {
    ctx_remove_idle (ctx, grab->timeout_id);
    grab->timeout_id = 0;
  }
  _ctx_item_unref (grab->item);
  free (grab);
}

static void device_remove_grab (Ctx *ctx, CtxGrab *grab)
{
  ctx_list_remove (&ctx->events.grabs, grab);
  grab_free (ctx, grab);
}

static CtxGrab *device_add_grab (Ctx *ctx, int device_no, CtxItem *item, CtxEventType type)
{
  CtxGrab *grab = calloc (1, sizeof (CtxGrab));
  grab->item = item;
  grab->type = type;
  _ctx_item_ref (item);
  grab->device_no = device_no;
  ctx_list_append (&ctx->events.grabs, grab);
  return grab;
}

static CtxList *_ctx_device_get_grabs (Ctx *ctx, int device_no)
{
  CtxList *ret = NULL;
  CtxList *l;
  for (l = ctx->events.grabs; l; l = l->next)
  {
    CtxGrab *grab = l->data;
    if (grab->device_no == device_no)
      ctx_list_append (&ret, grab);
  }
  return ret;
}

static void _mrg_restore_path (Ctx *ctx, void *path)  //XXX
{
  //int i;
  //cairo_path_data_t *data;
  //cairo_new_path (cr);
  //cairo_append_path (cr, path);
}

CtxList *_ctx_detect_list (Ctx *ctx, float x, float y, CtxEventType type)
{
  CtxList *a;
  CtxList *ret = NULL;

  if (type == CTX_KEY_DOWN ||
      type == CTX_KEY_UP ||
      type == CTX_KEY_PRESS ||
      type == CTX_MESSAGE ||
      type == (CTX_KEY_DOWN|CTX_MESSAGE) ||
      type == (CTX_KEY_DOWN|CTX_KEY_UP) ||
      type == (CTX_KEY_DOWN|CTX_KEY_UP|CTX_MESSAGE))
  {
    for (a = ctx->events.items; a; a = a->next)
    {
      CtxItem *item = a->data;
      if (item->types & type)
      {
        ctx_list_prepend (&ret, item);
        return ret;
      }
    }
    return NULL;
  }

  for (a = ctx->events.items; a; a = a->next)
  {
    CtxItem *item= a->data;
  
    float u, v;
    u = x;
    v = y;
    _ctx_matrix_apply_transform (&item->inv_matrix, &u, &v);

    if (u >= item->x0 && v >= item->y0 &&
        u <  item->x1 && v <  item->y1 && 
        ((item->types & type) || ((type == CTX_SET_CURSOR) &&
        item->cursor)))
    {
      if (item->path)
      {
        _mrg_restore_path (ctx, item->path);
        if (ctx_in_fill (ctx, u, v))
        {
          ctx_begin_path (ctx);
          ctx_list_prepend (&ret, item);
        }
        ctx_begin_path (ctx);
      }
      else
      {
        ctx_list_prepend (&ret, item);
      }
    }
  }
  return ret;
}

CtxItem *_ctx_detect (Ctx *ctx, float x, float y, CtxEventType type)
{
  CtxList *l = _ctx_detect_list (ctx, x, y, type);
  if (l)
  {
    ctx_list_reverse (&l);
    CtxItem *ret = l->data;
    ctx_list_free (&l);
    return ret;
  }
  return NULL;
}

static int
_ctx_emit_cb_item (Ctx *ctx, CtxItem *item, CtxEvent *event, CtxEventType type, float x, float y)
{
  static CtxEvent s_event;
  CtxEvent transformed_event;
  int i;


  if (!event)
  {
    event = &s_event;
    event->type = type;
    event->x = x;
    event->y = y;
  }
  event->ctx = ctx;
  transformed_event = *event;
  transformed_event.device_x = event->x;
  transformed_event.device_y = event->y;

  {
    float tx, ty;
    tx = transformed_event.x;
    ty = transformed_event.y;
    _ctx_matrix_apply_transform (&item->inv_matrix, &tx, &ty);
    transformed_event.x = tx;
    transformed_event.y = ty;

    if ((type & CTX_DRAG_PRESS) ||
        (type & CTX_DRAG_MOTION) ||
        (type & CTX_MOTION))   /* probably a worthwhile check for the performance 
                                  benefit
                                */
    {
      tx = transformed_event.start_x;
      ty = transformed_event.start_y;
      _ctx_matrix_apply_transform (&item->inv_matrix, &tx, &ty);
      transformed_event.start_x = tx;
      transformed_event.start_y = ty;
    }


    tx = transformed_event.delta_x;
    ty = transformed_event.delta_y;
    _ctx_matrix_apply_transform (&item->inv_matrix, &tx, &ty);
    transformed_event.delta_x = tx;
    transformed_event.delta_y = ty;
  }

  transformed_event.state = ctx->events.modifier_state;
  transformed_event.type = type;

  for (i = item->cb_count-1; i >= 0; i--)
  {
    if (item->cb[i].types & type)
    {
      item->cb[i].cb (&transformed_event, item->cb[i].data1, item->cb[i].data2);
      event->stop_propagate = transformed_event.stop_propagate; /* copy back the response */
      if (event->stop_propagate)
        return event->stop_propagate;
    }
  }
  return 0;
}
#endif

#if CTX_EVENTS

//#include <stdatomic.h>

void ctx_consume_events (Ctx *ctx)
{
  CtxBackend *backend = ctx->backend;
  if (backend && backend->consume_events)
    backend->consume_events (ctx);
}

void ctx_stdin_get_event_fds (Ctx *ctx, int *fd, int *count)
{
  fd[0] = STDIN_FILENO;
  *count = 1;
}

void ctx_get_event_fds (Ctx *ctx, int *fd, int *count)
{
  CtxBackend *backend = ctx->backend;
  if (backend && backend->get_event_fds)
    backend->get_event_fds (ctx, fd, count);
  *count = 0;
}

CtxEvent *ctx_get_event (Ctx *ctx)
{
  static CtxEvent event_copy;
  if (ctx->events.events)
    {
      event_copy = *((CtxEvent*)(ctx->events.events->data));
      ctx_list_remove (&ctx->events.events, ctx->events.events->data);
      return &event_copy;
    }

  _ctx_idle_iteration (ctx);
#if 1
  if (ctx->events.ctx_get_event_enabled==0)
  {
    ctx->events.ctx_get_event_enabled = 1;
    ctx_queue_draw (ctx);
  }
#endif

  ctx_consume_events (ctx);

  if (ctx->events.events)
    {
      event_copy = *((CtxEvent*)(ctx->events.events->data));
      ctx_list_remove (&ctx->events.events, ctx->events.events->data);
      return &event_copy;
    }
  return NULL;
}

static int
_ctx_emit_cb (Ctx *ctx, CtxList *items, CtxEvent *event, CtxEventType type, float x, float y)
{
  CtxList *l;
  event->stop_propagate = 0;
  for (l = items; l; l = l->next)
  {
    _ctx_emit_cb_item (ctx, l->data, event, type, x, y);
    if (event->stop_propagate)
      return event->stop_propagate;
  }
  return 0;
}

/*
 * update what is the currently hovered item and returns it.. and the list of hits
 * a well.
 *
 */
static CtxItem *_ctx_update_item (Ctx *ctx, int device_no, float x, float y, CtxEventType type, CtxList **hitlist)
{
  CtxItem *current = NULL;

  CtxList *l = _ctx_detect_list (ctx, x, y, type);
  if (l)
  {
    ctx_list_reverse (&l);
    current = l->data;
  }
  if (hitlist)
    *hitlist = l;
  else
    ctx_list_free (&l);

  if (ctx->events.prev[device_no] == NULL || current == NULL || (current->path_hash != ctx->events.prev[device_no]->path_hash))
  {
// enter/leave should snapshot chain to root
// and compare with previous snapshotted chain to root
// and emit/enter/leave as appropriate..
//
// leave might be registered for emission on enter..emission?


    //int focus_radius = 2;
    if (current)
      _ctx_item_ref (current);

    if (ctx->events.prev[device_no])
    {
      {
#if 0
        CtxIntRectangle rect = {floor(ctx->events.prev[device_no]->x0-focus_radius),
                             floor(ctx->events.prev[device_no]->y0-focus_radius),
                             ceil(ctx->events.prev[device_no]->x1)-floor(ctx->events.prev[device_no]->x0) + focus_radius * 2,
                             ceil(ctx->events.prev[device_no]->y1)-floor(ctx->events.prev[device_no]->y0) + focus_radius * 2};
        mrg_queue_draw (mrg, &rect);
#endif 
      }

      _ctx_emit_cb_item (ctx, ctx->events.prev[device_no], NULL, CTX_LEAVE, x, y);
      _ctx_item_unref (ctx->events.prev[device_no]);
      ctx->events.prev[device_no] = NULL;
    }
    if (current)
    {
#if 0
      {
        CtxIntRectangle rect = {floor(current->x0-focus_radius),
                             floor(current->y0-focus_radius),
                             ceil(current->x1)-floor(current->x0) + focus_radius * 2,
                             ceil(current->y1)-floor(current->y0) + focus_radius * 2};
        mrg_queue_draw (mrg, &rect);
      }
#endif
      _ctx_emit_cb_item (ctx, current, NULL, CTX_ENTER, x, y);
      ctx->events.prev[device_no] = current;
    }
  }
  current = _ctx_detect (ctx, x, y, type);
  //fprintf (stderr, "%p\n", current);
  return current;
}

static int tap_and_hold_fire (Ctx *ctx, void *data)
{
  CtxGrab *grab = data;
  CtxList *list = NULL;
  ctx_list_prepend (&list, grab->item);
  CtxEvent event = {0, };

  event.ctx = ctx;
  event.time = ctx_ms (ctx);

  event.device_x = 
  event.x = ctx->events.pointer_x[grab->device_no];
  event.device_y = 
  event.y = ctx->events.pointer_y[grab->device_no];

  // XXX: x and y coordinates
  int ret = _ctx_emit_cb (ctx, list, &event, CTX_TAP_AND_HOLD,
      ctx->events.pointer_x[grab->device_no], ctx->events.pointer_y[grab->device_no]);

  ctx_list_free (&list);

  grab->timeout_id = 0;

  return 0;

  return ret;
}

CTX_EXPORT int
ctx_pointer_drop (Ctx *ctx, float x, float y, int device_no, uint32_t time,
                  char *string)
{
  CtxList *l;
  CtxList *hitlist = NULL;

  ctx->events.pointer_x[device_no] = x;
  ctx->events.pointer_y[device_no] = y;
  if (device_no <= 3)
  {
    ctx->events.pointer_x[0] = x;
    ctx->events.pointer_y[0] = y;
  }

  if (device_no < 0) device_no = 0;
  if (device_no >= CTX_MAX_DEVICES) device_no = CTX_MAX_DEVICES-1;
  CtxEvent *event = &ctx->events.drag_event[device_no];

  if (time == 0)
    time = ctx_ms (ctx);

  event->ctx = ctx;
  event->x = x;
  event->y = y;

  event->delta_x = event->delta_y = 0;

  event->device_no = device_no;
  event->string    = string;
  event->time      = time;
  event->stop_propagate = 0;

  _ctx_update_item (ctx, device_no, x, y, CTX_DROP, &hitlist);

  for (l = hitlist; l; l = l?l->next:NULL)
  {
    CtxItem *item = l->data;
    _ctx_emit_cb_item (ctx, item, event, CTX_DROP, x, y);

    if (event->stop_propagate)
    {
      ctx_list_free (&hitlist);
      return 0;
    }
  }

  //mrg_queue_draw (mrg, NULL); /* in case of style change, and more  */
  ctx_list_free (&hitlist);

  return 0;
}

CTX_EXPORT int
ctx_pointer_press (Ctx *ctx, float x, float y, int device_no, uint32_t time)
{
  CtxEvents *events = &ctx->events;
  CtxList *hitlist = NULL;
  events->pointer_x[device_no] = x;
  events->pointer_y[device_no] = y;
  if (device_no <= 3)
  {
    events->pointer_x[0] = x;
    events->pointer_y[0] = y;
  }

  if (device_no < 0) device_no = 0;
  if (device_no >= CTX_MAX_DEVICES) device_no = CTX_MAX_DEVICES-1;
  CtxEvent *event = &events->drag_event[device_no];

  if (time == 0)
    time = ctx_ms (ctx);

  event->x = event->start_x = event->prev_x = x;
  event->y = event->start_y = event->prev_y = y;

  event->delta_x = event->delta_y = 0;

  event->device_no = device_no;
  event->time      = time;
  event->stop_propagate = 0;

  if (events->pointer_down[device_no] == 1)
  {
    fprintf (stderr, "events thought device %i was already down\n", device_no);
  }
  /* doing just one of these two should be enough? */
  events->pointer_down[device_no] = 1;
  switch (device_no)
  {
    case 1:
      events->modifier_state |= CTX_MODIFIER_STATE_BUTTON1;
      break;
    case 2:
      events->modifier_state |= CTX_MODIFIER_STATE_BUTTON2;
      break;
    case 3:
      events->modifier_state |= CTX_MODIFIER_STATE_BUTTON3;
      break;
    default:
      break;
  }

  CtxGrab *grab = NULL;
  CtxList *l;

  _ctx_update_item (ctx, device_no, x, y, 
      CTX_PRESS | CTX_DRAG_PRESS | CTX_TAP | CTX_TAP_AND_HOLD, &hitlist);

  for (l = hitlist; l; l = l?l->next:NULL)
  {
    CtxItem *item = l->data;
    if (item &&
        ((item->types & CTX_DRAG)||
         (item->types & CTX_TAP) ||
         (item->types & CTX_TAP_AND_HOLD)))
    {
      grab = device_add_grab (ctx, device_no, item, item->types);
      grab->start_time = time;

      if (item->types & CTX_TAP_AND_HOLD)
      {
         grab->timeout_id = ctx_add_timeout (ctx, events->tap_delay_hold, tap_and_hold_fire, grab);
      }
    }
    _ctx_emit_cb_item (ctx, item, event, CTX_PRESS, x, y);
    if (!event->stop_propagate)
      _ctx_emit_cb_item (ctx, item, event, CTX_DRAG_PRESS, x, y);

    if (event->stop_propagate)
    {
      ctx_list_free (&hitlist);
      return 0;
    }
  }

  //events_queue_draw (mrg, NULL); /* in case of style change, and more  */
  ctx_list_free (&hitlist);
  return 0;
}

void _ctx_resized (Ctx *ctx, int width, int height, long time)
{
  CtxItem *item = _ctx_detect (ctx, 0, 0, CTX_KEY_PRESS);
  CtxEvent event = {0, };

  if (!time)
    time = ctx_ms (ctx);
  
  event.ctx = ctx;
  event.time = time;
  event.string = "resize-event"; /* gets delivered to clients as a key_down event, maybe message shouldbe used instead?
   */

  if (item)
  {
    event.stop_propagate = 0;
    _ctx_emit_cb_item (ctx, item, &event, CTX_KEY_PRESS, 0, 0);
  }

}

CTX_EXPORT int
ctx_pointer_release (Ctx *ctx, float x, float y, int device_no, uint32_t time)
{
  CtxEvents *events = &ctx->events;
  if (time == 0)
    time = ctx_ms (ctx);

  if (device_no < 0) device_no = 0;
  if (device_no >= CTX_MAX_DEVICES) device_no = CTX_MAX_DEVICES-1;
  CtxEvent *event = &events->drag_event[device_no];

  event->time = time;
  event->x = x;
  event->ctx = ctx;
  event->y = y;
  event->device_no = device_no;
  event->stop_propagate = 0;

  switch (device_no)
  {
    case 1:
      if (events->modifier_state & CTX_MODIFIER_STATE_BUTTON1)
        events->modifier_state -= CTX_MODIFIER_STATE_BUTTON1;
      break;
    case 2:
      if (events->modifier_state & CTX_MODIFIER_STATE_BUTTON2)
        events->modifier_state -= CTX_MODIFIER_STATE_BUTTON2;
      break;
    case 3:
      if (events->modifier_state & CTX_MODIFIER_STATE_BUTTON3)
        events->modifier_state -= CTX_MODIFIER_STATE_BUTTON3;
      break;
    default:
      break;
  }

  //events_queue_draw (mrg, NULL); /* in case of style change */

  if (events->pointer_down[device_no] == 0)
  {
    //fprintf (stderr, "device %i already up\n", device_no);
  }
  events->pointer_down[device_no] = 0;

  events->pointer_x[device_no] = x;
  events->pointer_y[device_no] = y;
  if (device_no <= 3)
  {
    events->pointer_x[0] = x;
    events->pointer_y[0] = y;
  }
  CtxList *hitlist = NULL;
  CtxList *grablist = NULL , *g= NULL;
  CtxGrab *grab;

  _ctx_update_item (ctx, device_no, x, y, CTX_RELEASE | CTX_DRAG_RELEASE, &hitlist);
  grablist = _ctx_device_get_grabs (ctx, device_no);

  for (g = grablist; g; g = g->next)
  {
    grab = g->data;

    if (!event->stop_propagate)
    {
      if (grab->item->types & CTX_TAP)
      {
        long delay = time - grab->start_time;

        if (delay > events->tap_delay_min &&
            delay < events->tap_delay_max &&
            (
              (event->start_x - x) * (event->start_x - x) +
              (event->start_y - y) * (event->start_y - y)) < ctx_pow2(events->tap_hysteresis)
            )
        {
          _ctx_emit_cb_item (ctx, grab->item, event, CTX_TAP, x, y);
        }
      }

      if (!event->stop_propagate && grab->item->types & CTX_DRAG_RELEASE)
      {
        _ctx_emit_cb_item (ctx, grab->item, event, CTX_DRAG_RELEASE, x, y);
      }
    }

    device_remove_grab (ctx, grab);
  }

  if (hitlist)
  {
    if (!event->stop_propagate)
      _ctx_emit_cb (ctx, hitlist, event, CTX_RELEASE, x, y);
    ctx_list_free (&hitlist);
  }
  ctx_list_free (&grablist);
  return 0;
}

/*  for multi-touch, we use a list of active grabs - thus a grab corresponds to
 *  a device id. even during drag-grabs events propagate; to stop that stop
 *  propagation.
 */
CTX_EXPORT int
ctx_pointer_motion (Ctx *ctx, float x, float y, int device_no, uint32_t time)
{
  CtxList *hitlist = NULL;
  CtxList *grablist = NULL, *g;
  CtxGrab *grab;

  if (device_no < 0) device_no = 0;
  if (device_no >= CTX_MAX_DEVICES) device_no = CTX_MAX_DEVICES-1;
  CtxEvent *event = &ctx->events.drag_event[device_no];

  if (time == 0)
    time = ctx_ms (ctx);

  event->ctx       = ctx;
  event->x         = x;
  event->y         = y;
  event->time      = time;
  event->device_no = device_no;
  event->stop_propagate = 0;
  
  ctx->events.pointer_x[device_no] = x;
  ctx->events.pointer_y[device_no] = y;

  if (device_no <= 3)
  {
    ctx->events.pointer_x[0] = x;
    ctx->events.pointer_y[0] = y;
  }

  grablist = _ctx_device_get_grabs (ctx, device_no);
  _ctx_update_item (ctx, device_no, x, y, CTX_MOTION, &hitlist);

  {
    CtxItem  *cursor_item = _ctx_detect (ctx, x, y, CTX_SET_CURSOR);
    if (cursor_item)
    {
      ctx_set_cursor (ctx, cursor_item->cursor);
    }
    else
    {
      ctx_set_cursor (ctx, CTX_CURSOR_ARROW);
    }
    CtxItem  *hovered_item = _ctx_detect (ctx, x, y, CTX_ANY);
    static CtxItem *prev_hovered_item = NULL;
    if (prev_hovered_item != hovered_item)
    {
      ctx_queue_draw (ctx);
    }
    prev_hovered_item = hovered_item;
  }

  event->delta_x = x - event->prev_x;
  event->delta_y = y - event->prev_y;
  event->prev_x  = x;
  event->prev_y  = y;

  CtxList *remove_grabs = NULL;

  for (g = grablist; g; g = g->next)
  {
    grab = g->data;

    if ((grab->type & CTX_TAP) ||
        (grab->type & CTX_TAP_AND_HOLD))
    {
      if (
          (
            (event->start_x - x) * (event->start_x - x) +
            (event->start_y - y) * (event->start_y - y)) >
              ctx_pow2(ctx->events.tap_hysteresis)
         )
      {
        //fprintf (stderr, "-");
        ctx_list_prepend (&remove_grabs, grab);
      }
      else
      {
        //fprintf (stderr, ":");
      }
    }

    if (grab->type & CTX_DRAG_MOTION)
    {
      _ctx_emit_cb_item (ctx, grab->item, event, CTX_DRAG_MOTION, x, y);
      if (event->stop_propagate)
        break;
    }
  }
  if (remove_grabs)
  {
    for (g = remove_grabs; g; g = g->next)
      device_remove_grab (ctx, g->data);
    ctx_list_free (&remove_grabs);
  }
  if (hitlist)
  {
    if (!event->stop_propagate)
      _ctx_emit_cb (ctx, hitlist, event, CTX_MOTION, x, y);
    ctx_list_free (&hitlist);
  }
  ctx_list_free (&grablist);
  return 0;
}

CTX_EXPORT void
ctx_incoming_message (Ctx *ctx, const char *message, long time)
{
  CtxItem *item = _ctx_detect (ctx, 0, 0, CTX_MESSAGE);
  CtxEvent event = {0, };

  if (!time)
    time = ctx_ms (ctx);

  if (item)
  {
    int i;
    event.ctx = ctx;
    event.type = CTX_MESSAGE;
    event.time = time;
    event.string = message;

    fprintf (stderr, "{%s|\n", message);

      for (i = 0; i < item->cb_count; i++)
      {
        if (item->cb[i].types & (CTX_MESSAGE))
        {
          event.state = ctx->events.modifier_state;
          item->cb[i].cb (&event, item->cb[i].data1, item->cb[i].data2);
          if (event.stop_propagate)
            return;// event.stop_propagate;
        }
      }
  }
}

CTX_EXPORT int
ctx_scrolled (Ctx *ctx, float x, float y, CtxScrollDirection scroll_direction, uint32_t time)
{
  CtxList *hitlist = NULL;
  CtxList *l;

  int device_no = 0;
  ctx->events.pointer_x[device_no] = x;
  ctx->events.pointer_y[device_no] = y;

  CtxEvent *event = &ctx->events.drag_event[device_no];  /* XXX: might
                                       conflict with other code
                                       create a sibling member
                                       of drag_event?*/
  if (time == 0)
    time = ctx_ms (ctx);

  event->x         = event->start_x = event->prev_x = x;
  event->y         = event->start_y = event->prev_y = y;
  event->delta_x   = event->delta_y = 0;
  event->device_no = device_no;
  event->time      = time;
  event->stop_propagate = 0;
  event->scroll_direction = scroll_direction;

  _ctx_update_item (ctx, device_no, x, y, CTX_SCROLL, &hitlist);

  for (l = hitlist; l; l = l?l->next:NULL)
  {
    CtxItem *item = l->data;

    _ctx_emit_cb_item (ctx, item, event, CTX_SCROLL, x, y);

    if (event->stop_propagate)
      l = NULL;
  }

  //mrg_queue_draw (mrg, NULL); /* in case of style change, and more  */
  ctx_list_free (&hitlist);
  return 0;
}

static int ctx_str_has_prefix (const char *string, const char *prefix)
{
  for (int i = 0; prefix[i]; i++)
  {
    if (!string[i]) return 0;
    if (string[i] != prefix[i]) return 0;
  }
  return 0;
}


static const char *ctx_keycode_to_keyname (CtxModifierState modifier_state,
                                           int keycode)
{
   static char temp[6]=" ";
   const char *str = &temp[0];
   if (keycode >= 65 && keycode <= 90)
   {
     if (modifier_state & CTX_MODIFIER_STATE_SHIFT)
       temp[0]=keycode-65+'A';
     else
       temp[0]=keycode-65+'a';
   }
   else if (keycode >= 112 && keycode <= 123)
   {
     sprintf (temp, "F%i", keycode-111);
   }
   else
   switch (keycode)
   {
     case 8: str="backspace"; break;
     case 9: str="tab"; break;
     case 13: str="return"; break;
     case 16: str="shift"; break;
     case 17: str="control"; break;
     case 18: str="alt"; break;
     case 27: str="escape"; break;
     case 32: str="space"; break;
     case 33: str="page-up"; break;
     case 34: str="page-down"; break;
     case 35: str="end"; break;
     case 36: str="home"; break;
     case 37: str="left"; break;
     case 38: str="up"; break;
     case 39: str="right"; break;
     case 40: str="down"; break;
     case 45: str="insert"; break;
     case 46: str="delete"; break;
     default:
       if (modifier_state & CTX_MODIFIER_STATE_SHIFT)
       switch (keycode)
       {
         case 173: str="_"; break;
         case 186: str=":"; break;
         case 187: str="+"; break;
         case 188: str="<"; break;
         case 189: str="_"; break;
         case 190: str=">"; break;
         case 191: str="?"; break;
         case 192: str="~"; break;
         case 219: str="{"; break;
         case 221: str="}"; break;
         case 220: str="|"; break;
         case 222: str="\""; break;
         case 48: str=")"; break;
         case 49: str="!"; break;
         case 50: str="@"; break;
         case 51: str="#"; break;
         case 52: str="$"; break;
         case 53: str="%"; break;
         case 54: str="^"; break;
         case 55: str="&"; break;
         case 56: str="*"; break;
         case 57: str="("; break;
         case 59: str=":"; break;
         case 61: str="+"; break;
         default:
           fprintf (stderr, "unhandled skeycode %i\n", keycode);
           str="?";
           break;
       }
       else
       switch (keycode)
       {
         case 61: str="="; break;
         case 59: str=";"; break;
         case 173: str="-"; break;
         case 186: str=";"; break;
         case 187: str="="; break;
         case 188: str=","; break;
         case 189: str="-"; break;
         case 190: str="."; break;
         case 191: str="/"; break;
         case 192: str="`"; break;
         case 219: str="["; break;
         case 221: str="]"; break;
         case 220: str="\\"; break;
         case 222: str="'"; break;
         default:
           if (keycode >= 48 && keycode <=66)
           {
             temp[0]=keycode-48+'0';
           }
           else
           {
             fprintf (stderr, "unhandled keycode %i\n", keycode);
             str="?";
           }
           break;
       }
   }
   return str;
}

CTX_EXPORT int
ctx_key_press (Ctx *ctx, unsigned int keyval,
               const char *string, uint32_t time)
{
  char temp_key[128]="";
  char event_type[128]="";
  float x, y; int b;
  if (!string)
  {
    string = ctx_keycode_to_keyname (ctx->events.modifier_state, keyval);

    if (!strcmp (string, "shift") ||
        !strcmp (string, "control") ||
        !strcmp (string, "alt"))
      return 0;

    if (ctx->events.modifier_state)
    {
       if (ctx->events.modifier_state & CTX_MODIFIER_STATE_SHIFT &&
           (ctx->events.modifier_state & CTX_MODIFIER_STATE_ALT||
            ctx->events.modifier_state & CTX_MODIFIER_STATE_CONTROL))
       {
         string = ctx_keycode_to_keyname (0, keyval);
         sprintf (&temp_key[strlen(temp_key)], "shift-");
       }

       if (ctx->events.modifier_state & CTX_MODIFIER_STATE_ALT)
       {
         sprintf (&temp_key[strlen(temp_key)], "alt-");
       }
       if (ctx->events.modifier_state & CTX_MODIFIER_STATE_CONTROL)
       {
         sprintf (&temp_key[strlen(temp_key)], "control-");
       }
       sprintf (&temp_key[strlen(temp_key)], "%s", string);
       string = temp_key;
    }
  }

  sscanf (string, "%s %f %f %i", event_type, &x, &y, &b);
  if (!strcmp (event_type, "pm") ||
      !strcmp (event_type, "pd"))
    return ctx_pointer_motion (ctx, x, y, b, 0);
  else if (!strcmp (event_type, "pp"))
    return ctx_pointer_press (ctx, x, y, b, 0);
  else if (!strcmp (event_type, "pr"))
    return ctx_pointer_release (ctx, x, y, b, 0);
  //else if (!strcmp (event_type, "keydown"))
  //  return ctx_key_down (ctx, keyval, string + 8, time);
  //else if (!strcmp (event_type, "keyup"))
  //  return ctx_key_up (ctx, keyval, string + 6, time);

  CtxItem *item = _ctx_detect (ctx, 0, 0, CTX_KEY_PRESS);
  CtxEvent event = {0,};

  if (time == 0)
    time = ctx_ms (ctx);
  if (item)
  {
    int i;
    event.ctx = ctx;
    event.type = CTX_KEY_PRESS;
    event.unicode = keyval; 
    if (string)
    event.string = strdup(string);
    else
    event.string = "--";
    event.stop_propagate = 0;
    event.time = time;

    for (i = 0; i < item->cb_count; i++)
    {
      if (item->cb[i].types & (CTX_KEY_PRESS))
      {
        event.state = ctx->events.modifier_state;
        item->cb[i].cb (&event, item->cb[i].data1, item->cb[i].data2);
        if (event.stop_propagate)
        {
          free ((void*)event.string);
          return event.stop_propagate;
        }
      }
    }
    free ((void*)event.string);
  }
  return 0;
}

CTX_EXPORT int
ctx_key_down (Ctx *ctx, unsigned int keyval,
              const char *string, uint32_t time)
{
  CtxItem *item = _ctx_detect (ctx, 0, 0, CTX_KEY_DOWN);
  CtxEvent event = {0,};
  if (!string)
    string = ctx_keycode_to_keyname (0, keyval);

  if (!strcmp (string, "shift"))
  {
    ctx->events.modifier_state |= CTX_MODIFIER_STATE_SHIFT;
  }
  else if (!strcmp (string, "control"))
  {
    ctx->events.modifier_state |= CTX_MODIFIER_STATE_CONTROL;
  }
  else if (!strcmp (string, "alt"))
  {
    ctx->events.modifier_state |= CTX_MODIFIER_STATE_ALT;
  }

  if (time == 0)
    time = ctx_ms (ctx);
  if (item)
  {
    int i;
    event.ctx     = ctx;
    event.type    = CTX_KEY_DOWN;
    event.unicode = keyval; 
    event.string  = strdup(string);
    event.stop_propagate = 0;
    event.time    = time;

    for (i = 0; i < item->cb_count; i++)
    {
      if (item->cb[i].types & (CTX_KEY_DOWN))
      {
        event.state = ctx->events.modifier_state;
        item->cb[i].cb (&event, item->cb[i].data1, item->cb[i].data2);
        if (event.stop_propagate)
        {
          free ((void*)event.string);
          return event.stop_propagate;
        }
      }
    }
    free ((void*)event.string);
  }
  return 0;
}

CTX_EXPORT int
ctx_key_up (Ctx *ctx, unsigned int keyval,
            const char *string, uint32_t time)
{
  CtxItem *item = _ctx_detect (ctx, 0, 0, CTX_KEY_UP);
  CtxEvent event = {0,};
  if (!string)
    string = ctx_keycode_to_keyname (0, keyval);

  if (!strcmp (string, "shift"))
  {
    ctx->events.modifier_state &= ~(CTX_MODIFIER_STATE_SHIFT);
  }
  else if (!strcmp (string, "control"))
  {
    ctx->events.modifier_state &= ~(CTX_MODIFIER_STATE_CONTROL);
  }
  else if (!strcmp (string, "alt"))
  {
    ctx->events.modifier_state &= ~(CTX_MODIFIER_STATE_ALT);
  }

  if (time == 0)
    time = ctx_ms (ctx);
  if (item)
  {
    int i;
    event.ctx = ctx;
    event.type = CTX_KEY_UP;
    event.unicode = keyval; 
    event.string = strdup(string);
    event.stop_propagate = 0;
    event.time = time;

    for (i = 0; i < item->cb_count; i++)
    {
      if (item->cb[i].types & (CTX_KEY_UP))
      {
        event.state = ctx->events.modifier_state;
        item->cb[i].cb (&event, item->cb[i].data1, item->cb[i].data2);
        if (event.stop_propagate)
        {
          free ((void*)event.string);
          return event.stop_propagate;
        }
      }
    }
    free ((void*)event.string);
  }
  return 0;
}

void ctx_freeze           (Ctx *ctx)
{
  ctx->events.frozen ++;
}

void ctx_thaw             (Ctx *ctx)
{
  ctx->events.frozen --;
}
int ctx_events_frozen (Ctx *ctx)
{
  return ctx && ctx->events.frozen;
}
void ctx_events_clear_items (Ctx *ctx)
{
  ctx_list_free (&ctx->events.items);
}

float ctx_pointer_x (Ctx *ctx)
{
  return ctx->events.pointer_x[0];
}

float ctx_pointer_y (Ctx *ctx)
{
  return ctx->events.pointer_y[0];
}

int ctx_pointer_is_down (Ctx *ctx, int no)
{
  if (no < 0 || no > CTX_MAX_DEVICES) return 0;
  return ctx->events.pointer_down[no];
}

void _ctx_debug_overlays (Ctx *ctx)
{
  CtxList *a;
  ctx_save (ctx);

  ctx_line_width (ctx, 2);
  ctx_rgba (ctx, 0,0,0.8,0.5);
  for (a = ctx->events.items; a; a = a->next)
  {
    float current_x = ctx_pointer_x (ctx);
    float current_y = ctx_pointer_y (ctx);
    CtxItem *item = a->data;
    CtxMatrix matrix = item->inv_matrix;

    _ctx_matrix_apply_transform (&matrix, &current_x, &current_y);

    if (current_x >= item->x0 && current_x < item->x1 &&
        current_y >= item->y0 && current_y < item->y1)
    {
      ctx_matrix_invert (&matrix);
      ctx_set_matrix (ctx, &matrix);
      _mrg_restore_path (ctx, item->path);
      ctx_stroke (ctx);
    }
  }
  ctx_restore (ctx);
}

void ctx_set_render_threads   (Ctx *ctx, int n_threads)
{
  // XXX
}
int ctx_get_render_threads   (Ctx *ctx)
{
  return _ctx_max_threads;
}
void ctx_set_hash_cache (Ctx *ctx, int enable_hash_cache)
{
  _ctx_enable_hash_cache = enable_hash_cache;
}
int ctx_get_hash_cache (Ctx *ctx)
{
  return _ctx_enable_hash_cache;
}

int ctx_need_redraw (Ctx *ctx)
{
  return (ctx->dirty != 0)
#if CTX_CLIENTS
    || ctx_clients_need_redraw (ctx)
#endif
    ;
}


/*
 * centralized global API for managing file descriptors that
 * wake us up, this to remove sleeping and polling
 */

#define CTX_MAX_LISTEN_FDS 128 // becomes max clients..

static int _ctx_listen_fd[CTX_MAX_LISTEN_FDS];
static int _ctx_listen_fds    = 0;
static int _ctx_listen_max_fd = 0;

void _ctx_add_listen_fd (int fd)
{
  _ctx_listen_fd[_ctx_listen_fds++]=fd;
  if (fd > _ctx_listen_max_fd)
    _ctx_listen_max_fd = fd;
}

void _ctx_remove_listen_fd (int fd)
{
  for (int i = 0; i < _ctx_listen_fds; i++)
  {
    if (_ctx_listen_fd[i] == fd)
    {
      _ctx_listen_fd[i] = _ctx_listen_fd[_ctx_listen_fds-1];
      _ctx_listen_fds--;
      return;
    }
  }
}
#ifdef EMSCRIPTEN
extern int em_in_len;
#endif

int ctx_input_pending (Ctx *ctx, int timeout)
{
  struct timeval tv;
  fd_set fdset;
  FD_ZERO (&fdset);
  for (int i = 0; i < _ctx_listen_fds; i++)
  {
    FD_SET (_ctx_listen_fd[i], &fdset);
  }
  int input_fds[5];
  int n_fds;
  ctx_get_event_fds (ctx, input_fds, &n_fds);
  for (int i = 0; i < n_fds; i++)
  {
    FD_SET (input_fds[i], &fdset);
  }
  tv.tv_sec = 0;
  tv.tv_usec = timeout;
  tv.tv_sec = timeout / 1000000;
  tv.tv_usec = timeout % 1000000;
  int retval = select (_ctx_listen_max_fd + 1, &fdset, NULL, NULL, &tv);
  if (retval == -1)
  {
    perror ("select");
    return 0;
  }
#ifdef EMSCRIPTEN
  retval += em_in_len;
#endif
  return retval;
}

void ctx_handle_events (Ctx *ctx)
{
#if CTX_CLIENTS
  ctx_clients_handle_events (ctx);
#endif
  while (ctx_get_event (ctx)){}
}


static void ctx_events_deinit (Ctx *ctx)
{
  ctx_list_free (&ctx->events.items);
  ctx->events.last_item = NULL;

  while (ctx->events.idles)
  {
    CtxIdleCb *item = ctx->events.idles->data;
    ctx_list_remove (&ctx->events.idles, item);
    if (item->destroy_notify)
      item->destroy_notify (item->destroy_data);
  }
}


#define evsource_has_event(es)   (es)->has_event((es))
#define evsource_get_event(es)   (es)->get_event((es))
#define evsource_destroy(es)     do{if((es)->destroy)(es)->destroy((es));}while(0)
#define evsource_set_coord(es,x,y) do{if((es)->set_coord)(es)->set_coord((es),(x),(y));}while(0)
#define evsource_get_fd(es)      ((es)->get_fd?(es)->get_fd((es)):0)

static int mice_has_event ();
static char *mice_get_event ();
static void mice_destroy ();
static int mice_get_fd (EvSource *ev_source);
static void mice_set_coord (EvSource *ev_source, double x, double y);

static EvSource ctx_ev_src_mice = {
  NULL,
  (void*)mice_has_event,
  (void*)mice_get_event,
  (void*)mice_destroy,
  mice_get_fd,
  mice_set_coord
};

typedef struct Mice
{
  int     fd;
  double  x;
  double  y;
  int     button;
  int     prev_state;
} Mice;

Mice *_mrg_evsrc_coord = NULL;
static int _ctx_mice_fd = 0;

static Mice  mice;
static Mice* mrg_mice_this = &mice;

static int mmm_evsource_mice_init ()
{
  unsigned char reset[]={0xff};
  /* need to detect which event */

  mrg_mice_this->prev_state = 0;
  mrg_mice_this->fd = open ("/dev/input/mice", O_RDONLY | O_NONBLOCK);
  if (mrg_mice_this->fd == -1)
  {
    fprintf (stderr, "error opening /dev/input/mice device, maybe add user to input group if such group exist, or otherwise make the rights be satisfied.\n");
    return -1;
  }
  if (write (mrg_mice_this->fd, reset, 1) == -1)
  {
    // might happen if we're a regular user with only read permission
  }
  _ctx_mice_fd = mrg_mice_this->fd;
  _mrg_evsrc_coord = mrg_mice_this;
  return 0;
}

static void mice_destroy ()
{
  if (mrg_mice_this->fd != -1)
    close (mrg_mice_this->fd);
}

static int mice_has_event ()
{
  struct timeval tv;
  int retval;

  if (mrg_mice_this->fd == -1)
    return 0;

  fd_set rfds;
  FD_ZERO (&rfds);
  FD_SET(mrg_mice_this->fd, &rfds);
  tv.tv_sec = 0; tv.tv_usec = 0;
  retval = select (mrg_mice_this->fd+1, &rfds, NULL, NULL, &tv);
  if (retval == 1)
    return FD_ISSET (mrg_mice_this->fd, &rfds);
  return 0;
}

static char *mice_get_event ()
{
  const char *ret = "pm";
  double relx, rely;
  signed char buf[3];
  int n_read = 0;
  CtxTiled *tiled = (void*)ctx_ev_src_mice.priv;
  n_read = read (mrg_mice_this->fd, buf, 3);
  if (n_read == 0)
     return strdup ("");
  relx = buf[1];
  rely = -buf[2];

  if (relx < 0)
  {
    if (relx > -6)
    relx = - relx*relx;
    else
    relx = -36;
  }
  else
  {
    if (relx < 6)
    relx = relx*relx;
    else
    relx = 36;
  }

  if (rely < 0)
  {
    if (rely > -6)
    rely = - rely*rely;
    else
    rely = -36;
  }
  else
  {
    if (rely < 6)
    rely = rely*rely;
    else
    rely = 36;
  }

  mrg_mice_this->x += relx;
  mrg_mice_this->y += rely;

  if (mrg_mice_this->x < 0)
    mrg_mice_this->x = 0;
  if (mrg_mice_this->y < 0)
    mrg_mice_this->y = 0;
  if (mrg_mice_this->x >= tiled->width)
    mrg_mice_this->x = tiled->width -1;
  if (mrg_mice_this->y >= tiled->height)
    mrg_mice_this->y = tiled->height -1;
  int button = 0;
  
  if ((mrg_mice_this->prev_state & 1) != (buf[0] & 1))
    {
      if (buf[0] & 1)
        {
          ret = "pp";
        }
      else
        {
          ret = "pr";
        }
      button = 1;
    }
  else if (buf[0] & 1)
  {
    ret = "pd";
    button = 1;
  }

  if (!button)
  {
    if ((mrg_mice_this->prev_state & 2) != (buf[0] & 2))
    {
      if (buf[0] & 2)
        {
          ret = "pp";
        }
      else
        {
          ret = "pr";
        }
      button = 3;
    }
    else if (buf[0] & 2)
    {
      ret = "pd";
      button = 3;
    }
  }

  if (!button)
  {
    if ((mrg_mice_this->prev_state & 4) != (buf[0] & 4))
    {
      if (buf[0] & 4)
        {
          ret = "pp";
        }
      else
        {
          ret = "pr";
        }
      button = 2;
    }
    else if (buf[0] & 4)
    {
      ret = "pd";
      button = 2;
    }
  }

  mrg_mice_this->prev_state = buf[0];

  {
    char *r = malloc (64);
    sprintf (r, "%s %.0f %.0f %i", ret, mrg_mice_this->x, mrg_mice_this->y, button);
    return r;
  }

  return NULL;
}

static int mice_get_fd (EvSource *ev_source)
{
  return mrg_mice_this->fd;
}

static void mice_set_coord (EvSource *ev_source, double x, double y)
{
  mrg_mice_this->x = x;
  mrg_mice_this->y = y;
}

static EvSource *evsource_mice_new (void)
{
  if (mmm_evsource_mice_init () == 0)
    {
      mrg_mice_this->x = 0;
      mrg_mice_this->y = 0;
      return &ctx_ev_src_mice;
    }
  return NULL;
}

static int evsource_kb_has_event (void);
static char *evsource_kb_get_event (void);
static void evsource_kb_destroy (int sign);
static int evsource_kb_get_fd (void);

/* kept out of struct to be reachable by atexit */
static EvSource ctx_ev_src_kb = {
  NULL,
  (void*)evsource_kb_has_event,
  (void*)evsource_kb_get_event,
  (void*)evsource_kb_destroy,
  (void*)evsource_kb_get_fd,
  NULL
};

static struct termios orig_attr;

static void real_evsource_kb_destroy (int sign)
{
  static int done = 0;

  if (sign == 0)
    return;

  if (done)
    return;
  done = 1;

  switch (sign)
  {
    case  -11:break; /* will be called from atexit with sign==-11 */
    case   SIGSEGV: break;//fprintf (stderr, " SIGSEGV\n");break;
    case   SIGABRT: fprintf (stderr, " SIGABRT\n");break;
    case   SIGBUS:  fprintf (stderr, " SIGBUS\n");break;
    case   SIGKILL: fprintf (stderr, " SIGKILL\n");break;
    case   SIGINT:  fprintf (stderr, " SIGINT\n");break;
    case   SIGTERM: fprintf (stderr, " SIGTERM\n");break;
    case   SIGQUIT: fprintf (stderr, " SIGQUIT\n");break;
    default: fprintf (stderr, "sign: %i\n", sign);
             fprintf (stderr, "%i %i %i %i %i %i %i\n", SIGSEGV, SIGABRT, SIGBUS, SIGKILL, SIGINT, SIGTERM, SIGQUIT);
  }
  tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_attr);
  //fprintf (stderr, "evsource kb destroy\n");
}

static void evsource_kb_destroy (int sign)
{
  real_evsource_kb_destroy (-11);
}

static int evsource_kb_init ()
{
//  ioctl(STDIN_FILENO, KDSKBMODE, K_RAW);
  //atexit ((void*) real_evsource_kb_destroy);
  signal (SIGSEGV, (void*) real_evsource_kb_destroy);
  signal (SIGABRT, (void*) real_evsource_kb_destroy);
  signal (SIGBUS,  (void*) real_evsource_kb_destroy);
  signal (SIGKILL, (void*) real_evsource_kb_destroy);
  signal (SIGINT,  (void*) real_evsource_kb_destroy);
  signal (SIGTERM, (void*) real_evsource_kb_destroy);
  signal (SIGQUIT, (void*) real_evsource_kb_destroy);

  struct termios raw;
  if (tcgetattr (STDIN_FILENO, &orig_attr) == -1)
    {
      fprintf (stderr, "error initializing keyboard\n");
      return -1;
    }
  raw = orig_attr;

  cfmakeraw (&raw);

  raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */
  if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &raw) < 0)
    return 0; // XXX? return other value?

  return 0;
}
static int evsource_kb_has_event (void)
{
  struct timeval tv;
  int retval;

  fd_set rfds;
  FD_ZERO (&rfds);
  FD_SET(STDIN_FILENO, &rfds);
  tv.tv_sec = 0; tv.tv_usec = 0;
  retval = select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv);
  return retval == 1;
}

/* note that a nick can have multiple occurences, the labels
 * should be kept the same for all occurences of a combination.
 *
 * this table is taken from nchanterm.
 */
typedef struct MmmKeyCode {
  char *nick;          /* programmers name for key */
  char  sequence[10];  /* terminal sequence */
} MmmKeyCode;
static const MmmKeyCode ufb_keycodes[]={
  {"up",                  "\e[A"},
  {"down",                "\e[B"},
  {"right",               "\e[C"},
  {"left",                "\e[D"},

  {"shift-up",            "\e[1;2A"},
  {"shift-down",          "\e[1;2B"},
  {"shift-right",         "\e[1;2C"},
  {"shift-left",          "\e[1;2D"},

  {"alt-up",              "\e[1;3A"},
  {"alt-down",            "\e[1;3B"},
  {"alt-right",           "\e[1;3C"},
  {"alt-left",            "\e[1;3D"},
  {"alt-shift-up",         "\e[1;4A"},
  {"alt-shift-down",       "\e[1;4B"},
  {"alt-shift-right",      "\e[1;4C"},
  {"alt-shift-left",       "\e[1;4D"},

  {"control-up",          "\e[1;5A"},
  {"control-down",        "\e[1;5B"},
  {"control-right",       "\e[1;5C"},
  {"control-left",        "\e[1;5D"},

  /* putty */
  {"control-up",          "\eOA"},
  {"control-down",        "\eOB"},
  {"control-right",       "\eOC"},
  {"control-left",        "\eOD"},

  {"control-shift-up",    "\e[1;6A"},
  {"control-shift-down",  "\e[1;6B"},
  {"control-shift-right", "\e[1;6C"},
  {"control-shift-left",  "\e[1;6D"},

  {"control-up",          "\eOa"},
  {"control-down",        "\eOb"},
  {"control-right",       "\eOc"},
  {"control-left",        "\eOd"},

  {"shift-up",            "\e[a"},
  {"shift-down",          "\e[b"},
  {"shift-right",         "\e[c"},
  {"shift-left",          "\e[d"},

  {"insert",              "\e[2~"},
  {"delete",              "\e[3~"},
  {"page-up",             "\e[5~"},
  {"page-down",           "\e[6~"},
  {"home",                "\eOH"},
  {"end",                 "\eOF"},
  {"home",                "\e[H"},
  {"end",                 "\e[F"},
 {"control-delete",       "\e[3;5~"},
  {"shift-delete",        "\e[3;2~"},
  {"control-shift-delete","\e[3;6~"},

  {"F1",         "\e[25~"},
  {"F2",         "\e[26~"},
  {"F3",         "\e[27~"},
  {"F4",         "\e[26~"},


  {"F1",         "\e[11~"},
  {"F2",         "\e[12~"},
  {"F3",         "\e[13~"},
  {"F4",         "\e[14~"},
  {"F1",         "\eOP"},
  {"F2",         "\eOQ"},
  {"F3",         "\eOR"},
  {"F4",         "\eOS"},
  {"F5",         "\e[15~"},
  {"F6",         "\e[16~"},
  {"F7",         "\e[17~"},
  {"F8",         "\e[18~"},
  {"F9",         "\e[19~"},
  {"F9",         "\e[20~"},
  {"F10",        "\e[21~"},
  {"F11",        "\e[22~"},
  {"F12",        "\e[23~"},
  {"tab",         {9, '\0'}},
  {"shift-tab",   {27, 9, '\0'}}, // also generated by alt-tab in linux console
  {"alt-space",   {27, ' ', '\0'}},
  {"shift-tab",   "\e[Z"},
  {"backspace",   {127, '\0'}},
  {"space",       " "},
  {"\e",          "\e"},
  {"return",      {10,0}},
  {"return",      {13,0}},
  /* this section could be autogenerated by code */
  {"control-a",   {1,0}},
  {"control-b",   {2,0}},
  {"control-c",   {3,0}},
  {"control-d",   {4,0}},
  {"control-e",   {5,0}},
  {"control-f",   {6,0}},
  {"control-g",   {7,0}},
  {"control-h",   {8,0}}, /* backspace? */
  {"control-i",   {9,0}},
  {"control-j",   {10,0}},
  {"control-k",   {11,0}},
  {"control-l",   {12,0}},
  {"control-n",   {14,0}},
  {"control-o",   {15,0}},
  {"control-p",   {16,0}},
  {"control-q",   {17,0}},
  {"control-r",   {18,0}},
  {"control-s",   {19,0}},
  {"control-t",   {20,0}},
  {"control-u",   {21,0}},
  {"control-v",   {22,0}},
  {"control-w",   {23,0}},
  {"control-x",   {24,0}},
  {"control-y",   {25,0}},
  {"control-z",   {26,0}},
  {"alt-`",       "\e`"},
  {"alt-0",       "\e0"},
  {"alt-1",       "\e1"},
  {"alt-2",       "\e2"},
  {"alt-3",       "\e3"},
  {"alt-4",       "\e4"},
  {"alt-5",       "\e5"},
  {"alt-6",       "\e6"},
  {"alt-7",       "\e7"}, /* backspace? */
  {"alt-8",       "\e8"},
  {"alt-9",       "\e9"},
  {"alt-+",       "\e+"},
  {"alt--",       "\e-"},
  {"alt-/",       "\e/"},
  {"alt-a",       "\ea"},
  {"alt-b",       "\eb"},
  {"alt-c",       "\ec"},
  {"alt-d",       "\ed"},
  {"alt-e",       "\ee"},
  {"alt-f",       "\ef"},
  {"alt-g",       "\eg"},
  {"alt-h",       "\eh"}, /* backspace? */
  {"alt-i",       "\ei"},
  {"alt-j",       "\ej"},
  {"alt-k",       "\ek"},
  {"alt-l",       "\el"},
  {"alt-n",       "\em"},
  {"alt-n",       "\en"},
  {"alt-o",       "\eo"},
  {"alt-p",       "\ep"},
  {"alt-q",       "\eq"},
  {"alt-r",       "\er"},
  {"alt-s",       "\es"},
  {"alt-t",       "\et"},
  {"alt-u",       "\eu"},
  {"alt-v",       "\ev"},
  {"alt-w",       "\ew"},
  {"alt-x",       "\ex"},
  {"alt-y",       "\ey"},
  {"alt-z",       "\ez"},
  /* Linux Console  */
  {"home",       "\e[1~"},
  {"end",        "\e[4~"},
  {"F1",         "\e[[A"},
  {"F2",         "\e[[B"},
  {"F3",         "\e[[C"},
  {"F4",         "\e[[D"},
  {"F5",         "\e[[E"},
  {"F6",         "\e[[F"},
  {"F7",         "\e[[G"},
  {"F8",         "\e[[H"},
  {"F9",         "\e[[I"},
  {"F10",        "\e[[J"},
  {"F11",        "\e[[K"},
  {"F12",        "\e[[L"},
  {NULL, }
};
static int fb_keyboard_match_keycode (const char *buf, int length, const MmmKeyCode **ret)
{
  int i;
  int matches = 0;

  if (!strncmp (buf, "\e[M", MIN(length,3)))
    {
      if (length >= 6)
        return 9001;
      return 2342;
    }
  for (i = 0; ufb_keycodes[i].nick; i++)
    if (!strncmp (buf, ufb_keycodes[i].sequence, length))
      {
        matches ++;
        if ((int)strlen (ufb_keycodes[i].sequence) == length && ret)
          {
            *ret = &ufb_keycodes[i];
            return 1;
          }
      }
  if (matches != 1 && ret)
    *ret = NULL;
  return matches==1?2:matches;
}

//int is_active (void *host)
//{
//        return 1;
//}

static char *evsource_kb_get_event (void)
{
  unsigned char buf[20];
  int length;


  for (length = 0; length < 10; length ++)
    if (read (STDIN_FILENO, &buf[length], 1) != -1)
      {
        const MmmKeyCode *match = NULL;

        //if (!is_active (ctx_ev_src_kb.priv))
        //  return NULL;

        /* special case ESC, so that we can use it alone in keybindings */
        if (length == 0 && buf[0] == 27)
          {
            struct timeval tv;
            fd_set rfds;
            FD_ZERO (&rfds);
            FD_SET (STDIN_FILENO, &rfds);
            tv.tv_sec = 0;
            tv.tv_usec = 1000 * 120;
            if (select (STDIN_FILENO+1, &rfds, NULL, NULL, &tv) == 0)
              return strdup ("escape");
          }

        switch (fb_keyboard_match_keycode ((void*)buf, length + 1, &match))
          {
            case 1: /* unique match */
              if (!match)
                return NULL;
              return strdup (match->nick);
              break;
            case 0: /* no matches, bail*/
             {
                static char ret[256]="";
                if (length == 0 && ctx_utf8_len (buf[0])>1) /* read a
                                                             * single unicode
                                                             * utf8 character
                                                             */
                  {
                    int bytes = read (STDIN_FILENO, &buf[length+1], ctx_utf8_len(buf[0])-1);
                    if (bytes)
                    {
                      buf[ctx_utf8_len(buf[0])]=0;
                      strcpy (ret, (void*)buf);
                    }
                    return strdup(ret); //XXX: simplify
                  }
                if (length == 0) /* ascii */
                  {
                    buf[1]=0;
                    strcpy (ret, (void*)buf);
                    return strdup(ret);
                  }
                sprintf (ret, "unhandled %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c' %i:'%c'",
                    length >=0 ? buf[0] : 0,
                    length >=0 ? buf[0]>31?buf[0]:'?' : ' ',
                    length >=1 ? buf[1] : 0,
                    length >=1 ? buf[1]>31?buf[1]:'?' : ' ',
                    length >=2 ? buf[2] : 0,
                    length >=2 ? buf[2]>31?buf[2]:'?' : ' ',
                    length >=3 ? buf[3] : 0,
                    length >=3 ? buf[3]>31?buf[3]:'?' : ' ',
                    length >=4 ? buf[4] : 0,
                    length >=4 ? buf[4]>31?buf[4]:'?' : ' ',
                    length >=5 ? buf[5] : 0,
                    length >=5 ? buf[5]>31?buf[5]:'?' : ' ',
                    length >=6 ? buf[6] : 0,
                    length >=6 ? buf[6]>31?buf[6]:'?' : ' '
                    );
                return strdup(ret);
            }
              return NULL;
            default: /* continue */
              break;
          }
      }
    else
      return strdup("key read eek");
  return strdup("fail");
}

static int evsource_kb_get_fd (void)
{
  return STDIN_FILENO;
}


static EvSource *evsource_kb_new (void)
{
  if (evsource_kb_init() == 0)
  {
    return &ctx_ev_src_kb;
  }
  return NULL;
}

#if CTX_BABL
static int _ctx_babl_inits = 0;
#endif
static void ctx_babl_init (void)
{
#if CTX_BABL
  _ctx_babl_inits ++;
  if (_ctx_babl_inits == 1)
  {
    babl_init ();
  }
#endif
}
static void ctx_babl_exit (void)
{
#if CTX_BABL
  _ctx_babl_inits --;
  if (_ctx_babl_inits == 0)
  {
    babl_exit ();
  }
#endif
}

static int event_check_pending (CtxTiled *tiled)
{
  int events = 0;
  for (int i = 0; i < tiled->evsource_count; i++)
  {
    while (evsource_has_event (tiled->evsource[i]))
    {
      char *event = evsource_get_event (tiled->evsource[i]);
      if (event)
      {
        if (tiled->vt_active)
        {
          ctx_key_press (tiled->backend.ctx, 0, event, 0); // we deliver all events as key-press, the key_press handler disambiguates
          events++;
        }
        free (event);
      }
    }
  }
  return events;
}

#endif


void ctx_queue_draw (Ctx *ctx)
{
  ctx->dirty ++;
}
static void ctx_svg_arc_circle_to (Ctx *ctx,
                                   float radius,
                                   int large,
                                   int sweep,
                                   float x1, float y1)
{
  float x0, y0;
  ctx_current_point (ctx, &x0, &y0);
  int left_side = (large && !sweep) || (sweep && !large);

  float delta_x = (x1-x0) * 0.5f;
  float delta_y = (y1-y0) * 0.5f;

  float midpoint_x = x0 + delta_x;
  float midpoint_y = y0 + delta_y;

  float radius_vec_x;
  float radius_vec_y;
  float r = radius;

  if (left_side)
  {
    radius_vec_x = -delta_y;
    radius_vec_y = delta_x;
  }
  else
  {
    radius_vec_x = delta_y;
    radius_vec_y = -delta_x;
  }

  float len_squared = ctx_pow2(radius_vec_x) + ctx_pow2(radius_vec_y);
  if (len_squared - 0.03 > r * r || r < 0)
  {
    r = sqrtf (len_squared);
  }

  float center_x = midpoint_x +
           radius_vec_x * ctx_sqrtf(ctx_maxf(0, r * r / len_squared-1));
  float center_y = midpoint_y +
           radius_vec_y * ctx_sqrtf(ctx_maxf(0, r * r / len_squared-1));

  float arc = ctx_asinf(ctx_clampf(ctx_sqrtf(len_squared)/r, -1.0, 1.0))*2;
  if (large) arc = CTX_PI*2-arc;

  float start_angle = ctx_atan2f(y0 - center_y, x0 - center_x);
  float end_angle = sweep?start_angle+arc:start_angle-arc;

  ctx_arc (ctx, center_x, center_y, r, start_angle, end_angle, !sweep);
}


static void ctx_svg_arc_to (Ctx *ctx, float rx, float ry, 
                            float rotation,  int large, int sweep,
                            float x1, float y1)
{
  ctx_svg_arc_circle_to (ctx, rx, large, sweep, x1, y1);
  return;
   // XXX the following fails, one reason is that
   // ctx_current_point returns the point in the previous user_space
   // not the current.

  float x0, y0;
  ctx_current_point (ctx, &x0, &y0);
  float radius_min = ctx_hypotf (x1-x0,y1-y0)/2.0;
  float radius_lim = ctx_hypotf (rx, ry);
  float up_scale = 1.0;
  if (radius_lim < radius_min)
    up_scale = radius_min / radius_lim;
  float ratio = rx / ry;
  ctx_save (ctx);
  ctx_scale (ctx, up_scale * ratio, up_scale);

  //  the following is a hack, current_point should change instead,
  //  but that can have performance impact on adding coordinates
  ctx->state.x /= (up_scale * ratio);
  ctx->state.y /= (up_scale);


  //ctx_rotate (ctx, rotation);
  
  x1 = x1 / (up_scale * ratio);
  y1 = y1 / (up_scale);

  ctx_svg_arc_circle_to (ctx, rx, large, sweep, x1, y1);

  ctx_restore (ctx);
}

/* the parser comes in the end, nothing in ctx knows about the parser  */

#if CTX_PARSER

/* ctx parser, */

#define CTX_ID_MAXLEN 64 // in use should not be more than 40!
                         // to offer headroom for multiplexing


#define CTX_REPORT_COL_ROW 0

struct
  _CtxParser
{
  Ctx       *ctx;
  int        t_args; // total number of arguments seen for current command
  int        state;
#if CTX_PARSER_FIXED_TEMP
  uint8_t    holding[CTX_PARSER_MAXLEN]; /*  */
#else
  uint8_t   *holding;
#endif
  int        hold_len;
  int        pos;

#if CTX_REPORT_COL_ROW
  int        line; /*  for error reporting */
  int        col;  /*  for error reporting */
#endif
  float      numbers[CTX_PARSER_MAX_ARGS+1];
  int        n_numbers;
  int        decimal;
  CtxCode    command;
  int        expected_args; /* low digits are literal higher values
                               carry special meaning */
  int        n_args;
  int        texture_done;
  uint8_t    texture_id[CTX_ID_MAXLEN]; // used in defineTexture only
  uint32_t   set_key_hash;
  float      pcx;
  float      pcy;
  int        color_components;
  int        color_stroke; // 0 is fill source  1 is stroke source
  CtxColorModel   color_model; // 1 gray 3 rgb 4 cmyk
  float      left_margin; // set by last user provided move_to
  int        width;       // <- maybe should be float
  int        height;
  float      cell_width;
  float      cell_height;
  int        cursor_x;    // <- leaking in from terminal
  int        cursor_y;

  int        translate_origin;

  CtxColorSpace   color_space_slot;

  void (*exit) (void *exit_data);
  void *exit_data;
  int   (*set_prop)(void *prop_data, uint32_t key, const char *data,  int len);
  int   (*get_prop)(void *prop_data, const char *key, char **data, int *len);
  void *prop_data;
  int   prev_byte;
};

void
ctx_parser_set_size (CtxParser *parser,
                 int        width,
                 int        height,
                 float      cell_width,
                 float      cell_height)
{
  if (cell_width > 0)
    parser->cell_width       = cell_width;
  if (cell_height > 0)
    parser->cell_height      = cell_height;
  if (width > 0)
    parser->width            = width;
  if (height > 0)
    parser->height           = height;
}

static CtxParser *
ctx_parser_init (CtxParser *parser,
                 Ctx       *ctx,
                 int        width,
                 int        height,
                 float      cell_width,
                 float      cell_height,
                 int        cursor_x,
                 int        cursor_y,
  int   (*set_prop)(void *prop_data, uint32_t key, const char *data,  int len),
  int   (*get_prop)(void *prop_Data, const char *key, char **data, int *len),
                 void  *prop_data,
                 void (*exit) (void *exit_data),
                 void *exit_data
                )
{
  ctx_memset (parser, 0, sizeof (CtxParser) );
#if CTX_REPORT_COL_ROW
  parser->line             = 1;
#endif
  parser->ctx              = ctx;
  parser->cell_width       = cell_width;
  parser->cell_height      = cell_height;
  parser->cursor_x         = cursor_x;
  parser->cursor_y         = cursor_y;
  parser->width            = width;
  parser->height           = height;
  parser->exit             = exit;
  parser->exit_data        = exit_data;
  parser->color_model      = CTX_RGBA;
  parser->color_stroke     = 0;
  parser->color_components = 4;
  parser->command          = CTX_MOVE_TO;
  parser->set_prop         = set_prop;
  parser->get_prop         = get_prop;
  parser->prop_data        = prop_data;
  return parser;
}

CtxParser *ctx_parser_new (
  Ctx       *ctx,
  int        width,
  int        height,
  float      cell_width,
  float      cell_height,
  int        cursor_x,
  int        cursor_y,
  int   (*set_prop)(void *prop_data, uint32_t key, const char *data,  int len),
  int   (*get_prop)(void *prop_Data, const char *key, char **data, int *len),
  void  *prop_data,
  void (*exit) (void *exit_data),
  void *exit_data)
{
  return ctx_parser_init ( (CtxParser *) ctx_calloc (sizeof (CtxParser), 1),
                           ctx,
                           width, height,
                           cell_width, cell_height,
                           cursor_x, cursor_y, set_prop, get_prop, prop_data,
                           exit, exit_data);
}

void ctx_parser_free (CtxParser *parser)
{
#if !CTX_PARSER_FIXED_TEMP
  if (parser->holding)
    free (parser->holding);
#endif
  free (parser);
}

#define CTX_ARG_COLLECT_NUMBERS             50
#define CTX_ARG_STRING_OR_NUMBER            100
#define CTX_ARG_NUMBER_OF_COMPONENTS        200
#define CTX_ARG_NUMBER_OF_COMPONENTS_PLUS_1 201

static int ctx_arguments_for_code (CtxCode code)
{
  switch (code)
    {
      case CTX_SAVE:
      case CTX_START_GROUP:
      case CTX_END_GROUP:
      case CTX_IDENTITY:
      case CTX_CLOSE_PATH:
      case CTX_BEGIN_PATH:
      case CTX_RESET:
      case CTX_FLUSH:
      case CTX_RESTORE:
      case CTX_STROKE:
      case CTX_FILL:
      case CTX_PAINT:
      case CTX_NEW_PAGE:
      case CTX_CLIP:
      case CTX_EXIT:
        return 0;
      case CTX_GLOBAL_ALPHA:
      case CTX_COMPOSITING_MODE:
      case CTX_BLEND_MODE:
      case CTX_EXTEND:
      case CTX_FONT_SIZE:
      case CTX_LINE_JOIN:
      case CTX_LINE_CAP:
      case CTX_LINE_WIDTH:
      case CTX_LINE_DASH_OFFSET:
      case CTX_IMAGE_SMOOTHING:
      case CTX_SHADOW_BLUR:
      case CTX_SHADOW_OFFSET_X:
      case CTX_SHADOW_OFFSET_Y:
      case CTX_FILL_RULE:
      case CTX_TEXT_ALIGN:
      case CTX_TEXT_BASELINE:
      case CTX_TEXT_DIRECTION:
      case CTX_MITER_LIMIT:
      case CTX_REL_VER_LINE_TO:
      case CTX_REL_HOR_LINE_TO:
      case CTX_HOR_LINE_TO:
      case CTX_VER_LINE_TO:
      case CTX_FONT:
      case CTX_ROTATE:
      case CTX_GLYPH:
        return 1;
      case CTX_TRANSLATE:
      case CTX_REL_SMOOTHQ_TO:
      case CTX_LINE_TO:
      case CTX_MOVE_TO:
      case CTX_SCALE:
      case CTX_REL_LINE_TO:
      case CTX_REL_MOVE_TO:
      case CTX_SMOOTHQ_TO:
        return 2;
      case CTX_LINEAR_GRADIENT:
      case CTX_REL_QUAD_TO:
      case CTX_QUAD_TO:
      case CTX_RECTANGLE:
      case CTX_FILL_RECT:
      case CTX_STROKE_RECT:
      case CTX_REL_SMOOTH_TO:
      case CTX_VIEW_BOX:
      case CTX_SMOOTH_TO:
        return 4;
      case CTX_ROUND_RECTANGLE:
        return 5;
      case CTX_ARC:
      case CTX_CURVE_TO:
      case CTX_REL_CURVE_TO:
      case CTX_RADIAL_GRADIENT:
        return 6;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        return 7;
      case CTX_APPLY_TRANSFORM:
      case CTX_SOURCE_TRANSFORM:
        return 9;
      case CTX_STROKE_TEXT:
      case CTX_TEXT:
      case CTX_COLOR_SPACE:
      case CTX_DEFINE_GLYPH:
      case CTX_KERNING_PAIR:
      case CTX_TEXTURE:
      case CTX_DEFINE_TEXTURE:
        return CTX_ARG_STRING_OR_NUMBER;
      case CTX_LINE_DASH: /* append to current dashes for each argument encountered */
        return CTX_ARG_COLLECT_NUMBERS;
      //case CTX_SET_KEY:
      case CTX_COLOR:
      case CTX_SHADOW_COLOR:
        return CTX_ARG_NUMBER_OF_COMPONENTS;
      case CTX_GRADIENT_STOP:
        return CTX_ARG_NUMBER_OF_COMPONENTS_PLUS_1;

        default:
#if 1
        case CTX_SET_RGBA_U8:
        case CTX_NOP:
        case CTX_NEW_EDGE:
        case CTX_EDGE:
        case CTX_EDGE_FLIPPED:
        case CTX_CONT:
        case CTX_DATA:
        case CTX_DATA_REV:
        case CTX_SET_PIXEL:
        case CTX_REL_LINE_TO_X4:
        case CTX_REL_LINE_TO_REL_CURVE_TO:
        case CTX_REL_CURVE_TO_REL_LINE_TO:
        case CTX_REL_CURVE_TO_REL_MOVE_TO:
        case CTX_REL_LINE_TO_X2:
        case CTX_MOVE_TO_REL_LINE_TO:
        case CTX_REL_LINE_TO_REL_MOVE_TO:
        case CTX_FILL_MOVE_TO:
        case CTX_REL_QUAD_TO_REL_QUAD_TO:
        case CTX_REL_QUAD_TO_S16:
        case CTX_STROKE_SOURCE:
#endif
        return 0;
    }
}

static int ctx_parser_set_command (CtxParser *parser, CtxCode code)
{
  if (code < 150 && code >= 32)
  {
  parser->expected_args = ctx_arguments_for_code (code);
  parser->n_args = 0;
  parser->texture_done = 0;
  if (parser->expected_args >= CTX_ARG_NUMBER_OF_COMPONENTS)
    {
      parser->expected_args = (parser->expected_args % 100) + parser->color_components;
    }
  }
  return code;
}

static void ctx_parser_set_color_model (CtxParser *parser, CtxColorModel color_model, int stroke);

static int ctx_parser_resolve_command (CtxParser *parser, const uint8_t *str)
{
  uint32_t ret = str[0]; /* if it is single char it already is the CtxCode */

  /* this is handled outside the hashing to make it possible to be case insensitive
   * with the rest.
   */
  if (str[0] == CTX_SET_KEY && str[1] && str[2] == 0)
  {
    switch (str[1])
    {
      case 'm': return ctx_parser_set_command (parser, CTX_COMPOSITING_MODE);
      case 'B': return ctx_parser_set_command (parser, CTX_BLEND_MODE);
      case 'e': return ctx_parser_set_command (parser, CTX_EXTEND);
      case 'l': return ctx_parser_set_command (parser, CTX_MITER_LIMIT);
      case 't': return ctx_parser_set_command (parser, CTX_TEXT_ALIGN);
      case 'b': return ctx_parser_set_command (parser, CTX_TEXT_BASELINE);
      case 'd': return ctx_parser_set_command (parser, CTX_TEXT_DIRECTION);
      case 'j': return ctx_parser_set_command (parser, CTX_LINE_JOIN);
      case 'c': return ctx_parser_set_command (parser, CTX_LINE_CAP);
      case 'w': return ctx_parser_set_command (parser, CTX_LINE_WIDTH);
      case 'D': return ctx_parser_set_command (parser, CTX_LINE_DASH_OFFSET);
      case 'S': return ctx_parser_set_command (parser, CTX_IMAGE_SMOOTHING);
      case 'C': return ctx_parser_set_command (parser, CTX_SHADOW_COLOR);
      case 's': return ctx_parser_set_command (parser, CTX_SHADOW_BLUR);
      case 'x': return ctx_parser_set_command (parser, CTX_SHADOW_OFFSET_X);
      case 'y': return ctx_parser_set_command (parser, CTX_SHADOW_OFFSET_Y);
      case 'a': return ctx_parser_set_command (parser, CTX_GLOBAL_ALPHA);
      case 'f': return ctx_parser_set_command (parser, CTX_FONT_SIZE);
      case 'r': return ctx_parser_set_command (parser, CTX_FILL_RULE);
    }
  }

  if (str[0] && str[1])
    {
      uint32_t str_hash;
      /* trim ctx_ and CTX_ prefix */
      if ( (str[0] == 'c' && str[1] == 't' && str[2] == 'x' && str[3] == '_') ||
           (str[0] == 'C' && str[1] == 'T' && str[2] == 'X' && str[3] == '_') )
        {
          str += 4;
        }
      if ( (str[0] == 's' && str[1] == 'e' && str[2] == 't' && str[3] == '_') )
        { str += 4; }
      str_hash = ctx_strhash ( (char *) str);
      switch (str_hash)
        {
          /* first a list of mappings to one_char hashes, handled in a
           * separate fast path switch without hashing
           */
          case CTX_arcTo:          ret = CTX_ARC_TO; break;
          case CTX_arc:            ret = CTX_ARC; break;
          case CTX_curveTo:        ret = CTX_CURVE_TO; break;
          case CTX_restore:        ret = CTX_RESTORE; break;
          case CTX_stroke:         ret = CTX_STROKE; break;
          case CTX_fill:           ret = CTX_FILL; break;
          case CTX_paint:          ret = CTX_PAINT; break;
          case CTX_flush:          ret = CTX_FLUSH; break;
          case CTX_horLineTo:      ret = CTX_HOR_LINE_TO; break;
          case CTX_rotate:         ret = CTX_ROTATE; break;
          case CTX_color:          ret = CTX_COLOR; break;
          case CTX_lineTo:         ret = CTX_LINE_TO; break;
          case CTX_moveTo:         ret = CTX_MOVE_TO; break;
          case CTX_scale:          ret = CTX_SCALE; break;
          case CTX_newPage:        ret = CTX_NEW_PAGE; break;
          case CTX_quadTo:         ret = CTX_QUAD_TO; break;
          case CTX_viewBox:        ret = CTX_VIEW_BOX; break;
          case CTX_smooth_to:      ret = CTX_SMOOTH_TO; break;
          case CTX_smooth_quad_to: ret = CTX_SMOOTHQ_TO; break;
          case CTX_clear:          ret = CTX_COMPOSITE_CLEAR; break;
          case CTX_copy:           ret = CTX_COMPOSITE_COPY; break;
          case CTX_destinationOver:  ret = CTX_COMPOSITE_DESTINATION_OVER; break;
          case CTX_destinationIn:    ret = CTX_COMPOSITE_DESTINATION_IN; break;
          case CTX_destinationOut:   ret = CTX_COMPOSITE_DESTINATION_OUT; break;
          case CTX_sourceOver:       ret = CTX_COMPOSITE_SOURCE_OVER; break;
          case CTX_sourceAtop:       ret = CTX_COMPOSITE_SOURCE_ATOP; break;
          case CTX_destinationAtop:  ret = CTX_COMPOSITE_DESTINATION_ATOP; break;
          case CTX_sourceOut:        ret = CTX_COMPOSITE_SOURCE_OUT; break;
          case CTX_sourceIn:         ret = CTX_COMPOSITE_SOURCE_IN; break;
          case CTX_xor:              ret = CTX_COMPOSITE_XOR; break;
          case CTX_darken:           ret = CTX_BLEND_DARKEN; break;
          case CTX_lighten:          ret = CTX_BLEND_LIGHTEN; break;
          //case CTX_color:          ret = CTX_BLEND_COLOR; break;
          //
          //  XXX check that he special casing for color works
          //      it is the first collision and it is due to our own
          //      color, not w3c for now unique use of it
          //
          case CTX_hue:            ret = CTX_BLEND_HUE; break;
          case CTX_multiply:       ret = CTX_BLEND_MULTIPLY; break;
          case CTX_normal:         ret = CTX_BLEND_NORMAL;break;
          case CTX_screen:         ret = CTX_BLEND_SCREEN;break;
          case CTX_difference:     ret = CTX_BLEND_DIFFERENCE; break;
          case CTX_reset:          ret = CTX_RESET; break;
          case CTX_verLineTo:      ret = CTX_VER_LINE_TO; break;
          case CTX_exit:
          case CTX_done:           ret = CTX_EXIT; break;
          case CTX_closePath:      ret = CTX_CLOSE_PATH; break;
          case CTX_beginPath:
          case CTX_newPath:        ret = CTX_BEGIN_PATH; break;
          case CTX_relArcTo:       ret = CTX_REL_ARC_TO; break;
          case CTX_clip:           ret = CTX_CLIP; break;
          case CTX_relCurveTo:     ret = CTX_REL_CURVE_TO; break;
          case CTX_startGroup:     ret = CTX_START_GROUP; break;
          case CTX_endGroup:       ret = CTX_END_GROUP; break;
          case CTX_save:           ret = CTX_SAVE; break;
          case CTX_translate:      ret = CTX_TRANSLATE; break;
          case CTX_linearGradient: ret = CTX_LINEAR_GRADIENT; break;
          case CTX_relHorLineTo:   ret = CTX_REL_HOR_LINE_TO; break;
          case CTX_relLineTo:      ret = CTX_REL_LINE_TO; break;
          case CTX_relMoveTo:      ret = CTX_REL_MOVE_TO; break;
          case CTX_font:           ret = CTX_FONT; break;
          case CTX_radialGradient:ret = CTX_RADIAL_GRADIENT; break;
          case CTX_gradientAddStop:
          case CTX_addStop:        ret = CTX_GRADIENT_STOP; break;
          case CTX_relQuadTo:      ret = CTX_REL_QUAD_TO; break;
          case CTX_rectangle:
          case CTX_rect:           ret = CTX_RECTANGLE; break;
          case CTX_roundRectangle: ret = CTX_ROUND_RECTANGLE; break;
          case CTX_relSmoothTo:    ret = CTX_REL_SMOOTH_TO; break;
          case CTX_relSmoothqTo:   ret = CTX_REL_SMOOTHQ_TO; break;
          case CTX_strokeText:     ret = CTX_STROKE_TEXT; break;
          case CTX_strokeRect:     ret = CTX_STROKE_RECT; break;
          case CTX_fillRect:       ret = CTX_FILL_RECT; break;
          case CTX_relVerLineTo:   ret = CTX_REL_VER_LINE_TO; break;
          case CTX_text:           ret = CTX_TEXT; break;
          case CTX_identity:       ret = CTX_IDENTITY; break;
          case CTX_transform:      ret = CTX_APPLY_TRANSFORM; break;
          case CTX_sourceTransform: ret = CTX_SOURCE_TRANSFORM; break;
          case CTX_texture:        ret = CTX_TEXTURE; break;
          case CTX_defineTexture:  ret = CTX_DEFINE_TEXTURE; break;
#if 0
          case CTX_rgbSpace:
            return ctx_parser_set_command (parser, CTX_SET_RGB_SPACE);
          case CTX_cmykSpace:
            return ctx_parser_set_command (parser, CTX_SET_CMYK_SPACE);
          case CTX_drgbSpace:
            return ctx_parser_set_command (parser, CTX_SET_DRGB_SPACE);
#endif
          case CTX_defineGlyph:
            return ctx_parser_set_command (parser, CTX_DEFINE_GLYPH);
          case CTX_kerningPair:
            return ctx_parser_set_command (parser, CTX_KERNING_PAIR);

          case CTX_colorSpace:
            return ctx_parser_set_command (parser, CTX_COLOR_SPACE);
          case CTX_fillRule:
            return ctx_parser_set_command (parser, CTX_FILL_RULE);
          case CTX_fontSize:
          case CTX_setFontSize:
            return ctx_parser_set_command (parser, CTX_FONT_SIZE);
          case CTX_compositingMode:
            return ctx_parser_set_command (parser, CTX_COMPOSITING_MODE);

          case CTX_extend:
            return ctx_parser_set_command (parser, CTX_EXTEND);

          case CTX_blend:
          case CTX_blending:
          case CTX_blendMode:
            return ctx_parser_set_command (parser, CTX_BLEND_MODE);

          case CTX_miterLimit:
            return ctx_parser_set_command (parser, CTX_MITER_LIMIT);
          case CTX_textAlign:
            return ctx_parser_set_command (parser, CTX_TEXT_ALIGN);
          case CTX_textBaseline:
            return ctx_parser_set_command (parser, CTX_TEXT_BASELINE);
          case CTX_textDirection:
            return ctx_parser_set_command (parser, CTX_TEXT_DIRECTION);
          case CTX_join:
          case CTX_lineJoin:
          case CTX_setLineJoin:
            return ctx_parser_set_command (parser, CTX_LINE_JOIN);
          case CTX_glyph:
            return ctx_parser_set_command (parser, CTX_GLYPH);
          case CTX_cap:
          case CTX_lineCap:
          case CTX_setLineCap:
            return ctx_parser_set_command (parser, CTX_LINE_CAP);
          case CTX_lineDash:
            return ctx_parser_set_command (parser, CTX_LINE_DASH);
          case CTX_lineWidth:
          case CTX_setLineWidth:
            return ctx_parser_set_command (parser, CTX_LINE_WIDTH);
          case CTX_lineDashOffset:
            return ctx_parser_set_command (parser, CTX_LINE_DASH_OFFSET);
          case CTX_imageSmoothing:
            return ctx_parser_set_command (parser, CTX_IMAGE_SMOOTHING);
          case CTX_shadowColor:
            return ctx_parser_set_command (parser, CTX_SHADOW_COLOR);
          case CTX_shadowBlur:
            return ctx_parser_set_command (parser, CTX_SHADOW_BLUR);
          case CTX_shadowOffsetX:
            return ctx_parser_set_command (parser, CTX_SHADOW_OFFSET_X);
          case CTX_shadowOffsetY:
            return ctx_parser_set_command (parser, CTX_SHADOW_OFFSET_Y);
          case CTX_globalAlpha:
            return ctx_parser_set_command (parser, CTX_GLOBAL_ALPHA);

          case CTX_strokeSource:
            return ctx_parser_set_command (parser, CTX_STROKE_SOURCE);

          /* strings are handled directly here,
           * instead of in the one-char handler, using return instead of break
           */
          case CTX_gray:
            ctx_parser_set_color_model (parser, CTX_GRAY, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_graya:
            ctx_parser_set_color_model (parser, CTX_GRAYA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_rgb:
            ctx_parser_set_color_model (parser, CTX_RGB, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_drgb:
            ctx_parser_set_color_model (parser, CTX_DRGB, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_rgba:
            ctx_parser_set_color_model (parser, CTX_RGBA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_drgba:
            ctx_parser_set_color_model (parser, CTX_DRGBA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_cmyk:
            ctx_parser_set_color_model (parser, CTX_CMYK, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_cmyka:
            ctx_parser_set_color_model (parser, CTX_CMYKA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_lab:
            ctx_parser_set_color_model (parser, CTX_LAB, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_laba:
            ctx_parser_set_color_model (parser, CTX_LABA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_lch:
            ctx_parser_set_color_model (parser, CTX_LCH, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_lcha:
            ctx_parser_set_color_model (parser, CTX_LCHA, 0);
            return ctx_parser_set_command (parser, CTX_COLOR);

          /* and a full repeat of the above, with S for Stroke suffix */
          case CTX_grayS:
            ctx_parser_set_color_model (parser, CTX_GRAY, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_grayaS:
            ctx_parser_set_color_model (parser, CTX_GRAYA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_rgbS:
            ctx_parser_set_color_model (parser, CTX_RGB, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_drgbS:
            ctx_parser_set_color_model (parser, CTX_DRGB, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_rgbaS:
            ctx_parser_set_color_model (parser, CTX_RGBA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_drgbaS:
            ctx_parser_set_color_model (parser, CTX_DRGBA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_cmykS:
            ctx_parser_set_color_model (parser, CTX_CMYK, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_cmykaS:
            ctx_parser_set_color_model (parser, CTX_CMYKA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_labS:
            ctx_parser_set_color_model (parser, CTX_LAB, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_labaS:
            ctx_parser_set_color_model (parser, CTX_LABA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_lchS:
            ctx_parser_set_color_model (parser, CTX_LCH, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);
          case CTX_lchaS:
            ctx_parser_set_color_model (parser, CTX_LCHA, 1);
            return ctx_parser_set_command (parser, CTX_COLOR);

          /* words that correspond to low integer constants
          */
          case CTX_nonzero:     return CTX_FILL_RULE_WINDING;
          case CTX_non_zero:    return CTX_FILL_RULE_WINDING;
          case CTX_winding:     return CTX_FILL_RULE_WINDING;
          case CTX_evenOdd:
          case CTX_even_odd:    return CTX_FILL_RULE_EVEN_ODD;
          case CTX_bevel:       return CTX_JOIN_BEVEL;
          case CTX_round:       return CTX_JOIN_ROUND;
          case CTX_miter:       return CTX_JOIN_MITER;
          case CTX_none:        return CTX_CAP_NONE;
          case CTX_square:      return CTX_CAP_SQUARE;
          case CTX_start:       return CTX_TEXT_ALIGN_START;
          case CTX_end:         return CTX_TEXT_ALIGN_END;
          case CTX_left:        return CTX_TEXT_ALIGN_LEFT;
          case CTX_right:       return CTX_TEXT_ALIGN_RIGHT;
          case CTX_center:      return CTX_TEXT_ALIGN_CENTER;
          case CTX_top:         return CTX_TEXT_BASELINE_TOP;
          case CTX_bottom :     return CTX_TEXT_BASELINE_BOTTOM;
          case CTX_middle:      return CTX_TEXT_BASELINE_MIDDLE;
          case CTX_alphabetic:  return CTX_TEXT_BASELINE_ALPHABETIC;
          case CTX_hanging:     return CTX_TEXT_BASELINE_HANGING;
          case CTX_ideographic: return CTX_TEXT_BASELINE_IDEOGRAPHIC;

          case CTX_userRGB:     return CTX_COLOR_SPACE_USER_RGB;
          case CTX_deviceRGB:   return CTX_COLOR_SPACE_DEVICE_RGB;
          case CTX_userCMYK:    return CTX_COLOR_SPACE_USER_CMYK;
          case CTX_deviceCMYK:  return CTX_COLOR_SPACE_DEVICE_CMYK;
#undef STR
#undef LOWER
          default:
            ret = str_hash;
        }
    }
  if (ret == CTX_CLOSE_PATH2)
   {
     ret = CTX_CLOSE_PATH;
   }

  return ctx_parser_set_command (parser, (CtxCode) ret);
}

enum
{
  CTX_PARSER_NEUTRAL = 0,
  CTX_PARSER_NUMBER,
  CTX_PARSER_NEGATIVE_NUMBER,
  CTX_PARSER_WORD,
  CTX_PARSER_COMMENT,
  CTX_PARSER_STRING_APOS,
  CTX_PARSER_STRING_QUOT,
  CTX_PARSER_STRING_APOS_ESCAPED,
  CTX_PARSER_STRING_QUOT_ESCAPED,
  CTX_PARSER_STRING_A85,
  CTX_PARSER_STRING_YENC,
} CTX_STATE;

static void ctx_parser_set_color_model (CtxParser *parser, CtxColorModel color_model, int stroke)
{
  parser->color_model      = color_model;
  parser->color_stroke     = stroke;
  parser->color_components = ctx_color_model_get_components (color_model);
}

static void ctx_parser_get_color_rgba (CtxParser *parser, int offset, float *red, float *green, float *blue, float *alpha)
{
  /* XXX - this function is to be deprecated */
  *alpha = 1.0;
  switch (parser->color_model)
    {
      case CTX_GRAYA:
        *alpha = parser->numbers[offset + 1];
        /* FALLTHROUGH */
      case CTX_GRAY:
        *red = *green = *blue = parser->numbers[offset + 0];
        break;
      default:
      case CTX_LABA: // NYI - needs RGB profile
      case CTX_LCHA: // NYI - needs RGB profile
      case CTX_RGBA:
        *alpha = parser->numbers[offset + 3];
        /* FALLTHROUGH */
      case CTX_LAB: // NYI
      case CTX_LCH: // NYI
      case CTX_RGB:
        *red = parser->numbers[offset + 0];
        *green = parser->numbers[offset + 1];
        *blue = parser->numbers[offset + 2];
        break;
      case CTX_CMYKA:
        *alpha = parser->numbers[offset + 4];
        /* FALLTHROUGH */
      case CTX_CMYK:
        /* should use profile instead  */
        *red = (1.0-parser->numbers[offset + 0]) *
               (1.0 - parser->numbers[offset + 3]);
        *green = (1.0-parser->numbers[offset + 1]) *
                 (1.0 - parser->numbers[offset + 3]);
        *blue = (1.0-parser->numbers[offset + 2]) *
                (1.0 - parser->numbers[offset + 3]);
        break;
    }
}

static void ctx_parser_dispatch_command (CtxParser *parser)
{
  CtxCode cmd = parser->command;
  Ctx *ctx = parser->ctx;

  if (parser->expected_args != CTX_ARG_STRING_OR_NUMBER &&
      parser->expected_args != CTX_ARG_COLLECT_NUMBERS &&
      parser->expected_args != parser->n_numbers)
    {
#if CTX_REPORT_COL_ROW
         fprintf (stderr, "ctx:%i:%i %c got %i instead of %i args\n",
               parser->line, parser->col,
               cmd, parser->n_numbers, parser->expected_args);
#endif
      //return;
    }

#define arg(a)  (parser->numbers[a])
  parser->command = CTX_NOP;
  //parser->n_args = 0;
  switch (cmd)
    {
      default:
        break; // to silence warnings about missing ones
      case CTX_PRESERVE:
        ctx_preserve (ctx);
        break;
      case CTX_FILL:
        ctx_fill (ctx);
        break;
      case CTX_PAINT:
        ctx_paint (ctx);
        break;
      case CTX_SAVE:
        ctx_save (ctx);
        break;
      case CTX_START_GROUP:
        ctx_start_group (ctx);
        break;
      case CTX_END_GROUP:
        ctx_end_group (ctx);
        break;
      case CTX_STROKE:
        ctx_stroke (ctx);
        break;
      case CTX_STROKE_SOURCE:
        ctx_stroke_source (ctx);
        break;
      case CTX_RESTORE:
        ctx_restore (ctx);
        break;
#if CTX_ENABLE_CM
      case CTX_COLOR_SPACE:
        if (parser->n_numbers == 1)
        {
          parser->color_space_slot = (CtxColorSpace) arg(0);
          parser->command = CTX_COLOR_SPACE; // did this work without?
        }
        else
        {
          ctx_colorspace (ctx, (CtxColorSpace)parser->color_space_slot,
                               parser->holding, parser->pos);
        }
        break;
#endif
      case CTX_KERNING_PAIR:
        switch (parser->n_args)
        {
          case 0:
            parser->numbers[0] = ctx_utf8_to_unichar ((char*)parser->holding);
            break;
          case 1:
            parser->numbers[1] = ctx_utf8_to_unichar ((char*)parser->holding);
            break;
          case 2:
            parser->numbers[2] = strtod ((char*)parser->holding, NULL);
            {
              CtxEntry e = {CTX_KERNING_PAIR, };
              e.data.u16[0] = parser->numbers[0];
              e.data.u16[1] = parser->numbers[1];
              e.data.s32[1] = parser->numbers[2] * 256;
              ctx_process (ctx, &e);
            }
            break;
        }
        parser->command = CTX_KERNING_PAIR;
        parser->n_args ++; // make this more generic?
        break;             
      case CTX_TEXTURE:
        if (parser->texture_done)
        {
        }
        else
        if (parser->n_numbers == 2)
        {
          const char *eid = (char*)parser->holding;
          float x0 = arg(0);
          float x1 = arg(1);
          ctx_texture (ctx, eid, x0, x1);
          parser->texture_done = 1;
        }
        parser->command = CTX_TEXTURE;
        //parser->n_args++;
        break;
      case CTX_DEFINE_TEXTURE:
        if (parser->texture_done)
        {
          if (parser->texture_done++ == 1)
          {
             const char *eid = (char*)parser->texture_id;
             int width  = arg(0);
             int height = arg(1);
             CtxPixelFormat format = (CtxPixelFormat)arg(2);
             int stride = ctx_pixel_format_get_stride (format, width);
             int data_len = stride * height;
             if (format == CTX_FORMAT_YUV420)
                 data_len = height * width + 2*(height/2) * (width/2);


             if (parser->pos != data_len)
             {
             fprintf (stderr, "unexpected datasize for define texture %s %ix%i\n size:%i != expected:%i - start of data: %i %i %i %i\n", eid, width, height,
                               parser->pos,
                               stride * height,
                               parser->holding[0],
                               parser->holding[1],
                               parser->holding[2],
                               parser->holding[3]
                               );
             }
             else
             ctx_define_texture (ctx, eid, width, height, stride, format, parser->holding, NULL);
          }
        }
        else
        {
        switch (parser->n_numbers)
        {
          case 0:
             strncpy ((char*)parser->texture_id, (char*)parser->holding, sizeof(parser->texture_id));
             parser->texture_id[sizeof(parser->texture_id)-1]=0;
             break;
          case 1:
          case 2:
             break;
          case 3:
             parser->texture_done = 1;
             break;
          default:
             fprintf (stderr, "!!%i\n", parser->n_numbers);
             break;
        }
        }
        parser->command = CTX_DEFINE_TEXTURE;
        break;


      case CTX_DEFINE_GLYPH:
        /* XXX : reuse n_args logic - to enforce order */
        if (parser->n_numbers == 1)
        {
          CtxEntry e = {CTX_DEFINE_GLYPH, };
          e.data.u32[0] = parser->color_space_slot;
          e.data.u32[1] = arg(0) * 256;
          ctx_process (ctx, &e);
        }
        else
        {
          int unichar = ctx_utf8_to_unichar ((char*)parser->holding);
          parser->color_space_slot = (CtxColorSpace)unichar;
        }
        parser->command = CTX_DEFINE_GLYPH;
        break;             

      case CTX_COLOR:
        {
          switch (parser->color_model)
            {
              case CTX_GRAY:
              case CTX_GRAYA:
              case CTX_RGB:
              case CTX_RGBA:
              case CTX_DRGB:
              case CTX_DRGBA:
                ctx_color_raw (ctx, parser->color_model, parser->numbers, parser->color_stroke);
                break;
#if CTX_ENABLE_CMYK
              case CTX_CMYK:
              case CTX_CMYKA:
                ctx_color_raw (ctx, parser->color_model, parser->numbers, parser->color_stroke);
                break;
#else
              /* when there is no cmyk support at all in rasterizer
               * do a naive mapping to RGB on input.
               */
              case CTX_CMYK:
              case CTX_CMYKA:
              case CTX_DCMYKA:
                {
                  float rgba[4] = {1,1,1,1.0f};

                  ctx_cmyk_to_rgb (arg(0), arg(1), arg(2), arg(3), &rgba[0], &rgba[1], &rgba[2]);
                  if (parser->color_model == CTX_CMYKA)
                    { rgba[3] = arg(4); }
                  ctx_color_raw (ctx, CTX_RGBA, rgba, parser->color_stroke);
                }
                break;
#endif
              case CTX_LAB:
              case CTX_LCH:
              default:
                break;
            }
        }
        break;
      case CTX_LINE_DASH:
        if (parser->n_numbers)
        {
          ctx_line_dash (ctx, parser->numbers, parser->n_numbers);
        }
        else
        {
          ctx_line_dash (ctx, NULL, 0);
        }
        //append_dash_val (ctx, arg(0));
        break;
      case CTX_ARC_TO:
        ctx_svg_arc_to (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5), arg(6));
        break;
      case CTX_REL_ARC_TO:
        //ctx_rel_arc_to (ctx, arg(0), arg(1), arg(2), arg(3), arg(4) );
        //
        {
          float x = ctx_x (ctx);
          float y = ctx_y (ctx);
          ctx_svg_arc_to (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5)+x, arg(6)+y);
        }
        break;
      case CTX_REL_SMOOTH_TO:
        {
          float cx = parser->pcx;
          float cy = parser->pcy;
          float ax = 2 * ctx_x (ctx) - cx;
          float ay = 2 * ctx_y (ctx) - cy;
          ctx_curve_to (ctx, ax, ay, arg(0) +  cx, arg(1) + cy,
                        arg(2) + cx, arg(3) + cy);
          parser->pcx = arg(0) + cx;
          parser->pcy = arg(1) + cy;
        }
        break;
      case CTX_SMOOTH_TO:
        {
          float ax = 2 * ctx_x (ctx) - parser->pcx;
          float ay = 2 * ctx_y (ctx) - parser->pcy;
          ctx_curve_to (ctx, ax, ay, arg(0), arg(1),
                        arg(2), arg(3) );
          parser->pcx = arg(0);
          parser->pcx = arg(1);
        }
        break;
      case CTX_SMOOTHQ_TO:
        parser->pcx = 2 * ctx_x (ctx) - parser->pcx;
        parser->pcy = 2 * ctx_y (ctx) - parser->pcy;
        ctx_quad_to (ctx, parser->pcx, parser->pcy, arg(0), arg(1) );
        break;
      case CTX_REL_SMOOTHQ_TO:
        {
          float x = ctx_x (ctx);
          float y = ctx_y (ctx);
          parser->pcx = 2 * ctx_x (ctx) - parser->pcx;
          parser->pcy = 2 * ctx_y (ctx) - parser->pcy;
          ctx_quad_to (ctx, parser->pcx, parser->pcy, arg(0) + x, arg(1) + y);
        }
        break;
      case CTX_VER_LINE_TO:
        ctx_line_to (ctx, ctx_x (ctx), arg(0) );
        parser->command = CTX_VER_LINE_TO;
        parser->pcx = ctx_x (ctx);
        parser->pcy = ctx_y (ctx);
        break;
      case CTX_HOR_LINE_TO:
        ctx_line_to (ctx, arg(0), ctx_y (ctx) );
        parser->command = CTX_HOR_LINE_TO;
        parser->pcx = ctx_x (ctx);
        parser->pcy = ctx_y (ctx);
        break;
      case CTX_REL_HOR_LINE_TO:
        ctx_rel_line_to (ctx, arg(0), 0.0f);
        parser->command = CTX_REL_HOR_LINE_TO;
        parser->pcx = ctx_x (ctx);
        parser->pcy = ctx_y (ctx);
        break;
      case CTX_REL_VER_LINE_TO:
        ctx_rel_line_to (ctx, 0.0f, arg(0) );
        parser->command = CTX_REL_VER_LINE_TO;
        parser->pcx = ctx_x (ctx);
        parser->pcy = ctx_y (ctx);
        break;
      case CTX_ARC:
        ctx_arc (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5) );
        break;
      case CTX_APPLY_TRANSFORM:
        ctx_apply_transform (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5) , arg(6), arg(7), arg(8));
        break;
      case CTX_SOURCE_TRANSFORM:
        ctx_source_transform (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5), arg(6), arg(7), arg(8));
        break;
      case CTX_CURVE_TO:
        ctx_curve_to (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5) );
        parser->pcx = arg(2);
        parser->pcy = arg(3);
        parser->command = CTX_CURVE_TO;
        break;
      case CTX_REL_CURVE_TO:
        parser->pcx = arg(2) + ctx_x (ctx);
        parser->pcy = arg(3) + ctx_y (ctx);
        ctx_rel_curve_to (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5) );
        parser->command = CTX_REL_CURVE_TO;
        break;
      case CTX_LINE_TO:
        ctx_line_to (ctx, arg(0), arg(1) );
        parser->command = CTX_LINE_TO;
        parser->pcx = arg(0);
        parser->pcy = arg(1);
        break;
      case CTX_MOVE_TO:
        ctx_move_to (ctx, arg(0), arg(1) );
        parser->command = CTX_LINE_TO;
        parser->pcx = arg(0);
        parser->pcy = arg(1);
        parser->left_margin = parser->pcx;
        break;
      case CTX_FONT_SIZE:
        ctx_font_size (ctx, arg(0) );
        break;
      case CTX_MITER_LIMIT:
        ctx_miter_limit (ctx, arg(0) );
        break;
      case CTX_SCALE:
        ctx_scale (ctx, arg(0), arg(1) );
        break;
      case CTX_QUAD_TO:
        parser->pcx = arg(0);
        parser->pcy = arg(1);
        ctx_quad_to (ctx, arg(0), arg(1), arg(2), arg(3) );
        parser->command = CTX_QUAD_TO;
        break;
      case CTX_REL_QUAD_TO:
        parser->pcx = arg(0) + ctx_x (ctx);
        parser->pcy = arg(1) + ctx_y (ctx);
        ctx_rel_quad_to (ctx, arg(0), arg(1), arg(2), arg(3) );
        parser->command = CTX_REL_QUAD_TO;
        break;
      case CTX_CLIP:
        ctx_clip (ctx);
        break;
      case CTX_TRANSLATE:
        ctx_translate (ctx, arg(0), arg(1) );
        break;
      case CTX_ROTATE:
        ctx_rotate (ctx, arg(0) );
        break;
      case CTX_FONT:
        ctx_font (ctx, (char *) parser->holding);
        break;

      case CTX_STROKE_TEXT:
      case CTX_TEXT:
        if (parser->n_numbers == 1)
          { ctx_rel_move_to (ctx, -parser->numbers[0], 0.0); }  //  XXX : scale by font(size)
        else
          {
            for (char *c = (char *) parser->holding; c; )
              {
                char *next_nl = ctx_strchr (c, '\n');
                if (next_nl)
                  { *next_nl = 0; }
                /* do our own layouting on a per-word basis?, to get justified
                 * margins? then we'd want explict margins rather than the
                 * implicit ones from move_to's .. making move_to work within
                 * margins.
                 */
                if (cmd == CTX_STROKE_TEXT)
                  { ctx_text_stroke (ctx, c); }
                else
                  { ctx_text (ctx, c); }
                if (next_nl)
                  {
                    *next_nl = '\n'; // swap it newline back in
                    ctx_move_to (ctx, parser->left_margin, ctx_y (ctx) +
                                 ctx_get_font_size (ctx) );
                    c = next_nl + 1;
                    if (c[0] == 0)
                      { c = NULL; }
                  }
                else
                  {
                    c = NULL;
                  }
              }
          }
        if (cmd == CTX_STROKE_TEXT)
          { parser->command = CTX_STROKE_TEXT; }
        else
          { parser->command = CTX_TEXT; }
        break;
      case CTX_REL_LINE_TO:
        ctx_rel_line_to (ctx, arg(0), arg(1) );
        parser->pcx += arg(0);
        parser->pcy += arg(1);
        break;
      case CTX_REL_MOVE_TO:
        ctx_rel_move_to (ctx, arg(0), arg(1) );
        parser->pcx += arg(0);
        parser->pcy += arg(1);
        parser->left_margin = ctx_x (ctx);
        break;
      case CTX_LINE_WIDTH:
        ctx_line_width (ctx, arg(0));
        break;
      case CTX_LINE_DASH_OFFSET:
        ctx_line_dash_offset (ctx, arg(0));
        break;
      case CTX_IMAGE_SMOOTHING:
        ctx_image_smoothing (ctx, arg(0));
        break;
      case CTX_SHADOW_COLOR:
        ctx_shadow_rgba (ctx, arg(0), arg(1), arg(2), arg(3));
        break;
      case CTX_SHADOW_BLUR:
        ctx_shadow_blur (ctx, arg(0) );
        break;
      case CTX_SHADOW_OFFSET_X:
        ctx_shadow_offset_x (ctx, arg(0) );
        break;
      case CTX_SHADOW_OFFSET_Y:
        ctx_shadow_offset_y (ctx, arg(0) );
        break;
      case CTX_LINE_JOIN:
        ctx_line_join (ctx, (CtxLineJoin) arg(0) );
        break;
      case CTX_LINE_CAP:
        ctx_line_cap (ctx, (CtxLineCap) arg(0) );
        break;
      case CTX_COMPOSITING_MODE:
        ctx_compositing_mode (ctx, (CtxCompositingMode) arg(0) );
        break;
      case CTX_BLEND_MODE:
        {
          int blend_mode = arg(0);
          if (blend_mode == CTX_COLOR) blend_mode = CTX_BLEND_COLOR;
          ctx_blend_mode (ctx, (CtxBlend)blend_mode);
        }
        break;
      case CTX_EXTEND:
        ctx_extend (ctx, (CtxExtend)arg(0));
        break;
      case CTX_FILL_RULE:
        ctx_fill_rule (ctx, (CtxFillRule) arg(0) );
        break;
      case CTX_TEXT_ALIGN:
        ctx_text_align (ctx, (CtxTextAlign) arg(0) );
        break;
      case CTX_TEXT_BASELINE:
        ctx_text_baseline (ctx, (CtxTextBaseline) arg(0) );
        break;
      case CTX_TEXT_DIRECTION:
        ctx_text_direction (ctx, (CtxTextDirection) arg(0) );
        break;
      case CTX_IDENTITY:
        ctx_identity (ctx);
        break;
      case CTX_RECTANGLE:
        ctx_rectangle (ctx, arg(0), arg(1), arg(2), arg(3) );
        break;
      case CTX_FILL_RECT:
        ctx_rectangle (ctx, arg(0), arg(1), arg(2), arg(3) );
        ctx_fill (ctx);
        break;
      case CTX_STROKE_RECT:
        ctx_rectangle (ctx, arg(0), arg(1), arg(2), arg(3) );
        ctx_stroke (ctx);
        break;
      case CTX_ROUND_RECTANGLE:
        ctx_round_rectangle (ctx, arg(0), arg(1), arg(2), arg(3), arg(4));
        break;
      case CTX_VIEW_BOX:
        ctx_view_box (ctx, arg(0), arg(1), arg(2), arg(3) );
        break;
      case CTX_LINEAR_GRADIENT:
        ctx_linear_gradient (ctx, arg(0), arg(1), arg(2), arg(3) );
        break;
      case CTX_RADIAL_GRADIENT:
        ctx_radial_gradient (ctx, arg(0), arg(1), arg(2), arg(3), arg(4), arg(5) );
        break;
      case CTX_GRADIENT_STOP:
        {
          float red, green, blue, alpha;
          ctx_parser_get_color_rgba (parser, 1, &red, &green, &blue, &alpha);
          ctx_gradient_add_stop (ctx, arg(0), red, green, blue, alpha);
        }
        break;
      case CTX_GLOBAL_ALPHA:
        ctx_global_alpha (ctx, arg(0) );
        break;
      case CTX_BEGIN_PATH:
        ctx_begin_path (ctx);
        break;
      case CTX_GLYPH:
        ctx_glyph (ctx, arg(0), 0);
        break;
      case CTX_CLOSE_PATH:
        ctx_close_path (ctx);
        break;
      case CTX_EXIT:
        if (parser->exit)
          { parser->exit (parser->exit_data);
            return;
          }
        break;
      case CTX_FLUSH:
        //ctx_flush (ctx); // XXX  XXX  flush only does things inside backends
        break;
      case CTX_RESET:
        ctx_reset (ctx);
        if (parser->translate_origin)
        {
          ctx_translate (ctx,
                         (parser->cursor_x-1) * parser->cell_width * 1.0,
                         (parser->cursor_y-1) * parser->cell_height * 1.0);
        }
        break;
    }
#undef arg
//  parser->n_numbers = 0;
}

static inline void ctx_parser_holding_append (CtxParser *parser, int byte)
{
#if !CTX_PARSER_FIXED_TEMP
  if (CTX_UNLIKELY(parser->hold_len < parser->pos + 1 + 1))
  {
    int new_len = parser->hold_len * 2;
    if (new_len < 512) new_len = 512;
    parser->holding = (uint8_t*)realloc (parser->holding, new_len);
    parser->hold_len = new_len;
  }
#endif

  parser->holding[parser->pos++]=byte;
#if CTX_PARSER_FIXED_TEMP
  if (CTX_UNLIKELY(parser->pos > (int) sizeof (parser->holding)-2))
    { parser->pos = sizeof (parser->holding)-2; }
#endif
  parser->holding[parser->pos]=0;
}

static void ctx_parser_transform_percent (CtxParser *parser, CtxCode code, int arg_no, float *value)
{
  int big   = parser->width;
  int small = parser->height;
  if (big < small)
    {
      small = parser->width;
      big   = parser->height;
    }
  switch (code)
    {
      case CTX_RADIAL_GRADIENT:
      case CTX_ARC:
        switch (arg_no)
          {
            case 0:
            case 3:
              *value *= (parser->width/100.0);
              break;
            case 1:
            case 4:
              *value *= (parser->height/100.0);
              break;
            case 2:
            case 5:
              *value *= small/100.0;
              break;
          }
        break;
      case CTX_FONT_SIZE:
      case CTX_MITER_LIMIT:
      case CTX_LINE_WIDTH:
      case CTX_LINE_DASH_OFFSET:
        {
          *value *= (small/100.0);
        }
        break;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        if (arg_no > 3)
          {
            *value *= (small/100.0);
          }
        else
          {
            if (arg_no % 2 == 0)
              { *value  *= ( (parser->width) /100.0); }
            else
              { *value *= ( (parser->height) /100.0); }
          }
        break;
      case CTX_ROUND_RECTANGLE:
        if (arg_no == 4)
        {
          { *value *= ((parser->height)/100.0); }
          return;
        }
        /* FALLTHROUGH */
      default: // even means x coord
        if (arg_no % 2 == 0)
          { *value  *= ((parser->width)/100.0); }
        else
          { *value *= ((parser->height)/100.0); }
        break;
    }
}

static void ctx_parser_transform_percent_height (CtxParser *parser, CtxCode code, int arg_no, float *value)
{
  *value *= (parser->height/100.0);
}

static void ctx_parser_transform_percent_width (CtxParser *parser, CtxCode code, int arg_no, float *value)
{
  *value *= (parser->height/100.0);
}

static void ctx_parser_transform_cell (CtxParser *parser, CtxCode code, int arg_no, float *value)
{
  float small = parser->cell_width;
  if (small > parser->cell_height)
    { small = parser->cell_height; }
  switch (code)
    {
      case CTX_RADIAL_GRADIENT:
      case CTX_ARC:
        switch (arg_no)
          {
            case 0:
            case 3:
              *value *= parser->cell_width;
              break;
            case 1:
            case 4:
              *value *= parser->cell_height;
              break;
            case 2:
            case 5:
              *value *= small; // use height?
              break;
          }
        break;
      case CTX_MITER_LIMIT:
      case CTX_FONT_SIZE:
      case CTX_LINE_WIDTH:
      case CTX_LINE_DASH_OFFSET:
        {
          *value *= parser->cell_height;
        }
        break;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        if (arg_no > 3)
          {
            *value *= small;
          }
        else
          {
            *value *= (arg_no%2==0) ?parser->cell_width:parser->cell_height;
          }
        break;
      case CTX_RECTANGLE:
        if (arg_no % 2 == 0)
          { *value *= parser->cell_width; }
        else
          {
            if (! (arg_no > 1) )
              { (*value) -= 1.0f; }
            *value *= parser->cell_height;
          }
        break;
      default: // even means x coord odd means y coord
        *value *= (arg_no%2==0) ?parser->cell_width:parser->cell_height;
        break;
    }
}

// %h %v %m %M

static void ctx_parser_number_done (CtxParser *parser)
{

}

static void ctx_parser_word_done (CtxParser *parser)
{
  parser->holding[parser->pos]=0;
  //int old_args = parser->expected_args;
  int command = ctx_parser_resolve_command (parser, parser->holding);
  if ((command >= 0 && command < 32)
      || (command > 150) || (command < 0)
      )  // special case low enum values
    {                   // and enum values too high to be
                        // commands - permitting passing words
                        // for strings in some cases
      parser->numbers[parser->n_numbers] = command;

      // trigger transition from number
      parser->state = CTX_PARSER_NUMBER;
      char c = ',';
      ctx_parser_feed_bytes (parser, &c, 1);
    }
  else if (command > 0)
    {
#if 0
      if (old_args == CTX_ARG_COLLECT_NUMBERS ||
          old_args == CTX_ARG_STRING_OR_NUMBER)
      {
        int tmp1 = parser->command;
        int tmp2 = parser->expected_args;
        int tmp3 = parser->n_numbers;
 //     int tmp4 = parser->n_args;
        ctx_parser_dispatch_command (parser);
        parser->command = (CtxCode)tmp1;
        parser->expected_args = tmp2;
        parser->n_numbers = tmp3;
 //     parser->n_args = tmp4;
      }
#endif

      parser->command = (CtxCode) command;
      parser->n_numbers = 0;
      parser->n_args = 0;
      if (parser->expected_args == 0)
        {
          ctx_parser_dispatch_command (parser);
        }
    }
  else
    {
      /* interpret char by char */
      uint8_t buf[16]=" ";
      for (int i = 0; parser->pos && parser->holding[i] > ' '; i++)
        {
          buf[0] = parser->holding[i];
          parser->command = (CtxCode) ctx_parser_resolve_command (parser, buf);
          parser->n_numbers = 0;
          parser->n_args = 0;
          if (parser->command > 0)
            {
              if (parser->expected_args == 0)
                {
                  ctx_parser_dispatch_command (parser);
                }
            }
          else
            {
              ctx_log ("unhandled command '%c'\n", buf[0]);
            }
        }
    }
}

static void ctx_parser_string_done (CtxParser *parser)
{
  if (parser->expected_args == CTX_ARG_STRING_OR_NUMBER)
  {
          /*
    if (parser->state != CTX_PARSER_NUMBER &&
        parser->state != CTX_PARSER_NEGATIVE_NUMBER &&
        parser->state != CTX_PARSER_STRING_A85 &&
        parser->state != CTX_PARSER_STRING_APOS &&
        parser->state != CTX_PARSER_STRING_QUOT
        )
        */
    {
    int tmp1 = parser->command;
    int tmp2 = parser->expected_args;
    int tmp3 = parser->n_numbers;
    int tmp4 = parser->n_args;
    ctx_parser_dispatch_command (parser);
    parser->command = (CtxCode)tmp1;
    parser->expected_args = tmp2;
    parser->n_numbers = tmp3;
    parser->n_args = tmp4;
    }
  }
  else
  {
    ctx_parser_dispatch_command (parser);
  }
}

static inline void ctx_parser_feed_byte (CtxParser *parser, char byte)
{
#if CTX_REPORT_COL_ROW
    if (CTX_UNLIKELY(byte == '\n'))
    {
        parser->col=0;
        parser->line++;
    }
    else
    {
        parser->col++;
    }
#endif

    if (CTX_LIKELY(parser->state == CTX_PARSER_STRING_YENC))
    {
        if (CTX_UNLIKELY((parser->prev_byte == '=') && (byte == 'y')))
        {
          parser->state = CTX_PARSER_NEUTRAL;
                 //   fprintf (stderr, "got %i\n", parser->pos);
          parser->pos = ctx_ydec ((char*)parser->holding, (char*)parser->holding, parser->pos) - 1;
#if 0
          if (parser->pos > 5)
                    fprintf (stderr, "dec got %i %c %c %c %c\n", parser->pos,
                                    parser->holding[0],
                                    parser->holding[1],
                                    parser->holding[2],
                                    parser->holding[3]
                                    );
#endif
          ctx_parser_string_done (parser);
        }
        else
        {
          ctx_parser_holding_append (parser, byte);
        }
        parser->prev_byte = byte;
        return;
    }
    else if (parser->state == CTX_PARSER_STRING_A85)
    {
        /* since these are our largest bulk transfers, minimize
         * overhead for this case. */
        if (CTX_LIKELY(byte!='~')) 
        {
          ctx_parser_holding_append (parser, byte);
        }
        else
        {
          parser->state = CTX_PARSER_NEUTRAL;
                 //   fprintf (stderr, "got %i\n", parser->pos);
          parser->pos = ctx_a85dec ((char*)parser->holding, (char*)parser->holding, parser->pos);
                 //   fprintf (stderr, "dec got %i\n", parser->pos);
          ctx_parser_string_done (parser);
        }
        return;
    }
  switch (parser->state)
    {
      case CTX_PARSER_NEUTRAL:
        switch (byte)
          {
            case  0: case  1: case  2: case  3:  case 4:  case 5:
            case  6: case  7: case  8: case 11: case 12: case 14:
            case 15: case 16: case 17: case 18: case 19: case 20:
            case 21: case 22: case 23: case 24: case 25: case 26:
            case 27: case 28: case 29: case 30: case 31:
              break;
            case ' ': case '\t': case '\r': case '\n':
            case ';': case ',':
            case '(': case ')':
            case '{': case '}':
            //case '=':
              break;
            case '#':
              parser->state = CTX_PARSER_COMMENT;
              break;
            case '\'':
              parser->state = CTX_PARSER_STRING_APOS;
              parser->pos = 0;
              parser->holding[0] = 0;
              break;
            case '=':
              parser->state = CTX_PARSER_STRING_YENC;
              parser->pos = 0;
              parser->holding[0] = 0;
              break;
            case '~':
              parser->state = CTX_PARSER_STRING_A85;
              parser->pos = 0;
              parser->holding[0] = 0;
              break;
            case '"':
              parser->state = CTX_PARSER_STRING_QUOT;
              parser->pos = 0;
              parser->holding[0] = 0;
              break;
            case '-':
              parser->state = CTX_PARSER_NEGATIVE_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->decimal = 0;
              break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
              parser->state = CTX_PARSER_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->numbers[parser->n_numbers] += (byte - '0');
              parser->decimal = 0;
              break;
            case '.':
              parser->state = CTX_PARSER_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->decimal = 1;
              break;
            default:
              parser->state = CTX_PARSER_WORD;
              parser->pos = 0;
              ctx_parser_holding_append (parser, byte);
              break;
          }
        break;
      case CTX_PARSER_NUMBER:
      case CTX_PARSER_NEGATIVE_NUMBER:
        {
          switch (byte)
            {
              case 0: case 1: case 2: case 3: case 4: case 5:
              case 6: case 7: case 8:
              case 11: case 12: case 14: case 15: case 16:
              case 17: case 18: case 19: case 20: case 21:
              case 22: case 23: case 24: case 25: case 26:
              case 27: case 28: case 29: case 30: case 31:
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              case ' ':
              case '\t':
              case '\r':
              case '\n':
              case ';':
              case ',':
              case '(':
              case ')':
              case '{':
              case '}':
              case '=':
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              case '#':
                parser->state = CTX_PARSER_COMMENT;
                break;
              case '-':
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                parser->state = CTX_PARSER_NEGATIVE_NUMBER;
                parser->numbers[parser->n_numbers+1] = 0;
                parser->n_numbers ++;
                parser->decimal = 0;
                break;
              case '.':
                //if (parser->decimal) // TODO permit .13.32.43 to equivalent to .12 .32 .43
                parser->decimal = 1;
                break;
              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
                if (parser->decimal)
                  {
                    parser->decimal *= 10;
                    parser->numbers[parser->n_numbers] += (byte - '0') / (1.0 * parser->decimal);
                  }
                else
                  {
                    parser->numbers[parser->n_numbers] *= 10;
                    parser->numbers[parser->n_numbers] += (byte - '0');
                  }
                break;
              case '@': // cells
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                {
                float fval = parser->numbers[parser->n_numbers];
                ctx_parser_transform_cell (parser, parser->command, parser->n_numbers, &fval);
                parser->numbers[parser->n_numbers]= fval;
                }
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              case '%': // percent of width/height
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                {
                float fval = parser->numbers[parser->n_numbers];
                ctx_parser_transform_percent (parser, parser->command, parser->n_numbers, &fval);
                parser->numbers[parser->n_numbers]= fval;
                }
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              case '^': // percent of height
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                {
                float fval = parser->numbers[parser->n_numbers];
                ctx_parser_transform_percent_height (parser, parser->command, parser->n_numbers, &fval);
                parser->numbers[parser->n_numbers]= fval;
                }
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              case '~': // percent of width
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                {
                float fval = parser->numbers[parser->n_numbers];
                ctx_parser_transform_percent_width (parser, parser->command, parser->n_numbers, &fval);
                parser->numbers[parser->n_numbers]= fval;
                }
                parser->state = CTX_PARSER_NEUTRAL;
                break;
              default:
                if (parser->state == CTX_PARSER_NEGATIVE_NUMBER)
                  { parser->numbers[parser->n_numbers] *= -1; }
                parser->state = CTX_PARSER_WORD;
                parser->pos = 0;
                ctx_parser_holding_append (parser, byte);
                break;
            }
          if ( (parser->state != CTX_PARSER_NUMBER) &&
               (parser->state != CTX_PARSER_NEGATIVE_NUMBER))
            {
              parser->n_numbers ++;
              ctx_parser_number_done (parser);

              if (parser->n_numbers == parser->expected_args ||
                  parser->expected_args == CTX_ARG_COLLECT_NUMBERS ||
                  parser->expected_args == CTX_ARG_STRING_OR_NUMBER)
                {
                  int tmp1 = parser->n_numbers;
                  int tmp2 = parser->n_args;
                  CtxCode tmp3 = parser->command;
                  int tmp4 = parser->expected_args;
                  ctx_parser_dispatch_command (parser);
                  parser->command = tmp3;
                  switch (parser->command)
                  {
                    case CTX_DEFINE_TEXTURE:
                    case CTX_TEXTURE:
                      parser->n_numbers = tmp1;
                      parser->n_args = tmp2;
                      break;
                          default:
                      parser->n_numbers = 0;
                      parser->n_args = 0;
                      break;
                  }
                  parser->expected_args = tmp4;
                }
              if (parser->n_numbers > CTX_PARSER_MAX_ARGS)
                { parser->n_numbers = CTX_PARSER_MAX_ARGS;
                }
            }
        }
        break;
      case CTX_PARSER_WORD:
        switch (byte)
          {
            case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
            case 8: case 11: case 12: case 14: case 15: case 16: case 17:
            case 18: case 19: case 20: case 21: case 22: case 23: case 24:
            case 25: case 26: case 27: case 28: case 29: case 30: case 31:
            case ' ': case '\t': case '\r': case '\n':
            case ';': case ',':
            case '(': case ')': case '=': case '{': case '}':
              parser->state = CTX_PARSER_NEUTRAL;
              break;
            case '#':
              parser->state = CTX_PARSER_COMMENT;
              break;
            case '-':
              parser->state = CTX_PARSER_NEGATIVE_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->decimal = 0;
              break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
              parser->state = CTX_PARSER_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->numbers[parser->n_numbers] += (byte - '0');
              parser->decimal = 0;
              break;
            case '.':
              parser->state = CTX_PARSER_NUMBER;
              parser->numbers[parser->n_numbers] = 0;
              parser->decimal = 1;
              break;
            default:
              ctx_parser_holding_append (parser, byte);
              break;
          }
        if (parser->state != CTX_PARSER_WORD)
          {
            ctx_parser_word_done (parser);
          }
        break;
      case CTX_PARSER_STRING_A85:
        if (CTX_LIKELY(byte!='~'))
        {
          ctx_parser_holding_append (parser, byte);
        }
        else
        {
          parser->state = CTX_PARSER_NEUTRAL;
                 //   fprintf (stderr, "got %i\n", parser->pos);
          parser->pos = ctx_a85dec ((char*)parser->holding, (char*)parser->holding, parser->pos);
                 //   fprintf (stderr, "dec got %i\n", parser->pos);
          ctx_parser_string_done (parser);
        }
        break;
      case CTX_PARSER_STRING_APOS:
        switch (byte)
          {
            case '\\': parser->state = CTX_PARSER_STRING_APOS_ESCAPED; break;
            case '\'': parser->state = CTX_PARSER_NEUTRAL;
              ctx_parser_string_done (parser);
              break;
            default:
              ctx_parser_holding_append (parser, byte); break;
          }
        break;
      case CTX_PARSER_STRING_APOS_ESCAPED:
        switch (byte)
          {
            case '0': byte = '\0'; break;
            case 'b': byte = '\b'; break;
            case 'f': byte = '\f'; break;
            case 'n': byte = '\n'; break;
            case 'r': byte = '\r'; break;
            case 't': byte = '\t'; break;
            case 'v': byte = '\v'; break;
            default: break;
          }
        ctx_parser_holding_append (parser, byte);
        parser->state = CTX_PARSER_STRING_APOS;
        break;
      case CTX_PARSER_STRING_QUOT_ESCAPED:
        switch (byte)
          {
            case '0': byte = '\0'; break;
            case 'b': byte = '\b'; break;
            case 'f': byte = '\f'; break;
            case 'n': byte = '\n'; break;
            case 'r': byte = '\r'; break;
            case 't': byte = '\t'; break;
            case 'v': byte = '\v'; break;
            default: break;
          }
        ctx_parser_holding_append (parser, byte);
        parser->state = CTX_PARSER_STRING_QUOT;
        break;
      case CTX_PARSER_STRING_QUOT:
        switch (byte)
          {
            case '\\':
              parser->state = CTX_PARSER_STRING_QUOT_ESCAPED;
              break;
            case '"':
              parser->state = CTX_PARSER_NEUTRAL;
              ctx_parser_string_done (parser);
              break;
            default:
              ctx_parser_holding_append (parser, byte);
              break;
          }
        break;
      case CTX_PARSER_COMMENT:
        switch (byte)
          {
            case '\r':
            case '\n':
              parser->state = CTX_PARSER_NEUTRAL;
            default:
              break;
          }
        break;
    }
}

void ctx_parser_feed_bytes (CtxParser *parser, const char *data, int count)
{
  for (int i = 0; i < count; i++)
    ctx_parser_feed_byte (parser, data[i]);
}

CTX_EXPORT void
ctx_parse (Ctx *ctx, const char *string)
{
  if (!string)
    return;
  CtxParser *parser = ctx_parser_new (ctx, ctx_width(ctx),
                                           ctx_height(ctx),
                                           ctx_get_font_size(ctx),
                                           ctx_get_font_size(ctx),
                                           0, 0, NULL, NULL, NULL, NULL, NULL);
  ctx_parser_feed_bytes (parser, string, strlen (string));
  ctx_parser_feed_bytes (parser, " ", 1);
  ctx_parser_free (parser);
}

CTX_EXPORT void
ctx_parse2 (Ctx *ctx, const char *string, float *scene_elapsed_time, 
            int *scene_no_p)
{
  float time = *scene_elapsed_time;
  int scene_no = *scene_no_p;
  CtxString *str = ctx_string_new ("");
  int in_var = 0;
  float scene_duration = 5.0;

  int i;

again:
  i = 0;

  // XXX : this doesn't work when there are [ 's in the text

  int scene_pos = 0;
  int last_scene = 0;
  {
  int in_scene_marker = 0;
  float duration = -1;
  for (; string[i]; i++)
  {
    char p = string[i];
    if (in_scene_marker)
    {
       if (p == ']')
       {
          in_scene_marker = 0;
       //   printf ("scene: %i time: %f scene %i: %f\n", scene_no, time, scene_pos, duration);
          last_scene = scene_pos;
          if (scene_pos == scene_no)
          {
            scene_duration = duration;
            if (scene_duration < time)
            {
              scene_no ++;
              (*scene_no_p)++;
              *scene_elapsed_time = time = 0;
            }
            else
            {
              break;
            }
          }
          scene_pos++;
       }
       else if (p>='0' && p<='9' && duration < 0)
       {
          duration = atof (&string[i]);
       }
    }
    else
    {
       if (p == '[')
       {
          in_scene_marker = 1;
          duration = -1;
       }
    }
  }
  }

  if (scene_no > last_scene)
  {
     scene_no = 0;
     (*scene_no_p) = 0;
     goto again;
  }
  
  if (scene_no == 0 && last_scene==0 && string[i]==0)
    i=0;

#define MAX_KEY_FRAMES 64
  float keys[MAX_KEY_FRAMES];
  float values[MAX_KEY_FRAMES];
  int n_keys = 0;
  int smooth = 1; // default to catmull rom

  for (; string[i]; i++)
  {
    char p = string[i];
    if (in_var == 0)
    {
      if (p == '[')
        break;
      else if (p == '(')
      {
        in_var = 1;
        n_keys = 0;
      }
      else
      {
        ctx_string_append_byte (str, p);
      }
    }
    else
    {
      if (p == ')')
      {
        float resolved_val = -100000.0;
        float prev_val = 0;
        for (int i = 0; i < n_keys; i++)
        {
          float key = keys[i];
          float val = values[i];
          //printf ("%f=%f\n", key, val);
          if (key>=time && resolved_val <=-10000.0f)
          {
            if (smooth == 0) // linear interpolation
            {
              if (i == 0)
                resolved_val = val;
              else
                resolved_val = ctx_lerpf (values[i-1], val, 
                                (time-keys[i-1])/(key-keys[i-1]));
            }
            else
            {
              if (i == 0)
              {
                resolved_val = val;
              }
              else if (n_keys<=2)
              {
                resolved_val = ctx_lerpf (values[i-1], val, 
                                 (time-keys[i-1])/(key-keys[i-1]));
              } else if (i == 1)
              {
                resolved_val = ctx_catmull_rom_left (values[i-1], values[i],
                                 values[i+1],
                                 (time-keys[i-1])/(key-keys[i-1]));
              }
              else if (i > 1 && i+1 < n_keys)
              {
                resolved_val = ctx_catmull_rom (values[i-2], values[i-1],
                                 val, values[i+1],
                                 (time-keys[i-1])/(key-keys[i-1]));
              }
              else if (i >= 2 && i < n_keys)
              {
                resolved_val = ctx_catmull_rom_right (values[i-2], values[i-1],
                                 values[i],
                                 (time-keys[i-1])/(key-keys[i-1]));
              }
            }
          }
          prev_val = val;
        }
        if (resolved_val <= -100000.0f) resolved_val = prev_val;
        ctx_string_append_printf (str, "%f", resolved_val);
        in_var = 0;
      }
      else if (p>='0' && p<='9')
      {
        const char *sp = &string[i];
        char *ep = (char*)sp;
        float key      = strtof (sp, &ep);
        char *eq       = strchr (sp, '=');
        float val      = 0.0;

        if (eq)
           val = strtof (eq+1, &ep);

        keys[n_keys] = key;
        values[n_keys++] = val;

        i+=(ep-sp)-1;
      }
      else if (p=='s')
      {
        smooth = 1;
      } else if (p=='l')
      {
        smooth = 0;
      }
      else
      {
        /* ignore */
      }

    }
  }

  /* we've now built up the frame, and parse
   * it with the regular parser
   */
  ctx_parse (ctx, str->str);
  ctx_string_free (str, 1);
}

#endif

#if !__COSMOPOLITAN__
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif

//#include "ctx.h"
/* instead of including ctx.h we declare the few utf8
 * functions we use
 */
uint32_t ctx_utf8_to_unichar (const char *input);
int ctx_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
int ctx_utf8_strlen (const char *s);

static void ctx_string_init (CtxString *string, int initial_size)
{
  string->allocated_length = initial_size;
  string->length = 0;
  string->utf8_length = 0;
  string->str = (char*)malloc (string->allocated_length + 1);
  string->str[0]='\0';
}

static void ctx_string_destroy (CtxString *string)
{
  if (string->str)
    {
      free (string->str);
      string->str = NULL;
    }
}

void ctx_string_clear (CtxString *string)
{
  string->length = 0;
  string->utf8_length = 0;
  string->str[string->length]=0;
}


void ctx_string_pre_alloc (CtxString *string, int size)
{
  char *old = string->str;
  string->allocated_length = CTX_MAX (size + 2, string->length + 2);
  string->str = (char*)realloc (old, string->allocated_length);
}


static inline void _ctx_string_append_byte (CtxString *string, char  val)
{
  if (CTX_LIKELY((val & 0xC0) != 0x80))
    { string->utf8_length++; }
  if (CTX_UNLIKELY(string->length + 2 >= string->allocated_length))
    {
      char *old = string->str;
      string->allocated_length = CTX_MAX (string->allocated_length * 2, string->length + 2);
      string->str = (char*)realloc (old, string->allocated_length);
    }
  string->str[string->length++] = val;
  string->str[string->length] = '\0';
}

void ctx_string_append_byte (CtxString *string, char  val)
{
  _ctx_string_append_byte (string, val);
}

void ctx_string_append_unichar (CtxString *string, unsigned int unichar)
{
  char *str;
  char utf8[5];
  utf8[ctx_unichar_to_utf8 (unichar, (unsigned char *) utf8)]=0;
  str = utf8;
  while (str && *str)
    {
      _ctx_string_append_byte (string, *str);
      str++;
    }
}

static inline void _ctx_string_append_str (CtxString *string, const char *str)
{
  if (!str) { return; }
  while (*str)
    {
      _ctx_string_append_byte (string, *str);
      str++;
    }
}

void ctx_string_append_utf8char (CtxString *string, const char *str)
{
  if (!str) { return; }
  int len = ctx_utf8_len (*str);
  for (int i = 0; i < len && *str; i++)
    {
      _ctx_string_append_byte (string, *str);
      str++;
    }
}

void ctx_string_append_str (CtxString *string, const char *str)
{
  _ctx_string_append_str (string, str);
}

CtxString *ctx_string_new_with_size (const char *initial, int initial_size)
{
  CtxString *string = (CtxString*)ctx_calloc (sizeof (CtxString), 1);
  ctx_string_init (string, initial_size);
  if (initial)
    { _ctx_string_append_str (string, initial); }
  return string;
}

CtxString *ctx_string_new (const char *initial)
{
  return ctx_string_new_with_size (initial, 8);
}

void ctx_string_append_data (CtxString *string, const char *str, int len)
{
  int i;
  for (i = 0; i<len; i++)
    { _ctx_string_append_byte (string, str[i]); }
}

void ctx_string_append_string (CtxString *string, CtxString *string2)
{
  const char *str = ctx_string_get (string2);
  while (str && *str)
    {
      _ctx_string_append_byte (string, *str);
      str++;
    }
}

const char *ctx_string_get (CtxString *string)
{
  return string->str;
}

int ctx_string_get_utf8length (CtxString *string)
{
  return string->utf8_length;
}

int ctx_string_get_length (CtxString *string)
{
  return string->length;
}

void
ctx_string_free (CtxString *string, int freealloc)
{
  if (freealloc)
    {
      ctx_string_destroy (string);
    }
#if 0
  if (string->is_line)
  {
    VtLine *line = (VtLine*)string;
    if (line->style)
      { free (line->style); }
    if (line->ctx)
      { ctx_free (line->ctx); }
    if (line->ctx_copy)
      { ctx_free (line->ctx_copy); }
  }
#endif
  free (string);
}

char       *ctx_string_dissolve       (CtxString *string)
{
  char *ret = string->str;
  ctx_string_free (string, 0);
  return ret;
}

void
ctx_string_set (CtxString *string, const char *new_string)
{
  ctx_string_clear (string);
  _ctx_string_append_str (string, new_string);
}

static char *ctx_strdup (const char *str)
{
  int len = strlen (str);
  char *ret = (char*)malloc (len + 1);
  memcpy (ret, str, len);
  ret[len]=0;
  return ret;
}

void ctx_string_replace_utf8 (CtxString *string, int pos, const char *new_glyph)
{
#if 1
  int old_len = string->utf8_length;
#else
  int old_len = ctx_utf8_strlen (string->str);// string->utf8_length;
#endif
  if (CTX_LIKELY(pos == old_len))
    {
      _ctx_string_append_str (string, new_glyph);
      return;
    }

  char tmpg[3]=" ";
  int new_len = ctx_utf8_len (*new_glyph);
  if (new_len <= 1 && new_glyph[0] < 32)
    {
      new_len = 1;
      tmpg[0]=new_glyph[0]+64;
      new_glyph = tmpg;
    }
  {
    for (int i = old_len; i <= pos + 2; i++)
      {
        _ctx_string_append_byte (string, ' ');
        old_len++;
      }
  }
  if (string->length + new_len  >= string->allocated_length - 2)
    {
      char *tmp;
      char *defer;
      string->allocated_length = string->length + new_len + 2;
      tmp = (char*) ctx_calloc (string->allocated_length + 1 + 8, 1);
      strcpy (tmp, string->str);
      defer = string->str;
      string->str = tmp;
      free (defer);
    }
  char *p = (char *) ctx_utf8_skip (string->str, pos);
  int prev_len = ctx_utf8_len (*p);
  char *rest;
  if (*p == 0 || * (p+prev_len) == 0)
    {
      rest = ctx_strdup ("");
    }
  else
    {
      if (p + prev_len >= string->length  + string->str)
        { rest = ctx_strdup (""); }
      else
        { rest = ctx_strdup (p + prev_len); }
    }
  memcpy (p, new_glyph, new_len);
  memcpy (p + new_len, rest, strlen (rest) + 1);
  string->length += new_len;
  string->length -= prev_len;
  free (rest);
  //string->length = strlen (string->str);
  //string->utf8_length = ctx_utf8_strlen (string->str);
}

void ctx_string_replace_unichar (CtxString *string, int pos, uint32_t unichar)
{
  uint8_t utf8[8];
  ctx_unichar_to_utf8 (unichar, utf8);
  ctx_string_replace_utf8 (string, pos, (char *) utf8);
}

uint32_t ctx_string_get_unichar (CtxString *string, int pos)
{
  char *p = (char *) ctx_utf8_skip (string->str, pos);
  if (!p)
    { return 0; }
  return ctx_utf8_to_unichar (p);
}

void ctx_string_insert_utf8 (CtxString *string, int pos, const char *new_glyph)
{
  int new_len = ctx_utf8_len (*new_glyph);
  int old_len = string->utf8_length;
  char tmpg[3]=" ";
  if (old_len == pos && 0)
    {
      ctx_string_append_str (string, new_glyph);
      return;
    }
  if (new_len <= 1 && new_glyph[0] < 32)
    {
      tmpg[0]=new_glyph[0]+64;
      new_glyph = tmpg;
    }
  {
    for (int i = old_len; i <= pos; i++)
      {
        _ctx_string_append_byte (string, ' ');
        old_len++;
      }
  }
  if (string->length + new_len + 1  > string->allocated_length)
    {
      char *tmp;
      char *defer;
      string->allocated_length = string->length + new_len + 1;
      tmp = (char*) ctx_calloc (string->allocated_length + 1, 1);
      strcpy (tmp, string->str);
      defer = string->str;
      string->str = tmp;
      free (defer);
    }
  char *p = (char *) ctx_utf8_skip (string->str, pos);
  int prev_len = ctx_utf8_len (*p);
  char *rest;
  if ( (*p == 0 || * (p+prev_len) == 0) && pos != 0)
    {
      rest = ctx_strdup ("");
    }
  else
    {
      rest = ctx_strdup (p);
    }
  memcpy (p, new_glyph, new_len);
  memcpy (p + new_len, rest, strlen (rest) + 1);
  free (rest);
  string->length = strlen (string->str);
  string->utf8_length = ctx_utf8_strlen (string->str);
}

void ctx_string_insert_unichar (CtxString *string, int pos, uint32_t unichar)
{
  uint8_t utf8[5]="";
  utf8[ctx_unichar_to_utf8(unichar, utf8)]=0;
  ctx_string_insert_utf8 (string, pos, (char*)utf8);
}

void ctx_string_remove (CtxString *string, int pos)
{
  int old_len = string->utf8_length;
  {
    for (int i = old_len; i <= pos; i++)
      {
        _ctx_string_append_byte (string, ' ');
        old_len++;
      }
  }
  char *p = (char *) ctx_utf8_skip (string->str, pos);
  int prev_len = ctx_utf8_len (*p);
  char *rest;
  if (!p || *p == 0)
    {
      return;
      rest = ctx_strdup ("");
      prev_len = 0;
    }
  else if (* (p+prev_len) == 0)
  {
      rest = ctx_strdup ("");
  }
  else
    {
      rest = ctx_strdup (p + prev_len);
    }
  strcpy (p, rest);
  string->str[string->length - prev_len] = 0;
  free (rest);
  string->length = strlen (string->str);
  string->utf8_length = ctx_utf8_strlen (string->str);
}

char *ctx_strdup_printf (const char *format, ...)
{
  va_list ap;
  size_t needed;
  char *buffer;
  va_start (ap, format);
  needed = vsnprintf (NULL, 0, format, ap) + 1;
  buffer = (char*)malloc (needed);
  va_end (ap);
  va_start (ap, format);
  vsnprintf (buffer, needed, format, ap);
  va_end (ap);
  return buffer;
}

void ctx_string_append_printf (CtxString *string, const char *format, ...)
{
  va_list ap;
  size_t needed;
  char *buffer;
  va_start (ap, format);
  needed = vsnprintf (NULL, 0, format, ap) + 1;
  buffer = (char*)malloc (needed);
  va_end (ap);
  va_start (ap, format);
  vsnprintf (buffer, needed, format, ap);
  va_end (ap);
  ctx_string_append_str (string, buffer);
  free (buffer);
}

CtxString *ctx_string_new_printf (const char *format, ...)
{
  CtxString *string = ctx_string_new ("");
  va_list ap;
  size_t needed;
  char *buffer;
  va_start (ap, format);
  needed = vsnprintf (NULL, 0, format, ap) + 1;
  buffer = (char*)malloc (needed);
  va_end (ap);
  va_start (ap, format);
  vsnprintf (buffer, needed, format, ap);
  va_end (ap);
  ctx_string_append_str (string, buffer);
  free (buffer);
  return string;
}

void ctx_drawlist_clear (Ctx *ctx)
{
  ctx->drawlist.count = 0;
  ctx->drawlist.bitpack_pos = 0;
}

static void ctx_drawlist_backend_free (CtxBackend *backend)
{
  free (backend);
}

static void ctx_update_current_path (Ctx *ctx, CtxEntry *entry)
{
#if CTX_CURRENT_PATH
  switch (entry->code)
    {
      case CTX_TEXT:
      case CTX_STROKE_TEXT:
      case CTX_BEGIN_PATH:
        ctx->current_path.count = 0;
        break;
      case CTX_CLIP:
      case CTX_FILL:
      case CTX_STROKE:
              // XXX unless preserve
        ctx->current_path.count = 0;
        break;
      case CTX_CLOSE_PATH:
      case CTX_LINE_TO:
      case CTX_MOVE_TO:
      case CTX_QUAD_TO:
      case CTX_SMOOTH_TO:
      case CTX_SMOOTHQ_TO:
      case CTX_REL_QUAD_TO:
      case CTX_REL_SMOOTH_TO:
      case CTX_REL_SMOOTHQ_TO:
      case CTX_CURVE_TO:
      case CTX_REL_CURVE_TO:
      case CTX_ARC:
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
      case CTX_RECTANGLE:
      case CTX_ROUND_RECTANGLE:
        ctx_drawlist_add_entry (&ctx->current_path, entry);
        break;
      default:
        break;
    }
#endif
}

static void
ctx_drawlist_process (Ctx *ctx, CtxEntry *entry)
{
#if CTX_CURRENT_PATH
  ctx_update_current_path (ctx, entry);
#endif
  /* these functions can alter the code and coordinates of
     command that in the end gets added to the drawlist
   */
  ctx_interpret_style (&ctx->state, entry, ctx);
  ctx_interpret_transforms (&ctx->state, entry, ctx);
  ctx_interpret_pos (&ctx->state, entry, ctx);
  ctx_drawlist_add_entry (&ctx->drawlist, entry);
}

static CtxBackend *ctx_drawlist_backend_new (void)
{
  CtxBackend *backend = (CtxBackend*)calloc (sizeof (CtxBackend), 1);
  backend->process = (void(*)(Ctx *a, CtxCommand *c))ctx_drawlist_process;
  backend->free    = (void(*)(void *a))ctx_drawlist_backend_free;
  return backend;
}

#if CTX_RASTERIZER


static int
ctx_rect_intersect (const CtxIntRectangle *a, const CtxIntRectangle *b)
{
  if (a->x >= b->x + b->width ||
      b->x >= a->x + a->width ||
      a->y >= b->y + b->height ||
      b->y >= a->y + a->height) return 0;

  return 1;
}


static void
_ctx_add_hash (CtxHasher *hasher, CtxIntRectangle *shape_rect, uint32_t hash)
{
  CtxIntRectangle rect = {0,0, hasher->rasterizer.blit_width/hasher->cols,
                               hasher->rasterizer.blit_height/hasher->rows};
  uint32_t active = 0;
  int hno = 0;
  for (int row = 0; row < hasher->rows; row++)
    for (int col = 0; col < hasher->cols; col++, hno++)
     {
      rect.x = col * rect.width;
      rect.y = row * rect.height;
      if (ctx_rect_intersect (shape_rect, &rect))
      {
        hasher->hashes[(row * hasher->cols + col)] ^= hash;
        active |= (1<<hno);
      }
    }

  if (hasher->active_info_count+1 >= hasher->active_info_size)
  {
    hasher->active_info_size = hasher->active_info_size * 2 + 1024;
    hasher->active_info = realloc (hasher->active_info, hasher->active_info_size * sizeof (CtxCommandState));
  }
  hasher->active_info[hasher->active_info_count].pos = hasher->pos;
  hasher->active_info[hasher->active_info_count].active = active;
  hasher->active_info_count++;
}

static int
ctx_str_count_lines (const char *str)
{
  int count = 0;
  for (const char *p = str; *p; p++)
    if (*p == '\n') count ++;
  return count;
}

static inline uint32_t murmur_32_scramble(uint32_t k) {
    k *= 0xcc9e2d51;
    k = (k << 15) | (k >> 17);
    k *= 0x1b873593;
    return k;
}

static inline void murmur3_32_process(CtxMurmur *murmur, const uint8_t* key, size_t len)
{
    // code direct from the wikipedia article, it appears there without
    // a license
    uint32_t h = murmur->state[0];
    uint32_t k;
    /* Read in groups of 4. */
    for (size_t i = len >> 2; i; i--) {
        // Here is a source of differing results across endiannesses.
        // A swap here has no effects on hash properties though.
        memcpy(&k, key, sizeof(uint32_t));
        key += sizeof(uint32_t);
        h ^= murmur_32_scramble(k);
        h = (h << 13) | (h >> 19);
        h = h * 5 + 0xe6546b64;
    }
    /* Read the rest. */
    k = 0;
    for (size_t i = len & 3; i; i--) {
        k <<= 8;
        k |= key[i - 1];
    }
    // A swap is *not* necessary here because the preceding loop already
    // places the low bytes in the low places according to whatever endianness
    // we use. Swaps only apply when the memory is copied in a chunk.
    h ^= murmur_32_scramble(k);
    murmur->state[0] = h;
    murmur->state[1] += len;
}

static inline void murmur3_32_init (CtxMurmur *murmur)
{
  murmur->state[0]=0;
  murmur->state[1]=0;
}
static inline void murmur3_32_free (CtxMurmur *murmur)
{
  free (murmur);
}
static inline uint32_t murmur3_32_finalize (CtxMurmur *murmur)
{
  uint32_t h = murmur->state[0];
  /* Finalize. */
  h ^= murmur->state[1];
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;
  return h;
}

static inline int murmur3_32_done (CtxMurmur *murmur, unsigned char *out)
{
  murmur3_32_finalize (murmur);
  for (int i = 0; i < 4; i++)
    out[i]=0;
  memcpy (out, &murmur->state[0], 4);
  return murmur->state[0];
}

/*
 * the hasher should store a list of
 * times when the activeness of each tile changes
 *
 * on replay path and text/glyph commands as well
 * as stroke/fill can be ignored  clips outside
 * should mean no more drawing until restore
 */
static void
ctx_hasher_process (Ctx *ctx, CtxCommand *command)
{
  CtxEntry      *entry      = &command->entry;
  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx->backend;
  CtxHasher     *hasher     = (CtxHasher*) ctx->backend;
  CtxState      *state      = rasterizer->state;
  CtxCommand *c = (CtxCommand *) entry;
  int aa = 15;//rasterizer->aa;

  ctx_interpret_pos_bare (rasterizer->state, entry, NULL);
  ctx_interpret_style (rasterizer->state, entry, NULL);

  switch (c->code)
    {
      case CTX_TEXT:
        {
          const char *str = ctx_arg_string();
          CtxMurmur murmur;
          memcpy (&murmur, &hasher->murmur_fill[hasher->source_level], sizeof (CtxMurmur));
          float width = ctx_text_width (rasterizer->backend.ctx, str);


          float height = ctx_get_font_size (rasterizer->backend.ctx);
           CtxIntRectangle shape_rect;

           float tx = rasterizer->x;
           float ty = rasterizer->y - height * 1.2;
           float tw = width;
           float th = height * (ctx_str_count_lines (str) + 1.5);

           _ctx_user_to_device (rasterizer->state, &tx, &ty);
           _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
          
           shape_rect.x=tx;
           shape_rect.y=ty;
           shape_rect.width = tw;
           shape_rect.height = th;
          switch ((int)ctx_state_get (rasterizer->state, CTX_text_align))
          {
          case CTX_TEXT_ALIGN_LEFT:
          case CTX_TEXT_ALIGN_START:
                  break;
          case CTX_TEXT_ALIGN_END:
          case CTX_TEXT_ALIGN_RIGHT:
           shape_rect.x -= shape_rect.width;
           break;
          case CTX_TEXT_ALIGN_CENTER:
           shape_rect.x -= shape_rect.width/2;
           break;
                   // XXX : doesn't take all text-alignments into account
          }

#if 0
          uint32_t color;
          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, (uint8_t*)(&color));
#endif
          murmur3_32_process(&murmur, (const unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
#if 1
        murmur3_32_process(&murmur, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
    //      murmur3_32_process(&murmur, (unsigned char*)&color, 4);
#endif
          murmur3_32_process(&murmur, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
          _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));

          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
        }
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_STROKE_TEXT:
        {
          CtxMurmur murmur;
          const char *str = ctx_arg_string();
          memcpy (&murmur, &hasher->murmur_stroke[hasher->source_level], sizeof (CtxMurmur));
          float width = ctx_text_width (rasterizer->backend.ctx, str);
          float height = ctx_get_font_size (rasterizer->backend.ctx);

           CtxIntRectangle shape_rect;

           float tx = rasterizer->x;
           float ty = rasterizer->y;
           float tw = width;
           float th = height * (ctx_str_count_lines (str));

           _ctx_user_to_device (rasterizer->state, &tx, &ty);
           _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
          
           shape_rect.x=tx;
           shape_rect.y=ty;
           shape_rect.width = tw;
           shape_rect.height = th;

#if 0
          uint32_t color;
          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, (uint8_t*)(&color));
#endif
          murmur3_32_process(&murmur, (unsigned char*)ctx_arg_string(), strlen  (ctx_arg_string()));
#if 1
          murmur3_32_process(&murmur, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
    //    murmur3_32_process(&murmur, (unsigned char*)&color, 4);
#endif
          murmur3_32_process(&murmur, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
          _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));

          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
        }
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_GLYPH:
         {
          CtxMurmur murmur;
          memcpy (&murmur, &hasher->murmur_fill[hasher->source_level], sizeof (CtxMurmur));

          uint8_t string[8];
          string[ctx_unichar_to_utf8 (c->u32.a0, string)]=0;
          float width = ctx_text_width (rasterizer->backend.ctx, (char*)string);
          float height = ctx_get_font_size (rasterizer->backend.ctx);

          float tx = rasterizer->x;
          float ty = rasterizer->y;
          float tw = width;
          float th = height * 2;

          _ctx_user_to_device (rasterizer->state, &tx, &ty);
          _ctx_user_to_device_distance (rasterizer->state, &tw, &th);
          CtxIntRectangle shape_rect = {(int)tx,(int)(ty-th/2),(int)tw,(int)th};


#if 0
          uint32_t color;
          ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, (uint8_t*)(&color));
#endif
          murmur3_32_process(&murmur, string, strlen ((const char*)string));
          murmur3_32_process(&murmur, (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
#if 0
          murmur3_32_process(&murmur, (unsigned char*)&color, 4);
#endif
          murmur3_32_process(&murmur, (unsigned char*)&shape_rect, sizeof (CtxIntRectangle));
          _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));

          ctx_rasterizer_rel_move_to (rasterizer, width, 0);
          ctx_rasterizer_reset (rasterizer);
         }
        break;

      case CTX_CLIP:
      case CTX_PAINT:
        {
        CtxMurmur murmur;
        memcpy (&murmur, &hasher->murmur_fill[hasher->source_level], sizeof (CtxMurmur));
        if (rasterizer->edge_list.count)
          murmur3_32_process(&murmur,  (uint8_t*)rasterizer->edge_list.entries, sizeof(CtxSegment) * rasterizer->edge_list.count);

        {
          int is = rasterizer->state->gstate.fill_rule;
          murmur3_32_process(&murmur, (uint8_t*)&is, sizeof(int));
        }
        CtxIntRectangle shape_rect = {0,0,
                rasterizer->blit_width,
                rasterizer->blit_height};
        _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));
        }

        break;
      case CTX_FILL:
        {
          CtxMurmur murmur;
          memcpy (&murmur, &hasher->murmur_fill[hasher->source_level], sizeof (CtxMurmur));

          /* we eant this hasher to be as good as possible internally,
           * since it is also used in the small shapes rasterization
           * cache
           */
        //uint64_t hash = ctx_rasterizer_poly_to_hash2 (rasterizer); // + hasher->salt;
        CtxIntRectangle shape_rect = {
          (int)(rasterizer->col_min / CTX_SUBDIV - 2),
          (int)(rasterizer->scan_min / aa - 2),
          (int)(3+(rasterizer->col_max - rasterizer->col_min + CTX_SUBDIV-1) / CTX_SUBDIV),
          (int)(3+(rasterizer->scan_max - rasterizer->scan_min + aa-1) / aa)
        };

        if (rasterizer->edge_list.count)
          murmur3_32_process(&murmur,  (uint8_t*)rasterizer->edge_list.entries, sizeof(CtxSegment) * rasterizer->edge_list.count);

        {
          int is = rasterizer->state->gstate.fill_rule;
          murmur3_32_process(&murmur, (uint8_t*)&is, sizeof(int));
        }
        {
          int is = rasterizer->state->gstate.image_smoothing;
          murmur3_32_process(&murmur, (uint8_t*)&is, sizeof(int));
        }
        {
          int e = rasterizer->state->gstate.extend;
          murmur3_32_process(&murmur, (uint8_t*)&e, sizeof(int));
        }

          _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));

        if (c->code == CTX_CLIP)
          ctx_rasterizer_clip (rasterizer);

        if (!rasterizer->preserve)
          ctx_rasterizer_reset (rasterizer);
        rasterizer->preserve = 0;

        }
        break;
      case CTX_STROKE:
        {
          CtxMurmur murmur;
          memcpy (&murmur, &hasher->murmur_stroke[hasher->source_level], sizeof (CtxMurmur));
        if (rasterizer->edge_list.count)
        murmur3_32_process(&murmur,  (uint8_t*)rasterizer->edge_list.entries, sizeof(CtxSegment) * rasterizer->edge_list.count);
        CtxIntRectangle shape_rect = {
          (int)(rasterizer->col_min / CTX_SUBDIV - rasterizer->state->gstate.line_width),
          (int)(rasterizer->scan_min / aa - rasterizer->state->gstate.line_width),
          (int)((rasterizer->col_max - rasterizer->col_min + 1) / CTX_SUBDIV + rasterizer->state->gstate.line_width),
          (int)((rasterizer->scan_max - rasterizer->scan_min + 1) / aa + rasterizer->state->gstate.line_width)
        };

        shape_rect.width += rasterizer->state->gstate.line_width * 2;
        shape_rect.height += rasterizer->state->gstate.line_width * 2;
        shape_rect.x -= rasterizer->state->gstate.line_width;
        shape_rect.y -= rasterizer->state->gstate.line_width;

        {
          float f;
          int i;
          f = rasterizer->state->gstate.line_width;
          murmur3_32_process(&murmur, (uint8_t*)&f, sizeof(float));
          i = rasterizer->state->gstate.line_cap;
          murmur3_32_process(&murmur, (uint8_t*)&i, sizeof(int));
          i = rasterizer->state->gstate.line_join;
          murmur3_32_process(&murmur, (uint8_t*)&i, sizeof(int));
          i = rasterizer->state->gstate.source_stroke.type;
          murmur3_32_process(&murmur, (uint8_t*)&i, sizeof(int));
        }

        uint32_t color;
        ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, (uint8_t*)(&color));

          murmur3_32_process(&murmur, (unsigned char*)&color, 4);

          _ctx_add_hash (hasher, &shape_rect, murmur3_32_finalize (&murmur));
        }
        if (!rasterizer->preserve)
          ctx_rasterizer_reset (rasterizer);
        rasterizer->preserve = 0;
        break;
        /* the above cases are the painting cases and 
         * the only ones differing from the rasterizer's process switch
         */

      case CTX_LINE_TO:
        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_LINE_TO:
        ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_MOVE_TO:
        ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_MOVE_TO:
        ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_CURVE_TO:
        ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
        ctx_rasterizer_line_to (rasterizer, c->c.x1, c->c.y1);
        ctx_rasterizer_line_to (rasterizer, c->c.x2, c->c.y2);
        //ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
        //                         c->c.x1, c->c.y1,
        //                         c->c.x2, c->c.y2);
        break;
      case CTX_REL_CURVE_TO:
        ctx_rasterizer_rel_line_to (rasterizer, c->c.x2, c->c.y2);
        //ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
        //                             c->c.x1, c->c.y1,
        //                             c->c.x2, c->c.y2);
        break;
      case CTX_QUAD_TO:
        ctx_rasterizer_line_to (rasterizer, c->c.x1, c->c.y1);
        //ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_REL_QUAD_TO:
        ctx_rasterizer_rel_line_to (rasterizer, c->c.x1, c->c.y1);
        //ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_ARC:
        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, c->arc.direction);
        break;
      case CTX_RECTANGLE:
        ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                  c->rectangle.width, c->rectangle.height);
        break;
      case CTX_ROUND_RECTANGLE:
        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                        c->rectangle.width, c->rectangle.height,
                                        c->rectangle.radius);
        break;
      case CTX_SET_PIXEL:
        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
                                  c->set_pixel.rgba[0],
                                  c->set_pixel.rgba[1],
                                  c->set_pixel.rgba[2],
                                  c->set_pixel.rgba[3]);
        break;
      case CTX_PRESERVE:
        rasterizer->preserve = 1;
        break;
      case CTX_SAVE:
      case CTX_RESTORE:

        if (c->code == CTX_SAVE)
        {
           if (hasher->source_level + 1 < CTX_MAX_STATES)
           {
             hasher->source_level++;
             hasher->murmur_fill[hasher->source_level] =
               hasher->murmur_fill[hasher->source_level-1];
             hasher->murmur_stroke[hasher->source_level] =
               hasher->murmur_stroke[hasher->source_level-1];
           }
        }
        else
        {
           if (hasher->source_level - 1 >= 0)
           {
             hasher->source_level--;
             hasher->murmur_fill[hasher->source_level] =
               hasher->murmur_fill[hasher->source_level+1];
             hasher->murmur_stroke[hasher->source_level] =
               hasher->murmur_stroke[hasher->source_level+1];
           }
        }

        /* FALLTHROUGH */
      case CTX_ROTATE:
      case CTX_SCALE:
      case CTX_TRANSLATE:
      case CTX_APPLY_TRANSFORM:



        rasterizer->uses_transforms = 1;
        ctx_interpret_transforms (rasterizer->state, entry, NULL);

        
        break;
      case CTX_FONT:
        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
        break;
      case CTX_BEGIN_PATH:
        ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_CLOSE_PATH:
        ctx_rasterizer_finish_shape (rasterizer);
        break;
      case CTX_DEFINE_TEXTURE:
        {
        murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
        murmur3_32_process(&hasher->murmur_fill[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
        murmur3_32_process (&hasher->murmur_fill[hasher->source_level], (uint8_t*)c->define_texture.eid, strlen (c->define_texture.eid));
        murmur3_32_process(&hasher->murmur_fill[hasher->source_level], (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));

        rasterizer->comp_op = NULL; // why?
        }
        break;
      case CTX_TEXTURE:
        murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
        murmur3_32_process(&hasher->murmur_fill[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
        murmur3_32_process (&hasher->murmur_fill[hasher->source_level], (uint8_t*)c->texture.eid, strlen (c->texture.eid));
        murmur3_32_process (&hasher->murmur_fill[hasher->source_level], (uint8_t*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
        rasterizer->comp_op = NULL; // why?
        break;
      case CTX_COLOR:
        {
          uint32_t color;
          if (((int)(ctx_arg_float(0))&512))
          {
            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_stroke.color, (uint8_t*)(&color));
            murmur3_32_init (&hasher->murmur_stroke[hasher->source_level]);
            murmur3_32_process(&hasher->murmur_stroke[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
            murmur3_32_process(&hasher->murmur_stroke[hasher->source_level], (unsigned char*)&color, 4);
          }
          else
          {
            ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, (uint8_t*)(&color));
            murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
            murmur3_32_process(&hasher->murmur_fill[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
            murmur3_32_process(&hasher->murmur_fill[hasher->source_level], (unsigned char*)&color, 4);
          }
        }
        break;
      case CTX_LINEAR_GRADIENT:
          murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
          murmur3_32_process(&hasher->murmur_fill[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
          murmur3_32_process(&hasher->murmur_fill[hasher->source_level], 
                           (uint8_t*)c, sizeof (c->linear_gradient));
          murmur3_32_process (&hasher->murmur_fill[hasher->source_level], (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
        break;
      case CTX_RADIAL_GRADIENT:
          murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
          murmur3_32_process(&hasher->murmur_fill[hasher->source_level], &rasterizer->state->gstate.global_alpha_u8, 1);
          murmur3_32_process(&hasher->murmur_fill[hasher->source_level], 
                           (uint8_t*)c, sizeof (c->radial_gradient));
          murmur3_32_process (&hasher->murmur_fill[hasher->source_level], (unsigned char*)(&rasterizer->state->gstate.transform), sizeof (rasterizer->state->gstate.transform));
        //ctx_state_gradient_clear_stops (rasterizer->state);
        break;
#if CTX_GRADIENTS
      case CTX_GRADIENT_STOP:
        {
          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
                         };
          murmur3_32_process(&hasher->murmur_fill[hasher->source_level], 
                           (uint8_t*) &rgba[0], sizeof(rgba));
        }
        break;
#endif
    }

#if 0
  if (command->code == CTX_RESET)
  {
    while (hasher->active_info)
    {
      free (hasher->active_info->data);
      ctx_list_remove (&hasher->active_info, hasher->active_info->data);
    }
  }
#endif

    hasher->pos += ctx_conts_for_entry ((CtxEntry*)(command))+1;
  if (command->code == CTX_LINE_WIDTH)
    {
      float x = state->gstate.line_width;
      /* normalize line width according to scaling factor
       */
      x = x * ctx_maxf (ctx_maxf (ctx_fabsf (state->gstate.transform.m[0][0]),
                                  ctx_fabsf (state->gstate.transform.m[0][1]) ),
                        ctx_maxf (ctx_fabsf (state->gstate.transform.m[1][0]),
                                  ctx_fabsf (state->gstate.transform.m[1][1]) ) );
      state->gstate.line_width = x;
    }
}

static CtxRasterizer *
ctx_hasher_init (CtxRasterizer *rasterizer, Ctx *ctx, CtxState *state, int width, int height, int cols, int rows)
{
  CtxHasher *hasher = (CtxHasher*)rasterizer;
  ctx_memset (rasterizer, 0, sizeof (CtxHasher) );
  CtxBackend *backend = (CtxBackend*)hasher;
  backend->ctx         = ctx;
  backend->process = ctx_hasher_process;
  backend->free    = (CtxDestroyNotify)ctx_rasterizer_deinit;
  // XXX need own destructor to not leak ->hashes
  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
  rasterizer->state       = state;
  ctx_state_init (rasterizer->state);
  rasterizer->blit_x      = 0;
  rasterizer->blit_y      = 0;
  rasterizer->blit_width  = width;
  rasterizer->blit_height = height;
  rasterizer->state->gstate.clip_min_x  = 0;
  rasterizer->state->gstate.clip_min_y  = 0;
  rasterizer->state->gstate.clip_max_x  = width - 1;
  rasterizer->state->gstate.clip_max_y  = height - 1;
  rasterizer->scan_min    = 5000;
  rasterizer->scan_max    = -5000;
  //rasterizer->aa          = 15;

  hasher->rows = rows;
  hasher->cols = cols;
  hasher->pos  = 0;

  hasher->hashes = (uint32_t*)ctx_calloc (4, rows * cols);
  murmur3_32_init (&hasher->murmur_fill[hasher->source_level]);
  murmur3_32_init (&hasher->murmur_stroke[hasher->source_level]);

  return rasterizer;
}

Ctx *ctx_hasher_new (int width, int height, int cols, int rows)
{
  Ctx *ctx           = _ctx_new_drawlist (width, height);
  CtxState    *state = &ctx->state;
  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx_calloc (sizeof (CtxHasher), 1);
  ctx_hasher_init (rasterizer, ctx, state, width, height, cols, rows);
  ctx_set_backend (ctx, (void*)rasterizer);
  return ctx;
}

uint32_t ctx_hasher_get_hash (Ctx *ctx, int col, int row)
{
  CtxHasher *hasher = (CtxHasher*)ctx->backend;
  if (row < 0) row =0;
  if (col < 0) col =0;
  if (row >= hasher->rows) row = hasher->rows-1;
  if (col >= hasher->cols) col = hasher->cols-1;

  return hasher->hashes[(row*hasher->cols+col)];
}

CtxCommandState *ctx_hasher_get_active_info (Ctx *ctx, int *count)
{
  CtxHasher *hasher = (CtxHasher*)ctx->backend;
  *count = hasher->active_info_count;
  CtxCommandState *ret = hasher->active_info;
  hasher->active_info = NULL;
  return ret;
}

#endif

#if CTX_CAIRO

typedef struct _CtxCairo CtxCairo;
struct
  _CtxCairo
{
  CtxBackend        backend;
  cairo_t          *cr;
  cairo_pattern_t  *pat;
  cairo_surface_t  *image;
  int               preserve;

  // maintain separate fill and stroke state - even though the more limited use of ctx
  // then suffers?
  //
};

static void
ctx_cairo_process (Ctx *ctx, CtxCommand *c)
{
  CtxCairo *ctx_cairo = (void*)ctx->backend;
  CtxEntry *entry = (CtxEntry *) &c->entry;

#if CTX_CURRENT_PATH
  ctx_update_current_path (ctx, entry);
#endif

  cairo_t *cr = ctx_cairo->cr;
  switch (entry->code)
    {
      case CTX_LINE_TO:
        cairo_line_to (cr, c->line_to.x, c->line_to.y);
        break;
      case CTX_REL_LINE_TO:
        cairo_rel_line_to (cr, c->rel_line_to.x, c->rel_line_to.y);
        break;
      case CTX_MOVE_TO:
        cairo_move_to (cr, c->move_to.x, c->move_to.y);
        break;
      case CTX_REL_MOVE_TO:
        cairo_rel_move_to (cr, ctx_arg_float (0), ctx_arg_float (1) );
        break;
      case CTX_CURVE_TO:
        cairo_curve_to (cr, ctx_arg_float (0), ctx_arg_float (1),
                        ctx_arg_float (2), ctx_arg_float (3),
                        ctx_arg_float (4), ctx_arg_float (5) );
        break;
      case CTX_REL_CURVE_TO:
        cairo_rel_curve_to (cr,ctx_arg_float (0), ctx_arg_float (1),
                            ctx_arg_float (2), ctx_arg_float (3),
                            ctx_arg_float (4), ctx_arg_float (5) );
        break;
      case CTX_PRESERVE:
        ctx_cairo->preserve = 1;
        break;
      case CTX_QUAD_TO:
        {
          double x0, y0;
          cairo_get_current_point (cr, &x0, &y0);
          float cx = ctx_arg_float (0);
          float cy = ctx_arg_float (1);
          float  x = ctx_arg_float (2);
          float  y = ctx_arg_float (3);
          cairo_curve_to (cr,
                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
                          x,                              y);
        }
        break;
      case CTX_REL_QUAD_TO:
        {
          double x0, y0;
          cairo_get_current_point (cr, &x0, &y0);
          float cx = ctx_arg_float (0) + x0;
          float cy = ctx_arg_float (1) + y0;
          float  x = ctx_arg_float (2) + x0;
          float  y = ctx_arg_float (3) + y0;
          cairo_curve_to (cr,
                          (cx * 2 + x0) / 3.0f, (cy * 2 + y0) / 3.0f,
                          (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
                          x,                              y);
        }
        break;
      /* rotate/scale/translate does not occur in fully minified data stream */
      case CTX_ROTATE:
        cairo_rotate (cr, ctx_arg_float (0) );
        break;
      case CTX_SCALE:
        cairo_scale (cr, ctx_arg_float (0), ctx_arg_float (1) );
        break;
      case CTX_TRANSLATE:
        cairo_translate (cr, ctx_arg_float (0), ctx_arg_float (1) );
        break;
      case CTX_LINE_WIDTH:
        cairo_set_line_width (cr, ctx_arg_float (0) );
        break;
      case CTX_ARC:
#if 0
        fprintf (stderr, "F %2.1f %2.1f %2.1f %2.1f %2.1f %2.1f\n",
                        ctx_arg_float(0),
                        ctx_arg_float(1),
                        ctx_arg_float(2),
                        ctx_arg_float(3),
                        ctx_arg_float(4),
                        ctx_arg_float(5),
                        ctx_arg_float(6));
#endif
        if (ctx_arg_float (5) == 1)
          cairo_arc (cr, ctx_arg_float (0), ctx_arg_float (1),
                     ctx_arg_float (2), ctx_arg_float (3),
                     ctx_arg_float (4) );
        else
          cairo_arc_negative (cr, ctx_arg_float (0), ctx_arg_float (1),
                              ctx_arg_float (2), ctx_arg_float (3),
                              ctx_arg_float (4) );
        break;
      case CTX_SET_RGBA_U8:
        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
                               ctx_u8_to_float (ctx_arg_u8 (1) ),
                               ctx_u8_to_float (ctx_arg_u8 (2) ),
                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
        break;
#if 0
      case CTX_SET_RGBA_STROKE: // XXX : we need to maintain
        //       state for the two kinds
        cairo_set_source_rgba (cr, ctx_arg_u8 (0) /255.0,
                               ctx_arg_u8 (1) /255.0,
                               ctx_arg_u8 (2) /255.0,
                               ctx_arg_u8 (3) /255.0);
        break;
#endif
      case CTX_RECTANGLE:
      case CTX_ROUND_RECTANGLE: // XXX - arcs
        cairo_rectangle (cr, c->rectangle.x, c->rectangle.y,
                         c->rectangle.width, c->rectangle.height);
        break;
      case CTX_SET_PIXEL:
        cairo_set_source_rgba (cr, ctx_u8_to_float (ctx_arg_u8 (0) ),
                               ctx_u8_to_float (ctx_arg_u8 (1) ),
                               ctx_u8_to_float (ctx_arg_u8 (2) ),
                               ctx_u8_to_float (ctx_arg_u8 (3) ) );
        cairo_rectangle (cr, ctx_arg_u16 (2), ctx_arg_u16 (3), 1, 1);
        cairo_fill (cr);
        break;
      case CTX_FILL:
        if (ctx_cairo->preserve)
        {
          cairo_fill_preserve (cr);
          ctx_cairo->preserve = 0;
        }
        else
        {
          cairo_fill (cr);
        }
        break;
      case CTX_STROKE:
        if (ctx_cairo->preserve)
        {
          cairo_stroke_preserve (cr);
          ctx_cairo->preserve = 0;
        }
        else
        {
          cairo_stroke (cr);
        }
        break;
      case CTX_IDENTITY:
        cairo_identity_matrix (cr);
        break;
      case CTX_CLIP:
        if (ctx_cairo->preserve)
        {
          cairo_clip_preserve (cr);
          ctx_cairo->preserve = 0;
        }
        else
        {
          cairo_clip (cr);
        }
        break;
        break;
      case CTX_BEGIN_PATH:
        cairo_new_path (cr);
        break;
      case CTX_CLOSE_PATH:
        cairo_close_path (cr);
        break;
      case CTX_SAVE:
        cairo_save (cr);
        break;
      case CTX_RESTORE:
        cairo_restore (cr);
        break;
      case CTX_FONT_SIZE:
        cairo_set_font_size (cr, ctx_arg_float (0) );
        break;
      case CTX_MITER_LIMIT:
        cairo_set_miter_limit (cr, ctx_arg_float (0) );
        break;
      case CTX_LINE_CAP:
        {
          int cairo_val = CAIRO_LINE_CAP_SQUARE;
          switch (ctx_arg_u8 (0) )
            {
              case CTX_CAP_ROUND:
                cairo_val = CAIRO_LINE_CAP_ROUND;
                break;
              case CTX_CAP_SQUARE:
                cairo_val = CAIRO_LINE_CAP_SQUARE;
                break;
              case CTX_CAP_NONE:
                cairo_val = CAIRO_LINE_CAP_BUTT;
                break;
            }
          cairo_set_line_cap (cr, cairo_val);
        }
        break;
      case CTX_BLEND_MODE:
        {
          // does not map to cairo
        }
        break;
      case CTX_COMPOSITING_MODE:
        {
          int cairo_val = CAIRO_OPERATOR_OVER;
          switch (ctx_arg_u8 (0) )
            {
              case CTX_COMPOSITE_SOURCE_OVER:
                cairo_val = CAIRO_OPERATOR_OVER;
                break;
              case CTX_COMPOSITE_COPY:
                cairo_val = CAIRO_OPERATOR_SOURCE;
                break;
            }
          cairo_set_operator (cr, cairo_val);
        }
        break;
      case CTX_LINE_JOIN:
        {
          int cairo_val = CAIRO_LINE_JOIN_ROUND;
          switch (ctx_arg_u8 (0) )
            {
              case CTX_JOIN_ROUND:
                cairo_val = CAIRO_LINE_JOIN_ROUND;
                break;
              case CTX_JOIN_BEVEL:
                cairo_val = CAIRO_LINE_JOIN_BEVEL;
                break;
              case CTX_JOIN_MITER:
                cairo_val = CAIRO_LINE_JOIN_MITER;
                break;
            }
          cairo_set_line_join (cr, cairo_val);
        }
        break;
      case CTX_LINEAR_GRADIENT:
        {
          if (ctx_cairo->pat)
            {
              cairo_pattern_destroy (ctx_cairo->pat);
              ctx_cairo->pat = NULL;
            }
          ctx_cairo->pat = cairo_pattern_create_linear (ctx_arg_float (0), ctx_arg_float (1),
                           ctx_arg_float (2), ctx_arg_float (3) );
          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 0, 0, 0, 0, 1);
          cairo_pattern_add_color_stop_rgba (ctx_cairo->pat, 1, 1, 1, 1, 1);
          cairo_set_source (cr, ctx_cairo->pat);
        }
        break;
      case CTX_RADIAL_GRADIENT:
        {
          if (ctx_cairo->pat)
            {
              cairo_pattern_destroy (ctx_cairo->pat);
              ctx_cairo->pat = NULL;
            }
          ctx_cairo->pat = cairo_pattern_create_radial (ctx_arg_float (0), ctx_arg_float (1),
                           ctx_arg_float (2), ctx_arg_float (3),
                           ctx_arg_float (4), ctx_arg_float (5) );
          cairo_set_source (cr, ctx_cairo->pat);
        }
        break;
      case CTX_GRADIENT_STOP:
        cairo_pattern_add_color_stop_rgba (ctx_cairo->pat,
                                           ctx_arg_float (0),
                                           ctx_u8_to_float (ctx_arg_u8 (4) ),
                                           ctx_u8_to_float (ctx_arg_u8 (5) ),
                                           ctx_u8_to_float (ctx_arg_u8 (6) ),
                                           ctx_u8_to_float (ctx_arg_u8 (7) ) );
        break;
        // XXX  implement TEXTURE
#if 0
      case CTX_LOAD_IMAGE:
        {
          if (image)
            {
              cairo_surface_destroy (image);
              image = NULL;
            }
          if (pat)
            {
              cairo_pattern_destroy (pat);
              pat = NULL;
            }
          image = cairo_image_surface_create_from_png (ctx_arg_string() );
          cairo_set_source_surface (cr, image, ctx_arg_float (0), ctx_arg_float (1) );
        }
        break;
#endif
      case CTX_TEXT:
        /* XXX: implement some linebreaking/wrap, positioning
         *      behavior here?
         */
        cairo_show_text (cr, ctx_arg_string () );
        break;
      case CTX_CONT:
      case CTX_EDGE:
      case CTX_DATA:
      case CTX_DATA_REV:
      case CTX_FLUSH:
        break;
    }
  ctx_process (ctx_cairo->backend.ctx, entry);
}

void ctx_cairo_free (CtxCairo *ctx_cairo)
{
  if (ctx_cairo->pat)
    { cairo_pattern_destroy (ctx_cairo->pat); }
  if (ctx_cairo->image)
    { cairo_surface_destroy (ctx_cairo->image); }
  free (ctx_cairo);
}

void
ctx_render_cairo (Ctx *ctx, cairo_t *cr)
{
  CtxCairo    ctx_cairo; /* on-stack backend */
  CtxBackend *backend = (CtxBackend*)&ctx_cairo;
  CtxIterator iterator;
  CtxCommand *command;
  ctx_cairo.cr = cr;
  backend->process = ctx_cairo_process;
  backend->ctx = ctx;
  ctx_iterator_init (&iterator, &ctx->drawlist, 0,
                     CTX_ITERATOR_EXPAND_BITPACK);
  while ( (command = ctx_iterator_next (&iterator) ) )
    { ctx_cairo_process (ctx, command); }
}

Ctx *
ctx_new_for_cairo (cairo_t *cr)
{
  Ctx *ctx = _ctx_new_drawlist (640, 480);
  CtxCairo *ctx_cairo = calloc(sizeof(CtxCairo),1);
  CtxBackend *backend  = (CtxBackend*)ctx_cairo;
  backend->free    = (void*)ctx_cairo_free;
  backend->process = ctx_cairo_process;
  backend->ctx = ctx;
  ctx_cairo->cr = cr;
  ctx_set_backend (ctx, (void*)ctx_cairo);
  return ctx;
}

#endif

#if CTX_EVENTS

static int ctx_find_largest_matching_substring
 (const char *X, const char *Y, int m, int n, int *offsetY, int *offsetX) 
{ 
  int longest_common_suffix[2][n+1];
  int best_length = 0;
  for (int i=0; i<=m; i++)
  {
    for (int j=0; j<=n; j++)
    {
      if (i == 0 || j == 0 || !(X[i-1] == Y[j-1]))
      {
        longest_common_suffix[i%2][j] = 0;
      }
      else
      {
          longest_common_suffix[i%2][j] = longest_common_suffix[(i-1)%2][j-1] + 1;
          if (best_length < longest_common_suffix[i%2][j])
          {
            best_length = longest_common_suffix[i%2][j];
            if (offsetY) *offsetY = j - best_length;
            if (offsetX) *offsetX = i - best_length;
          }
      }
    }
  }
  return best_length;
} 

typedef struct CtxSpan {
  int from_prev;
  int start;
  int length;
} CtxSpan;

#define CHUNK_SIZE 32
#define MIN_MATCH  7        // minimum match length to be encoded
#define WINDOW_PADDING 16   // look-aside amount

#if 0
static void _dassert(int line, int condition, const char *str, int foo, int bar, int baz)
{
  if (!condition)
  {
    FILE *f = fopen ("/tmp/cdebug", "a");
    fprintf (f, "%i: %s    %i %i %i\n", line, str, foo, bar, baz);
    fclose (f);
  }
}
#define dassert(cond, foo, bar, baz) _dassert(__LINE__, cond, #cond, foo, bar ,baz)
#endif
#define dassert(cond, foo, bar, baz)

/* XXX repeated substring matching is slow, we'll be
 * better off with a hash-table with linked lists of
 * matching 3-4 characters in previous.. or even
 * a naive approach that expects rough alignment..
 */
static char *encode_in_terms_of_previous (
                const char *src,  int src_len,
                const char *prev, int prev_len,
                int *out_len,
                int max_ticks)
{
  CtxString *string = ctx_string_new ("");
  CtxList *encoded_list = NULL;

  /* TODO : make expected position offset in prev slide based on
   * matches and not be constant */

  long ticks_start = ctx_ticks ();
  int start = 0;
  int length = CHUNK_SIZE;
  for (start = 0; start < src_len; start += length)
  {
    CtxSpan *span = calloc (sizeof (CtxSpan), 1);
    span->start = start;
    if (start + length > src_len)
      span->length = src_len - start;
    else
      span->length = length;
    span->from_prev = 0;
    ctx_list_append (&encoded_list, span);
  }

  for (CtxList *l = encoded_list; l; l = l->next)
  {
    CtxSpan *span = l->data;
    if (!span->from_prev)
    {
      if (span->length >= MIN_MATCH)
      {
         int prev_pos = 0;
         int curr_pos = 0;
         assert(1);
#if 0
         int prev_start =  0;
         int prev_window_length = prev_len;
#else
         int window_padding = WINDOW_PADDING;
         int prev_start = span->start - window_padding;
         if (prev_start < 0)
           prev_start = 0;

         dassert(span->start>=0 , 0,0,0);

         int prev_window_length = prev_len - prev_start;
         if (prev_window_length > span->length + window_padding * 2 + span->start)
           prev_window_length = span->length + window_padding * 2 + span->start;
#endif
         int match_len = 0;
         if (prev_window_length > 0)
           match_len = ctx_find_largest_matching_substring(prev + prev_start, src + span->start, prev_window_length, span->length, &curr_pos, &prev_pos);
#if 1
         prev_pos += prev_start;
#endif

         if (match_len >= MIN_MATCH)
         {
            int start  = span->start;
            int length = span->length;

            span->from_prev = 1;
            span->start     = prev_pos;
            span->length    = match_len;
            dassert (span->start >= 0, prev_pos, prev_start, span->start);
            dassert (span->length > 0, prev_pos, prev_start, span->length);

            if (curr_pos)
            {
              CtxSpan *prev = calloc (sizeof (CtxSpan), 1);
              prev->start = start;
              prev->length =  curr_pos;
            dassert (prev->start >= 0, prev_pos, prev_start, prev->start);
            dassert (prev->length > 0, prev_pos, prev_start, prev->length);
              prev->from_prev = 0;
              ctx_list_insert_before (&encoded_list, l, prev);
            }


            if (match_len + curr_pos < start + length)
            {
              CtxSpan *next = calloc (sizeof (CtxSpan), 1);
              next->start = start + curr_pos + match_len;
              next->length = (start + length) - next->start;
            dassert (next->start >= 0, prev_pos, prev_start, next->start);
      //    dassert (next->length > 0, prev_pos, prev_start, next->length);
              next->from_prev = 0;
              if (next->length)
              {
                if (l->next)
                  ctx_list_insert_before (&encoded_list, l->next, next);
                else
                  ctx_list_append (&encoded_list, next);
              }
              else
                free (next);
            }

            if (curr_pos) // step one item back for forloop
            {
              CtxList *tmp = encoded_list;
              int found = 0;
              while (!found && tmp && tmp->next)
              {
                if (tmp->next == l)
                {
                  l = tmp;
                  break;
                }
                tmp = tmp->next;
              }
            }
         }
      }
    }

    if (ctx_ticks ()-ticks_start > (unsigned long)max_ticks)
      break;
  }

  /* merge adjecant prev span references  */
  {
    for (CtxList *l = encoded_list; l; l = l->next)
    {
      CtxSpan *span = l->data;
again:
      if (l->next)
      {
        CtxSpan *next_span = l->next->data;
        if (span->from_prev && next_span->from_prev &&
            span->start + span->length == 
            next_span->start)
        {
           span->length += next_span->length;
           ctx_list_remove (&encoded_list, next_span);
           goto again;
        }
      }
    }
  }

  while (encoded_list)
  {
    CtxSpan *span = encoded_list->data;
    if (span->from_prev)
    {
      char ref[128];
      sprintf (ref, "%c%i %i%c", CTX_CODEC_CHAR, span->start, span->length, CTX_CODEC_CHAR);
      ctx_string_append_data (string, ref, strlen(ref));
    }
    else
    {
      for (int i = span->start; i< span->start+span->length; i++)
      {
        if (src[i] == CTX_CODEC_CHAR)
        {
          char bytes[2]={CTX_CODEC_CHAR, CTX_CODEC_CHAR};
          ctx_string_append_data (string, bytes, 2);
        }
        else
        {
          ctx_string_append_data (string, &src[i], 1);
        }
      }
    }
    free (span);
    ctx_list_remove (&encoded_list, span);
  }

  char *ret = string->str;
  if (out_len) *out_len = string->length;
  ctx_string_free (string, 0);
  return ret;
}

#if 0 // for documentation/reference purposes
static char *decode_ctx (const char *encoded, int enc_len, const char *prev, int prev_len, int *out_len)
{
  CtxString *string = ctx_string_new ("");
  char reference[32]="";
  int ref_len = 0;
  int in_ref = 0;
  for (int i = 0; i < enc_len; i++)
  {
    if (encoded[i] == CTX_CODEC_CHAR)
    {
      if (!in_ref)
      {
        in_ref = 1;
      }
      else
      {
        int start = atoi (reference);
        int len = 0;
        if (strchr (reference, ' '))
          len = atoi (strchr (reference, ' ')+1);

        if (start < 0)start = 0;
        if (start >= prev_len)start = prev_len-1;
        if (len + start > prev_len)
          len = prev_len - start;

        if (start == 0 && len == 0)
          ctx_string_append_byte (string, CTX_CODEC_CHAR);
        else
          ctx_string_append_data (string, prev + start, len);
        ref_len = 0;
        in_ref = 0;
      }
    }
    else
    {
      if (in_ref)
      {
        if (ref_len < 16)
        {
          reference[ref_len++] = encoded[i];
          reference[ref_len] = 0;
        }
      }
      else
      ctx_string_append_data (string, &encoded[i], 1);
    }
  }
  char *ret = string->str;
  if (out_len) *out_len = string->length;
  ctx_string_free (string, 0);
  return ret;
}
#endif

#define CTX_START_STRING "U\n"  // or " reset "
#define CTX_END_STRING   "\nX"  // or "\ndone"
#define CTX_END_STRING2  "\n"

int ctx_frame_ack = -1;
static char *prev_frame_contents = NULL;
static int   prev_frame_len = 0;

static int ctx_native_events = 1;

static void ctx_ctx_flush (Ctx *ctx)
{
  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
#if 0
  FILE *debug = fopen ("/tmp/ctx-debug", "a");
  fprintf (debug, "------\n");
#endif

  if (ctx_native_events)
    fprintf (stdout, "\e[?201h");
  fprintf (stdout, "\e[H\e[?25l\e[?200h");
#if 1
  fprintf (stdout, CTX_START_STRING);
  ctx_render_stream (ctxctx->backend.ctx, stdout, 0);
  fprintf (stdout, CTX_END_STRING);
#else
  {
    int cur_frame_len = 0;
    char *rest = ctx_render_string (ctxctx->ctx, 0, &cur_frame_len);
    char *cur_frame_contents = malloc (cur_frame_len + strlen(CTX_START_STRING) + strlen (CTX_END_STRING) + 1);

    cur_frame_contents[0]=0;
    strcat (cur_frame_contents, CTX_START_STRING);
    strcat (cur_frame_contents, rest);
    strcat (cur_frame_contents, CTX_END_STRING);
    free (rest);
    cur_frame_len += strlen (CTX_START_STRING) + strlen (CTX_END_STRING);

    if (prev_frame_contents && 0)  // XXX : 
    {
      char *encoded;
      int encoded_len = 0;
      //uint64_t ticks_start = ctx_ticks ();

      encoded = encode_in_terms_of_previous (cur_frame_contents, cur_frame_len, prev_frame_contents, prev_frame_len, &encoded_len, 1000 * 10);
//    encoded = strdup (cur_frame_contents);
//    encoded_len = strlen (encoded);
      //uint64_t ticks_end = ctx_ticks ();

      fwrite (encoded, encoded_len, 1, stdout);
//    fwrite (encoded, cur_frame_len, 1, stdout);
#if 0
      fprintf (debug, "---prev-frame(%i)\n%s", (int)strlen(prev_frame_contents), prev_frame_contents);
      fprintf (debug, "---cur-frame(%i)\n%s", (int)strlen(cur_frame_contents), cur_frame_contents);
      fprintf (debug, "---encoded(%.4f %i)---\n%s--------\n",
                      (ticks_end-ticks_start)/1000.0,
                      (int)strlen(encoded), encoded);
#endif
      free (encoded);
    }
    else
    {
      fwrite (cur_frame_contents, cur_frame_len, 1, stdout);
    }

    if (prev_frame_contents)
      free (prev_frame_contents);
    prev_frame_contents = cur_frame_contents;
    prev_frame_len = cur_frame_len;
  }
#endif
  fprintf (stdout, CTX_END_STRING2);
#if 0
    fclose (debug);
#endif

#if CTX_SYNC_FRAMES
  fprintf (stdout, "\e[5n");
  fflush (stdout);

  ctx_frame_ack = 0;
  do {
     ctx_consume_events (ctxctx->backend.ctx);
  } while (ctx_frame_ack != 1);
#else
  fflush (stdout);
#endif
}

void ctx_ctx_free (CtxCtx *ctx)
{
  nc_at_exit ();
  free (ctx);
  /* we're not destoring the ctx member, this is function is called in ctx' teardown */
}

void ctx_ctx_consume_events (Ctx *ctx)
{
  //int ix, iy;
  CtxCtx *ctxctx = (CtxCtx*)ctx->backend;
  const char *event = NULL;
#if CTX_AUDIO
  ctx_ctx_pcm (ctx);
#endif
  assert (ctx_native_events);

#if 1
    { /* XXX : this is a work-around for signals not working properly, we are polling the
         size with an ioctl per consume-events
         */
      struct winsize ws;
      ioctl(0,TIOCGWINSZ,&ws);
      ctxctx->cols = ws.ws_col;
      ctxctx->rows = ws.ws_row;
      ctx_set_size (ctx, ws.ws_xpixel, ws.ws_ypixel);
    }
#endif
    //char *cmd = ctx_strdup_printf ("touch /tmp/ctx-%ix%i", ctxctx->width, ctxctx->height);
    //system (cmd);
    //free (cmd);

  if (ctx_native_events)
    do {

      float x = 0, y = 0;
      int b = 0;
      char event_type[128]="";
      event = ctx_native_get_event (ctx, 1000/120);

      if (event)
      {
      sscanf (event, "%s %f %f %i", event_type, &x, &y, &b);
      if (!strcmp (event_type, "idle"))
      {
              event = NULL;
      }
      else if (!strcmp (event_type, "pp"))
      {
        ctx_pointer_press (ctx, x, y, b, 0);
      }
      else if (!strcmp (event_type, "pd")||
               !strcmp (event_type, "pm"))
      {
        ctx_pointer_motion (ctx, x, y, b, 0);
      }
      else if (!strcmp (event_type, "pr"))
      {
        ctx_pointer_release (ctx, x, y, b, 0);
      }
      else if (!strcmp (event_type, "message"))
      {
        ctx_incoming_message (ctx, event + strlen ("message"), 0);
      } else if (!strcmp (event, "size-changed"))
      {
        fprintf (stdout, "\e[H\e[2J\e[?25l");
        ctxctx->cols = ctx_terminal_cols ();
        ctxctx->rows = ctx_terminal_rows ();

        //system ("touch /tmp/ctx-abc");

        ctx_set_size (ctx, ctx_terminal_width(), ctx_terminal_height());

        if (prev_frame_contents)
          free (prev_frame_contents);
        prev_frame_contents = NULL;
        prev_frame_len = 0;
        ctx_queue_draw (ctx);

      //   ctx_key_press(ctx,0,"size-changed",0);
      }
      else if (!strcmp (event_type, "keyup"))
      {
        char buf[4]={ x, 0 };
        ctx_key_up (ctx, (int)x, buf, 0);
      }
      else if (!strcmp (event_type, "keydown"))
      {
        char buf[4]={ x, 0 };
        ctx_key_down (ctx, (int)x, buf, 0);
      }
      else
      {
        ctx_key_press (ctx, 0, event, 0);
      }
      }
    } while (event);
}

Ctx *ctx_new_ctx (int width, int height)
{
  float font_size = 12.0;
  Ctx *ctx = _ctx_new_drawlist (width, height);
  CtxCtx *ctxctx = (CtxCtx*)calloc (sizeof (CtxCtx), 1);
  CtxBackend *backend = (CtxBackend*)ctxctx;
  fprintf (stdout, "\e[?1049h");
  fflush (stdout);
  //fprintf (stderr, "\e[H");
  //fprintf (stderr, "\e[2J");
  ctx_native_events = 1;
  if (width <= 0 || height <= 0)
  {
    ctxctx->cols = ctx_terminal_cols ();
    ctxctx->rows = ctx_terminal_rows ();
    width  = ctx->width  = ctx_terminal_width ();
    height = ctx->height = ctx_terminal_height ();
    font_size = height / ctxctx->rows;
    ctx_font_size (ctx, font_size);
  }
  else
  {
    ctx->width  = width;
    ctx->height = height;
    ctxctx->cols   = width / 80;
    ctxctx->rows   = height / 24;
  }
  backend->ctx = ctx;
  if (!ctx_native_events)
    _ctx_mouse (ctx, NC_MOUSE_DRAG);
  backend->flush = ctx_ctx_flush;
  backend->free  = (void(*)(void *))ctx_ctx_free;
  backend->process = (void*)ctx_drawlist_process;
  backend->consume_events = ctx_ctx_consume_events;
  ctx_set_backend (ctx, ctxctx);
  ctx_set_size (ctx, width, height);
  return ctx;
}

void ctx_ctx_pcm (Ctx *ctx);


#endif

#if CTX_TILED
static inline int
ctx_tiled_threads_done (CtxTiled *tiled)
{
  int sum = 0;
  for (int i = 0; i < _ctx_max_threads; i++)
  {
     if (tiled->rendered_frame[i] == tiled->render_frame)
       sum ++;
  }
  return sum;
}

int _ctx_damage_control = 0;

void ctx_tiled_free (CtxTiled *tiled)
{
  tiled->quit = 1;
  mtx_lock (&tiled->mtx);
  cnd_broadcast (&tiled->cond);
  mtx_unlock (&tiled->mtx);

  while (tiled->thread_quit < _ctx_max_threads)
    usleep (1000);

  if (tiled->pixels)
  {
    free (tiled->pixels);
    tiled->pixels = NULL;
    for (int i = 0 ; i < _ctx_max_threads; i++)
    {
      if (tiled->host[i])
        ctx_free (tiled->host[i]);
      tiled->host[i]=NULL;
    }
    ctx_free (tiled->ctx_copy);
  }

  if (tiled->active_info)
  {
    free (tiled->active_info);
    tiled->active_info = 0;
    tiled->active_info_count = 0;
  }

  // leak?
}
static unsigned char *sdl_icc = NULL;
static long sdl_icc_length = 0;

static void ctx_tiled_flush (Ctx *ctx)
{
  CtxTiled *tiled = (CtxTiled*)ctx->backend;
  mtx_lock (&tiled->mtx);
  if (tiled->shown_frame == tiled->render_frame)
  {
    int dirty_tiles = 0;
    ctx_set_drawlist (tiled->ctx_copy, &tiled->backend.ctx->drawlist.entries[0],
                                           tiled->backend.ctx->drawlist.count * 9);
    if (_ctx_enable_hash_cache)
    {
      Ctx *hasher = ctx_hasher_new (tiled->width, tiled->height,
                        CTX_HASH_COLS, CTX_HASH_ROWS);
      ctx_render_ctx (tiled->ctx_copy, hasher);

      for (int row = 0; row < CTX_HASH_ROWS; row++)
      {
        for (int col = 0; col < CTX_HASH_COLS; col++)
        {
          uint32_t new_hash = ctx_hasher_get_hash (hasher, col, row);
          if (new_hash && new_hash != tiled->hashes[(row * CTX_HASH_COLS + col)])
          {
            tiled->hashes[(row * CTX_HASH_COLS +  col)] = new_hash;
            tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
            dirty_tiles++;
          }
          else
          {
            tiled->tile_affinity[row * CTX_HASH_COLS + col] = -1;
          }
        }
      }
      if (tiled->active_info)
      {
        free (tiled->active_info);
        tiled->active_info = 0;
        tiled->active_info_count = 0;
      }

      tiled->active_info = ctx_hasher_get_active_info (hasher, &tiled->active_info_count);
      free (((CtxHasher*)(hasher->backend))->hashes);
      ctx_free (hasher);
    }
    else
    {
      for (int row = 0; row < CTX_HASH_ROWS; row++)
        for (int col = 0; col < CTX_HASH_COLS; col++)
          {
            tiled->tile_affinity[row * CTX_HASH_COLS + col] = 1;
            dirty_tiles++;
          }
    }
    int dirty_no = 0;
    if (dirty_tiles)
    for (int row = 0; row < CTX_HASH_ROWS; row++)
      for (int col = 0; col < CTX_HASH_COLS; col++)
      {
        if (tiled->tile_affinity[row * CTX_HASH_COLS + col] != -1)
        {
          tiled->tile_affinity[row * CTX_HASH_COLS + col] = dirty_no * (_ctx_max_threads) / dirty_tiles;
          dirty_no++;
          if (col > tiled->max_col) tiled->max_col = col;
          if (col < tiled->min_col) tiled->min_col = col;
          if (row > tiled->max_row) tiled->max_row = row;
          if (row < tiled->min_row) tiled->min_row = row;
        }
      }

    if (_ctx_damage_control)
    {
      for (int i = 0; i < tiled->width * tiled->height; i++)
      {
        tiled->pixels[i*4+2]  = (tiled->pixels[i*4+2] + 255)/2;
      }
    }

    tiled->render_frame = ++tiled->frame;

#if 0

          //if (tiled->tile_affinity[hno]==no)
          {
            int x0 = ((tiled->width)/CTX_HASH_COLS) * 0;
            int y0 = ((tiled->height)/CTX_HASH_ROWS) * 0;
            int width = tiled->width / CTX_HASH_COLS;
            int height = tiled->height / CTX_HASH_ROWS;
            Ctx *host = tiled->host[0];

            CtxRasterizer *rasterizer = (CtxRasterizer*)host->backend;
            int swap_red_green = ((CtxRasterizer*)(host->backend))->swap_red_green;
            ctx_rasterizer_init (rasterizer,
                                 host, tiled->backend.ctx, &host->state,
                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
                                 0, 0, 1, 1,
                                 tiled->width*4, CTX_FORMAT_BGRA8,
                                 tiled->antialias);
            ((CtxRasterizer*)(host->backend))->swap_red_green = swap_red_green;
            if (sdl_icc_length)
              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);

            ctx_translate (host, -x0, -y0);
            ctx_render_ctx (tiled->ctx_copy, host);
          }
#endif
    cnd_broadcast (&tiled->cond);
  }
  else
  {
    fprintf (stderr, "{drip}");
  }
  mtx_unlock (&tiled->mtx);
  ctx_drawlist_clear (ctx);
}

static
void ctx_tiled_render_fun (void **data)
{
  int      no = (size_t)data[0];
  CtxTiled *tiled = data[1];

  while (!tiled->quit)
  {
    Ctx *host = tiled->host[no];

    mtx_lock (&tiled->mtx);
    cnd_wait(&tiled->cond, &tiled->mtx);
    mtx_unlock (&tiled->mtx);

    if (tiled->render_frame != tiled->rendered_frame[no])
    {
      int hno = 0;
      for (int row = 0; row < CTX_HASH_ROWS; row++)
        for (int col = 0; col < CTX_HASH_COLS; col++, hno++)
        {
          if (tiled->tile_affinity[hno]==no)
          {
            int x0 = ((tiled->width)/CTX_HASH_COLS) * col;
            int y0 = ((tiled->height)/CTX_HASH_ROWS) * row;
            int width = tiled->width / CTX_HASH_COLS;
            int height = tiled->height / CTX_HASH_ROWS;

            CtxRasterizer *rasterizer = (CtxRasterizer*)host->backend;

            int active_mask = 1 << hno;

#if CTX_TILED_MERGE_HORIZONTAL_NEIGHBORS
            while (col + 1 < CTX_HASH_COLS &&
                   tiled->tile_affinity[hno+1] == no)
            {
              width += tiled->width / CTX_HASH_COLS;
              col++;
              hno++;
              active_mask |= 1 << hno;
            }
#endif
            int swap_red_green = ((CtxRasterizer*)(host->backend))->swap_red_green;
            ctx_rasterizer_init (rasterizer,
                                 host, tiled->backend.ctx, &host->state,
                                 &tiled->pixels[tiled->width * 4 * y0 + x0 * 4],
                                 0, 0, width, height,
                                 tiled->width*4, CTX_FORMAT_BGRA8,
                                 tiled->antialias);
            ((CtxRasterizer*)(host->backend))->swap_red_green = swap_red_green;
            if (sdl_icc_length)
              ctx_colorspace (host, CTX_COLOR_SPACE_DEVICE_RGB, sdl_icc, sdl_icc_length);

            ctx_translate (host, -x0, -y0);
            ctx_render_ctx_masked (tiled->ctx_copy, host, tiled->active_info, tiled->active_info_count, active_mask);
          }
        }
      tiled->rendered_frame[no] = tiled->render_frame;
    }
  }
  tiled->thread_quit++; // need atomic?
}


static int       ctx_tiled_cursor_drawn   = 0;
static int       ctx_tiled_cursor_drawn_x = 0;
static int       ctx_tiled_cursor_drawn_y = 0;
static CtxCursor ctx_tiled_cursor_drawn_shape = 0;


#define CTX_FB_HIDE_CURSOR_FRAMES 200

static int ctx_tiled_cursor_same_pos = CTX_FB_HIDE_CURSOR_FRAMES;

static inline int ctx_is_in_cursor (int x, int y, int size, CtxCursor shape)
{
  switch (shape)
  {
    case CTX_CURSOR_ARROW:
      if (x > ((size * 4)-y*4)) return 0;
      if (x < y && x > y / 16)
        return 1;
      return 0;

    case CTX_CURSOR_RESIZE_SE:
    case CTX_CURSOR_RESIZE_NW:
    case CTX_CURSOR_RESIZE_SW:
    case CTX_CURSOR_RESIZE_NE:
      {
        float theta = -45.0/180 * M_PI;
        float cos_theta;
        float sin_theta;

        if ((shape == CTX_CURSOR_RESIZE_SW) ||
            (shape == CTX_CURSOR_RESIZE_NE))
        {
          theta = -theta;
          cos_theta = ctx_cosf (theta);
          sin_theta = ctx_sinf (theta);
        }
        else
        {
          cos_theta = ctx_cosf (theta);
          sin_theta = ctx_sinf (theta);
        }
        int rot_x = x * cos_theta - y * sin_theta;
        int rot_y = y * cos_theta + x * sin_theta;
        x = rot_x;
        y = rot_y;
      }
      /*FALLTHROUGH*/
    case CTX_CURSOR_RESIZE_W:
    case CTX_CURSOR_RESIZE_E:
    case CTX_CURSOR_RESIZE_ALL:
      if (abs (x) < size/2 && abs (y) < size/2)
      {
        if (abs(y) < size/10)
        {
          return 1;
        }
      }
      if ((abs (x) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
      {
        if (abs(y) < (size/2.8)-(abs(x) - (size/2)))
          return 1;
      }
      if (shape != CTX_CURSOR_RESIZE_ALL)
        break;
      /* FALLTHROUGH */
    case CTX_CURSOR_RESIZE_S:
    case CTX_CURSOR_RESIZE_N:
      if (abs (y) < size/2 && abs (x) < size/2)
      {
        if (abs(x) < size/10)
        {
          return 1;
        }
      }
      if ((abs (y) - size/ (shape == CTX_CURSOR_RESIZE_ALL?2:2.7)) >= 0)
      {
        if (abs(x) < (size/2.8)-(abs(y) - (size/2)))
          return 1;
      }
      break;
#if 0
    case CTX_CURSOR_RESIZE_ALL:
      if (abs (x) < size/2 && abs (y) < size/2)
      {
        if (abs (x) < size/10 || abs(y) < size/10)
          return 1;
      }
      break;
#endif
    default:
      return (x ^ y) & 1;
  }
  return 0;
}

static void ctx_tiled_undraw_cursor (CtxTiled *tiled)
{
    int cursor_size = ctx_height (tiled->backend.ctx) / 28;

    if (ctx_tiled_cursor_drawn)
    {
      int no = 0;
      int startx = -cursor_size;
      int starty = -cursor_size;
      if (ctx_tiled_cursor_drawn_shape == CTX_CURSOR_ARROW)
        startx = starty = 0;

      for (int y = starty; y < cursor_size; y++)
      for (int x = startx; x < cursor_size; x++, no+=4)
      {
        if (x + ctx_tiled_cursor_drawn_x < tiled->width && y + ctx_tiled_cursor_drawn_y < tiled->height)
        {
          if (ctx_is_in_cursor (x, y, cursor_size, ctx_tiled_cursor_drawn_shape))
          {
            int o = ((ctx_tiled_cursor_drawn_y + y) * tiled->width + (ctx_tiled_cursor_drawn_x + x)) * 4;
            tiled->fb[o+0]^=0x88;
            tiled->fb[o+1]^=0x88;
            tiled->fb[o+2]^=0x88;
          }
        }
      }

    ctx_tiled_cursor_drawn = 0;
    }
}

static void ctx_tiled_draw_cursor (CtxTiled *tiled)
{
    int cursor_x    = ctx_pointer_x (tiled->backend.ctx);
    int cursor_y    = ctx_pointer_y (tiled->backend.ctx);
    int cursor_size = ctx_height (tiled->backend.ctx) / 28;
    CtxCursor cursor_shape = tiled->backend.ctx->cursor;
    int no = 0;

    if (cursor_x == ctx_tiled_cursor_drawn_x &&
        cursor_y == ctx_tiled_cursor_drawn_y &&
        cursor_shape == ctx_tiled_cursor_drawn_shape)
      ctx_tiled_cursor_same_pos ++;
    else
      ctx_tiled_cursor_same_pos = 0;

    if (ctx_tiled_cursor_same_pos >= CTX_FB_HIDE_CURSOR_FRAMES)
    {
      if (ctx_tiled_cursor_drawn)
        ctx_tiled_undraw_cursor (tiled);
      return;
    }

    /* no need to flicker when stationary, motion flicker can also be removed
     * by combining the previous and next position masks when a motion has
     * occured..
     */
    if (ctx_tiled_cursor_same_pos && ctx_tiled_cursor_drawn)
      return;

    ctx_tiled_undraw_cursor (tiled);

    no = 0;

    int startx = -cursor_size;
    int starty = -cursor_size;

    if (cursor_shape == CTX_CURSOR_ARROW)
      startx = starty = 0;

    for (int y = starty; y < cursor_size; y++)
      for (int x = startx; x < cursor_size; x++, no+=4)
      {
        if (x + cursor_x < tiled->width && y + cursor_y < tiled->height)
        {
          if (ctx_is_in_cursor (x, y, cursor_size, cursor_shape))
          {
            int o = ((cursor_y + y) * tiled->width + (cursor_x + x)) * 4;
            tiled->fb[o+0]^=0x88;
            tiled->fb[o+1]^=0x88;
            tiled->fb[o+2]^=0x88;
          }
        }
      }
    ctx_tiled_cursor_drawn = 1;
    ctx_tiled_cursor_drawn_x = cursor_x;
    ctx_tiled_cursor_drawn_y = cursor_y;
    ctx_tiled_cursor_drawn_shape = cursor_shape;
}

#endif





#if CTX_EVENTS
#if CTX_HEADLESS

#include <fcntl.h>
#include <sys/ioctl.h>
#include <signal.h>

static char *ctx_fb_clipboard = NULL;
static void ctx_headless_set_clipboard (Ctx *ctx, const char *text)
{
  if (ctx_fb_clipboard)
    free (ctx_fb_clipboard);
  ctx_fb_clipboard = NULL;
  if (text)
  {
    ctx_fb_clipboard = strdup (text);
  }
}

static char *ctx_headless_get_clipboard (Ctx *ctx)
{
  if (ctx_fb_clipboard) return strdup (ctx_fb_clipboard);
  return strdup ("");
}

static int ctx_headless_get_mice_fd (Ctx *ctx)
{
  //CtxHeadless *fb = (void*)ctx->backend;
  return _ctx_mice_fd;
}

typedef struct _CtxHeadless CtxHeadless;
struct _CtxHeadless
{
   CtxTiled tiled;
   int           key_balance;
   int           key_repeat;
   int           lctrl;
   int           lalt;
   int           rctrl;


   int          fb_fd;
   char        *fb_path;
   int          fb_bits;
   int          fb_bpp;
   int          fb_mapped_size;
   int          vt;
   cnd_t        cond;
   mtx_t        mtx;
   int          tty;
};

#if UINTPTR_MAX == 0xffFFffFF
  #define fbdrmuint_t uint32_t
#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
  #define fbdrmuint_t uint64_t
#endif

static void ctx_headless_show_frame (CtxHeadless *fb, int block)
{
  CtxTiled *tiled = (void*)fb;
  if (tiled->shown_frame == tiled->render_frame)
  {
    return;
  }

  if (block)
  {
    int count = 0;
    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
    {
      usleep (500);
      count ++;
      if (count > 2000)
      {
        tiled->shown_frame = tiled->render_frame;
        return;
      }
    }
  }
  else
  {
    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
      return;
  }
    if (tiled->vt_active)
    {
       int pre_skip = tiled->min_row * tiled->height/CTX_HASH_ROWS * tiled->width;
       int post_skip = (CTX_HASH_ROWS-tiled->max_row-1) * tiled->height/CTX_HASH_ROWS * tiled->width;

       int rows = ((tiled->width * tiled->height) - pre_skip - post_skip)/tiled->width;

       int col_pre_skip = tiled->min_col * tiled->width/CTX_HASH_COLS;
       int col_post_skip = (CTX_HASH_COLS-tiled->max_col-1) * tiled->width/CTX_HASH_COLS;
       if (_ctx_damage_control)
       {
         pre_skip = post_skip = col_pre_skip = col_post_skip = 0;
       }

       if (pre_skip < 0) pre_skip = 0;
       if (post_skip < 0) post_skip = 0;


       if (tiled->min_row == 100){
          pre_skip = 0;
          post_skip = 0;
       }
       else
       {
         tiled->min_row = 100;
         tiled->max_row = 0;
         tiled->min_col = 100;
         tiled->max_col = 0;
         {
           uint8_t *dst = tiled->fb + pre_skip * 4;
           uint8_t *src = tiled->pixels + pre_skip * 4;
           int pre = col_pre_skip * 4;
           int post = col_post_skip * 4;
           int core = tiled->width * 4 - pre - post;
           for (int i = 0; i < rows; i++)
           {
             dst  += pre;
             src  += pre;
             memcpy (dst, src, core);
             src  += core;
             dst  += core;
             dst  += post;
             src  += post;
           }
         }
    }
    tiled->shown_frame = tiled->render_frame;
  }
}

void ctx_headless_consume_events (Ctx *ctx)
{
  CtxHeadless *fb = (void*)ctx->backend;
  ctx_headless_show_frame (fb, 0);
  event_check_pending (&fb->tiled);
}

inline static void ctx_headless_reset (Ctx *ctx)
{
  ctx_headless_show_frame ((CtxHeadless*)ctx->backend, 1);
}

void ctx_headless_free (CtxHeadless *fb)
{
  CtxTiled *tiled=(CtxTiled*)fb;

  if (tiled->fb)
  {
  free (tiled->fb); // it is not the tiled renderers responsibilty,
                    // since it might not be allocated this way
  tiled->fb = NULL;
  ctx_babl_exit (); // we do this together with the fb,
                    // which makes it happen only once
                    // even if the headless_free is called
                    // twice
  }
  //munmap (tiled->fb, fb->fb_mapped_size);
  //close (fb->fb_fd);
  //if (system("stty sane")){};
  ctx_tiled_free ((CtxTiled*)fb);
  //free (fb);
}

//static unsigned char *fb_icc = NULL;
//static long fb_icc_length = 0;

static CtxHeadless *ctx_headless = NULL;


Ctx *ctx_new_headless (int width, int height)
{
  if (width < 0 || height < 0)
  {
    width = 1920;
    height = 780;
  }
#if CTX_RASTERIZER
  CtxHeadless *fb = calloc (sizeof (CtxHeadless), 1);
  CtxBackend *backend = (CtxBackend*)fb;
  CtxTiled *tiled     = (CtxTiled*)fb;
  ctx_headless = fb;

  tiled->width = width;
  tiled->height = height;

  fb->fb_bits        = 32;
  fb->fb_bpp         = 4;
  fb->fb_mapped_size = width * height * 4;
#endif

  tiled->fb = calloc (fb->fb_mapped_size, 1);
  if (!tiled->fb)
    return NULL;
  tiled->pixels = calloc (fb->fb_mapped_size, 1);
  tiled->show_frame = (void*)ctx_headless_show_frame;

  ctx_babl_init ();

 // ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);
 //
 // not to be done for headless, we want sRGB thumbs - at least not device specific
 // perhaps rec2020 or similar?

  backend->ctx = _ctx_new_drawlist (width, height);
  backend->flush = ctx_tiled_flush;
  backend->process = (void*)ctx_drawlist_process;
  backend->reset = ctx_headless_reset;
  backend->free  = (void*)ctx_headless_free;
  backend->set_clipboard = ctx_headless_set_clipboard;
  backend->get_clipboard = ctx_headless_get_clipboard;
  backend->consume_events = ctx_headless_consume_events;

  tiled->ctx_copy = ctx_new (width, height, "drawlist");
  tiled->width    = width;
  tiled->height   = height;

  ctx_set_backend (backend->ctx, fb);
  ctx_set_backend (tiled->ctx_copy, fb);
  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);

  for (int i = 0; i < _ctx_max_threads; i++)
  {
    tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
                   tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
                   tiled->width * 4, CTX_FORMAT_BGRA8); // this format
                                  // is overriden in  thread
    ((CtxRasterizer*)(tiled->host[i]->backend))->swap_red_green = 1;
    ctx_set_texture_source (tiled->host[i], backend->ctx);
  }

  mtx_init (&tiled->mtx, mtx_plain);
  cnd_init (&tiled->cond);

#define start_thread(no)\
  if(_ctx_max_threads>no){ \
    static void *args[2]={(void*)no, };\
    thrd_t tid;\
    args[1]=fb;\
    thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
  }
  start_thread(0);
  start_thread(1);
  start_thread(2);
  start_thread(3);
  start_thread(4);
  start_thread(5);
  start_thread(6);
  start_thread(7);
  start_thread(8);
  start_thread(9);
  start_thread(10);
  start_thread(11);
  start_thread(12);
  start_thread(13);
  start_thread(14);
  start_thread(15);
#undef start_thread

  tiled->vt_active = 1;

  return backend->ctx;
}
#endif
#endif

#if CTX_EVENTS

#if !__COSMOPOLITAN__
#include <fcntl.h>
#include <sys/ioctl.h>
#include <signal.h>
#endif


#if CTX_KMS || CTX_FB

static int ctx_fb_get_mice_fd (Ctx *ctx)
{
  //CtxFb *fb = (void*)ctx->backend;
  return _ctx_mice_fd;
}

static void ctx_fb_get_event_fds (Ctx *ctx, int *fd, int *count)
{
  int mice_fd = ctx_fb_get_mice_fd (ctx);
  fd[0] = STDIN_FILENO;
  if (mice_fd)
  {
    fd[1] = mice_fd;
    *count = 2;
  }
  else
  {
    *count = 1;
  }
}
#endif

#if CTX_FB

#ifdef __linux__
  #include <linux/fb.h>
  #include <linux/vt.h>
  #include <linux/kd.h>
#endif

#ifdef __NetBSD__
  typedef uint8_t unchar;
  typedef uint8_t u_char;
  typedef uint16_t ushort;
  typedef uint32_t u_int;
  typedef uint64_t u_long;
  #include <sys/param.h>
  #include <dev/wscons/wsdisplay_usl_io.h>
  #include <dev/wscons/wsconsio.h>
  #include <dev/wscons/wsksymdef.h>
#endif

  #include <sys/mman.h>

typedef struct _CtxFb CtxFb;
struct _CtxFb
{
   CtxTiled tiled;
   int           key_balance;
   int           key_repeat;
   int           lctrl;
   int           lalt;
   int           rctrl;


   int          fb_fd;
   char        *fb_path;
   int          fb_bits;
   int          fb_bpp;
   int          fb_mapped_size;
   int          vt;
   int          tty;
   cnd_t        cond;
   mtx_t        mtx;
#if __linux__
   struct       fb_var_screeninfo vinfo;
   struct       fb_fix_screeninfo finfo;
#endif
};

#if UINTPTR_MAX == 0xffFFffFF
  #define fbdrmuint_t uint32_t
#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
  #define fbdrmuint_t uint64_t
#endif


static void ctx_fb_flip (CtxFb *fb)
{
#ifdef __linux__
  ioctl (fb->fb_fd, FBIOPAN_DISPLAY, &fb->vinfo);
#endif
}

static void ctx_fb_show_frame (CtxFb *fb, int block)
{
  CtxTiled *tiled = (void*)fb;
  if (tiled->shown_frame == tiled->render_frame)
  {
    if (block == 0) // consume event call
    {
      ctx_tiled_draw_cursor (tiled);
      ctx_fb_flip (fb);
    }
    return;
  }

  if (block)
  {
    int count = 0;
    while (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
    {
      usleep (500);
      count ++;
      if (count > 2000)
      {
        tiled->shown_frame = tiled->render_frame;
        return;
      }
    }
  }
  else
  {
    if (ctx_tiled_threads_done (tiled) != _ctx_max_threads)
      return;
  }

    if (tiled->vt_active)
    {
       int pre_skip = tiled->min_row * tiled->height/CTX_HASH_ROWS * tiled->width;
       int post_skip = (CTX_HASH_ROWS-tiled->max_row-1) * tiled->height/CTX_HASH_ROWS * tiled->width;

       int rows = ((tiled->width * tiled->height) - pre_skip - post_skip)/tiled->width;

       int col_pre_skip = tiled->min_col * tiled->width/CTX_HASH_COLS;
       int col_post_skip = (CTX_HASH_COLS-tiled->max_col-1) * tiled->width/CTX_HASH_COLS;
       if (_ctx_damage_control)
       {
         pre_skip = post_skip = col_pre_skip = col_post_skip = 0;
       }

       if (pre_skip < 0) pre_skip = 0;
       if (post_skip < 0) post_skip = 0;


       if (tiled->min_row == 100){
          pre_skip = 0;
          post_skip = 0;
#ifdef __linux__
           __u32 dummy = 0;
          ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
#endif
          ctx_tiled_undraw_cursor (tiled);
       }
       else
       {

      tiled->min_row = 100;
      tiled->max_row = 0;
      tiled->min_col = 100;
      tiled->max_col = 0;
#ifdef __linux__
    {
     __u32 dummy = 0;
     ioctl (fb->fb_fd, FBIO_WAITFORVSYNC, &dummy);
    }
#endif
     ctx_tiled_undraw_cursor (tiled);
     switch (fb->fb_bits)
     {
       case 32:
#if 1
         {
           uint8_t *dst = tiled->fb + pre_skip * 4;
           uint8_t *src = tiled->pixels + pre_skip * 4;
           int pre = col_pre_skip * 4;
           int post = col_post_skip * 4;
           int core = tiled->width * 4 - pre - post;
           for (int i = 0; i < rows; i++)
           {
             dst  += pre;
             src  += pre;
             memcpy (dst, src, core);
             src  += core;
             dst  += core;
             dst  += post;
             src  += post;
           }
         }
#else
         { int count = tiled->width * tiled->height;
           const uint32_t *src = (void*)tiled->pixels;
           uint32_t *dst = (void*)tiled->fb;
           count-= pre_skip;
           src+= pre_skip;
           dst+= pre_skip;
           count-= post_skip;
           while (count -- > 0)
           {
             dst[0] = ctx_swap_red_green2 (src[0]);
             src++;
             dst++;
           }
         }
#endif
         break;
         /* XXX  :  note: converting a scanline (or all) to target and
          * then doing a bulk memcpy be faster (at least with som /dev/fbs)  */
       case 24:
         { int count = tiled->width * tiled->height;
           const uint8_t *src = tiled->pixels;
           uint8_t *dst = tiled->fb;
           count-= pre_skip;
           src+= pre_skip * 4;
           dst+= pre_skip * 3;
           count-= post_skip;
           while (count -- > 0)
           {
             dst[0] = src[0];
             dst[1] = src[1];
             dst[2] = src[2];
             dst+=3;
             src+=4;
           }
         }
         break;
       case 16:
         { int count = tiled->width * tiled->height;
           const uint8_t *src = tiled->pixels;
           uint8_t *dst = tiled->fb;
           count-= post_skip;
           count-= pre_skip;
           src+= pre_skip * 4;
           dst+= pre_skip * 2;
           while (count -- > 0)
           {
             int big = ((src[0] >> 3)) +
                ((src[1] >> 2)<<5) +
                ((src[2] >> 3)<<11);
             dst[0] = big & 255;
             dst[1] = big >>  8;
             dst+=2;
             src+=4;
           }
         }
         break;
       case 15:
         { int count = tiled->width * tiled->height;
           const uint8_t *src = tiled->pixels;
           uint8_t *dst = tiled->fb;
           count-= post_skip;
           count-= pre_skip;
           src+= pre_skip * 4;
           dst+= pre_skip * 2;
           while (count -- > 0)
           {
             int big = ((src[2] >> 3)) +
                       ((src[1] >> 2)<<5) +
                       ((src[0] >> 3)<<10);
             dst[0] = big & 255;
             dst[1] = big >>  8;
             dst+=2;
             src+=4;
           }
         }
         break;
       case 8:
         { int count = tiled->width * tiled->height;
           const uint8_t *src = tiled->pixels;
           uint8_t *dst = tiled->fb;
           count-= post_skip;
           count-= pre_skip;
           src+= pre_skip * 4;
           dst+= pre_skip;
           while (count -- > 0)
           {
             dst[0] = ((src[0] >> 5)) +
                      ((src[1] >> 5)<<3) +
                      ((src[2] >> 6)<<6);
             dst+=1;
             src+=4;
           }
         }
         break;
     }
    }
    ctx_tiled_cursor_drawn = 0;
    ctx_tiled_draw_cursor (tiled);
    ctx_fb_flip (fb);
    tiled->shown_frame = tiled->render_frame;
  }
}

void ctx_fb_consume_events (Ctx *ctx)
{
  CtxFb *fb = (void*)ctx->backend;
  ctx_fb_show_frame (fb, 0);
  event_check_pending (&fb->tiled);
}

inline static void ctx_fb_reset (Ctx *ctx)
{
  ctx_fb_show_frame ((CtxFb*)ctx->backend, 1);
}

void ctx_fb_free (CtxFb *fb)
{
  CtxTiled*tiled=(CtxTiled*)fb;

//#ifdef __linux__
  ioctl (0, KDSETMODE, KD_TEXT);
//#endif
#ifdef __NetBSD__
  {
   int mode = WSDISPLAYIO_MODE_EMUL;
   ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode);
  }
#endif
  munmap (tiled->fb, fb->fb_mapped_size);
  close (fb->fb_fd);
  if (system("stty sane")){};
  ctx_tiled_free ((CtxTiled*)fb);
  //free (fb);
  ctx_babl_exit ();
}

//static unsigned char *fb_icc = NULL;
//static long fb_icc_length = 0;

static CtxFb *ctx_fb = NULL;
#ifdef __linux__
static void fb_vt_switch_cb (int sig)
{
  CtxTiled *tiled = (void*)ctx_fb;
  CtxBackend *backend = (void*)ctx_fb;
  if (sig == SIGUSR1)
  {
    ioctl (0, VT_RELDISP, 1);
    tiled->vt_active = 0;
    ioctl (0, KDSETMODE, KD_TEXT);
  }
  else
  {
    ioctl (0, VT_RELDISP, VT_ACKACQ);
    tiled->vt_active = 1;
    // queue draw
    tiled->render_frame = ++tiled->frame;
    ioctl (0, KDSETMODE, KD_GRAPHICS);
    {
      backend->ctx->dirty=1;

      for (int row = 0; row < CTX_HASH_ROWS; row++)
      for (int col = 0; col < CTX_HASH_COLS; col++)
      {
        tiled->hashes[(row * CTX_HASH_COLS + col)] += 1;
      }
    }
  }
}
#endif


Ctx *ctx_new_fb (int width, int height)
{
#if CTX_RASTERIZER
  CtxFb *fb = calloc (sizeof (CtxFb), 1);
  CtxTiled *tiled = (void*)fb;
  CtxBackend *backend = (void*)fb;
  ctx_fb = fb;
  {
#ifdef __linux__
  const char *dev_path = "/dev/fb0";
#endif
#ifdef __NetBSD__
  const char *dev_path = "/dev/ttyE0";
#endif
#ifdef __OpenBSD__
  const char *dev_path = "/dev/ttyC0";
#endif
  fb->fb_fd = open (dev_path, O_RDWR);
  if (fb->fb_fd > 0)
    fb->fb_path = strdup (dev_path);
  else
  {
#ifdef __linux__
    fb->fb_fd = open ("/dev/graphics/fb0", O_RDWR);
    if (fb->fb_fd > 0)
    {
      fb->fb_path = strdup ("/dev/graphics/fb0");
    }
    else
#endif
    {
      free (fb);
      return NULL;
    }
  }

#ifdef __linux__
  if (ioctl(fb->fb_fd, FBIOGET_FSCREENINFO, &fb->finfo))
    {
      fprintf (stderr, "error getting fbinfo\n");
      close (fb->fb_fd);
      free (fb->fb_path);
      free (fb);
      return NULL;
    }

   if (ioctl(fb->fb_fd, FBIOGET_VSCREENINFO, &fb->vinfo))
     {
       fprintf (stderr, "error getting fbinfo\n");
      close (fb->fb_fd);
      free (fb->fb_path);
      free (fb);
      return NULL;
     }
  ioctl (0, KDSETMODE, KD_GRAPHICS);

//fprintf (stderr, "%s\n", fb->fb_path);
  width = tiled->width = fb->vinfo.xres;
  height = tiled->height = fb->vinfo.yres;

  fb->fb_bits = fb->vinfo.bits_per_pixel;
//fprintf (stderr, "fb bits: %i\n", fb->fb_bits);

  if (fb->fb_bits == 16)
    fb->fb_bits =
      fb->vinfo.red.length +
      fb->vinfo.green.length +
      fb->vinfo.blue.length;
   else if (fb->fb_bits == 8)
  {
    unsigned short red[256],  green[256],  blue[256];
  //  unsigned short original_red[256];
  //  unsigned short original_green[256];
  //  unsigned short original_blue[256];
    struct fb_cmap cmap = {0, 256, red, green, blue, NULL};
  //  struct fb_cmap original_cmap = {0, 256, original_red, original_green, original_blue, NULL};
    int i;

    /* do we really need to restore it ? */
   // if (ioctl (fb->fb_fd, FBIOPUTCMAP, &original_cmap) == -1)
   // {
   //   fprintf (stderr, "palette initialization problem %i\n", __LINE__);
   // }

    for (i = 0; i < 256; i++)
    {
      red[i]   = ((( i >> 5) & 0x7) << 5) << 8;
      green[i] = ((( i >> 2) & 0x7) << 5) << 8;
      blue[i]  = ((( i >> 0) & 0x3) << 6) << 8;
    }

    if (ioctl (fb->fb_fd, FBIOPUTCMAP, &cmap) == -1)
    {
      fprintf (stderr, "palette initialization problem %i\n", __LINE__);
    }
  }

  fb->fb_bpp = fb->vinfo.bits_per_pixel / 8;
  fb->fb_mapped_size = fb->finfo.smem_len;
#endif

#ifdef __NetBSD__
  struct wsdisplay_fbinfo finfo;

  int mode = WSDISPLAYIO_MODE_DUMBFB;
  //int mode = WSDISPLAYIO_MODE_MAPPED;
  if (ioctl (fb->fb_fd, WSDISPLAYIO_SMODE, &mode)) {
    return NULL;
  }
  if (ioctl (fb->fb_fd, WSDISPLAYIO_GINFO, &finfo)) {
    fprintf (stderr, "ioctl: WSIDSPLAYIO_GINFO failed\n");
    return NULL;
  }

  width = tiled->width = finfo.width;
  height = tiled->height = finfo.height;
  fb->fb_bits = finfo.depth;
  fb->fb_bpp = (fb->fb_bits + 1) / 8;
  fb->fb_mapped_size = width * height * fb->fb_bpp;


  if (fb->fb_bits == 8)
  {
    uint8_t red[256],  green[256],  blue[256];
    struct wsdisplay_cmap cmap;
    cmap.red = red;
    cmap.green = green;
    cmap.blue = blue;
    cmap.count = 256;
    cmap.index = 0;
    for (int i = 0; i < 256; i++)
    {
      red[i]   = ((( i >> 5) & 0x7) << 5);
      green[i] = ((( i >> 2) & 0x7) << 5);
      blue[i]  = ((( i >> 0) & 0x3) << 6);
    }

    ioctl (fb->fb_fd, WSDISPLAYIO_PUTCMAP, &cmap);
  }
#endif

                                              
  tiled->fb = mmap (NULL, fb->fb_mapped_size, PROT_READ|PROT_WRITE, MAP_SHARED, fb->fb_fd, 0);
  }
  if (!tiled->fb)
    return NULL;
  tiled->pixels = calloc (fb->fb_mapped_size, 1);
  tiled->show_frame = (void*)ctx_fb_show_frame;

  ctx_babl_init ();

  ctx_get_contents ("file:///tmp/ctx.icc", &sdl_icc, &sdl_icc_length);

  backend->ctx    = _ctx_new_drawlist (width, height);
  tiled->ctx_copy = _ctx_new_drawlist (width, height);
  tiled->width    = width;
  tiled->height   = height;

  ctx_set_backend (backend->ctx, fb);
  ctx_set_backend (tiled->ctx_copy, fb);
  ctx_set_texture_cache (tiled->ctx_copy, backend->ctx);


  backend->flush = ctx_tiled_flush;
  backend->process = (void*)ctx_drawlist_process;

  backend->reset = ctx_fb_reset;
  backend->free  = (void*)ctx_fb_free;
  backend->set_clipboard = ctx_headless_set_clipboard;
  backend->get_clipboard = ctx_headless_get_clipboard;
  backend->consume_events = ctx_fb_consume_events;
  backend->get_event_fds = ctx_fb_get_event_fds;

  ctx_set_size (backend->ctx, width, height);
  ctx_set_size (tiled->ctx_copy, width, height);

  for (int i = 0; i < _ctx_max_threads; i++)
  {
    tiled->host[i] = ctx_new_for_framebuffer (tiled->pixels,
                   tiled->width/CTX_HASH_COLS, tiled->height/CTX_HASH_ROWS,
                   tiled->width * 4, CTX_FORMAT_BGRA8); // this format
                                  // is overriden in  thread
    ((CtxRasterizer*)(tiled->host[i]->backend))->swap_red_green = 1;
    ctx_set_texture_source (tiled->host[i], backend->ctx);
  }

  mtx_init (&tiled->mtx, mtx_plain);
  cnd_init (&tiled->cond);

#define start_thread(no)\
  if(_ctx_max_threads>no){ \
    static void *args[2]={(void*)no, };\
    thrd_t tid;\
    args[1]=fb;\
    thrd_create (&tid, (void*)ctx_tiled_render_fun, args);\
  }
  start_thread(0);
  start_thread(1);
  start_thread(2);
  start_thread(3);
  start_thread(4);
  start_thread(5);
  start_thread(6);
  start_thread(7);
  start_thread(8);
  start_thread(9);
  start_thread(10);
  start_thread(11);
  start_thread(12);
  start_thread(13);
  start_thread(14);
  start_thread(15);
#undef start_thread

  EvSource *kb = evsource_kb_new ();
  if (kb)
  {
    tiled->evsource[tiled->evsource_count++] = kb;
    kb->priv = fb;
  }
  EvSource *mice  = evsource_mice_new ();
  if (mice)
  {
    tiled->evsource[tiled->evsource_count++] = mice;
    mice->priv = fb;
  }

  tiled->vt_active = 1;
#ifdef __linux__
  ioctl(0, KDSETMODE, KD_GRAPHICS);
  signal (SIGUSR1, fb_vt_switch_cb);
  signal (SIGUSR2, fb_vt_switch_cb);

  struct vt_stat st;
  if (ioctl (0, VT_GETSTATE, &st) == -1)
  {
    ctx_log ("VT_GET_MODE on vt %i failed\n", fb->vt);
    return NULL;
  }

  fb->vt = st.v_active;

  struct vt_mode mode;
  mode.mode   = VT_PROCESS;
  mode.relsig = SIGUSR1;
  mode.acqsig = SIGUSR2;
  if (ioctl (0, VT_SETMODE, &mode) < 0)
  {
    ctx_log ("VT_SET_MODE on vt %i failed\n", fb->vt);
    return NULL;
  }
#endif

  return backend->ctx;
#else
  return NULL;
#endif
}
#endif
#endif

#if CTX_EVENTS

#if !__COSMOPOLITAN__
#include <fcntl.h>
#include <sys/ioctl.h>
#include <signal.h>
#endif



#if CTX_KMS
#ifdef __linux__
  #include <linux/kd.h>
#endif
  //#include <linux/fb.h>
  //#include <linux/vt.h>
  #include <sys/mman.h>
  //#include <threads.h>
  #include <libdrm/drm.h>
  #include <libdrm/drm_mode.h>


typedef struct _CtxKMS CtxKMS;
struct _CtxKMS
{
   CtxTiled tiled;
   int           key_balance;
   int           key_repeat;
   int           lctrl;
   int           lalt;
   int           rctrl;

   int          fb_fd;
   char        *fb_path;
   int          fb_bits;
   int          fb_bpp;
   int          fb_mapped_size;
   //struct       fb_var_screeninfo vinfo;
   //struct       fb_fix_screeninfo finfo;
   int          vt;
   int          tty;
   int          is_kms;
   cnd_t        cond;
   mtx_t        mtx;
   struct drm_mode_crtc crtc;
};


#if UINTPTR_MAX == 0xffFFffFF
  #define fbdrmuint_t uint32_t
#elif UINTPTR_MAX == 0xffFFffFFffFFffFF
  #define fbdrmuint_t uint64_t
#endif

void *ctx_fbkms_new (CtxKMS *fb, int *width, int *height)
{
   int got_master = 0;
   fb->fb_fd = open("/dev/dri/card0", O_RDWR | O_CLOEXEC);
   if (!fb->fb_fd)
     return NULL;
   static fbdrmuint_t res_conn_buf[20]={0}; // this is static since its contents
                                         // are used by the flip callback
   fbdrmuint_t res_fb_buf[20]={0};
   fbdrmuint_t res_crtc_buf[20]={0};
   fbdrmuint_t res_enc_buf[20]={0};
   struct   drm_mode_card_res res={0};

   if (ioctl(fb->fb_fd, DRM_IOCTL_SET_MASTER, 0))
     goto cleanup;
   got_master = 1;

   if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETRESOURCES, &res))
     goto cleanup;
   res.fb_id_ptr=(fbdrmuint_t)res_fb_buf;
   res.crtc_id_ptr=(fbdrmuint_t)res_crtc_buf;
   res.connector_id_ptr=(fbdrmuint_t)res_conn_buf;
   res.encoder_id_ptr=(fbdrmuint_t)res_enc_buf;
   if(ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETRESOURCES, &res))
      goto cleanup;


   unsigned int i;
   for (i=0;i<res.count_connectors;i++)
   {
     struct drm_mode_modeinfo conn_mode_buf[20]={0};
     fbdrmuint_t conn_prop_buf[20]={0},
                     conn_propval_buf[20]={0},
                     conn_enc_buf[20]={0};

     struct drm_mode_get_connector conn={0};

     conn.connector_id=res_conn_buf[i];

     if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETCONNECTOR, &conn))
       goto cleanup;

     conn.modes_ptr=(fbdrmuint_t)conn_mode_buf;
     conn.props_ptr=(fbdrmuint_t)conn_prop_buf;
     conn.prop_values_ptr=(fbdrmuint_t)conn_propval_buf;
     conn.encoders_ptr=(fbdrmuint_t)conn_enc_buf;

     if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETCONNECTOR, &conn))
       goto cleanup;

     //Check if the connector is OK to use (connected to something)
     if (conn.count_encoders<1 || conn.count_modes<1 || !conn.encoder_id || !conn.connection)
       continue;

//------------------------------------------------------------------------------
//Creating a dumb buffer
//------------------------------------------------------------------------------
     struct drm_mode_create_dumb create_dumb={0};
     struct drm_mode_map_dumb    map_dumb={0};
     struct drm_mode_fb_cmd      cmd_dumb={0};
     create_dumb.width  = conn_mode_buf[0].hdisplay;
     create_dumb.height = conn_mode_buf[0].vdisplay;
     create_dumb.bpp   = 32;
     create_dumb.flags = 0;
     create_dumb.pitch = 0;
     create_dumb.size  = 0;
     create_dumb.handle = 0;
     if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb) ||
         !create_dumb.handle)
       goto cleanup;

     cmd_dumb.width =create_dumb.width;
     cmd_dumb.height=create_dumb.height;
     cmd_dumb.bpp   =create_dumb.bpp;
     cmd_dumb.pitch =create_dumb.pitch;
     cmd_dumb.depth =24;
     cmd_dumb.handle=create_dumb.handle;
     if (ioctl(fb->fb_fd,DRM_IOCTL_MODE_ADDFB,&cmd_dumb))
       goto cleanup;

     map_dumb.handle=create_dumb.handle;
     if (ioctl(fb->fb_fd,DRM_IOCTL_MODE_MAP_DUMB,&map_dumb))
       goto cleanup;

     void *base = mmap(0, create_dumb.size, PROT_READ | PROT_WRITE, MAP_SHARED,
                       fb->fb_fd, map_dumb.offset);
     if (!base)
     {
       goto cleanup;
     }
     *width  = create_dumb.width;
     *height = create_dumb.height;

     struct drm_mode_get_encoder enc={0};
     enc.encoder_id=conn.encoder_id;
     if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETENCODER, &enc))
        goto cleanup;

     fb->crtc.crtc_id=enc.crtc_id;
     if (ioctl(fb->fb_fd, DRM_IOCTL_MODE_GETCRTC, &fb->crtc))
        goto cleanup;

     fb->crtc.fb_id=cmd_dumb.fb_id;
     fb->crtc.set_connectors_ptr=(fbdrmuint_t)&res_conn_buf[i];
     fb->crtc.count_connectors=1;
     fb->crtc.mode=conn_mode_buf[0];
     fb->crtc.mode_valid=1;
     return base;
   }
cleanup:
   if (got_master)
     ioctl(fb->fb_fd, DRM_IOCTL_DROP_MASTER, 0);
   fb->fb_fd = 0;
   return NULL;
}

void ctx_fbkms_flip (CtxKMS *fb)
{
  if (!fb->fb_fd)
    return;
  ioctl(fb->fb_fd, DRM_IOCTL_MODE_SETCRTC, &fb->crtc);
}

void ctx_fbkms_close (CtxKMS *fb)
{
  if (!fb->fb_fd)
    return;
  ioctl(fb->fb_fd, DRM_IOCTL_DROP_MASTER, 0);
  close (fb->fb_fd);
  fb->fb_fd = 0;
}

static void ctx_kms_flip (CtxKMS *fb)
{
  if (fb->is_kms)
    ctx_fbkms_flip (fb);
#if 0
  else
    ioctl (fb->fb_fd, FBIOPAN_DISPLAY, &fb->vinfo);
#endif
}

inline static uint32_t
ctx_swap_red_green2 (uint32_t orig)
{
  uint32_t  green_alpha = (orig & 0xff00ff