source: opengl-game/stb_image.cpp@ f7d35da

feature/imgui-sdl points-test
Last change on this file since f7d35da was 485424b, checked in by Dmitry Portnoy <dmitry.portnoy@…>, 7 years ago

Add support for loading and applying texture and add a square to the scene with a demo texture

  • Property mode set to 100644
File size: 226.5 KB
RevLine 
[485424b]1#define STB_IMAGE_IMPLEMENTATION
2#include "stb_image.h"
3
4#ifdef STB_IMAGE_IMPLEMENTATION
5
6#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
7 || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
8 || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
9 || defined(STBI_ONLY_ZLIB)
10 #ifndef STBI_ONLY_JPEG
11 #define STBI_NO_JPEG
12 #endif
13 #ifndef STBI_ONLY_PNG
14 #define STBI_NO_PNG
15 #endif
16 #ifndef STBI_ONLY_BMP
17 #define STBI_NO_BMP
18 #endif
19 #ifndef STBI_ONLY_PSD
20 #define STBI_NO_PSD
21 #endif
22 #ifndef STBI_ONLY_TGA
23 #define STBI_NO_TGA
24 #endif
25 #ifndef STBI_ONLY_GIF
26 #define STBI_NO_GIF
27 #endif
28 #ifndef STBI_ONLY_HDR
29 #define STBI_NO_HDR
30 #endif
31 #ifndef STBI_ONLY_PIC
32 #define STBI_NO_PIC
33 #endif
34 #ifndef STBI_ONLY_PNM
35 #define STBI_NO_PNM
36 #endif
37#endif
38
39#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
40#define STBI_NO_ZLIB
41#endif
42
43
44#include <stdarg.h>
45#include <stddef.h> // ptrdiff_t on osx
46#include <stdlib.h>
47#include <string.h>
48#include <limits.h>
49
50#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
51#include <math.h> // ldexp
52#endif
53
54#ifndef STBI_NO_STDIO
55#include <stdio.h>
56#endif
57
58#ifndef STBI_ASSERT
59#include <assert.h>
60#define STBI_ASSERT(x) assert(x)
61#endif
62
63
64#ifndef _MSC_VER
65 #ifdef __cplusplus
66 #define stbi_inline inline
67 #else
68 #define stbi_inline
69 #endif
70#else
71 #define stbi_inline __forceinline
72#endif
73
74
75#ifdef _MSC_VER
76typedef unsigned short stbi__uint16;
77typedef signed short stbi__int16;
78typedef unsigned int stbi__uint32;
79typedef signed int stbi__int32;
80#else
81#include <stdint.h>
82typedef uint16_t stbi__uint16;
83typedef int16_t stbi__int16;
84typedef uint32_t stbi__uint32;
85typedef int32_t stbi__int32;
86#endif
87
88// should produce compiler error if size is wrong
89typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
90
91#ifdef _MSC_VER
92#define STBI_NOTUSED(v) (void)(v)
93#else
94#define STBI_NOTUSED(v) (void)sizeof(v)
95#endif
96
97#ifdef _MSC_VER
98#define STBI_HAS_LROTL
99#endif
100
101#ifdef STBI_HAS_LROTL
102 #define stbi_lrot(x,y) _lrotl(x,y)
103#else
104 #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
105#endif
106
107#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
108// ok
109#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
110// ok
111#else
112#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
113#endif
114
115#ifndef STBI_MALLOC
116#define STBI_MALLOC(sz) malloc(sz)
117#define STBI_REALLOC(p,newsz) realloc(p,newsz)
118#define STBI_FREE(p) free(p)
119#endif
120
121#ifndef STBI_REALLOC_SIZED
122#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
123#endif
124
125// x86/x64 detection
126#if defined(__x86_64__) || defined(_M_X64)
127#define STBI__X64_TARGET
128#elif defined(__i386) || defined(_M_IX86)
129#define STBI__X86_TARGET
130#endif
131
132#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
133// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
134// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
135// but previous attempts to provide the SSE2 functions with runtime
136// detection caused numerous issues. The way architecture extensions are
137// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
138// New behavior: if compiled with -msse2, we use SSE2 without any
139// detection; if not, we don't use it at all.
140#define STBI_NO_SIMD
141#endif
142
143#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
144// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
145//
146// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
147// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
148// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
149// simultaneously enabling "-mstackrealign".
150//
151// See https://github.com/nothings/stb/issues/81 for more information.
152//
153// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
154// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
155#define STBI_NO_SIMD
156#endif
157
158#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
159#define STBI_SSE2
160#include <emmintrin.h>
161
162#ifdef _MSC_VER
163
164#if _MSC_VER >= 1400 // not VC6
165#include <intrin.h> // __cpuid
166static int stbi__cpuid3(void)
167{
168 int info[4];
169 __cpuid(info,1);
170 return info[3];
171}
172#else
173static int stbi__cpuid3(void)
174{
175 int res;
176 __asm {
177 mov eax,1
178 cpuid
179 mov res,edx
180 }
181 return res;
182}
183#endif
184
185#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
186
187static int stbi__sse2_available(void)
188{
189 int info3 = stbi__cpuid3();
190 return ((info3 >> 26) & 1) != 0;
191}
192#else // assume GCC-style if not VC++
193#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
194
195static int stbi__sse2_available(void)
196{
197 // If we're even attempting to compile this on GCC/Clang, that means
198 // -msse2 is on, which means the compiler is allowed to use SSE2
199 // instructions at will, and so are we.
200 return 1;
201}
202#endif
203#endif
204
205// ARM NEON
206#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
207#undef STBI_NEON
208#endif
209
210#ifdef STBI_NEON
211#include <arm_neon.h>
212// assume GCC or Clang on ARM targets
213#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
214#endif
215
216#ifndef STBI_SIMD_ALIGN
217#define STBI_SIMD_ALIGN(type, name) type name
218#endif
219
220///////////////////////////////////////////////
221//
222// stbi__context struct and start_xxx functions
223
224// stbi__context structure is our basic context used by all images, so it
225// contains all the IO context, plus some basic image information
226typedef struct
227{
228 stbi__uint32 img_x, img_y;
229 int img_n, img_out_n;
230
231 stbi_io_callbacks io;
232 void *io_user_data;
233
234 int read_from_callbacks;
235 int buflen;
236 stbi_uc buffer_start[128];
237
238 stbi_uc *img_buffer, *img_buffer_end;
239 stbi_uc *img_buffer_original, *img_buffer_original_end;
240} stbi__context;
241
242
243static void stbi__refill_buffer(stbi__context *s);
244
245// initialize a memory-decode context
246static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
247{
248 s->io.read = NULL;
249 s->read_from_callbacks = 0;
250 s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
251 s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
252}
253
254// initialize a callback-based context
255static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
256{
257 s->io = *c;
258 s->io_user_data = user;
259 s->buflen = sizeof(s->buffer_start);
260 s->read_from_callbacks = 1;
261 s->img_buffer_original = s->buffer_start;
262 stbi__refill_buffer(s);
263 s->img_buffer_original_end = s->img_buffer_end;
264}
265
266#ifndef STBI_NO_STDIO
267
268static int stbi__stdio_read(void *user, char *data, int size)
269{
270 return (int) fread(data,1,size,(FILE*) user);
271}
272
273static void stbi__stdio_skip(void *user, int n)
274{
275 fseek((FILE*) user, n, SEEK_CUR);
276}
277
278static int stbi__stdio_eof(void *user)
279{
280 return feof((FILE*) user);
281}
282
283static stbi_io_callbacks stbi__stdio_callbacks =
284{
285 stbi__stdio_read,
286 stbi__stdio_skip,
287 stbi__stdio_eof,
288};
289
290static void stbi__start_file(stbi__context *s, FILE *f)
291{
292 stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
293}
294
295//static void stop_file(stbi__context *s) { }
296
297#endif // !STBI_NO_STDIO
298
299static void stbi__rewind(stbi__context *s)
300{
301 // conceptually rewind SHOULD rewind to the beginning of the stream,
302 // but we just rewind to the beginning of the initial buffer, because
303 // we only use it after doing 'test', which only ever looks at at most 92 bytes
304 s->img_buffer = s->img_buffer_original;
305 s->img_buffer_end = s->img_buffer_original_end;
306}
307
308enum
309{
310 STBI_ORDER_RGB,
311 STBI_ORDER_BGR
312};
313
314typedef struct
315{
316 int bits_per_channel;
317 int num_channels;
318 int channel_order;
319} stbi__result_info;
320
321#ifndef STBI_NO_JPEG
322static int stbi__jpeg_test(stbi__context *s);
323static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
324static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
325#endif
326
327#ifndef STBI_NO_PNG
328static int stbi__png_test(stbi__context *s);
329static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
330static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
331#endif
332
333#ifndef STBI_NO_BMP
334static int stbi__bmp_test(stbi__context *s);
335static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
336static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
337#endif
338
339#ifndef STBI_NO_TGA
340static int stbi__tga_test(stbi__context *s);
341static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
342static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
343#endif
344
345#ifndef STBI_NO_PSD
346static int stbi__psd_test(stbi__context *s);
347static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
348static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
349#endif
350
351#ifndef STBI_NO_HDR
352static int stbi__hdr_test(stbi__context *s);
353static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
354static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
355#endif
356
357#ifndef STBI_NO_PIC
358static int stbi__pic_test(stbi__context *s);
359static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
360static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
361#endif
362
363#ifndef STBI_NO_GIF
364static int stbi__gif_test(stbi__context *s);
365static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
366static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
367#endif
368
369#ifndef STBI_NO_PNM
370static int stbi__pnm_test(stbi__context *s);
371static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
372static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
373#endif
374
375// this is not threadsafe
376static const char *stbi__g_failure_reason;
377
378STBIDEF const char *stbi_failure_reason(void)
379{
380 return stbi__g_failure_reason;
381}
382
383static int stbi__err(const char *str)
384{
385 stbi__g_failure_reason = str;
386 return 0;
387}
388
389static void *stbi__malloc(size_t size)
390{
391 return STBI_MALLOC(size);
392}
393
394// stb_image uses ints pervasively, including for offset calculations.
395// therefore the largest decoded image size we can support with the
396// current code, even on 64-bit targets, is INT_MAX. this is not a
397// significant limitation for the intended use case.
398//
399// we do, however, need to make sure our size calculations don't
400// overflow. hence a few helper functions for size calculations that
401// multiply integers together, making sure that they're non-negative
402// and no overflow occurs.
403
404// return 1 if the sum is valid, 0 on overflow.
405// negative terms are considered invalid.
406static int stbi__addsizes_valid(int a, int b)
407{
408 if (b < 0) return 0;
409 // now 0 <= b <= INT_MAX, hence also
410 // 0 <= INT_MAX - b <= INTMAX.
411 // And "a + b <= INT_MAX" (which might overflow) is the
412 // same as a <= INT_MAX - b (no overflow)
413 return a <= INT_MAX - b;
414}
415
416// returns 1 if the product is valid, 0 on overflow.
417// negative factors are considered invalid.
418static int stbi__mul2sizes_valid(int a, int b)
419{
420 if (a < 0 || b < 0) return 0;
421 if (b == 0) return 1; // mul-by-0 is always safe
422 // portable way to check for no overflows in a*b
423 return a <= INT_MAX/b;
424}
425
426// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
427static int stbi__mad2sizes_valid(int a, int b, int add)
428{
429 return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
430}
431
432// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
433static int stbi__mad3sizes_valid(int a, int b, int c, int add)
434{
435 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
436 stbi__addsizes_valid(a*b*c, add);
437}
438
439// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
440static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
441{
442 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
443 stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
444}
445
446// mallocs with size overflow checking
447static void *stbi__malloc_mad2(int a, int b, int add)
448{
449 if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
450 return stbi__malloc(a*b + add);
451}
452
453static void *stbi__malloc_mad3(int a, int b, int c, int add)
454{
455 if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
456 return stbi__malloc(a*b*c + add);
457}
458
459static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
460{
461 if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
462 return stbi__malloc(a*b*c*d + add);
463}
464
465// stbi__err - error
466// stbi__errpf - error returning pointer to float
467// stbi__errpuc - error returning pointer to unsigned char
468
469#ifdef STBI_NO_FAILURE_STRINGS
470 #define stbi__err(x,y) 0
471#elif defined(STBI_FAILURE_USERMSG)
472 #define stbi__err(x,y) stbi__err(y)
473#else
474 #define stbi__err(x,y) stbi__err(x)
475#endif
476
477#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
478#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
479
480STBIDEF void stbi_image_free(void *retval_from_stbi_load)
481{
482 STBI_FREE(retval_from_stbi_load);
483}
484
485#ifndef STBI_NO_LINEAR
486static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
487#endif
488
489#ifndef STBI_NO_HDR
490static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
491#endif
492
493static int stbi__vertically_flip_on_load = 0;
494
495STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
496{
497 stbi__vertically_flip_on_load = flag_true_if_should_flip;
498}
499
500static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
501{
502 memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
503 ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
504 ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
505 ri->num_channels = 0;
506
507 #ifndef STBI_NO_JPEG
508 if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
509 #endif
510 #ifndef STBI_NO_PNG
511 if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
512 #endif
513 #ifndef STBI_NO_BMP
514 if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
515 #endif
516 #ifndef STBI_NO_GIF
517 if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
518 #endif
519 #ifndef STBI_NO_PSD
520 if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
521 #endif
522 #ifndef STBI_NO_PIC
523 if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
524 #endif
525 #ifndef STBI_NO_PNM
526 if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
527 #endif
528
529 #ifndef STBI_NO_HDR
530 if (stbi__hdr_test(s)) {
531 float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
532 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
533 }
534 #endif
535
536 #ifndef STBI_NO_TGA
537 // test tga last because it's a crappy test!
538 if (stbi__tga_test(s))
539 return stbi__tga_load(s,x,y,comp,req_comp, ri);
540 #endif
541
542 return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
543}
544
545static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
546{
547 int i;
548 int img_len = w * h * channels;
549 stbi_uc *reduced;
550
551 reduced = (stbi_uc *) stbi__malloc(img_len);
552 if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
553
554 for (i = 0; i < img_len; ++i)
555 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
556
557 STBI_FREE(orig);
558 return reduced;
559}
560
561static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
562{
563 int i;
564 int img_len = w * h * channels;
565 stbi__uint16 *enlarged;
566
567 enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
568 if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
569
570 for (i = 0; i < img_len; ++i)
571 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
572
573 STBI_FREE(orig);
574 return enlarged;
575}
576
577static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
578{
579 int row;
580 size_t bytes_per_row = (size_t)w * bytes_per_pixel;
581 stbi_uc temp[2048];
582 stbi_uc *bytes = (stbi_uc *)image;
583
584 for (row = 0; row < (h>>1); row++) {
585 stbi_uc *row0 = bytes + row*bytes_per_row;
586 stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
587 // swap row0 with row1
588 size_t bytes_left = bytes_per_row;
589 while (bytes_left) {
590 size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
591 memcpy(temp, row0, bytes_copy);
592 memcpy(row0, row1, bytes_copy);
593 memcpy(row1, temp, bytes_copy);
594 row0 += bytes_copy;
595 row1 += bytes_copy;
596 bytes_left -= bytes_copy;
597 }
598 }
599}
600
601static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
602{
603 stbi__result_info ri;
604 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
605
606 if (result == NULL)
607 return NULL;
608
609 if (ri.bits_per_channel != 8) {
610 STBI_ASSERT(ri.bits_per_channel == 16);
611 result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
612 ri.bits_per_channel = 8;
613 }
614
615 // @TODO: move stbi__convert_format to here
616
617 if (stbi__vertically_flip_on_load) {
618 int channels = req_comp ? req_comp : *comp;
619 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
620 }
621
622 return (unsigned char *) result;
623}
624
625static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
626{
627 stbi__result_info ri;
628 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
629
630 if (result == NULL)
631 return NULL;
632
633 if (ri.bits_per_channel != 16) {
634 STBI_ASSERT(ri.bits_per_channel == 8);
635 result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
636 ri.bits_per_channel = 16;
637 }
638
639 // @TODO: move stbi__convert_format16 to here
640 // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
641
642 if (stbi__vertically_flip_on_load) {
643 int channels = req_comp ? req_comp : *comp;
644 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
645 }
646
647 return (stbi__uint16 *) result;
648}
649
650#ifndef STBI_NO_HDR
651static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
652{
653 if (stbi__vertically_flip_on_load && result != NULL) {
654 int channels = req_comp ? req_comp : *comp;
655 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
656 }
657}
658#endif
659
660#ifndef STBI_NO_STDIO
661
662static FILE *stbi__fopen(char const *filename, char const *mode)
663{
664 FILE *f;
665#if defined(_MSC_VER) && _MSC_VER >= 1400
666 if (0 != fopen_s(&f, filename, mode))
667 f=0;
668#else
669 f = fopen(filename, mode);
670#endif
671 return f;
672}
673
674
675STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
676{
677 FILE *f = stbi__fopen(filename, "rb");
678 unsigned char *result;
679 if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
680 result = stbi_load_from_file(f,x,y,comp,req_comp);
681 fclose(f);
682 return result;
683}
684
685STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
686{
687 unsigned char *result;
688 stbi__context s;
689 stbi__start_file(&s,f);
690 result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
691 if (result) {
692 // need to 'unget' all the characters in the IO buffer
693 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
694 }
695 return result;
696}
697
698STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
699{
700 stbi__uint16 *result;
701 stbi__context s;
702 stbi__start_file(&s,f);
703 result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
704 if (result) {
705 // need to 'unget' all the characters in the IO buffer
706 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
707 }
708 return result;
709}
710
711STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
712{
713 FILE *f = stbi__fopen(filename, "rb");
714 stbi__uint16 *result;
715 if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
716 result = stbi_load_from_file_16(f,x,y,comp,req_comp);
717 fclose(f);
718 return result;
719}
720
721
722#endif //!STBI_NO_STDIO
723
724STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
725{
726 stbi__context s;
727 stbi__start_mem(&s,buffer,len);
728 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
729}
730
731STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
732{
733 stbi__context s;
734 stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
735 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
736}
737
738STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
739{
740 stbi__context s;
741 stbi__start_mem(&s,buffer,len);
742 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
743}
744
745STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
746{
747 stbi__context s;
748 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
749 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
750}
751
752#ifndef STBI_NO_LINEAR
753static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
754{
755 unsigned char *data;
756 #ifndef STBI_NO_HDR
757 if (stbi__hdr_test(s)) {
758 stbi__result_info ri;
759 float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
760 if (hdr_data)
761 stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
762 return hdr_data;
763 }
764 #endif
765 data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
766 if (data)
767 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
768 return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
769}
770
771STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
772{
773 stbi__context s;
774 stbi__start_mem(&s,buffer,len);
775 return stbi__loadf_main(&s,x,y,comp,req_comp);
776}
777
778STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
779{
780 stbi__context s;
781 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
782 return stbi__loadf_main(&s,x,y,comp,req_comp);
783}
784
785#ifndef STBI_NO_STDIO
786STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
787{
788 float *result;
789 FILE *f = stbi__fopen(filename, "rb");
790 if (!f) return stbi__errpf("can't fopen", "Unable to open file");
791 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
792 fclose(f);
793 return result;
794}
795
796STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
797{
798 stbi__context s;
799 stbi__start_file(&s,f);
800 return stbi__loadf_main(&s,x,y,comp,req_comp);
801}
802#endif // !STBI_NO_STDIO
803
804#endif // !STBI_NO_LINEAR
805
806// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
807// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
808// reports false!
809
810STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
811{
812 #ifndef STBI_NO_HDR
813 stbi__context s;
814 stbi__start_mem(&s,buffer,len);
815 return stbi__hdr_test(&s);
816 #else
817 STBI_NOTUSED(buffer);
818 STBI_NOTUSED(len);
819 return 0;
820 #endif
821}
822
823#ifndef STBI_NO_STDIO
824STBIDEF int stbi_is_hdr (char const *filename)
825{
826 FILE *f = stbi__fopen(filename, "rb");
827 int result=0;
828 if (f) {
829 result = stbi_is_hdr_from_file(f);
830 fclose(f);
831 }
832 return result;
833}
834
835STBIDEF int stbi_is_hdr_from_file(FILE *f)
836{
837 #ifndef STBI_NO_HDR
838 stbi__context s;
839 stbi__start_file(&s,f);
840 return stbi__hdr_test(&s);
841 #else
842 STBI_NOTUSED(f);
843 return 0;
844 #endif
845}
846#endif // !STBI_NO_STDIO
847
848STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
849{
850 #ifndef STBI_NO_HDR
851 stbi__context s;
852 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
853 return stbi__hdr_test(&s);
854 #else
855 STBI_NOTUSED(clbk);
856 STBI_NOTUSED(user);
857 return 0;
858 #endif
859}
860
861#ifndef STBI_NO_LINEAR
862static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
863
864STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
865STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
866#endif
867
868static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
869
870STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
871STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
872
873
874//////////////////////////////////////////////////////////////////////////////
875//
876// Common code used by all image loaders
877//
878
879enum
880{
881 STBI__SCAN_load=0,
882 STBI__SCAN_type,
883 STBI__SCAN_header
884};
885
886static void stbi__refill_buffer(stbi__context *s)
887{
888 int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
889 if (n == 0) {
890 // at end of file, treat same as if from memory, but need to handle case
891 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
892 s->read_from_callbacks = 0;
893 s->img_buffer = s->buffer_start;
894 s->img_buffer_end = s->buffer_start+1;
895 *s->img_buffer = 0;
896 } else {
897 s->img_buffer = s->buffer_start;
898 s->img_buffer_end = s->buffer_start + n;
899 }
900}
901
902stbi_inline static stbi_uc stbi__get8(stbi__context *s)
903{
904 if (s->img_buffer < s->img_buffer_end)
905 return *s->img_buffer++;
906 if (s->read_from_callbacks) {
907 stbi__refill_buffer(s);
908 return *s->img_buffer++;
909 }
910 return 0;
911}
912
913stbi_inline static int stbi__at_eof(stbi__context *s)
914{
915 if (s->io.read) {
916 if (!(s->io.eof)(s->io_user_data)) return 0;
917 // if feof() is true, check if buffer = end
918 // special case: we've only got the special 0 character at the end
919 if (s->read_from_callbacks == 0) return 1;
920 }
921
922 return s->img_buffer >= s->img_buffer_end;
923}
924
925static void stbi__skip(stbi__context *s, int n)
926{
927 if (n < 0) {
928 s->img_buffer = s->img_buffer_end;
929 return;
930 }
931 if (s->io.read) {
932 int blen = (int) (s->img_buffer_end - s->img_buffer);
933 if (blen < n) {
934 s->img_buffer = s->img_buffer_end;
935 (s->io.skip)(s->io_user_data, n - blen);
936 return;
937 }
938 }
939 s->img_buffer += n;
940}
941
942static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
943{
944 if (s->io.read) {
945 int blen = (int) (s->img_buffer_end - s->img_buffer);
946 if (blen < n) {
947 int res, count;
948
949 memcpy(buffer, s->img_buffer, blen);
950
951 count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
952 res = (count == (n-blen));
953 s->img_buffer = s->img_buffer_end;
954 return res;
955 }
956 }
957
958 if (s->img_buffer+n <= s->img_buffer_end) {
959 memcpy(buffer, s->img_buffer, n);
960 s->img_buffer += n;
961 return 1;
962 } else
963 return 0;
964}
965
966static int stbi__get16be(stbi__context *s)
967{
968 int z = stbi__get8(s);
969 return (z << 8) + stbi__get8(s);
970}
971
972static stbi__uint32 stbi__get32be(stbi__context *s)
973{
974 stbi__uint32 z = stbi__get16be(s);
975 return (z << 16) + stbi__get16be(s);
976}
977
978#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
979// nothing
980#else
981static int stbi__get16le(stbi__context *s)
982{
983 int z = stbi__get8(s);
984 return z + (stbi__get8(s) << 8);
985}
986#endif
987
988#ifndef STBI_NO_BMP
989static stbi__uint32 stbi__get32le(stbi__context *s)
990{
991 stbi__uint32 z = stbi__get16le(s);
992 return z + (stbi__get16le(s) << 16);
993}
994#endif
995
996#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
997
998
999//////////////////////////////////////////////////////////////////////////////
1000//
1001// generic converter from built-in img_n to req_comp
1002// individual types do this automatically as much as possible (e.g. jpeg
1003// does all cases internally since it needs to colorspace convert anyway,
1004// and it never has alpha, so very few cases ). png can automatically
1005// interleave an alpha=255 channel, but falls back to this for other cases
1006//
1007// assume data buffer is malloced, so malloc a new one and free that one
1008// only failure mode is malloc failing
1009
1010static stbi_uc stbi__compute_y(int r, int g, int b)
1011{
1012 return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1013}
1014
1015static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1016{
1017 int i,j;
1018 unsigned char *good;
1019
1020 if (req_comp == img_n) return data;
1021 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1022
1023 good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1024 if (good == NULL) {
1025 STBI_FREE(data);
1026 return stbi__errpuc("outofmem", "Out of memory");
1027 }
1028
1029 for (j=0; j < (int) y; ++j) {
1030 unsigned char *src = data + j * x * img_n ;
1031 unsigned char *dest = good + j * x * req_comp;
1032
1033 #define STBI__COMBO(a,b) ((a)*8+(b))
1034 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1035 // convert source image with img_n components to one with req_comp components;
1036 // avoid switch per pixel, so use switch per scanline and massive macros
1037 switch (STBI__COMBO(img_n, req_comp)) {
1038 STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break;
1039 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1040 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break;
1041 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1042 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1043 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break;
1044 STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break;
1045 STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1046 STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break;
1047 STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1048 STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1049 STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break;
1050 default: STBI_ASSERT(0);
1051 }
1052 #undef STBI__CASE
1053 }
1054
1055 STBI_FREE(data);
1056 return good;
1057}
1058
1059static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1060{
1061 return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1062}
1063
1064static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1065{
1066 int i,j;
1067 stbi__uint16 *good;
1068
1069 if (req_comp == img_n) return data;
1070 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1071
1072 good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1073 if (good == NULL) {
1074 STBI_FREE(data);
1075 return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1076 }
1077
1078 for (j=0; j < (int) y; ++j) {
1079 stbi__uint16 *src = data + j * x * img_n ;
1080 stbi__uint16 *dest = good + j * x * req_comp;
1081
1082 #define STBI__COMBO(a,b) ((a)*8+(b))
1083 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1084 // convert source image with img_n components to one with req_comp components;
1085 // avoid switch per pixel, so use switch per scanline and massive macros
1086 switch (STBI__COMBO(img_n, req_comp)) {
1087 STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break;
1088 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1089 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break;
1090 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1091 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1092 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break;
1093 STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break;
1094 STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1095 STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1096 STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1097 STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1098 STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break;
1099 default: STBI_ASSERT(0);
1100 }
1101 #undef STBI__CASE
1102 }
1103
1104 STBI_FREE(data);
1105 return good;
1106}
1107
1108#ifndef STBI_NO_LINEAR
1109static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1110{
1111 int i,k,n;
1112 float *output;
1113 if (!data) return NULL;
1114 output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1115 if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1116 // compute number of non-alpha components
1117 if (comp & 1) n = comp; else n = comp-1;
1118 for (i=0; i < x*y; ++i) {
1119 for (k=0; k < n; ++k) {
1120 output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1121 }
1122 if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1123 }
1124 STBI_FREE(data);
1125 return output;
1126}
1127#endif
1128
1129#ifndef STBI_NO_HDR
1130#define stbi__float2int(x) ((int) (x))
1131static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1132{
1133 int i,k,n;
1134 stbi_uc *output;
1135 if (!data) return NULL;
1136 output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1137 if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1138 // compute number of non-alpha components
1139 if (comp & 1) n = comp; else n = comp-1;
1140 for (i=0; i < x*y; ++i) {
1141 for (k=0; k < n; ++k) {
1142 float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1143 if (z < 0) z = 0;
1144 if (z > 255) z = 255;
1145 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1146 }
1147 if (k < comp) {
1148 float z = data[i*comp+k] * 255 + 0.5f;
1149 if (z < 0) z = 0;
1150 if (z > 255) z = 255;
1151 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1152 }
1153 }
1154 STBI_FREE(data);
1155 return output;
1156}
1157#endif
1158
1159//////////////////////////////////////////////////////////////////////////////
1160//
1161// "baseline" JPEG/JFIF decoder
1162//
1163// simple implementation
1164// - doesn't support delayed output of y-dimension
1165// - simple interface (only one output format: 8-bit interleaved RGB)
1166// - doesn't try to recover corrupt jpegs
1167// - doesn't allow partial loading, loading multiple at once
1168// - still fast on x86 (copying globals into locals doesn't help x86)
1169// - allocates lots of intermediate memory (full size of all components)
1170// - non-interleaved case requires this anyway
1171// - allows good upsampling (see next)
1172// high-quality
1173// - upsampled channels are bilinearly interpolated, even across blocks
1174// - quality integer IDCT derived from IJG's 'slow'
1175// performance
1176// - fast huffman; reasonable integer IDCT
1177// - some SIMD kernels for common paths on targets with SSE2/NEON
1178// - uses a lot of intermediate memory, could cache poorly
1179
1180#ifndef STBI_NO_JPEG
1181
1182// huffman decoding acceleration
1183#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1184
1185typedef struct
1186{
1187 stbi_uc fast[1 << FAST_BITS];
1188 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1189 stbi__uint16 code[256];
1190 stbi_uc values[256];
1191 stbi_uc size[257];
1192 unsigned int maxcode[18];
1193 int delta[17]; // old 'firstsymbol' - old 'firstcode'
1194} stbi__huffman;
1195
1196typedef struct
1197{
1198 stbi__context *s;
1199 stbi__huffman huff_dc[4];
1200 stbi__huffman huff_ac[4];
1201 stbi__uint16 dequant[4][64];
1202 stbi__int16 fast_ac[4][1 << FAST_BITS];
1203
1204// sizes for components, interleaved MCUs
1205 int img_h_max, img_v_max;
1206 int img_mcu_x, img_mcu_y;
1207 int img_mcu_w, img_mcu_h;
1208
1209// definition of jpeg image component
1210 struct
1211 {
1212 int id;
1213 int h,v;
1214 int tq;
1215 int hd,ha;
1216 int dc_pred;
1217
1218 int x,y,w2,h2;
1219 stbi_uc *data;
1220 void *raw_data, *raw_coeff;
1221 stbi_uc *linebuf;
1222 short *coeff; // progressive only
1223 int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1224 } img_comp[4];
1225
1226 stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1227 int code_bits; // number of valid bits
1228 unsigned char marker; // marker seen while filling entropy buffer
1229 int nomore; // flag if we saw a marker so must stop
1230
1231 int progressive;
1232 int spec_start;
1233 int spec_end;
1234 int succ_high;
1235 int succ_low;
1236 int eob_run;
1237 int jfif;
1238 int app14_color_transform; // Adobe APP14 tag
1239 int rgb;
1240
1241 int scan_n, order[4];
1242 int restart_interval, todo;
1243
1244// kernels
1245 void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1246 void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1247 stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1248} stbi__jpeg;
1249
1250static int stbi__build_huffman(stbi__huffman *h, int *count)
1251{
1252 int i,j,k=0,code;
1253 // build size list for each symbol (from JPEG spec)
1254 for (i=0; i < 16; ++i)
1255 for (j=0; j < count[i]; ++j)
1256 h->size[k++] = (stbi_uc) (i+1);
1257 h->size[k] = 0;
1258
1259 // compute actual symbols (from jpeg spec)
1260 code = 0;
1261 k = 0;
1262 for(j=1; j <= 16; ++j) {
1263 // compute delta to add to code to compute symbol id
1264 h->delta[j] = k - code;
1265 if (h->size[k] == j) {
1266 while (h->size[k] == j)
1267 h->code[k++] = (stbi__uint16) (code++);
1268 if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1269 }
1270 // compute largest code + 1 for this size, preshifted as needed later
1271 h->maxcode[j] = code << (16-j);
1272 code <<= 1;
1273 }
1274 h->maxcode[j] = 0xffffffff;
1275
1276 // build non-spec acceleration table; 255 is flag for not-accelerated
1277 memset(h->fast, 255, 1 << FAST_BITS);
1278 for (i=0; i < k; ++i) {
1279 int s = h->size[i];
1280 if (s <= FAST_BITS) {
1281 int c = h->code[i] << (FAST_BITS-s);
1282 int m = 1 << (FAST_BITS-s);
1283 for (j=0; j < m; ++j) {
1284 h->fast[c+j] = (stbi_uc) i;
1285 }
1286 }
1287 }
1288 return 1;
1289}
1290
1291// build a table that decodes both magnitude and value of small ACs in
1292// one go.
1293static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1294{
1295 int i;
1296 for (i=0; i < (1 << FAST_BITS); ++i) {
1297 stbi_uc fast = h->fast[i];
1298 fast_ac[i] = 0;
1299 if (fast < 255) {
1300 int rs = h->values[fast];
1301 int run = (rs >> 4) & 15;
1302 int magbits = rs & 15;
1303 int len = h->size[fast];
1304
1305 if (magbits && len + magbits <= FAST_BITS) {
1306 // magnitude code followed by receive_extend code
1307 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1308 int m = 1 << (magbits - 1);
1309 if (k < m) k += (~0U << magbits) + 1;
1310 // if the result is small enough, we can fit it in fast_ac table
1311 if (k >= -128 && k <= 127)
1312 fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1313 }
1314 }
1315 }
1316}
1317
1318static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1319{
1320 do {
1321 int b = j->nomore ? 0 : stbi__get8(j->s);
1322 if (b == 0xff) {
1323 int c = stbi__get8(j->s);
1324 while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1325 if (c != 0) {
1326 j->marker = (unsigned char) c;
1327 j->nomore = 1;
1328 return;
1329 }
1330 }
1331 j->code_buffer |= b << (24 - j->code_bits);
1332 j->code_bits += 8;
1333 } while (j->code_bits <= 24);
1334}
1335
1336// (1 << n) - 1
1337static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1338
1339// decode a jpeg huffman value from the bitstream
1340stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1341{
1342 unsigned int temp;
1343 int c,k;
1344
1345 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1346
1347 // look at the top FAST_BITS and determine what symbol ID it is,
1348 // if the code is <= FAST_BITS
1349 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1350 k = h->fast[c];
1351 if (k < 255) {
1352 int s = h->size[k];
1353 if (s > j->code_bits)
1354 return -1;
1355 j->code_buffer <<= s;
1356 j->code_bits -= s;
1357 return h->values[k];
1358 }
1359
1360 // naive test is to shift the code_buffer down so k bits are
1361 // valid, then test against maxcode. To speed this up, we've
1362 // preshifted maxcode left so that it has (16-k) 0s at the
1363 // end; in other words, regardless of the number of bits, it
1364 // wants to be compared against something shifted to have 16;
1365 // that way we don't need to shift inside the loop.
1366 temp = j->code_buffer >> 16;
1367 for (k=FAST_BITS+1 ; ; ++k)
1368 if (temp < h->maxcode[k])
1369 break;
1370 if (k == 17) {
1371 // error! code not found
1372 j->code_bits -= 16;
1373 return -1;
1374 }
1375
1376 if (k > j->code_bits)
1377 return -1;
1378
1379 // convert the huffman code to the symbol id
1380 c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1381 STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1382
1383 // convert the id to a symbol
1384 j->code_bits -= k;
1385 j->code_buffer <<= k;
1386 return h->values[c];
1387}
1388
1389// bias[n] = (-1<<n) + 1
1390static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1391
1392// combined JPEG 'receive' and JPEG 'extend', since baseline
1393// always extends everything it receives.
1394stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1395{
1396 unsigned int k;
1397 int sgn;
1398 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1399
1400 sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1401 k = stbi_lrot(j->code_buffer, n);
1402 STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1403 j->code_buffer = k & ~stbi__bmask[n];
1404 k &= stbi__bmask[n];
1405 j->code_bits -= n;
1406 return k + (stbi__jbias[n] & ~sgn);
1407}
1408
1409// get some unsigned bits
1410stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1411{
1412 unsigned int k;
1413 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1414 k = stbi_lrot(j->code_buffer, n);
1415 j->code_buffer = k & ~stbi__bmask[n];
1416 k &= stbi__bmask[n];
1417 j->code_bits -= n;
1418 return k;
1419}
1420
1421stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1422{
1423 unsigned int k;
1424 if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1425 k = j->code_buffer;
1426 j->code_buffer <<= 1;
1427 --j->code_bits;
1428 return k & 0x80000000;
1429}
1430
1431// given a value that's at position X in the zigzag stream,
1432// where does it appear in the 8x8 matrix coded as row-major?
1433static stbi_uc stbi__jpeg_dezigzag[64+15] =
1434{
1435 0, 1, 8, 16, 9, 2, 3, 10,
1436 17, 24, 32, 25, 18, 11, 4, 5,
1437 12, 19, 26, 33, 40, 48, 41, 34,
1438 27, 20, 13, 6, 7, 14, 21, 28,
1439 35, 42, 49, 56, 57, 50, 43, 36,
1440 29, 22, 15, 23, 30, 37, 44, 51,
1441 58, 59, 52, 45, 38, 31, 39, 46,
1442 53, 60, 61, 54, 47, 55, 62, 63,
1443 // let corrupt input sample past end
1444 63, 63, 63, 63, 63, 63, 63, 63,
1445 63, 63, 63, 63, 63, 63, 63
1446};
1447
1448// decode one 64-entry block--
1449static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1450{
1451 int diff,dc,k;
1452 int t;
1453
1454 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1455 t = stbi__jpeg_huff_decode(j, hdc);
1456 if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1457
1458 // 0 all the ac values now so we can do it 32-bits at a time
1459 memset(data,0,64*sizeof(data[0]));
1460
1461 diff = t ? stbi__extend_receive(j, t) : 0;
1462 dc = j->img_comp[b].dc_pred + diff;
1463 j->img_comp[b].dc_pred = dc;
1464 data[0] = (short) (dc * dequant[0]);
1465
1466 // decode AC components, see JPEG spec
1467 k = 1;
1468 do {
1469 unsigned int zig;
1470 int c,r,s;
1471 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1472 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1473 r = fac[c];
1474 if (r) { // fast-AC path
1475 k += (r >> 4) & 15; // run
1476 s = r & 15; // combined length
1477 j->code_buffer <<= s;
1478 j->code_bits -= s;
1479 // decode into unzigzag'd location
1480 zig = stbi__jpeg_dezigzag[k++];
1481 data[zig] = (short) ((r >> 8) * dequant[zig]);
1482 } else {
1483 int rs = stbi__jpeg_huff_decode(j, hac);
1484 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1485 s = rs & 15;
1486 r = rs >> 4;
1487 if (s == 0) {
1488 if (rs != 0xf0) break; // end block
1489 k += 16;
1490 } else {
1491 k += r;
1492 // decode into unzigzag'd location
1493 zig = stbi__jpeg_dezigzag[k++];
1494 data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1495 }
1496 }
1497 } while (k < 64);
1498 return 1;
1499}
1500
1501static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1502{
1503 int diff,dc;
1504 int t;
1505 if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1506
1507 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1508
1509 if (j->succ_high == 0) {
1510 // first scan for DC coefficient, must be first
1511 memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1512 t = stbi__jpeg_huff_decode(j, hdc);
1513 diff = t ? stbi__extend_receive(j, t) : 0;
1514
1515 dc = j->img_comp[b].dc_pred + diff;
1516 j->img_comp[b].dc_pred = dc;
1517 data[0] = (short) (dc << j->succ_low);
1518 } else {
1519 // refinement scan for DC coefficient
1520 if (stbi__jpeg_get_bit(j))
1521 data[0] += (short) (1 << j->succ_low);
1522 }
1523 return 1;
1524}
1525
1526// @OPTIMIZE: store non-zigzagged during the decode passes,
1527// and only de-zigzag when dequantizing
1528static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1529{
1530 int k;
1531 if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1532
1533 if (j->succ_high == 0) {
1534 int shift = j->succ_low;
1535
1536 if (j->eob_run) {
1537 --j->eob_run;
1538 return 1;
1539 }
1540
1541 k = j->spec_start;
1542 do {
1543 unsigned int zig;
1544 int c,r,s;
1545 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1546 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1547 r = fac[c];
1548 if (r) { // fast-AC path
1549 k += (r >> 4) & 15; // run
1550 s = r & 15; // combined length
1551 j->code_buffer <<= s;
1552 j->code_bits -= s;
1553 zig = stbi__jpeg_dezigzag[k++];
1554 data[zig] = (short) ((r >> 8) << shift);
1555 } else {
1556 int rs = stbi__jpeg_huff_decode(j, hac);
1557 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1558 s = rs & 15;
1559 r = rs >> 4;
1560 if (s == 0) {
1561 if (r < 15) {
1562 j->eob_run = (1 << r);
1563 if (r)
1564 j->eob_run += stbi__jpeg_get_bits(j, r);
1565 --j->eob_run;
1566 break;
1567 }
1568 k += 16;
1569 } else {
1570 k += r;
1571 zig = stbi__jpeg_dezigzag[k++];
1572 data[zig] = (short) (stbi__extend_receive(j,s) << shift);
1573 }
1574 }
1575 } while (k <= j->spec_end);
1576 } else {
1577 // refinement scan for these AC coefficients
1578
1579 short bit = (short) (1 << j->succ_low);
1580
1581 if (j->eob_run) {
1582 --j->eob_run;
1583 for (k = j->spec_start; k <= j->spec_end; ++k) {
1584 short *p = &data[stbi__jpeg_dezigzag[k]];
1585 if (*p != 0)
1586 if (stbi__jpeg_get_bit(j))
1587 if ((*p & bit)==0) {
1588 if (*p > 0)
1589 *p += bit;
1590 else
1591 *p -= bit;
1592 }
1593 }
1594 } else {
1595 k = j->spec_start;
1596 do {
1597 int r,s;
1598 int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
1599 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1600 s = rs & 15;
1601 r = rs >> 4;
1602 if (s == 0) {
1603 if (r < 15) {
1604 j->eob_run = (1 << r) - 1;
1605 if (r)
1606 j->eob_run += stbi__jpeg_get_bits(j, r);
1607 r = 64; // force end of block
1608 } else {
1609 // r=15 s=0 should write 16 0s, so we just do
1610 // a run of 15 0s and then write s (which is 0),
1611 // so we don't have to do anything special here
1612 }
1613 } else {
1614 if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
1615 // sign bit
1616 if (stbi__jpeg_get_bit(j))
1617 s = bit;
1618 else
1619 s = -bit;
1620 }
1621
1622 // advance by r
1623 while (k <= j->spec_end) {
1624 short *p = &data[stbi__jpeg_dezigzag[k++]];
1625 if (*p != 0) {
1626 if (stbi__jpeg_get_bit(j))
1627 if ((*p & bit)==0) {
1628 if (*p > 0)
1629 *p += bit;
1630 else
1631 *p -= bit;
1632 }
1633 } else {
1634 if (r == 0) {
1635 *p = (short) s;
1636 break;
1637 }
1638 --r;
1639 }
1640 }
1641 } while (k <= j->spec_end);
1642 }
1643 }
1644 return 1;
1645}
1646
1647// take a -128..127 value and stbi__clamp it and convert to 0..255
1648stbi_inline static stbi_uc stbi__clamp(int x)
1649{
1650 // trick to use a single test to catch both cases
1651 if ((unsigned int) x > 255) {
1652 if (x < 0) return 0;
1653 if (x > 255) return 255;
1654 }
1655 return (stbi_uc) x;
1656}
1657
1658#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
1659#define stbi__fsh(x) ((x) << 12)
1660
1661// derived from jidctint -- DCT_ISLOW
1662#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
1663 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1664 p2 = s2; \
1665 p3 = s6; \
1666 p1 = (p2+p3) * stbi__f2f(0.5411961f); \
1667 t2 = p1 + p3*stbi__f2f(-1.847759065f); \
1668 t3 = p1 + p2*stbi__f2f( 0.765366865f); \
1669 p2 = s0; \
1670 p3 = s4; \
1671 t0 = stbi__fsh(p2+p3); \
1672 t1 = stbi__fsh(p2-p3); \
1673 x0 = t0+t3; \
1674 x3 = t0-t3; \
1675 x1 = t1+t2; \
1676 x2 = t1-t2; \
1677 t0 = s7; \
1678 t1 = s5; \
1679 t2 = s3; \
1680 t3 = s1; \
1681 p3 = t0+t2; \
1682 p4 = t1+t3; \
1683 p1 = t0+t3; \
1684 p2 = t1+t2; \
1685 p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
1686 t0 = t0*stbi__f2f( 0.298631336f); \
1687 t1 = t1*stbi__f2f( 2.053119869f); \
1688 t2 = t2*stbi__f2f( 3.072711026f); \
1689 t3 = t3*stbi__f2f( 1.501321110f); \
1690 p1 = p5 + p1*stbi__f2f(-0.899976223f); \
1691 p2 = p5 + p2*stbi__f2f(-2.562915447f); \
1692 p3 = p3*stbi__f2f(-1.961570560f); \
1693 p4 = p4*stbi__f2f(-0.390180644f); \
1694 t3 += p1+p4; \
1695 t2 += p2+p3; \
1696 t1 += p2+p4; \
1697 t0 += p1+p3;
1698
1699static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
1700{
1701 int i,val[64],*v=val;
1702 stbi_uc *o;
1703 short *d = data;
1704
1705 // columns
1706 for (i=0; i < 8; ++i,++d, ++v) {
1707 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1708 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1709 && d[40]==0 && d[48]==0 && d[56]==0) {
1710 // no shortcut 0 seconds
1711 // (1|2|3|4|5|6|7)==0 0 seconds
1712 // all separate -0.047 seconds
1713 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1714 int dcterm = d[0] << 2;
1715 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1716 } else {
1717 STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
1718 // constants scaled things up by 1<<12; let's bring them back
1719 // down, but keep 2 extra bits of precision
1720 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1721 v[ 0] = (x0+t3) >> 10;
1722 v[56] = (x0-t3) >> 10;
1723 v[ 8] = (x1+t2) >> 10;
1724 v[48] = (x1-t2) >> 10;
1725 v[16] = (x2+t1) >> 10;
1726 v[40] = (x2-t1) >> 10;
1727 v[24] = (x3+t0) >> 10;
1728 v[32] = (x3-t0) >> 10;
1729 }
1730 }
1731
1732 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1733 // no fast case since the first 1D IDCT spread components out
1734 STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1735 // constants scaled things up by 1<<12, plus we had 1<<2 from first
1736 // loop, plus horizontal and vertical each scale by sqrt(8) so together
1737 // we've got an extra 1<<3, so 1<<17 total we need to remove.
1738 // so we want to round that, which means adding 0.5 * 1<<17,
1739 // aka 65536. Also, we'll end up with -128 to 127 that we want
1740 // to encode as 0..255 by adding 128, so we'll add that before the shift
1741 x0 += 65536 + (128<<17);
1742 x1 += 65536 + (128<<17);
1743 x2 += 65536 + (128<<17);
1744 x3 += 65536 + (128<<17);
1745 // tried computing the shifts into temps, or'ing the temps to see
1746 // if any were out of range, but that was slower
1747 o[0] = stbi__clamp((x0+t3) >> 17);
1748 o[7] = stbi__clamp((x0-t3) >> 17);
1749 o[1] = stbi__clamp((x1+t2) >> 17);
1750 o[6] = stbi__clamp((x1-t2) >> 17);
1751 o[2] = stbi__clamp((x2+t1) >> 17);
1752 o[5] = stbi__clamp((x2-t1) >> 17);
1753 o[3] = stbi__clamp((x3+t0) >> 17);
1754 o[4] = stbi__clamp((x3-t0) >> 17);
1755 }
1756}
1757
1758#ifdef STBI_SSE2
1759// sse2 integer IDCT. not the fastest possible implementation but it
1760// produces bit-identical results to the generic C version so it's
1761// fully "transparent".
1762static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
1763{
1764 // This is constructed to match our regular (generic) integer IDCT exactly.
1765 __m128i row0, row1, row2, row3, row4, row5, row6, row7;
1766 __m128i tmp;
1767
1768 // dot product constant: even elems=x, odd elems=y
1769 #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
1770
1771 // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
1772 // out(1) = c1[even]*x + c1[odd]*y
1773 #define dct_rot(out0,out1, x,y,c0,c1) \
1774 __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
1775 __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
1776 __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
1777 __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
1778 __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
1779 __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
1780
1781 // out = in << 12 (in 16-bit, out 32-bit)
1782 #define dct_widen(out, in) \
1783 __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
1784 __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
1785
1786 // wide add
1787 #define dct_wadd(out, a, b) \
1788 __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
1789 __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
1790
1791 // wide sub
1792 #define dct_wsub(out, a, b) \
1793 __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
1794 __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
1795
1796 // butterfly a/b, add bias, then shift by "s" and pack
1797 #define dct_bfly32o(out0, out1, a,b,bias,s) \
1798 { \
1799 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
1800 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
1801 dct_wadd(sum, abiased, b); \
1802 dct_wsub(dif, abiased, b); \
1803 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
1804 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
1805 }
1806
1807 // 8-bit interleave step (for transposes)
1808 #define dct_interleave8(a, b) \
1809 tmp = a; \
1810 a = _mm_unpacklo_epi8(a, b); \
1811 b = _mm_unpackhi_epi8(tmp, b)
1812
1813 // 16-bit interleave step (for transposes)
1814 #define dct_interleave16(a, b) \
1815 tmp = a; \
1816 a = _mm_unpacklo_epi16(a, b); \
1817 b = _mm_unpackhi_epi16(tmp, b)
1818
1819 #define dct_pass(bias,shift) \
1820 { \
1821 /* even part */ \
1822 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
1823 __m128i sum04 = _mm_add_epi16(row0, row4); \
1824 __m128i dif04 = _mm_sub_epi16(row0, row4); \
1825 dct_widen(t0e, sum04); \
1826 dct_widen(t1e, dif04); \
1827 dct_wadd(x0, t0e, t3e); \
1828 dct_wsub(x3, t0e, t3e); \
1829 dct_wadd(x1, t1e, t2e); \
1830 dct_wsub(x2, t1e, t2e); \
1831 /* odd part */ \
1832 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
1833 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
1834 __m128i sum17 = _mm_add_epi16(row1, row7); \
1835 __m128i sum35 = _mm_add_epi16(row3, row5); \
1836 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
1837 dct_wadd(x4, y0o, y4o); \
1838 dct_wadd(x5, y1o, y5o); \
1839 dct_wadd(x6, y2o, y5o); \
1840 dct_wadd(x7, y3o, y4o); \
1841 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
1842 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
1843 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
1844 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
1845 }
1846
1847 __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
1848 __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
1849 __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
1850 __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
1851 __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
1852 __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
1853 __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
1854 __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
1855
1856 // rounding biases in column/row passes, see stbi__idct_block for explanation.
1857 __m128i bias_0 = _mm_set1_epi32(512);
1858 __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
1859
1860 // load
1861 row0 = _mm_load_si128((const __m128i *) (data + 0*8));
1862 row1 = _mm_load_si128((const __m128i *) (data + 1*8));
1863 row2 = _mm_load_si128((const __m128i *) (data + 2*8));
1864 row3 = _mm_load_si128((const __m128i *) (data + 3*8));
1865 row4 = _mm_load_si128((const __m128i *) (data + 4*8));
1866 row5 = _mm_load_si128((const __m128i *) (data + 5*8));
1867 row6 = _mm_load_si128((const __m128i *) (data + 6*8));
1868 row7 = _mm_load_si128((const __m128i *) (data + 7*8));
1869
1870 // column pass
1871 dct_pass(bias_0, 10);
1872
1873 {
1874 // 16bit 8x8 transpose pass 1
1875 dct_interleave16(row0, row4);
1876 dct_interleave16(row1, row5);
1877 dct_interleave16(row2, row6);
1878 dct_interleave16(row3, row7);
1879
1880 // transpose pass 2
1881 dct_interleave16(row0, row2);
1882 dct_interleave16(row1, row3);
1883 dct_interleave16(row4, row6);
1884 dct_interleave16(row5, row7);
1885
1886 // transpose pass 3
1887 dct_interleave16(row0, row1);
1888 dct_interleave16(row2, row3);
1889 dct_interleave16(row4, row5);
1890 dct_interleave16(row6, row7);
1891 }
1892
1893 // row pass
1894 dct_pass(bias_1, 17);
1895
1896 {
1897 // pack
1898 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
1899 __m128i p1 = _mm_packus_epi16(row2, row3);
1900 __m128i p2 = _mm_packus_epi16(row4, row5);
1901 __m128i p3 = _mm_packus_epi16(row6, row7);
1902
1903 // 8bit 8x8 transpose pass 1
1904 dct_interleave8(p0, p2); // a0e0a1e1...
1905 dct_interleave8(p1, p3); // c0g0c1g1...
1906
1907 // transpose pass 2
1908 dct_interleave8(p0, p1); // a0c0e0g0...
1909 dct_interleave8(p2, p3); // b0d0f0h0...
1910
1911 // transpose pass 3
1912 dct_interleave8(p0, p2); // a0b0c0d0...
1913 dct_interleave8(p1, p3); // a4b4c4d4...
1914
1915 // store
1916 _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
1917 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
1918 _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
1919 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
1920 _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
1921 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
1922 _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
1923 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
1924 }
1925
1926#undef dct_const
1927#undef dct_rot
1928#undef dct_widen
1929#undef dct_wadd
1930#undef dct_wsub
1931#undef dct_bfly32o
1932#undef dct_interleave8
1933#undef dct_interleave16
1934#undef dct_pass
1935}
1936
1937#endif // STBI_SSE2
1938
1939#ifdef STBI_NEON
1940
1941// NEON integer IDCT. should produce bit-identical
1942// results to the generic C version.
1943static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
1944{
1945 int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
1946
1947 int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
1948 int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
1949 int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
1950 int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
1951 int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
1952 int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
1953 int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
1954 int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
1955 int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
1956 int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
1957 int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
1958 int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
1959
1960#define dct_long_mul(out, inq, coeff) \
1961 int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
1962 int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
1963
1964#define dct_long_mac(out, acc, inq, coeff) \
1965 int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
1966 int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
1967
1968#define dct_widen(out, inq) \
1969 int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
1970 int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
1971
1972// wide add
1973#define dct_wadd(out, a, b) \
1974 int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
1975 int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
1976
1977// wide sub
1978#define dct_wsub(out, a, b) \
1979 int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
1980 int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
1981
1982// butterfly a/b, then shift using "shiftop" by "s" and pack
1983#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
1984 { \
1985 dct_wadd(sum, a, b); \
1986 dct_wsub(dif, a, b); \
1987 out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
1988 out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
1989 }
1990
1991#define dct_pass(shiftop, shift) \
1992 { \
1993 /* even part */ \
1994 int16x8_t sum26 = vaddq_s16(row2, row6); \
1995 dct_long_mul(p1e, sum26, rot0_0); \
1996 dct_long_mac(t2e, p1e, row6, rot0_1); \
1997 dct_long_mac(t3e, p1e, row2, rot0_2); \
1998 int16x8_t sum04 = vaddq_s16(row0, row4); \
1999 int16x8_t dif04 = vsubq_s16(row0, row4); \
2000 dct_widen(t0e, sum04); \
2001 dct_widen(t1e, dif04); \
2002 dct_wadd(x0, t0e, t3e); \
2003 dct_wsub(x3, t0e, t3e); \
2004 dct_wadd(x1, t1e, t2e); \
2005 dct_wsub(x2, t1e, t2e); \
2006 /* odd part */ \
2007 int16x8_t sum15 = vaddq_s16(row1, row5); \
2008 int16x8_t sum17 = vaddq_s16(row1, row7); \
2009 int16x8_t sum35 = vaddq_s16(row3, row5); \
2010 int16x8_t sum37 = vaddq_s16(row3, row7); \
2011 int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2012 dct_long_mul(p5o, sumodd, rot1_0); \
2013 dct_long_mac(p1o, p5o, sum17, rot1_1); \
2014 dct_long_mac(p2o, p5o, sum35, rot1_2); \
2015 dct_long_mul(p3o, sum37, rot2_0); \
2016 dct_long_mul(p4o, sum15, rot2_1); \
2017 dct_wadd(sump13o, p1o, p3o); \
2018 dct_wadd(sump24o, p2o, p4o); \
2019 dct_wadd(sump23o, p2o, p3o); \
2020 dct_wadd(sump14o, p1o, p4o); \
2021 dct_long_mac(x4, sump13o, row7, rot3_0); \
2022 dct_long_mac(x5, sump24o, row5, rot3_1); \
2023 dct_long_mac(x6, sump23o, row3, rot3_2); \
2024 dct_long_mac(x7, sump14o, row1, rot3_3); \
2025 dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2026 dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2027 dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2028 dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2029 }
2030
2031 // load
2032 row0 = vld1q_s16(data + 0*8);
2033 row1 = vld1q_s16(data + 1*8);
2034 row2 = vld1q_s16(data + 2*8);
2035 row3 = vld1q_s16(data + 3*8);
2036 row4 = vld1q_s16(data + 4*8);
2037 row5 = vld1q_s16(data + 5*8);
2038 row6 = vld1q_s16(data + 6*8);
2039 row7 = vld1q_s16(data + 7*8);
2040
2041 // add DC bias
2042 row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2043
2044 // column pass
2045 dct_pass(vrshrn_n_s32, 10);
2046
2047 // 16bit 8x8 transpose
2048 {
2049// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2050// whether compilers actually get this is another story, sadly.
2051#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2052#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2053#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2054
2055 // pass 1
2056 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2057 dct_trn16(row2, row3);
2058 dct_trn16(row4, row5);
2059 dct_trn16(row6, row7);
2060
2061 // pass 2
2062 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2063 dct_trn32(row1, row3);
2064 dct_trn32(row4, row6);
2065 dct_trn32(row5, row7);
2066
2067 // pass 3
2068 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2069 dct_trn64(row1, row5);
2070 dct_trn64(row2, row6);
2071 dct_trn64(row3, row7);
2072
2073#undef dct_trn16
2074#undef dct_trn32
2075#undef dct_trn64
2076 }
2077
2078 // row pass
2079 // vrshrn_n_s32 only supports shifts up to 16, we need
2080 // 17. so do a non-rounding shift of 16 first then follow
2081 // up with a rounding shift by 1.
2082 dct_pass(vshrn_n_s32, 16);
2083
2084 {
2085 // pack and round
2086 uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2087 uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2088 uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2089 uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2090 uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2091 uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2092 uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2093 uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2094
2095 // again, these can translate into one instruction, but often don't.
2096#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2097#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2098#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2099
2100 // sadly can't use interleaved stores here since we only write
2101 // 8 bytes to each scan line!
2102
2103 // 8x8 8-bit transpose pass 1
2104 dct_trn8_8(p0, p1);
2105 dct_trn8_8(p2, p3);
2106 dct_trn8_8(p4, p5);
2107 dct_trn8_8(p6, p7);
2108
2109 // pass 2
2110 dct_trn8_16(p0, p2);
2111 dct_trn8_16(p1, p3);
2112 dct_trn8_16(p4, p6);
2113 dct_trn8_16(p5, p7);
2114
2115 // pass 3
2116 dct_trn8_32(p0, p4);
2117 dct_trn8_32(p1, p5);
2118 dct_trn8_32(p2, p6);
2119 dct_trn8_32(p3, p7);
2120
2121 // store
2122 vst1_u8(out, p0); out += out_stride;
2123 vst1_u8(out, p1); out += out_stride;
2124 vst1_u8(out, p2); out += out_stride;
2125 vst1_u8(out, p3); out += out_stride;
2126 vst1_u8(out, p4); out += out_stride;
2127 vst1_u8(out, p5); out += out_stride;
2128 vst1_u8(out, p6); out += out_stride;
2129 vst1_u8(out, p7);
2130
2131#undef dct_trn8_8
2132#undef dct_trn8_16
2133#undef dct_trn8_32
2134 }
2135
2136#undef dct_long_mul
2137#undef dct_long_mac
2138#undef dct_widen
2139#undef dct_wadd
2140#undef dct_wsub
2141#undef dct_bfly32o
2142#undef dct_pass
2143}
2144
2145#endif // STBI_NEON
2146
2147#define STBI__MARKER_none 0xff
2148// if there's a pending marker from the entropy stream, return that
2149// otherwise, fetch from the stream and get a marker. if there's no
2150// marker, return 0xff, which is never a valid marker value
2151static stbi_uc stbi__get_marker(stbi__jpeg *j)
2152{
2153 stbi_uc x;
2154 if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2155 x = stbi__get8(j->s);
2156 if (x != 0xff) return STBI__MARKER_none;
2157 while (x == 0xff)
2158 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2159 return x;
2160}
2161
2162// in each scan, we'll have scan_n components, and the order
2163// of the components is specified by order[]
2164#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2165
2166// after a restart interval, stbi__jpeg_reset the entropy decoder and
2167// the dc prediction
2168static void stbi__jpeg_reset(stbi__jpeg *j)
2169{
2170 j->code_bits = 0;
2171 j->code_buffer = 0;
2172 j->nomore = 0;
2173 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2174 j->marker = STBI__MARKER_none;
2175 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2176 j->eob_run = 0;
2177 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2178 // since we don't even allow 1<<30 pixels
2179}
2180
2181static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2182{
2183 stbi__jpeg_reset(z);
2184 if (!z->progressive) {
2185 if (z->scan_n == 1) {
2186 int i,j;
2187 STBI_SIMD_ALIGN(short, data[64]);
2188 int n = z->order[0];
2189 // non-interleaved data, we just need to process one block at a time,
2190 // in trivial scanline order
2191 // number of blocks to do just depends on how many actual "pixels" this
2192 // component has, independent of interleaved MCU blocking and such
2193 int w = (z->img_comp[n].x+7) >> 3;
2194 int h = (z->img_comp[n].y+7) >> 3;
2195 for (j=0; j < h; ++j) {
2196 for (i=0; i < w; ++i) {
2197 int ha = z->img_comp[n].ha;
2198 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2199 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2200 // every data block is an MCU, so countdown the restart interval
2201 if (--z->todo <= 0) {
2202 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2203 // if it's NOT a restart, then just bail, so we get corrupt data
2204 // rather than no data
2205 if (!STBI__RESTART(z->marker)) return 1;
2206 stbi__jpeg_reset(z);
2207 }
2208 }
2209 }
2210 return 1;
2211 } else { // interleaved
2212 int i,j,k,x,y;
2213 STBI_SIMD_ALIGN(short, data[64]);
2214 for (j=0; j < z->img_mcu_y; ++j) {
2215 for (i=0; i < z->img_mcu_x; ++i) {
2216 // scan an interleaved mcu... process scan_n components in order
2217 for (k=0; k < z->scan_n; ++k) {
2218 int n = z->order[k];
2219 // scan out an mcu's worth of this component; that's just determined
2220 // by the basic H and V specified for the component
2221 for (y=0; y < z->img_comp[n].v; ++y) {
2222 for (x=0; x < z->img_comp[n].h; ++x) {
2223 int x2 = (i*z->img_comp[n].h + x)*8;
2224 int y2 = (j*z->img_comp[n].v + y)*8;
2225 int ha = z->img_comp[n].ha;
2226 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2227 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2228 }
2229 }
2230 }
2231 // after all interleaved components, that's an interleaved MCU,
2232 // so now count down the restart interval
2233 if (--z->todo <= 0) {
2234 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2235 if (!STBI__RESTART(z->marker)) return 1;
2236 stbi__jpeg_reset(z);
2237 }
2238 }
2239 }
2240 return 1;
2241 }
2242 } else {
2243 if (z->scan_n == 1) {
2244 int i,j;
2245 int n = z->order[0];
2246 // non-interleaved data, we just need to process one block at a time,
2247 // in trivial scanline order
2248 // number of blocks to do just depends on how many actual "pixels" this
2249 // component has, independent of interleaved MCU blocking and such
2250 int w = (z->img_comp[n].x+7) >> 3;
2251 int h = (z->img_comp[n].y+7) >> 3;
2252 for (j=0; j < h; ++j) {
2253 for (i=0; i < w; ++i) {
2254 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2255 if (z->spec_start == 0) {
2256 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2257 return 0;
2258 } else {
2259 int ha = z->img_comp[n].ha;
2260 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2261 return 0;
2262 }
2263 // every data block is an MCU, so countdown the restart interval
2264 if (--z->todo <= 0) {
2265 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2266 if (!STBI__RESTART(z->marker)) return 1;
2267 stbi__jpeg_reset(z);
2268 }
2269 }
2270 }
2271 return 1;
2272 } else { // interleaved
2273 int i,j,k,x,y;
2274 for (j=0; j < z->img_mcu_y; ++j) {
2275 for (i=0; i < z->img_mcu_x; ++i) {
2276 // scan an interleaved mcu... process scan_n components in order
2277 for (k=0; k < z->scan_n; ++k) {
2278 int n = z->order[k];
2279 // scan out an mcu's worth of this component; that's just determined
2280 // by the basic H and V specified for the component
2281 for (y=0; y < z->img_comp[n].v; ++y) {
2282 for (x=0; x < z->img_comp[n].h; ++x) {
2283 int x2 = (i*z->img_comp[n].h + x);
2284 int y2 = (j*z->img_comp[n].v + y);
2285 short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2286 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2287 return 0;
2288 }
2289 }
2290 }
2291 // after all interleaved components, that's an interleaved MCU,
2292 // so now count down the restart interval
2293 if (--z->todo <= 0) {
2294 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2295 if (!STBI__RESTART(z->marker)) return 1;
2296 stbi__jpeg_reset(z);
2297 }
2298 }
2299 }
2300 return 1;
2301 }
2302 }
2303}
2304
2305static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2306{
2307 int i;
2308 for (i=0; i < 64; ++i)
2309 data[i] *= dequant[i];
2310}
2311
2312static void stbi__jpeg_finish(stbi__jpeg *z)
2313{
2314 if (z->progressive) {
2315 // dequantize and idct the data
2316 int i,j,n;
2317 for (n=0; n < z->s->img_n; ++n) {
2318 int w = (z->img_comp[n].x+7) >> 3;
2319 int h = (z->img_comp[n].y+7) >> 3;
2320 for (j=0; j < h; ++j) {
2321 for (i=0; i < w; ++i) {
2322 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2323 stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2324 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2325 }
2326 }
2327 }
2328 }
2329}
2330
2331static int stbi__process_marker(stbi__jpeg *z, int m)
2332{
2333 int L;
2334 switch (m) {
2335 case STBI__MARKER_none: // no marker found
2336 return stbi__err("expected marker","Corrupt JPEG");
2337
2338 case 0xDD: // DRI - specify restart interval
2339 if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2340 z->restart_interval = stbi__get16be(z->s);
2341 return 1;
2342
2343 case 0xDB: // DQT - define quantization table
2344 L = stbi__get16be(z->s)-2;
2345 while (L > 0) {
2346 int q = stbi__get8(z->s);
2347 int p = q >> 4, sixteen = (p != 0);
2348 int t = q & 15,i;
2349 if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2350 if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2351
2352 for (i=0; i < 64; ++i)
2353 z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2354 L -= (sixteen ? 129 : 65);
2355 }
2356 return L==0;
2357
2358 case 0xC4: // DHT - define huffman table
2359 L = stbi__get16be(z->s)-2;
2360 while (L > 0) {
2361 stbi_uc *v;
2362 int sizes[16],i,n=0;
2363 int q = stbi__get8(z->s);
2364 int tc = q >> 4;
2365 int th = q & 15;
2366 if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2367 for (i=0; i < 16; ++i) {
2368 sizes[i] = stbi__get8(z->s);
2369 n += sizes[i];
2370 }
2371 L -= 17;
2372 if (tc == 0) {
2373 if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2374 v = z->huff_dc[th].values;
2375 } else {
2376 if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2377 v = z->huff_ac[th].values;
2378 }
2379 for (i=0; i < n; ++i)
2380 v[i] = stbi__get8(z->s);
2381 if (tc != 0)
2382 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2383 L -= n;
2384 }
2385 return L==0;
2386 }
2387
2388 // check for comment block or APP blocks
2389 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2390 L = stbi__get16be(z->s);
2391 if (L < 2) {
2392 if (m == 0xFE)
2393 return stbi__err("bad COM len","Corrupt JPEG");
2394 else
2395 return stbi__err("bad APP len","Corrupt JPEG");
2396 }
2397 L -= 2;
2398
2399 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2400 static const unsigned char tag[5] = {'J','F','I','F','\0'};
2401 int ok = 1;
2402 int i;
2403 for (i=0; i < 5; ++i)
2404 if (stbi__get8(z->s) != tag[i])
2405 ok = 0;
2406 L -= 5;
2407 if (ok)
2408 z->jfif = 1;
2409 } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2410 static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2411 int ok = 1;
2412 int i;
2413 for (i=0; i < 6; ++i)
2414 if (stbi__get8(z->s) != tag[i])
2415 ok = 0;
2416 L -= 6;
2417 if (ok) {
2418 stbi__get8(z->s); // version
2419 stbi__get16be(z->s); // flags0
2420 stbi__get16be(z->s); // flags1
2421 z->app14_color_transform = stbi__get8(z->s); // color transform
2422 L -= 6;
2423 }
2424 }
2425
2426 stbi__skip(z->s, L);
2427 return 1;
2428 }
2429
2430 return stbi__err("unknown marker","Corrupt JPEG");
2431}
2432
2433// after we see SOS
2434static int stbi__process_scan_header(stbi__jpeg *z)
2435{
2436 int i;
2437 int Ls = stbi__get16be(z->s);
2438 z->scan_n = stbi__get8(z->s);
2439 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2440 if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2441 for (i=0; i < z->scan_n; ++i) {
2442 int id = stbi__get8(z->s), which;
2443 int q = stbi__get8(z->s);
2444 for (which = 0; which < z->s->img_n; ++which)
2445 if (z->img_comp[which].id == id)
2446 break;
2447 if (which == z->s->img_n) return 0; // no match
2448 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2449 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2450 z->order[i] = which;
2451 }
2452
2453 {
2454 int aa;
2455 z->spec_start = stbi__get8(z->s);
2456 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2457 aa = stbi__get8(z->s);
2458 z->succ_high = (aa >> 4);
2459 z->succ_low = (aa & 15);
2460 if (z->progressive) {
2461 if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2462 return stbi__err("bad SOS", "Corrupt JPEG");
2463 } else {
2464 if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2465 if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2466 z->spec_end = 63;
2467 }
2468 }
2469
2470 return 1;
2471}
2472
2473static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2474{
2475 int i;
2476 for (i=0; i < ncomp; ++i) {
2477 if (z->img_comp[i].raw_data) {
2478 STBI_FREE(z->img_comp[i].raw_data);
2479 z->img_comp[i].raw_data = NULL;
2480 z->img_comp[i].data = NULL;
2481 }
2482 if (z->img_comp[i].raw_coeff) {
2483 STBI_FREE(z->img_comp[i].raw_coeff);
2484 z->img_comp[i].raw_coeff = 0;
2485 z->img_comp[i].coeff = 0;
2486 }
2487 if (z->img_comp[i].linebuf) {
2488 STBI_FREE(z->img_comp[i].linebuf);
2489 z->img_comp[i].linebuf = NULL;
2490 }
2491 }
2492 return why;
2493}
2494
2495static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2496{
2497 stbi__context *s = z->s;
2498 int Lf,p,i,q, h_max=1,v_max=1,c;
2499 Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2500 p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2501 s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2502 s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2503 c = stbi__get8(s);
2504 if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2505 s->img_n = c;
2506 for (i=0; i < c; ++i) {
2507 z->img_comp[i].data = NULL;
2508 z->img_comp[i].linebuf = NULL;
2509 }
2510
2511 if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2512
2513 z->rgb = 0;
2514 for (i=0; i < s->img_n; ++i) {
2515 static unsigned char rgb[3] = { 'R', 'G', 'B' };
2516 z->img_comp[i].id = stbi__get8(s);
2517 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2518 ++z->rgb;
2519 q = stbi__get8(s);
2520 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2521 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2522 z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2523 }
2524
2525 if (scan != STBI__SCAN_load) return 1;
2526
2527 if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2528
2529 for (i=0; i < s->img_n; ++i) {
2530 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2531 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2532 }
2533
2534 // compute interleaved mcu info
2535 z->img_h_max = h_max;
2536 z->img_v_max = v_max;
2537 z->img_mcu_w = h_max * 8;
2538 z->img_mcu_h = v_max * 8;
2539 // these sizes can't be more than 17 bits
2540 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
2541 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
2542
2543 for (i=0; i < s->img_n; ++i) {
2544 // number of effective pixels (e.g. for non-interleaved MCU)
2545 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
2546 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
2547 // to simplify generation, we'll allocate enough memory to decode
2548 // the bogus oversized data from using interleaved MCUs and their
2549 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
2550 // discard the extra data until colorspace conversion
2551 //
2552 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
2553 // so these muls can't overflow with 32-bit ints (which we require)
2554 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
2555 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
2556 z->img_comp[i].coeff = 0;
2557 z->img_comp[i].raw_coeff = 0;
2558 z->img_comp[i].linebuf = NULL;
2559 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
2560 if (z->img_comp[i].raw_data == NULL)
2561 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
2562 // align blocks for idct using mmx/sse
2563 z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
2564 if (z->progressive) {
2565 // w2, h2 are multiples of 8 (see above)
2566 z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
2567 z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
2568 z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
2569 if (z->img_comp[i].raw_coeff == NULL)
2570 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
2571 z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
2572 }
2573 }
2574
2575 return 1;
2576}
2577
2578// use comparisons since in some cases we handle more than one case (e.g. SOF)
2579#define stbi__DNL(x) ((x) == 0xdc)
2580#define stbi__SOI(x) ((x) == 0xd8)
2581#define stbi__EOI(x) ((x) == 0xd9)
2582#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
2583#define stbi__SOS(x) ((x) == 0xda)
2584
2585#define stbi__SOF_progressive(x) ((x) == 0xc2)
2586
2587static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
2588{
2589 int m;
2590 z->jfif = 0;
2591 z->app14_color_transform = -1; // valid values are 0,1,2
2592 z->marker = STBI__MARKER_none; // initialize cached marker to empty
2593 m = stbi__get_marker(z);
2594 if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
2595 if (scan == STBI__SCAN_type) return 1;
2596 m = stbi__get_marker(z);
2597 while (!stbi__SOF(m)) {
2598 if (!stbi__process_marker(z,m)) return 0;
2599 m = stbi__get_marker(z);
2600 while (m == STBI__MARKER_none) {
2601 // some files have extra padding after their blocks, so ok, we'll scan
2602 if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
2603 m = stbi__get_marker(z);
2604 }
2605 }
2606 z->progressive = stbi__SOF_progressive(m);
2607 if (!stbi__process_frame_header(z, scan)) return 0;
2608 return 1;
2609}
2610
2611// decode image to YCbCr format
2612static int stbi__decode_jpeg_image(stbi__jpeg *j)
2613{
2614 int m;
2615 for (m = 0; m < 4; m++) {
2616 j->img_comp[m].raw_data = NULL;
2617 j->img_comp[m].raw_coeff = NULL;
2618 }
2619 j->restart_interval = 0;
2620 if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
2621 m = stbi__get_marker(j);
2622 while (!stbi__EOI(m)) {
2623 if (stbi__SOS(m)) {
2624 if (!stbi__process_scan_header(j)) return 0;
2625 if (!stbi__parse_entropy_coded_data(j)) return 0;
2626 if (j->marker == STBI__MARKER_none ) {
2627 // handle 0s at the end of image data from IP Kamera 9060
2628 while (!stbi__at_eof(j->s)) {
2629 int x = stbi__get8(j->s);
2630 if (x == 255) {
2631 j->marker = stbi__get8(j->s);
2632 break;
2633 }
2634 }
2635 // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
2636 }
2637 } else if (stbi__DNL(m)) {
2638 int Ld = stbi__get16be(j->s);
2639 stbi__uint32 NL = stbi__get16be(j->s);
2640 if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
2641 if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
2642 } else {
2643 if (!stbi__process_marker(j, m)) return 0;
2644 }
2645 m = stbi__get_marker(j);
2646 }
2647 if (j->progressive)
2648 stbi__jpeg_finish(j);
2649 return 1;
2650}
2651
2652// static jfif-centered resampling (across block boundaries)
2653
2654typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
2655 int w, int hs);
2656
2657#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
2658
2659static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2660{
2661 STBI_NOTUSED(out);
2662 STBI_NOTUSED(in_far);
2663 STBI_NOTUSED(w);
2664 STBI_NOTUSED(hs);
2665 return in_near;
2666}
2667
2668static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2669{
2670 // need to generate two samples vertically for every one in input
2671 int i;
2672 STBI_NOTUSED(hs);
2673 for (i=0; i < w; ++i)
2674 out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
2675 return out;
2676}
2677
2678static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2679{
2680 // need to generate two samples horizontally for every one in input
2681 int i;
2682 stbi_uc *input = in_near;
2683
2684 if (w == 1) {
2685 // if only one sample, can't do any interpolation
2686 out[0] = out[1] = input[0];
2687 return out;
2688 }
2689
2690 out[0] = input[0];
2691 out[1] = stbi__div4(input[0]*3 + input[1] + 2);
2692 for (i=1; i < w-1; ++i) {
2693 int n = 3*input[i]+2;
2694 out[i*2+0] = stbi__div4(n+input[i-1]);
2695 out[i*2+1] = stbi__div4(n+input[i+1]);
2696 }
2697 out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
2698 out[i*2+1] = input[w-1];
2699
2700 STBI_NOTUSED(in_far);
2701 STBI_NOTUSED(hs);
2702
2703 return out;
2704}
2705
2706#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
2707
2708static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2709{
2710 // need to generate 2x2 samples for every one in input
2711 int i,t0,t1;
2712 if (w == 1) {
2713 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2714 return out;
2715 }
2716
2717 t1 = 3*in_near[0] + in_far[0];
2718 out[0] = stbi__div4(t1+2);
2719 for (i=1; i < w; ++i) {
2720 t0 = t1;
2721 t1 = 3*in_near[i]+in_far[i];
2722 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
2723 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
2724 }
2725 out[w*2-1] = stbi__div4(t1+2);
2726
2727 STBI_NOTUSED(hs);
2728
2729 return out;
2730}
2731
2732#if defined(STBI_SSE2) || defined(STBI_NEON)
2733static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2734{
2735 // need to generate 2x2 samples for every one in input
2736 int i=0,t0,t1;
2737
2738 if (w == 1) {
2739 out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2740 return out;
2741 }
2742
2743 t1 = 3*in_near[0] + in_far[0];
2744 // process groups of 8 pixels for as long as we can.
2745 // note we can't handle the last pixel in a row in this loop
2746 // because we need to handle the filter boundary conditions.
2747 for (; i < ((w-1) & ~7); i += 8) {
2748#if defined(STBI_SSE2)
2749 // load and perform the vertical filtering pass
2750 // this uses 3*x + y = 4*x + (y - x)
2751 __m128i zero = _mm_setzero_si128();
2752 __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
2753 __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
2754 __m128i farw = _mm_unpacklo_epi8(farb, zero);
2755 __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
2756 __m128i diff = _mm_sub_epi16(farw, nearw);
2757 __m128i nears = _mm_slli_epi16(nearw, 2);
2758 __m128i curr = _mm_add_epi16(nears, diff); // current row
2759
2760 // horizontal filter works the same based on shifted vers of current
2761 // row. "prev" is current row shifted right by 1 pixel; we need to
2762 // insert the previous pixel value (from t1).
2763 // "next" is current row shifted left by 1 pixel, with first pixel
2764 // of next block of 8 pixels added in.
2765 __m128i prv0 = _mm_slli_si128(curr, 2);
2766 __m128i nxt0 = _mm_srli_si128(curr, 2);
2767 __m128i prev = _mm_insert_epi16(prv0, t1, 0);
2768 __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
2769
2770 // horizontal filter, polyphase implementation since it's convenient:
2771 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2772 // odd pixels = 3*cur + next = cur*4 + (next - cur)
2773 // note the shared term.
2774 __m128i bias = _mm_set1_epi16(8);
2775 __m128i curs = _mm_slli_epi16(curr, 2);
2776 __m128i prvd = _mm_sub_epi16(prev, curr);
2777 __m128i nxtd = _mm_sub_epi16(next, curr);
2778 __m128i curb = _mm_add_epi16(curs, bias);
2779 __m128i even = _mm_add_epi16(prvd, curb);
2780 __m128i odd = _mm_add_epi16(nxtd, curb);
2781
2782 // interleave even and odd pixels, then undo scaling.
2783 __m128i int0 = _mm_unpacklo_epi16(even, odd);
2784 __m128i int1 = _mm_unpackhi_epi16(even, odd);
2785 __m128i de0 = _mm_srli_epi16(int0, 4);
2786 __m128i de1 = _mm_srli_epi16(int1, 4);
2787
2788 // pack and write output
2789 __m128i outv = _mm_packus_epi16(de0, de1);
2790 _mm_storeu_si128((__m128i *) (out + i*2), outv);
2791#elif defined(STBI_NEON)
2792 // load and perform the vertical filtering pass
2793 // this uses 3*x + y = 4*x + (y - x)
2794 uint8x8_t farb = vld1_u8(in_far + i);
2795 uint8x8_t nearb = vld1_u8(in_near + i);
2796 int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
2797 int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
2798 int16x8_t curr = vaddq_s16(nears, diff); // current row
2799
2800 // horizontal filter works the same based on shifted vers of current
2801 // row. "prev" is current row shifted right by 1 pixel; we need to
2802 // insert the previous pixel value (from t1).
2803 // "next" is current row shifted left by 1 pixel, with first pixel
2804 // of next block of 8 pixels added in.
2805 int16x8_t prv0 = vextq_s16(curr, curr, 7);
2806 int16x8_t nxt0 = vextq_s16(curr, curr, 1);
2807 int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
2808 int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
2809
2810 // horizontal filter, polyphase implementation since it's convenient:
2811 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2812 // odd pixels = 3*cur + next = cur*4 + (next - cur)
2813 // note the shared term.
2814 int16x8_t curs = vshlq_n_s16(curr, 2);
2815 int16x8_t prvd = vsubq_s16(prev, curr);
2816 int16x8_t nxtd = vsubq_s16(next, curr);
2817 int16x8_t even = vaddq_s16(curs, prvd);
2818 int16x8_t odd = vaddq_s16(curs, nxtd);
2819
2820 // undo scaling and round, then store with even/odd phases interleaved
2821 uint8x8x2_t o;
2822 o.val[0] = vqrshrun_n_s16(even, 4);
2823 o.val[1] = vqrshrun_n_s16(odd, 4);
2824 vst2_u8(out + i*2, o);
2825#endif
2826
2827 // "previous" value for next iter
2828 t1 = 3*in_near[i+7] + in_far[i+7];
2829 }
2830
2831 t0 = t1;
2832 t1 = 3*in_near[i] + in_far[i];
2833 out[i*2] = stbi__div16(3*t1 + t0 + 8);
2834
2835 for (++i; i < w; ++i) {
2836 t0 = t1;
2837 t1 = 3*in_near[i]+in_far[i];
2838 out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
2839 out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
2840 }
2841 out[w*2-1] = stbi__div4(t1+2);
2842
2843 STBI_NOTUSED(hs);
2844
2845 return out;
2846}
2847#endif
2848
2849static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2850{
2851 // resample with nearest-neighbor
2852 int i,j;
2853 STBI_NOTUSED(in_far);
2854 for (i=0; i < w; ++i)
2855 for (j=0; j < hs; ++j)
2856 out[i*hs+j] = in_near[i];
2857 return out;
2858}
2859
2860// this is a reduced-precision calculation of YCbCr-to-RGB introduced
2861// to make sure the code produces the same results in both SIMD and scalar
2862#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
2863static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
2864{
2865 int i;
2866 for (i=0; i < count; ++i) {
2867 int y_fixed = (y[i] << 20) + (1<<19); // rounding
2868 int r,g,b;
2869 int cr = pcr[i] - 128;
2870 int cb = pcb[i] - 128;
2871 r = y_fixed + cr* stbi__float2fixed(1.40200f);
2872 g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
2873 b = y_fixed + cb* stbi__float2fixed(1.77200f);
2874 r >>= 20;
2875 g >>= 20;
2876 b >>= 20;
2877 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
2878 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
2879 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
2880 out[0] = (stbi_uc)r;
2881 out[1] = (stbi_uc)g;
2882 out[2] = (stbi_uc)b;
2883 out[3] = 255;
2884 out += step;
2885 }
2886}
2887
2888#if defined(STBI_SSE2) || defined(STBI_NEON)
2889static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
2890{
2891 int i = 0;
2892
2893#ifdef STBI_SSE2
2894 // step == 3 is pretty ugly on the final interleave, and i'm not convinced
2895 // it's useful in practice (you wouldn't use it for textures, for example).
2896 // so just accelerate step == 4 case.
2897 if (step == 4) {
2898 // this is a fairly straightforward implementation and not super-optimized.
2899 __m128i signflip = _mm_set1_epi8(-0x80);
2900 __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
2901 __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
2902 __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
2903 __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
2904 __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
2905 __m128i xw = _mm_set1_epi16(255); // alpha channel
2906
2907 for (; i+7 < count; i += 8) {
2908 // load
2909 __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
2910 __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
2911 __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
2912 __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
2913 __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
2914
2915 // unpack to short (and left-shift cr, cb by 8)
2916 __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
2917 __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
2918 __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
2919
2920 // color transform
2921 __m128i yws = _mm_srli_epi16(yw, 4);
2922 __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
2923 __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
2924 __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
2925 __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
2926 __m128i rws = _mm_add_epi16(cr0, yws);
2927 __m128i gwt = _mm_add_epi16(cb0, yws);
2928 __m128i bws = _mm_add_epi16(yws, cb1);
2929 __m128i gws = _mm_add_epi16(gwt, cr1);
2930
2931 // descale
2932 __m128i rw = _mm_srai_epi16(rws, 4);
2933 __m128i bw = _mm_srai_epi16(bws, 4);
2934 __m128i gw = _mm_srai_epi16(gws, 4);
2935
2936 // back to byte, set up for transpose
2937 __m128i brb = _mm_packus_epi16(rw, bw);
2938 __m128i gxb = _mm_packus_epi16(gw, xw);
2939
2940 // transpose to interleave channels
2941 __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
2942 __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
2943 __m128i o0 = _mm_unpacklo_epi16(t0, t1);
2944 __m128i o1 = _mm_unpackhi_epi16(t0, t1);
2945
2946 // store
2947 _mm_storeu_si128((__m128i *) (out + 0), o0);
2948 _mm_storeu_si128((__m128i *) (out + 16), o1);
2949 out += 32;
2950 }
2951 }
2952#endif
2953
2954#ifdef STBI_NEON
2955 // in this version, step=3 support would be easy to add. but is there demand?
2956 if (step == 4) {
2957 // this is a fairly straightforward implementation and not super-optimized.
2958 uint8x8_t signflip = vdup_n_u8(0x80);
2959 int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
2960 int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
2961 int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
2962 int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
2963
2964 for (; i+7 < count; i += 8) {
2965 // load
2966 uint8x8_t y_bytes = vld1_u8(y + i);
2967 uint8x8_t cr_bytes = vld1_u8(pcr + i);
2968 uint8x8_t cb_bytes = vld1_u8(pcb + i);
2969 int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
2970 int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
2971
2972 // expand to s16
2973 int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
2974 int16x8_t crw = vshll_n_s8(cr_biased, 7);
2975 int16x8_t cbw = vshll_n_s8(cb_biased, 7);
2976
2977 // color transform
2978 int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
2979 int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
2980 int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
2981 int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
2982 int16x8_t rws = vaddq_s16(yws, cr0);
2983 int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
2984 int16x8_t bws = vaddq_s16(yws, cb1);
2985
2986 // undo scaling, round, convert to byte
2987 uint8x8x4_t o;
2988 o.val[0] = vqrshrun_n_s16(rws, 4);
2989 o.val[1] = vqrshrun_n_s16(gws, 4);
2990 o.val[2] = vqrshrun_n_s16(bws, 4);
2991 o.val[3] = vdup_n_u8(255);
2992
2993 // store, interleaving r/g/b/a
2994 vst4_u8(out, o);
2995 out += 8*4;
2996 }
2997 }
2998#endif
2999
3000 for (; i < count; ++i) {
3001 int y_fixed = (y[i] << 20) + (1<<19); // rounding
3002 int r,g,b;
3003 int cr = pcr[i] - 128;
3004 int cb = pcb[i] - 128;
3005 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3006 g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3007 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3008 r >>= 20;
3009 g >>= 20;
3010 b >>= 20;
3011 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3012 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3013 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3014 out[0] = (stbi_uc)r;
3015 out[1] = (stbi_uc)g;
3016 out[2] = (stbi_uc)b;
3017 out[3] = 255;
3018 out += step;
3019 }
3020}
3021#endif
3022
3023// set up the kernels
3024static void stbi__setup_jpeg(stbi__jpeg *j)
3025{
3026 j->idct_block_kernel = stbi__idct_block;
3027 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3028 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3029
3030#ifdef STBI_SSE2
3031 if (stbi__sse2_available()) {
3032 j->idct_block_kernel = stbi__idct_simd;
3033 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3034 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3035 }
3036#endif
3037
3038#ifdef STBI_NEON
3039 j->idct_block_kernel = stbi__idct_simd;
3040 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3041 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3042#endif
3043}
3044
3045// clean up the temporary component buffers
3046static void stbi__cleanup_jpeg(stbi__jpeg *j)
3047{
3048 stbi__free_jpeg_components(j, j->s->img_n, 0);
3049}
3050
3051typedef struct
3052{
3053 resample_row_func resample;
3054 stbi_uc *line0,*line1;
3055 int hs,vs; // expansion factor in each axis
3056 int w_lores; // horizontal pixels pre-expansion
3057 int ystep; // how far through vertical expansion we are
3058 int ypos; // which pre-expansion row we're on
3059} stbi__resample;
3060
3061// fast 0..255 * 0..255 => 0..255 rounded multiplication
3062static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3063{
3064 unsigned int t = x*y + 128;
3065 return (stbi_uc) ((t + (t >>8)) >> 8);
3066}
3067
3068static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3069{
3070 int n, decode_n, is_rgb;
3071 z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3072
3073 // validate req_comp
3074 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3075
3076 // load a jpeg image from whichever source, but leave in YCbCr format
3077 if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3078
3079 // determine actual number of components to generate
3080 n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3081
3082 is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3083
3084 if (z->s->img_n == 3 && n < 3 && !is_rgb)
3085 decode_n = 1;
3086 else
3087 decode_n = z->s->img_n;
3088
3089 // resample and color-convert
3090 {
3091 int k;
3092 unsigned int i,j;
3093 stbi_uc *output;
3094 stbi_uc *coutput[4];
3095
3096 stbi__resample res_comp[4];
3097
3098 for (k=0; k < decode_n; ++k) {
3099 stbi__resample *r = &res_comp[k];
3100
3101 // allocate line buffer big enough for upsampling off the edges
3102 // with upsample factor of 4
3103 z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3104 if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3105
3106 r->hs = z->img_h_max / z->img_comp[k].h;
3107 r->vs = z->img_v_max / z->img_comp[k].v;
3108 r->ystep = r->vs >> 1;
3109 r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3110 r->ypos = 0;
3111 r->line0 = r->line1 = z->img_comp[k].data;
3112
3113 if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3114 else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3115 else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3116 else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3117 else r->resample = stbi__resample_row_generic;
3118 }
3119
3120 // can't error after this so, this is safe
3121 output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3122 if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3123
3124 // now go ahead and resample
3125 for (j=0; j < z->s->img_y; ++j) {
3126 stbi_uc *out = output + n * z->s->img_x * j;
3127 for (k=0; k < decode_n; ++k) {
3128 stbi__resample *r = &res_comp[k];
3129 int y_bot = r->ystep >= (r->vs >> 1);
3130 coutput[k] = r->resample(z->img_comp[k].linebuf,
3131 y_bot ? r->line1 : r->line0,
3132 y_bot ? r->line0 : r->line1,
3133 r->w_lores, r->hs);
3134 if (++r->ystep >= r->vs) {
3135 r->ystep = 0;
3136 r->line0 = r->line1;
3137 if (++r->ypos < z->img_comp[k].y)
3138 r->line1 += z->img_comp[k].w2;
3139 }
3140 }
3141 if (n >= 3) {
3142 stbi_uc *y = coutput[0];
3143 if (z->s->img_n == 3) {
3144 if (is_rgb) {
3145 for (i=0; i < z->s->img_x; ++i) {
3146 out[0] = y[i];
3147 out[1] = coutput[1][i];
3148 out[2] = coutput[2][i];
3149 out[3] = 255;
3150 out += n;
3151 }
3152 } else {
3153 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3154 }
3155 } else if (z->s->img_n == 4) {
3156 if (z->app14_color_transform == 0) { // CMYK
3157 for (i=0; i < z->s->img_x; ++i) {
3158 stbi_uc m = coutput[3][i];
3159 out[0] = stbi__blinn_8x8(coutput[0][i], m);
3160 out[1] = stbi__blinn_8x8(coutput[1][i], m);
3161 out[2] = stbi__blinn_8x8(coutput[2][i], m);
3162 out[3] = 255;
3163 out += n;
3164 }
3165 } else if (z->app14_color_transform == 2) { // YCCK
3166 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3167 for (i=0; i < z->s->img_x; ++i) {
3168 stbi_uc m = coutput[3][i];
3169 out[0] = stbi__blinn_8x8(255 - out[0], m);
3170 out[1] = stbi__blinn_8x8(255 - out[1], m);
3171 out[2] = stbi__blinn_8x8(255 - out[2], m);
3172 out += n;
3173 }
3174 } else { // YCbCr + alpha? Ignore the fourth channel for now
3175 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3176 }
3177 } else
3178 for (i=0; i < z->s->img_x; ++i) {
3179 out[0] = out[1] = out[2] = y[i];
3180 out[3] = 255; // not used if n==3
3181 out += n;
3182 }
3183 } else {
3184 if (is_rgb) {
3185 if (n == 1)
3186 for (i=0; i < z->s->img_x; ++i)
3187 *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3188 else {
3189 for (i=0; i < z->s->img_x; ++i, out += 2) {
3190 out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3191 out[1] = 255;
3192 }
3193 }
3194 } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3195 for (i=0; i < z->s->img_x; ++i) {
3196 stbi_uc m = coutput[3][i];
3197 stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3198 stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3199 stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3200 out[0] = stbi__compute_y(r, g, b);
3201 out[1] = 255;
3202 out += n;
3203 }
3204 } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3205 for (i=0; i < z->s->img_x; ++i) {
3206 out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3207 out[1] = 255;
3208 out += n;
3209 }
3210 } else {
3211 stbi_uc *y = coutput[0];
3212 if (n == 1)
3213 for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3214 else
3215 for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3216 }
3217 }
3218 }
3219 stbi__cleanup_jpeg(z);
3220 *out_x = z->s->img_x;
3221 *out_y = z->s->img_y;
3222 if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3223 return output;
3224 }
3225}
3226
3227static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3228{
3229 unsigned char* result;
3230 stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3231 STBI_NOTUSED(ri);
3232 j->s = s;
3233 stbi__setup_jpeg(j);
3234 result = load_jpeg_image(j, x,y,comp,req_comp);
3235 STBI_FREE(j);
3236 return result;
3237}
3238
3239static int stbi__jpeg_test(stbi__context *s)
3240{
3241 int r;
3242 stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3243 j->s = s;
3244 stbi__setup_jpeg(j);
3245 r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3246 stbi__rewind(s);
3247 STBI_FREE(j);
3248 return r;
3249}
3250
3251static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3252{
3253 if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3254 stbi__rewind( j->s );
3255 return 0;
3256 }
3257 if (x) *x = j->s->img_x;
3258 if (y) *y = j->s->img_y;
3259 if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3260 return 1;
3261}
3262
3263static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3264{
3265 int result;
3266 stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3267 j->s = s;
3268 result = stbi__jpeg_info_raw(j, x, y, comp);
3269 STBI_FREE(j);
3270 return result;
3271}
3272#endif
3273
3274// public domain zlib decode v0.2 Sean Barrett 2006-11-18
3275// simple implementation
3276// - all input must be provided in an upfront buffer
3277// - all output is written to a single output buffer (can malloc/realloc)
3278// performance
3279// - fast huffman
3280
3281#ifndef STBI_NO_ZLIB
3282
3283// fast-way is faster to check than jpeg huffman, but slow way is slower
3284#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3285#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3286
3287// zlib-style huffman encoding
3288// (jpegs packs from left, zlib from right, so can't share code)
3289typedef struct
3290{
3291 stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3292 stbi__uint16 firstcode[16];
3293 int maxcode[17];
3294 stbi__uint16 firstsymbol[16];
3295 stbi_uc size[288];
3296 stbi__uint16 value[288];
3297} stbi__zhuffman;
3298
3299stbi_inline static int stbi__bitreverse16(int n)
3300{
3301 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3302 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3303 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3304 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3305 return n;
3306}
3307
3308stbi_inline static int stbi__bit_reverse(int v, int bits)
3309{
3310 STBI_ASSERT(bits <= 16);
3311 // to bit reverse n bits, reverse 16 and shift
3312 // e.g. 11 bits, bit reverse and shift away 5
3313 return stbi__bitreverse16(v) >> (16-bits);
3314}
3315
3316static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3317{
3318 int i,k=0;
3319 int code, next_code[16], sizes[17];
3320
3321 // DEFLATE spec for generating codes
3322 memset(sizes, 0, sizeof(sizes));
3323 memset(z->fast, 0, sizeof(z->fast));
3324 for (i=0; i < num; ++i)
3325 ++sizes[sizelist[i]];
3326 sizes[0] = 0;
3327 for (i=1; i < 16; ++i)
3328 if (sizes[i] > (1 << i))
3329 return stbi__err("bad sizes", "Corrupt PNG");
3330 code = 0;
3331 for (i=1; i < 16; ++i) {
3332 next_code[i] = code;
3333 z->firstcode[i] = (stbi__uint16) code;
3334 z->firstsymbol[i] = (stbi__uint16) k;
3335 code = (code + sizes[i]);
3336 if (sizes[i])
3337 if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3338 z->maxcode[i] = code << (16-i); // preshift for inner loop
3339 code <<= 1;
3340 k += sizes[i];
3341 }
3342 z->maxcode[16] = 0x10000; // sentinel
3343 for (i=0; i < num; ++i) {
3344 int s = sizelist[i];
3345 if (s) {
3346 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3347 stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3348 z->size [c] = (stbi_uc ) s;
3349 z->value[c] = (stbi__uint16) i;
3350 if (s <= STBI__ZFAST_BITS) {
3351 int j = stbi__bit_reverse(next_code[s],s);
3352 while (j < (1 << STBI__ZFAST_BITS)) {
3353 z->fast[j] = fastv;
3354 j += (1 << s);
3355 }
3356 }
3357 ++next_code[s];
3358 }
3359 }
3360 return 1;
3361}
3362
3363// zlib-from-memory implementation for PNG reading
3364// because PNG allows splitting the zlib stream arbitrarily,
3365// and it's annoying structurally to have PNG call ZLIB call PNG,
3366// we require PNG read all the IDATs and combine them into a single
3367// memory buffer
3368
3369typedef struct
3370{
3371 stbi_uc *zbuffer, *zbuffer_end;
3372 int num_bits;
3373 stbi__uint32 code_buffer;
3374
3375 char *zout;
3376 char *zout_start;
3377 char *zout_end;
3378 int z_expandable;
3379
3380 stbi__zhuffman z_length, z_distance;
3381} stbi__zbuf;
3382
3383stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3384{
3385 if (z->zbuffer >= z->zbuffer_end) return 0;
3386 return *z->zbuffer++;
3387}
3388
3389static void stbi__fill_bits(stbi__zbuf *z)
3390{
3391 do {
3392 STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3393 z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3394 z->num_bits += 8;
3395 } while (z->num_bits <= 24);
3396}
3397
3398stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3399{
3400 unsigned int k;
3401 if (z->num_bits < n) stbi__fill_bits(z);
3402 k = z->code_buffer & ((1 << n) - 1);
3403 z->code_buffer >>= n;
3404 z->num_bits -= n;
3405 return k;
3406}
3407
3408static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3409{
3410 int b,s,k;
3411 // not resolved by fast table, so compute it the slow way
3412 // use jpeg approach, which requires MSbits at top
3413 k = stbi__bit_reverse(a->code_buffer, 16);
3414 for (s=STBI__ZFAST_BITS+1; ; ++s)
3415 if (k < z->maxcode[s])
3416 break;
3417 if (s == 16) return -1; // invalid code!
3418 // code size is s, so:
3419 b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3420 STBI_ASSERT(z->size[b] == s);
3421 a->code_buffer >>= s;
3422 a->num_bits -= s;
3423 return z->value[b];
3424}
3425
3426stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3427{
3428 int b,s;
3429 if (a->num_bits < 16) stbi__fill_bits(a);
3430 b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3431 if (b) {
3432 s = b >> 9;
3433 a->code_buffer >>= s;
3434 a->num_bits -= s;
3435 return b & 511;
3436 }
3437 return stbi__zhuffman_decode_slowpath(a, z);
3438}
3439
3440static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3441{
3442 char *q;
3443 int cur, limit, old_limit;
3444 z->zout = zout;
3445 if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3446 cur = (int) (z->zout - z->zout_start);
3447 limit = old_limit = (int) (z->zout_end - z->zout_start);
3448 while (cur + n > limit)
3449 limit *= 2;
3450 q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3451 STBI_NOTUSED(old_limit);
3452 if (q == NULL) return stbi__err("outofmem", "Out of memory");
3453 z->zout_start = q;
3454 z->zout = q + cur;
3455 z->zout_end = q + limit;
3456 return 1;
3457}
3458
3459static int stbi__zlength_base[31] = {
3460 3,4,5,6,7,8,9,10,11,13,
3461 15,17,19,23,27,31,35,43,51,59,
3462 67,83,99,115,131,163,195,227,258,0,0 };
3463
3464static int stbi__zlength_extra[31]=
3465{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3466
3467static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3468257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3469
3470static int stbi__zdist_extra[32] =
3471{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3472
3473static int stbi__parse_huffman_block(stbi__zbuf *a)
3474{
3475 char *zout = a->zout;
3476 for(;;) {
3477 int z = stbi__zhuffman_decode(a, &a->z_length);
3478 if (z < 256) {
3479 if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3480 if (zout >= a->zout_end) {
3481 if (!stbi__zexpand(a, zout, 1)) return 0;
3482 zout = a->zout;
3483 }
3484 *zout++ = (char) z;
3485 } else {
3486 stbi_uc *p;
3487 int len,dist;
3488 if (z == 256) {
3489 a->zout = zout;
3490 return 1;
3491 }
3492 z -= 257;
3493 len = stbi__zlength_base[z];
3494 if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3495 z = stbi__zhuffman_decode(a, &a->z_distance);
3496 if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3497 dist = stbi__zdist_base[z];
3498 if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3499 if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3500 if (zout + len > a->zout_end) {
3501 if (!stbi__zexpand(a, zout, len)) return 0;
3502 zout = a->zout;
3503 }
3504 p = (stbi_uc *) (zout - dist);
3505 if (dist == 1) { // run of one byte; common in images.
3506 stbi_uc v = *p;
3507 if (len) { do *zout++ = v; while (--len); }
3508 } else {
3509 if (len) { do *zout++ = *p++; while (--len); }
3510 }
3511 }
3512 }
3513}
3514
3515static int stbi__compute_huffman_codes(stbi__zbuf *a)
3516{
3517 static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3518 stbi__zhuffman z_codelength;
3519 stbi_uc lencodes[286+32+137];//padding for maximum single op
3520 stbi_uc codelength_sizes[19];
3521 int i,n;
3522
3523 int hlit = stbi__zreceive(a,5) + 257;
3524 int hdist = stbi__zreceive(a,5) + 1;
3525 int hclen = stbi__zreceive(a,4) + 4;
3526 int ntot = hlit + hdist;
3527
3528 memset(codelength_sizes, 0, sizeof(codelength_sizes));
3529 for (i=0; i < hclen; ++i) {
3530 int s = stbi__zreceive(a,3);
3531 codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3532 }
3533 if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3534
3535 n = 0;
3536 while (n < ntot) {
3537 int c = stbi__zhuffman_decode(a, &z_codelength);
3538 if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
3539 if (c < 16)
3540 lencodes[n++] = (stbi_uc) c;
3541 else {
3542 stbi_uc fill = 0;
3543 if (c == 16) {
3544 c = stbi__zreceive(a,2)+3;
3545 if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
3546 fill = lencodes[n-1];
3547 } else if (c == 17)
3548 c = stbi__zreceive(a,3)+3;
3549 else {
3550 STBI_ASSERT(c == 18);
3551 c = stbi__zreceive(a,7)+11;
3552 }
3553 if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
3554 memset(lencodes+n, fill, c);
3555 n += c;
3556 }
3557 }
3558 if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
3559 if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
3560 if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
3561 return 1;
3562}
3563
3564static int stbi__parse_uncompressed_block(stbi__zbuf *a)
3565{
3566 stbi_uc header[4];
3567 int len,nlen,k;
3568 if (a->num_bits & 7)
3569 stbi__zreceive(a, a->num_bits & 7); // discard
3570 // drain the bit-packed data into header
3571 k = 0;
3572 while (a->num_bits > 0) {
3573 header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
3574 a->code_buffer >>= 8;
3575 a->num_bits -= 8;
3576 }
3577 STBI_ASSERT(a->num_bits == 0);
3578 // now fill header the normal way
3579 while (k < 4)
3580 header[k++] = stbi__zget8(a);
3581 len = header[1] * 256 + header[0];
3582 nlen = header[3] * 256 + header[2];
3583 if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
3584 if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
3585 if (a->zout + len > a->zout_end)
3586 if (!stbi__zexpand(a, a->zout, len)) return 0;
3587 memcpy(a->zout, a->zbuffer, len);
3588 a->zbuffer += len;
3589 a->zout += len;
3590 return 1;
3591}
3592
3593static int stbi__parse_zlib_header(stbi__zbuf *a)
3594{
3595 int cmf = stbi__zget8(a);
3596 int cm = cmf & 15;
3597 /* int cinfo = cmf >> 4; */
3598 int flg = stbi__zget8(a);
3599 if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
3600 if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
3601 if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
3602 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
3603 return 1;
3604}
3605
3606static const stbi_uc stbi__zdefault_length[288] =
3607{
3608 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3609 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3610 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3611 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
3612 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3613 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3614 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3615 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3616 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
3617};
3618static const stbi_uc stbi__zdefault_distance[32] =
3619{
3620 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
3621};
3622/*
3623Init algorithm:
3624{
3625 int i; // use <= to match clearly with spec
3626 for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
3627 for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
3628 for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
3629 for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
3630 for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
3631}
3632*/
3633
3634static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
3635{
3636 int final, type;
3637 if (parse_header)
3638 if (!stbi__parse_zlib_header(a)) return 0;
3639 a->num_bits = 0;
3640 a->code_buffer = 0;
3641 do {
3642 final = stbi__zreceive(a,1);
3643 type = stbi__zreceive(a,2);
3644 if (type == 0) {
3645 if (!stbi__parse_uncompressed_block(a)) return 0;
3646 } else if (type == 3) {
3647 return 0;
3648 } else {
3649 if (type == 1) {
3650 // use fixed code lengths
3651 if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
3652 if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
3653 } else {
3654 if (!stbi__compute_huffman_codes(a)) return 0;
3655 }
3656 if (!stbi__parse_huffman_block(a)) return 0;
3657 }
3658 } while (!final);
3659 return 1;
3660}
3661
3662static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
3663{
3664 a->zout_start = obuf;
3665 a->zout = obuf;
3666 a->zout_end = obuf + olen;
3667 a->z_expandable = exp;
3668
3669 return stbi__parse_zlib(a, parse_header);
3670}
3671
3672STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
3673{
3674 stbi__zbuf a;
3675 char *p = (char *) stbi__malloc(initial_size);
3676 if (p == NULL) return NULL;
3677 a.zbuffer = (stbi_uc *) buffer;
3678 a.zbuffer_end = (stbi_uc *) buffer + len;
3679 if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
3680 if (outlen) *outlen = (int) (a.zout - a.zout_start);
3681 return a.zout_start;
3682 } else {
3683 STBI_FREE(a.zout_start);
3684 return NULL;
3685 }
3686}
3687
3688STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
3689{
3690 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
3691}
3692
3693STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
3694{
3695 stbi__zbuf a;
3696 char *p = (char *) stbi__malloc(initial_size);
3697 if (p == NULL) return NULL;
3698 a.zbuffer = (stbi_uc *) buffer;
3699 a.zbuffer_end = (stbi_uc *) buffer + len;
3700 if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
3701 if (outlen) *outlen = (int) (a.zout - a.zout_start);
3702 return a.zout_start;
3703 } else {
3704 STBI_FREE(a.zout_start);
3705 return NULL;
3706 }
3707}
3708
3709STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
3710{
3711 stbi__zbuf a;
3712 a.zbuffer = (stbi_uc *) ibuffer;
3713 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3714 if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
3715 return (int) (a.zout - a.zout_start);
3716 else
3717 return -1;
3718}
3719
3720STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
3721{
3722 stbi__zbuf a;
3723 char *p = (char *) stbi__malloc(16384);
3724 if (p == NULL) return NULL;
3725 a.zbuffer = (stbi_uc *) buffer;
3726 a.zbuffer_end = (stbi_uc *) buffer+len;
3727 if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
3728 if (outlen) *outlen = (int) (a.zout - a.zout_start);
3729 return a.zout_start;
3730 } else {
3731 STBI_FREE(a.zout_start);
3732 return NULL;
3733 }
3734}
3735
3736STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
3737{
3738 stbi__zbuf a;
3739 a.zbuffer = (stbi_uc *) ibuffer;
3740 a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3741 if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
3742 return (int) (a.zout - a.zout_start);
3743 else
3744 return -1;
3745}
3746#endif
3747
3748// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
3749// simple implementation
3750// - only 8-bit samples
3751// - no CRC checking
3752// - allocates lots of intermediate memory
3753// - avoids problem of streaming data between subsystems
3754// - avoids explicit window management
3755// performance
3756// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
3757
3758#ifndef STBI_NO_PNG
3759typedef struct
3760{
3761 stbi__uint32 length;
3762 stbi__uint32 type;
3763} stbi__pngchunk;
3764
3765static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
3766{
3767 stbi__pngchunk c;
3768 c.length = stbi__get32be(s);
3769 c.type = stbi__get32be(s);
3770 return c;
3771}
3772
3773static int stbi__check_png_header(stbi__context *s)
3774{
3775 static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
3776 int i;
3777 for (i=0; i < 8; ++i)
3778 if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
3779 return 1;
3780}
3781
3782typedef struct
3783{
3784 stbi__context *s;
3785 stbi_uc *idata, *expanded, *out;
3786 int depth;
3787} stbi__png;
3788
3789
3790enum {
3791 STBI__F_none=0,
3792 STBI__F_sub=1,
3793 STBI__F_up=2,
3794 STBI__F_avg=3,
3795 STBI__F_paeth=4,
3796 // synthetic filters used for first scanline to avoid needing a dummy row of 0s
3797 STBI__F_avg_first,
3798 STBI__F_paeth_first
3799};
3800
3801static stbi_uc first_row_filter[5] =
3802{
3803 STBI__F_none,
3804 STBI__F_sub,
3805 STBI__F_none,
3806 STBI__F_avg_first,
3807 STBI__F_paeth_first
3808};
3809
3810static int stbi__paeth(int a, int b, int c)
3811{
3812 int p = a + b - c;
3813 int pa = abs(p-a);
3814 int pb = abs(p-b);
3815 int pc = abs(p-c);
3816 if (pa <= pb && pa <= pc) return a;
3817 if (pb <= pc) return b;
3818 return c;
3819}
3820
3821static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
3822
3823// create the png data from post-deflated data
3824static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
3825{
3826 int bytes = (depth == 16? 2 : 1);
3827 stbi__context *s = a->s;
3828 stbi__uint32 i,j,stride = x*out_n*bytes;
3829 stbi__uint32 img_len, img_width_bytes;
3830 int k;
3831 int img_n = s->img_n; // copy it into a local for later
3832
3833 int output_bytes = out_n*bytes;
3834 int filter_bytes = img_n*bytes;
3835 int width = x;
3836
3837 STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
3838 a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
3839 if (!a->out) return stbi__err("outofmem", "Out of memory");
3840
3841 img_width_bytes = (((img_n * x * depth) + 7) >> 3);
3842 img_len = (img_width_bytes + 1) * y;
3843 // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
3844 // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
3845 // so just check for raw_len < img_len always.
3846 if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
3847
3848 for (j=0; j < y; ++j) {
3849 stbi_uc *cur = a->out + stride*j;
3850 stbi_uc *prior;
3851 int filter = *raw++;
3852
3853 if (filter > 4)
3854 return stbi__err("invalid filter","Corrupt PNG");
3855
3856 if (depth < 8) {
3857 STBI_ASSERT(img_width_bytes <= x);
3858 cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
3859 filter_bytes = 1;
3860 width = img_width_bytes;
3861 }
3862 prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
3863
3864 // if first row, use special filter that doesn't sample previous row
3865 if (j == 0) filter = first_row_filter[filter];
3866
3867 // handle first byte explicitly
3868 for (k=0; k < filter_bytes; ++k) {
3869 switch (filter) {
3870 case STBI__F_none : cur[k] = raw[k]; break;
3871 case STBI__F_sub : cur[k] = raw[k]; break;
3872 case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3873 case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
3874 case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
3875 case STBI__F_avg_first : cur[k] = raw[k]; break;
3876 case STBI__F_paeth_first: cur[k] = raw[k]; break;
3877 }
3878 }
3879
3880 if (depth == 8) {
3881 if (img_n != out_n)
3882 cur[img_n] = 255; // first pixel
3883 raw += img_n;
3884 cur += out_n;
3885 prior += out_n;
3886 } else if (depth == 16) {
3887 if (img_n != out_n) {
3888 cur[filter_bytes] = 255; // first pixel top byte
3889 cur[filter_bytes+1] = 255; // first pixel bottom byte
3890 }
3891 raw += filter_bytes;
3892 cur += output_bytes;
3893 prior += output_bytes;
3894 } else {
3895 raw += 1;
3896 cur += 1;
3897 prior += 1;
3898 }
3899
3900 // this is a little gross, so that we don't switch per-pixel or per-component
3901 if (depth < 8 || img_n == out_n) {
3902 int nk = (width - 1)*filter_bytes;
3903 #define STBI__CASE(f) \
3904 case f: \
3905 for (k=0; k < nk; ++k)
3906 switch (filter) {
3907 // "none" filter turns into a memcpy here; make that explicit.
3908 case STBI__F_none: memcpy(cur, raw, nk); break;
3909 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
3910 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
3911 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
3912 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
3913 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
3914 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
3915 }
3916 #undef STBI__CASE
3917 raw += nk;
3918 } else {
3919 STBI_ASSERT(img_n+1 == out_n);
3920 #define STBI__CASE(f) \
3921 case f: \
3922 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
3923 for (k=0; k < filter_bytes; ++k)
3924 switch (filter) {
3925 STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
3926 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
3927 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
3928 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
3929 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
3930 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
3931 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
3932 }
3933 #undef STBI__CASE
3934
3935 // the loop above sets the high byte of the pixels' alpha, but for
3936 // 16 bit png files we also need the low byte set. we'll do that here.
3937 if (depth == 16) {
3938 cur = a->out + stride*j; // start at the beginning of the row again
3939 for (i=0; i < x; ++i,cur+=output_bytes) {
3940 cur[filter_bytes+1] = 255;
3941 }
3942 }
3943 }
3944 }
3945
3946 // we make a separate pass to expand bits to pixels; for performance,
3947 // this could run two scanlines behind the above code, so it won't
3948 // intefere with filtering but will still be in the cache.
3949 if (depth < 8) {
3950 for (j=0; j < y; ++j) {
3951 stbi_uc *cur = a->out + stride*j;
3952 stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
3953 // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
3954 // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
3955 stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
3956
3957 // note that the final byte might overshoot and write more data than desired.
3958 // we can allocate enough data that this never writes out of memory, but it
3959 // could also overwrite the next scanline. can it overwrite non-empty data
3960 // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
3961 // so we need to explicitly clamp the final ones
3962
3963 if (depth == 4) {
3964 for (k=x*img_n; k >= 2; k-=2, ++in) {
3965 *cur++ = scale * ((*in >> 4) );
3966 *cur++ = scale * ((*in ) & 0x0f);
3967 }
3968 if (k > 0) *cur++ = scale * ((*in >> 4) );
3969 } else if (depth == 2) {
3970 for (k=x*img_n; k >= 4; k-=4, ++in) {
3971 *cur++ = scale * ((*in >> 6) );
3972 *cur++ = scale * ((*in >> 4) & 0x03);
3973 *cur++ = scale * ((*in >> 2) & 0x03);
3974 *cur++ = scale * ((*in ) & 0x03);
3975 }
3976 if (k > 0) *cur++ = scale * ((*in >> 6) );
3977 if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
3978 if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
3979 } else if (depth == 1) {
3980 for (k=x*img_n; k >= 8; k-=8, ++in) {
3981 *cur++ = scale * ((*in >> 7) );
3982 *cur++ = scale * ((*in >> 6) & 0x01);
3983 *cur++ = scale * ((*in >> 5) & 0x01);
3984 *cur++ = scale * ((*in >> 4) & 0x01);
3985 *cur++ = scale * ((*in >> 3) & 0x01);
3986 *cur++ = scale * ((*in >> 2) & 0x01);
3987 *cur++ = scale * ((*in >> 1) & 0x01);
3988 *cur++ = scale * ((*in ) & 0x01);
3989 }
3990 if (k > 0) *cur++ = scale * ((*in >> 7) );
3991 if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
3992 if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
3993 if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
3994 if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
3995 if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
3996 if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
3997 }
3998 if (img_n != out_n) {
3999 int q;
4000 // insert alpha = 255
4001 cur = a->out + stride*j;
4002 if (img_n == 1) {
4003 for (q=x-1; q >= 0; --q) {
4004 cur[q*2+1] = 255;
4005 cur[q*2+0] = cur[q];
4006 }
4007 } else {
4008 STBI_ASSERT(img_n == 3);
4009 for (q=x-1; q >= 0; --q) {
4010 cur[q*4+3] = 255;
4011 cur[q*4+2] = cur[q*3+2];
4012 cur[q*4+1] = cur[q*3+1];
4013 cur[q*4+0] = cur[q*3+0];
4014 }
4015 }
4016 }
4017 }
4018 } else if (depth == 16) {
4019 // force the image data from big-endian to platform-native.
4020 // this is done in a separate pass due to the decoding relying
4021 // on the data being untouched, but could probably be done
4022 // per-line during decode if care is taken.
4023 stbi_uc *cur = a->out;
4024 stbi__uint16 *cur16 = (stbi__uint16*)cur;
4025
4026 for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4027 *cur16 = (cur[0] << 8) | cur[1];
4028 }
4029 }
4030
4031 return 1;
4032}
4033
4034static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4035{
4036 int bytes = (depth == 16 ? 2 : 1);
4037 int out_bytes = out_n * bytes;
4038 stbi_uc *final;
4039 int p;
4040 if (!interlaced)
4041 return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4042
4043 // de-interlacing
4044 final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4045 for (p=0; p < 7; ++p) {
4046 int xorig[] = { 0,4,0,2,0,1,0 };
4047 int yorig[] = { 0,0,4,0,2,0,1 };
4048 int xspc[] = { 8,8,4,4,2,2,1 };
4049 int yspc[] = { 8,8,8,4,4,2,2 };
4050 int i,j,x,y;
4051 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4052 x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4053 y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4054 if (x && y) {
4055 stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4056 if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4057 STBI_FREE(final);
4058 return 0;
4059 }
4060 for (j=0; j < y; ++j) {
4061 for (i=0; i < x; ++i) {
4062 int out_y = j*yspc[p]+yorig[p];
4063 int out_x = i*xspc[p]+xorig[p];
4064 memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4065 a->out + (j*x+i)*out_bytes, out_bytes);
4066 }
4067 }
4068 STBI_FREE(a->out);
4069 image_data += img_len;
4070 image_data_len -= img_len;
4071 }
4072 }
4073 a->out = final;
4074
4075 return 1;
4076}
4077
4078static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4079{
4080 stbi__context *s = z->s;
4081 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4082 stbi_uc *p = z->out;
4083
4084 // compute color-based transparency, assuming we've
4085 // already got 255 as the alpha value in the output
4086 STBI_ASSERT(out_n == 2 || out_n == 4);
4087
4088 if (out_n == 2) {
4089 for (i=0; i < pixel_count; ++i) {
4090 p[1] = (p[0] == tc[0] ? 0 : 255);
4091 p += 2;
4092 }
4093 } else {
4094 for (i=0; i < pixel_count; ++i) {
4095 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4096 p[3] = 0;
4097 p += 4;
4098 }
4099 }
4100 return 1;
4101}
4102
4103static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4104{
4105 stbi__context *s = z->s;
4106 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4107 stbi__uint16 *p = (stbi__uint16*) z->out;
4108
4109 // compute color-based transparency, assuming we've
4110 // already got 65535 as the alpha value in the output
4111 STBI_ASSERT(out_n == 2 || out_n == 4);
4112
4113 if (out_n == 2) {
4114 for (i = 0; i < pixel_count; ++i) {
4115 p[1] = (p[0] == tc[0] ? 0 : 65535);
4116 p += 2;
4117 }
4118 } else {
4119 for (i = 0; i < pixel_count; ++i) {
4120 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4121 p[3] = 0;
4122 p += 4;
4123 }
4124 }
4125 return 1;
4126}
4127
4128static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4129{
4130 stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4131 stbi_uc *p, *temp_out, *orig = a->out;
4132
4133 p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4134 if (p == NULL) return stbi__err("outofmem", "Out of memory");
4135
4136 // between here and free(out) below, exitting would leak
4137 temp_out = p;
4138
4139 if (pal_img_n == 3) {
4140 for (i=0; i < pixel_count; ++i) {
4141 int n = orig[i]*4;
4142 p[0] = palette[n ];
4143 p[1] = palette[n+1];
4144 p[2] = palette[n+2];
4145 p += 3;
4146 }
4147 } else {
4148 for (i=0; i < pixel_count; ++i) {
4149 int n = orig[i]*4;
4150 p[0] = palette[n ];
4151 p[1] = palette[n+1];
4152 p[2] = palette[n+2];
4153 p[3] = palette[n+3];
4154 p += 4;
4155 }
4156 }
4157 STBI_FREE(a->out);
4158 a->out = temp_out;
4159
4160 STBI_NOTUSED(len);
4161
4162 return 1;
4163}
4164
4165static int stbi__unpremultiply_on_load = 0;
4166static int stbi__de_iphone_flag = 0;
4167
4168STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4169{
4170 stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4171}
4172
4173STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4174{
4175 stbi__de_iphone_flag = flag_true_if_should_convert;
4176}
4177
4178static void stbi__de_iphone(stbi__png *z)
4179{
4180 stbi__context *s = z->s;
4181 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4182 stbi_uc *p = z->out;
4183
4184 if (s->img_out_n == 3) { // convert bgr to rgb
4185 for (i=0; i < pixel_count; ++i) {
4186 stbi_uc t = p[0];
4187 p[0] = p[2];
4188 p[2] = t;
4189 p += 3;
4190 }
4191 } else {
4192 STBI_ASSERT(s->img_out_n == 4);
4193 if (stbi__unpremultiply_on_load) {
4194 // convert bgr to rgb and unpremultiply
4195 for (i=0; i < pixel_count; ++i) {
4196 stbi_uc a = p[3];
4197 stbi_uc t = p[0];
4198 if (a) {
4199 stbi_uc half = a / 2;
4200 p[0] = (p[2] * 255 + half) / a;
4201 p[1] = (p[1] * 255 + half) / a;
4202 p[2] = ( t * 255 + half) / a;
4203 } else {
4204 p[0] = p[2];
4205 p[2] = t;
4206 }
4207 p += 4;
4208 }
4209 } else {
4210 // convert bgr to rgb
4211 for (i=0; i < pixel_count; ++i) {
4212 stbi_uc t = p[0];
4213 p[0] = p[2];
4214 p[2] = t;
4215 p += 4;
4216 }
4217 }
4218 }
4219}
4220
4221#define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4222
4223static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4224{
4225 stbi_uc palette[1024], pal_img_n=0;
4226 stbi_uc has_trans=0, tc[3];
4227 stbi__uint16 tc16[3];
4228 stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4229 int first=1,k,interlace=0, color=0, is_iphone=0;
4230 stbi__context *s = z->s;
4231
4232 z->expanded = NULL;
4233 z->idata = NULL;
4234 z->out = NULL;
4235
4236 if (!stbi__check_png_header(s)) return 0;
4237
4238 if (scan == STBI__SCAN_type) return 1;
4239
4240 for (;;) {
4241 stbi__pngchunk c = stbi__get_chunk_header(s);
4242 switch (c.type) {
4243 case STBI__PNG_TYPE('C','g','B','I'):
4244 is_iphone = 1;
4245 stbi__skip(s, c.length);
4246 break;
4247 case STBI__PNG_TYPE('I','H','D','R'): {
4248 int comp,filter;
4249 if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4250 first = 0;
4251 if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4252 s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4253 s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4254 z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4255 color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4256 if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
4257 if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4258 comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4259 filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4260 interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4261 if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4262 if (!pal_img_n) {
4263 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4264 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4265 if (scan == STBI__SCAN_header) return 1;
4266 } else {
4267 // if paletted, then pal_n is our final components, and
4268 // img_n is # components to decompress/filter.
4269 s->img_n = 1;
4270 if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4271 // if SCAN_header, have to scan to see if we have a tRNS
4272 }
4273 break;
4274 }
4275
4276 case STBI__PNG_TYPE('P','L','T','E'): {
4277 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4278 if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4279 pal_len = c.length / 3;
4280 if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4281 for (i=0; i < pal_len; ++i) {
4282 palette[i*4+0] = stbi__get8(s);
4283 palette[i*4+1] = stbi__get8(s);
4284 palette[i*4+2] = stbi__get8(s);
4285 palette[i*4+3] = 255;
4286 }
4287 break;
4288 }
4289
4290 case STBI__PNG_TYPE('t','R','N','S'): {
4291 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4292 if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4293 if (pal_img_n) {
4294 if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4295 if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4296 if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4297 pal_img_n = 4;
4298 for (i=0; i < c.length; ++i)
4299 palette[i*4+3] = stbi__get8(s);
4300 } else {
4301 if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4302 if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4303 has_trans = 1;
4304 if (z->depth == 16) {
4305 for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4306 } else {
4307 for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4308 }
4309 }
4310 break;
4311 }
4312
4313 case STBI__PNG_TYPE('I','D','A','T'): {
4314 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4315 if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4316 if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4317 if ((int)(ioff + c.length) < (int)ioff) return 0;
4318 if (ioff + c.length > idata_limit) {
4319 stbi__uint32 idata_limit_old = idata_limit;
4320 stbi_uc *p;
4321 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4322 while (ioff + c.length > idata_limit)
4323 idata_limit *= 2;
4324 STBI_NOTUSED(idata_limit_old);
4325 p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4326 z->idata = p;
4327 }
4328 if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4329 ioff += c.length;
4330 break;
4331 }
4332
4333 case STBI__PNG_TYPE('I','E','N','D'): {
4334 stbi__uint32 raw_len, bpl;
4335 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4336 if (scan != STBI__SCAN_load) return 1;
4337 if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4338 // initial guess for decoded data size to avoid unnecessary reallocs
4339 bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4340 raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4341 z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4342 if (z->expanded == NULL) return 0; // zlib should set error
4343 STBI_FREE(z->idata); z->idata = NULL;
4344 if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4345 s->img_out_n = s->img_n+1;
4346 else
4347 s->img_out_n = s->img_n;
4348 if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4349 if (has_trans) {
4350 if (z->depth == 16) {
4351 if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4352 } else {
4353 if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4354 }
4355 }
4356 if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4357 stbi__de_iphone(z);
4358 if (pal_img_n) {
4359 // pal_img_n == 3 or 4
4360 s->img_n = pal_img_n; // record the actual colors we had
4361 s->img_out_n = pal_img_n;
4362 if (req_comp >= 3) s->img_out_n = req_comp;
4363 if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4364 return 0;
4365 } else if (has_trans) {
4366 // non-paletted image with tRNS -> source image has (constant) alpha
4367 ++s->img_n;
4368 }
4369 STBI_FREE(z->expanded); z->expanded = NULL;
4370 return 1;
4371 }
4372
4373 default:
4374 // if critical, fail
4375 if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4376 if ((c.type & (1 << 29)) == 0) {
4377 #ifndef STBI_NO_FAILURE_STRINGS
4378 // not threadsafe
4379 static char invalid_chunk[] = "XXXX PNG chunk not known";
4380 invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4381 invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4382 invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4383 invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4384 #endif
4385 return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4386 }
4387 stbi__skip(s, c.length);
4388 break;
4389 }
4390 // end of PNG chunk, read and skip CRC
4391 stbi__get32be(s);
4392 }
4393}
4394
4395static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4396{
4397 void *result=NULL;
4398 if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4399 if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4400 if (p->depth < 8)
4401 ri->bits_per_channel = 8;
4402 else
4403 ri->bits_per_channel = p->depth;
4404 result = p->out;
4405 p->out = NULL;
4406 if (req_comp && req_comp != p->s->img_out_n) {
4407 if (ri->bits_per_channel == 8)
4408 result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4409 else
4410 result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4411 p->s->img_out_n = req_comp;
4412 if (result == NULL) return result;
4413 }
4414 *x = p->s->img_x;
4415 *y = p->s->img_y;
4416 if (n) *n = p->s->img_n;
4417 }
4418 STBI_FREE(p->out); p->out = NULL;
4419 STBI_FREE(p->expanded); p->expanded = NULL;
4420 STBI_FREE(p->idata); p->idata = NULL;
4421
4422 return result;
4423}
4424
4425static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4426{
4427 stbi__png p;
4428 p.s = s;
4429 return stbi__do_png(&p, x,y,comp,req_comp, ri);
4430}
4431
4432static int stbi__png_test(stbi__context *s)
4433{
4434 int r;
4435 r = stbi__check_png_header(s);
4436 stbi__rewind(s);
4437 return r;
4438}
4439
4440static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4441{
4442 if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4443 stbi__rewind( p->s );
4444 return 0;
4445 }
4446 if (x) *x = p->s->img_x;
4447 if (y) *y = p->s->img_y;
4448 if (comp) *comp = p->s->img_n;
4449 return 1;
4450}
4451
4452static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4453{
4454 stbi__png p;
4455 p.s = s;
4456 return stbi__png_info_raw(&p, x, y, comp);
4457}
4458#endif
4459
4460// Microsoft/Windows BMP image
4461
4462#ifndef STBI_NO_BMP
4463static int stbi__bmp_test_raw(stbi__context *s)
4464{
4465 int r;
4466 int sz;
4467 if (stbi__get8(s) != 'B') return 0;
4468 if (stbi__get8(s) != 'M') return 0;
4469 stbi__get32le(s); // discard filesize
4470 stbi__get16le(s); // discard reserved
4471 stbi__get16le(s); // discard reserved
4472 stbi__get32le(s); // discard data offset
4473 sz = stbi__get32le(s);
4474 r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4475 return r;
4476}
4477
4478static int stbi__bmp_test(stbi__context *s)
4479{
4480 int r = stbi__bmp_test_raw(s);
4481 stbi__rewind(s);
4482 return r;
4483}
4484
4485
4486// returns 0..31 for the highest set bit
4487static int stbi__high_bit(unsigned int z)
4488{
4489 int n=0;
4490 if (z == 0) return -1;
4491 if (z >= 0x10000) n += 16, z >>= 16;
4492 if (z >= 0x00100) n += 8, z >>= 8;
4493 if (z >= 0x00010) n += 4, z >>= 4;
4494 if (z >= 0x00004) n += 2, z >>= 2;
4495 if (z >= 0x00002) n += 1, z >>= 1;
4496 return n;
4497}
4498
4499static int stbi__bitcount(unsigned int a)
4500{
4501 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
4502 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
4503 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4504 a = (a + (a >> 8)); // max 16 per 8 bits
4505 a = (a + (a >> 16)); // max 32 per 8 bits
4506 return a & 0xff;
4507}
4508
4509static int stbi__shiftsigned(int v, int shift, int bits)
4510{
4511 int result;
4512 int z=0;
4513
4514 if (shift < 0) v <<= -shift;
4515 else v >>= shift;
4516 result = v;
4517
4518 z = bits;
4519 while (z < 8) {
4520 result += v >> z;
4521 z += bits;
4522 }
4523 return result;
4524}
4525
4526typedef struct
4527{
4528 int bpp, offset, hsz;
4529 unsigned int mr,mg,mb,ma, all_a;
4530} stbi__bmp_data;
4531
4532static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4533{
4534 int hsz;
4535 if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4536 stbi__get32le(s); // discard filesize
4537 stbi__get16le(s); // discard reserved
4538 stbi__get16le(s); // discard reserved
4539 info->offset = stbi__get32le(s);
4540 info->hsz = hsz = stbi__get32le(s);
4541 info->mr = info->mg = info->mb = info->ma = 0;
4542
4543 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
4544 if (hsz == 12) {
4545 s->img_x = stbi__get16le(s);
4546 s->img_y = stbi__get16le(s);
4547 } else {
4548 s->img_x = stbi__get32le(s);
4549 s->img_y = stbi__get32le(s);
4550 }
4551 if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
4552 info->bpp = stbi__get16le(s);
4553 if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
4554 if (hsz != 12) {
4555 int compress = stbi__get32le(s);
4556 if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
4557 stbi__get32le(s); // discard sizeof
4558 stbi__get32le(s); // discard hres
4559 stbi__get32le(s); // discard vres
4560 stbi__get32le(s); // discard colorsused
4561 stbi__get32le(s); // discard max important
4562 if (hsz == 40 || hsz == 56) {
4563 if (hsz == 56) {
4564 stbi__get32le(s);
4565 stbi__get32le(s);
4566 stbi__get32le(s);
4567 stbi__get32le(s);
4568 }
4569 if (info->bpp == 16 || info->bpp == 32) {
4570 if (compress == 0) {
4571 if (info->bpp == 32) {
4572 info->mr = 0xffu << 16;
4573 info->mg = 0xffu << 8;
4574 info->mb = 0xffu << 0;
4575 info->ma = 0xffu << 24;
4576 info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
4577 } else {
4578 info->mr = 31u << 10;
4579 info->mg = 31u << 5;
4580 info->mb = 31u << 0;
4581 }
4582 } else if (compress == 3) {
4583 info->mr = stbi__get32le(s);
4584 info->mg = stbi__get32le(s);
4585 info->mb = stbi__get32le(s);
4586 // not documented, but generated by photoshop and handled by mspaint
4587 if (info->mr == info->mg && info->mg == info->mb) {
4588 // ?!?!?
4589 return stbi__errpuc("bad BMP", "bad BMP");
4590 }
4591 } else
4592 return stbi__errpuc("bad BMP", "bad BMP");
4593 }
4594 } else {
4595 int i;
4596 if (hsz != 108 && hsz != 124)
4597 return stbi__errpuc("bad BMP", "bad BMP");
4598 info->mr = stbi__get32le(s);
4599 info->mg = stbi__get32le(s);
4600 info->mb = stbi__get32le(s);
4601 info->ma = stbi__get32le(s);
4602 stbi__get32le(s); // discard color space
4603 for (i=0; i < 12; ++i)
4604 stbi__get32le(s); // discard color space parameters
4605 if (hsz == 124) {
4606 stbi__get32le(s); // discard rendering intent
4607 stbi__get32le(s); // discard offset of profile data
4608 stbi__get32le(s); // discard size of profile data
4609 stbi__get32le(s); // discard reserved
4610 }
4611 }
4612 }
4613 return (void *) 1;
4614}
4615
4616
4617static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4618{
4619 stbi_uc *out;
4620 unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
4621 stbi_uc pal[256][4];
4622 int psize=0,i,j,width;
4623 int flip_vertically, pad, target;
4624 stbi__bmp_data info;
4625 STBI_NOTUSED(ri);
4626
4627 info.all_a = 255;
4628 if (stbi__bmp_parse_header(s, &info) == NULL)
4629 return NULL; // error code already set
4630
4631 flip_vertically = ((int) s->img_y) > 0;
4632 s->img_y = abs((int) s->img_y);
4633
4634 mr = info.mr;
4635 mg = info.mg;
4636 mb = info.mb;
4637 ma = info.ma;
4638 all_a = info.all_a;
4639
4640 if (info.hsz == 12) {
4641 if (info.bpp < 24)
4642 psize = (info.offset - 14 - 24) / 3;
4643 } else {
4644 if (info.bpp < 16)
4645 psize = (info.offset - 14 - info.hsz) >> 2;
4646 }
4647
4648 s->img_n = ma ? 4 : 3;
4649 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
4650 target = req_comp;
4651 else
4652 target = s->img_n; // if they want monochrome, we'll post-convert
4653
4654 // sanity-check size
4655 if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
4656 return stbi__errpuc("too large", "Corrupt BMP");
4657
4658 out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
4659 if (!out) return stbi__errpuc("outofmem", "Out of memory");
4660 if (info.bpp < 16) {
4661 int z=0;
4662 if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
4663 for (i=0; i < psize; ++i) {
4664 pal[i][2] = stbi__get8(s);
4665 pal[i][1] = stbi__get8(s);
4666 pal[i][0] = stbi__get8(s);
4667 if (info.hsz != 12) stbi__get8(s);
4668 pal[i][3] = 255;
4669 }
4670 stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
4671 if (info.bpp == 4) width = (s->img_x + 1) >> 1;
4672 else if (info.bpp == 8) width = s->img_x;
4673 else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
4674 pad = (-width)&3;
4675 for (j=0; j < (int) s->img_y; ++j) {
4676 for (i=0; i < (int) s->img_x; i += 2) {
4677 int v=stbi__get8(s),v2=0;
4678 if (info.bpp == 4) {
4679 v2 = v & 15;
4680 v >>= 4;
4681 }
4682 out[z++] = pal[v][0];
4683 out[z++] = pal[v][1];
4684 out[z++] = pal[v][2];
4685 if (target == 4) out[z++] = 255;
4686 if (i+1 == (int) s->img_x) break;
4687 v = (info.bpp == 8) ? stbi__get8(s) : v2;
4688 out[z++] = pal[v][0];
4689 out[z++] = pal[v][1];
4690 out[z++] = pal[v][2];
4691 if (target == 4) out[z++] = 255;
4692 }
4693 stbi__skip(s, pad);
4694 }
4695 } else {
4696 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
4697 int z = 0;
4698 int easy=0;
4699 stbi__skip(s, info.offset - 14 - info.hsz);
4700 if (info.bpp == 24) width = 3 * s->img_x;
4701 else if (info.bpp == 16) width = 2*s->img_x;
4702 else /* bpp = 32 and pad = 0 */ width=0;
4703 pad = (-width) & 3;
4704 if (info.bpp == 24) {
4705 easy = 1;
4706 } else if (info.bpp == 32) {
4707 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
4708 easy = 2;
4709 }
4710 if (!easy) {
4711 if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
4712 // right shift amt to put high bit in position #7
4713 rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
4714 gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
4715 bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
4716 ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
4717 }
4718 for (j=0; j < (int) s->img_y; ++j) {
4719 if (easy) {
4720 for (i=0; i < (int) s->img_x; ++i) {
4721 unsigned char a;
4722 out[z+2] = stbi__get8(s);
4723 out[z+1] = stbi__get8(s);
4724 out[z+0] = stbi__get8(s);
4725 z += 3;
4726 a = (easy == 2 ? stbi__get8(s) : 255);
4727 all_a |= a;
4728 if (target == 4) out[z++] = a;
4729 }
4730 } else {
4731 int bpp = info.bpp;
4732 for (i=0; i < (int) s->img_x; ++i) {
4733 stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
4734 int a;
4735 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
4736 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
4737 out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
4738 a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
4739 all_a |= a;
4740 if (target == 4) out[z++] = STBI__BYTECAST(a);
4741 }
4742 }
4743 stbi__skip(s, pad);
4744 }
4745 }
4746
4747 // if alpha channel is all 0s, replace with all 255s
4748 if (target == 4 && all_a == 0)
4749 for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
4750 out[i] = 255;
4751
4752 if (flip_vertically) {
4753 stbi_uc t;
4754 for (j=0; j < (int) s->img_y>>1; ++j) {
4755 stbi_uc *p1 = out + j *s->img_x*target;
4756 stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
4757 for (i=0; i < (int) s->img_x*target; ++i) {
4758 t = p1[i], p1[i] = p2[i], p2[i] = t;
4759 }
4760 }
4761 }
4762
4763 if (req_comp && req_comp != target) {
4764 out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
4765 if (out == NULL) return out; // stbi__convert_format frees input on failure
4766 }
4767
4768 *x = s->img_x;
4769 *y = s->img_y;
4770 if (comp) *comp = s->img_n;
4771 return out;
4772}
4773#endif
4774
4775// Targa Truevision - TGA
4776// by Jonathan Dummer
4777#ifndef STBI_NO_TGA
4778// returns STBI_rgb or whatever, 0 on error
4779static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
4780{
4781 // only RGB or RGBA (incl. 16bit) or grey allowed
4782 if(is_rgb16) *is_rgb16 = 0;
4783 switch(bits_per_pixel) {
4784 case 8: return STBI_grey;
4785 case 16: if(is_grey) return STBI_grey_alpha;
4786 // else: fall-through
4787 case 15: if(is_rgb16) *is_rgb16 = 1;
4788 return STBI_rgb;
4789 case 24: // fall-through
4790 case 32: return bits_per_pixel/8;
4791 default: return 0;
4792 }
4793}
4794
4795static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
4796{
4797 int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
4798 int sz, tga_colormap_type;
4799 stbi__get8(s); // discard Offset
4800 tga_colormap_type = stbi__get8(s); // colormap type
4801 if( tga_colormap_type > 1 ) {
4802 stbi__rewind(s);
4803 return 0; // only RGB or indexed allowed
4804 }
4805 tga_image_type = stbi__get8(s); // image type
4806 if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
4807 if (tga_image_type != 1 && tga_image_type != 9) {
4808 stbi__rewind(s);
4809 return 0;
4810 }
4811 stbi__skip(s,4); // skip index of first colormap entry and number of entries
4812 sz = stbi__get8(s); // check bits per palette color entry
4813 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
4814 stbi__rewind(s);
4815 return 0;
4816 }
4817 stbi__skip(s,4); // skip image x and y origin
4818 tga_colormap_bpp = sz;
4819 } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
4820 if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
4821 stbi__rewind(s);
4822 return 0; // only RGB or grey allowed, +/- RLE
4823 }
4824 stbi__skip(s,9); // skip colormap specification and image x/y origin
4825 tga_colormap_bpp = 0;
4826 }
4827 tga_w = stbi__get16le(s);
4828 if( tga_w < 1 ) {
4829 stbi__rewind(s);
4830 return 0; // test width
4831 }
4832 tga_h = stbi__get16le(s);
4833 if( tga_h < 1 ) {
4834 stbi__rewind(s);
4835 return 0; // test height
4836 }
4837 tga_bits_per_pixel = stbi__get8(s); // bits per pixel
4838 stbi__get8(s); // ignore alpha bits
4839 if (tga_colormap_bpp != 0) {
4840 if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
4841 // when using a colormap, tga_bits_per_pixel is the size of the indexes
4842 // I don't think anything but 8 or 16bit indexes makes sense
4843 stbi__rewind(s);
4844 return 0;
4845 }
4846 tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
4847 } else {
4848 tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
4849 }
4850 if(!tga_comp) {
4851 stbi__rewind(s);
4852 return 0;
4853 }
4854 if (x) *x = tga_w;
4855 if (y) *y = tga_h;
4856 if (comp) *comp = tga_comp;
4857 return 1; // seems to have passed everything
4858}
4859
4860static int stbi__tga_test(stbi__context *s)
4861{
4862 int res = 0;
4863 int sz, tga_color_type;
4864 stbi__get8(s); // discard Offset
4865 tga_color_type = stbi__get8(s); // color type
4866 if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
4867 sz = stbi__get8(s); // image type
4868 if ( tga_color_type == 1 ) { // colormapped (paletted) image
4869 if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
4870 stbi__skip(s,4); // skip index of first colormap entry and number of entries
4871 sz = stbi__get8(s); // check bits per palette color entry
4872 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
4873 stbi__skip(s,4); // skip image x and y origin
4874 } else { // "normal" image w/o colormap
4875 if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
4876 stbi__skip(s,9); // skip colormap specification and image x/y origin
4877 }
4878 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
4879 if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
4880 sz = stbi__get8(s); // bits per pixel
4881 if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
4882 if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
4883
4884 res = 1; // if we got this far, everything's good and we can return 1 instead of 0
4885
4886errorEnd:
4887 stbi__rewind(s);
4888 return res;
4889}
4890
4891// read 16bit value and convert to 24bit RGB
4892static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
4893{
4894 stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
4895 stbi__uint16 fiveBitMask = 31;
4896 // we have 3 channels with 5bits each
4897 int r = (px >> 10) & fiveBitMask;
4898 int g = (px >> 5) & fiveBitMask;
4899 int b = px & fiveBitMask;
4900 // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
4901 out[0] = (stbi_uc)((r * 255)/31);
4902 out[1] = (stbi_uc)((g * 255)/31);
4903 out[2] = (stbi_uc)((b * 255)/31);
4904
4905 // some people claim that the most significant bit might be used for alpha
4906 // (possibly if an alpha-bit is set in the "image descriptor byte")
4907 // but that only made 16bit test images completely translucent..
4908 // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
4909}
4910
4911static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4912{
4913 // read in the TGA header stuff
4914 int tga_offset = stbi__get8(s);
4915 int tga_indexed = stbi__get8(s);
4916 int tga_image_type = stbi__get8(s);
4917 int tga_is_RLE = 0;
4918 int tga_palette_start = stbi__get16le(s);
4919 int tga_palette_len = stbi__get16le(s);
4920 int tga_palette_bits = stbi__get8(s);
4921 int tga_x_origin = stbi__get16le(s);
4922 int tga_y_origin = stbi__get16le(s);
4923 int tga_width = stbi__get16le(s);
4924 int tga_height = stbi__get16le(s);
4925 int tga_bits_per_pixel = stbi__get8(s);
4926 int tga_comp, tga_rgb16=0;
4927 int tga_inverted = stbi__get8(s);
4928 // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
4929 // image data
4930 unsigned char *tga_data;
4931 unsigned char *tga_palette = NULL;
4932 int i, j;
4933 unsigned char raw_data[4] = {0};
4934 int RLE_count = 0;
4935 int RLE_repeating = 0;
4936 int read_next_pixel = 1;
4937 STBI_NOTUSED(ri);
4938
4939 // do a tiny bit of precessing
4940 if ( tga_image_type >= 8 )
4941 {
4942 tga_image_type -= 8;
4943 tga_is_RLE = 1;
4944 }
4945 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
4946
4947 // If I'm paletted, then I'll use the number of bits from the palette
4948 if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
4949 else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
4950
4951 if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
4952 return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
4953
4954 // tga info
4955 *x = tga_width;
4956 *y = tga_height;
4957 if (comp) *comp = tga_comp;
4958
4959 if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
4960 return stbi__errpuc("too large", "Corrupt TGA");
4961
4962 tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
4963 if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
4964
4965 // skip to the data's starting position (offset usually = 0)
4966 stbi__skip(s, tga_offset );
4967
4968 if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
4969 for (i=0; i < tga_height; ++i) {
4970 int row = tga_inverted ? tga_height -i - 1 : i;
4971 stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
4972 stbi__getn(s, tga_row, tga_width * tga_comp);
4973 }
4974 } else {
4975 // do I need to load a palette?
4976 if ( tga_indexed)
4977 {
4978 // any data to skip? (offset usually = 0)
4979 stbi__skip(s, tga_palette_start );
4980 // load the palette
4981 tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
4982 if (!tga_palette) {
4983 STBI_FREE(tga_data);
4984 return stbi__errpuc("outofmem", "Out of memory");
4985 }
4986 if (tga_rgb16) {
4987 stbi_uc *pal_entry = tga_palette;
4988 STBI_ASSERT(tga_comp == STBI_rgb);
4989 for (i=0; i < tga_palette_len; ++i) {
4990 stbi__tga_read_rgb16(s, pal_entry);
4991 pal_entry += tga_comp;
4992 }
4993 } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
4994 STBI_FREE(tga_data);
4995 STBI_FREE(tga_palette);
4996 return stbi__errpuc("bad palette", "Corrupt TGA");
4997 }
4998 }
4999 // load the data
5000 for (i=0; i < tga_width * tga_height; ++i)
5001 {
5002 // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5003 if ( tga_is_RLE )
5004 {
5005 if ( RLE_count == 0 )
5006 {
5007 // yep, get the next byte as a RLE command
5008 int RLE_cmd = stbi__get8(s);
5009 RLE_count = 1 + (RLE_cmd & 127);
5010 RLE_repeating = RLE_cmd >> 7;
5011 read_next_pixel = 1;
5012 } else if ( !RLE_repeating )
5013 {
5014 read_next_pixel = 1;
5015 }
5016 } else
5017 {
5018 read_next_pixel = 1;
5019 }
5020 // OK, if I need to read a pixel, do it now
5021 if ( read_next_pixel )
5022 {
5023 // load however much data we did have
5024 if ( tga_indexed )
5025 {
5026 // read in index, then perform the lookup
5027 int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5028 if ( pal_idx >= tga_palette_len ) {
5029 // invalid index
5030 pal_idx = 0;
5031 }
5032 pal_idx *= tga_comp;
5033 for (j = 0; j < tga_comp; ++j) {
5034 raw_data[j] = tga_palette[pal_idx+j];
5035 }
5036 } else if(tga_rgb16) {
5037 STBI_ASSERT(tga_comp == STBI_rgb);
5038 stbi__tga_read_rgb16(s, raw_data);
5039 } else {
5040 // read in the data raw
5041 for (j = 0; j < tga_comp; ++j) {
5042 raw_data[j] = stbi__get8(s);
5043 }
5044 }
5045 // clear the reading flag for the next pixel
5046 read_next_pixel = 0;
5047 } // end of reading a pixel
5048
5049 // copy data
5050 for (j = 0; j < tga_comp; ++j)
5051 tga_data[i*tga_comp+j] = raw_data[j];
5052
5053 // in case we're in RLE mode, keep counting down
5054 --RLE_count;
5055 }
5056 // do I need to invert the image?
5057 if ( tga_inverted )
5058 {
5059 for (j = 0; j*2 < tga_height; ++j)
5060 {
5061 int index1 = j * tga_width * tga_comp;
5062 int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5063 for (i = tga_width * tga_comp; i > 0; --i)
5064 {
5065 unsigned char temp = tga_data[index1];
5066 tga_data[index1] = tga_data[index2];
5067 tga_data[index2] = temp;
5068 ++index1;
5069 ++index2;
5070 }
5071 }
5072 }
5073 // clear my palette, if I had one
5074 if ( tga_palette != NULL )
5075 {
5076 STBI_FREE( tga_palette );
5077 }
5078 }
5079
5080 // swap RGB - if the source data was RGB16, it already is in the right order
5081 if (tga_comp >= 3 && !tga_rgb16)
5082 {
5083 unsigned char* tga_pixel = tga_data;
5084 for (i=0; i < tga_width * tga_height; ++i)
5085 {
5086 unsigned char temp = tga_pixel[0];
5087 tga_pixel[0] = tga_pixel[2];
5088 tga_pixel[2] = temp;
5089 tga_pixel += tga_comp;
5090 }
5091 }
5092
5093 // convert to target component count
5094 if (req_comp && req_comp != tga_comp)
5095 tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5096
5097 // the things I do to get rid of an error message, and yet keep
5098 // Microsoft's C compilers happy... [8^(
5099 tga_palette_start = tga_palette_len = tga_palette_bits =
5100 tga_x_origin = tga_y_origin = 0;
5101 // OK, done
5102 return tga_data;
5103}
5104#endif
5105
5106// *************************************************************************************************
5107// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5108
5109#ifndef STBI_NO_PSD
5110static int stbi__psd_test(stbi__context *s)
5111{
5112 int r = (stbi__get32be(s) == 0x38425053);
5113 stbi__rewind(s);
5114 return r;
5115}
5116
5117static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5118{
5119 int count, nleft, len;
5120
5121 count = 0;
5122 while ((nleft = pixelCount - count) > 0) {
5123 len = stbi__get8(s);
5124 if (len == 128) {
5125 // No-op.
5126 } else if (len < 128) {
5127 // Copy next len+1 bytes literally.
5128 len++;
5129 if (len > nleft) return 0; // corrupt data
5130 count += len;
5131 while (len) {
5132 *p = stbi__get8(s);
5133 p += 4;
5134 len--;
5135 }
5136 } else if (len > 128) {
5137 stbi_uc val;
5138 // Next -len+1 bytes in the dest are replicated from next source byte.
5139 // (Interpret len as a negative 8-bit int.)
5140 len = 257 - len;
5141 if (len > nleft) return 0; // corrupt data
5142 val = stbi__get8(s);
5143 count += len;
5144 while (len) {
5145 *p = val;
5146 p += 4;
5147 len--;
5148 }
5149 }
5150 }
5151
5152 return 1;
5153}
5154
5155static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5156{
5157 int pixelCount;
5158 int channelCount, compression;
5159 int channel, i;
5160 int bitdepth;
5161 int w,h;
5162 stbi_uc *out;
5163 STBI_NOTUSED(ri);
5164
5165 // Check identifier
5166 if (stbi__get32be(s) != 0x38425053) // "8BPS"
5167 return stbi__errpuc("not PSD", "Corrupt PSD image");
5168
5169 // Check file type version.
5170 if (stbi__get16be(s) != 1)
5171 return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5172
5173 // Skip 6 reserved bytes.
5174 stbi__skip(s, 6 );
5175
5176 // Read the number of channels (R, G, B, A, etc).
5177 channelCount = stbi__get16be(s);
5178 if (channelCount < 0 || channelCount > 16)
5179 return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5180
5181 // Read the rows and columns of the image.
5182 h = stbi__get32be(s);
5183 w = stbi__get32be(s);
5184
5185 // Make sure the depth is 8 bits.
5186 bitdepth = stbi__get16be(s);
5187 if (bitdepth != 8 && bitdepth != 16)
5188 return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5189
5190 // Make sure the color mode is RGB.
5191 // Valid options are:
5192 // 0: Bitmap
5193 // 1: Grayscale
5194 // 2: Indexed color
5195 // 3: RGB color
5196 // 4: CMYK color
5197 // 7: Multichannel
5198 // 8: Duotone
5199 // 9: Lab color
5200 if (stbi__get16be(s) != 3)
5201 return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5202
5203 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
5204 stbi__skip(s,stbi__get32be(s) );
5205
5206 // Skip the image resources. (resolution, pen tool paths, etc)
5207 stbi__skip(s, stbi__get32be(s) );
5208
5209 // Skip the reserved data.
5210 stbi__skip(s, stbi__get32be(s) );
5211
5212 // Find out if the data is compressed.
5213 // Known values:
5214 // 0: no compression
5215 // 1: RLE compressed
5216 compression = stbi__get16be(s);
5217 if (compression > 1)
5218 return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5219
5220 // Check size
5221 if (!stbi__mad3sizes_valid(4, w, h, 0))
5222 return stbi__errpuc("too large", "Corrupt PSD");
5223
5224 // Create the destination image.
5225
5226 if (!compression && bitdepth == 16 && bpc == 16) {
5227 out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5228 ri->bits_per_channel = 16;
5229 } else
5230 out = (stbi_uc *) stbi__malloc(4 * w*h);
5231
5232 if (!out) return stbi__errpuc("outofmem", "Out of memory");
5233 pixelCount = w*h;
5234
5235 // Initialize the data to zero.
5236 //memset( out, 0, pixelCount * 4 );
5237
5238 // Finally, the image data.
5239 if (compression) {
5240 // RLE as used by .PSD and .TIFF
5241 // Loop until you get the number of unpacked bytes you are expecting:
5242 // Read the next source byte into n.
5243 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5244 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5245 // Else if n is 128, noop.
5246 // Endloop
5247
5248 // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5249 // which we're going to just skip.
5250 stbi__skip(s, h * channelCount * 2 );
5251
5252 // Read the RLE data by channel.
5253 for (channel = 0; channel < 4; channel++) {
5254 stbi_uc *p;
5255
5256 p = out+channel;
5257 if (channel >= channelCount) {
5258 // Fill this channel with default data.
5259 for (i = 0; i < pixelCount; i++, p += 4)
5260 *p = (channel == 3 ? 255 : 0);
5261 } else {
5262 // Read the RLE data.
5263 if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5264 STBI_FREE(out);
5265 return stbi__errpuc("corrupt", "bad RLE data");
5266 }
5267 }
5268 }
5269
5270 } else {
5271 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5272 // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5273
5274 // Read the data by channel.
5275 for (channel = 0; channel < 4; channel++) {
5276 if (channel >= channelCount) {
5277 // Fill this channel with default data.
5278 if (bitdepth == 16 && bpc == 16) {
5279 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5280 stbi__uint16 val = channel == 3 ? 65535 : 0;
5281 for (i = 0; i < pixelCount; i++, q += 4)
5282 *q = val;
5283 } else {
5284 stbi_uc *p = out+channel;
5285 stbi_uc val = channel == 3 ? 255 : 0;
5286 for (i = 0; i < pixelCount; i++, p += 4)
5287 *p = val;
5288 }
5289 } else {
5290 if (ri->bits_per_channel == 16) { // output bpc
5291 stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5292 for (i = 0; i < pixelCount; i++, q += 4)
5293 *q = (stbi__uint16) stbi__get16be(s);
5294 } else {
5295 stbi_uc *p = out+channel;
5296 if (bitdepth == 16) { // input bpc
5297 for (i = 0; i < pixelCount; i++, p += 4)
5298 *p = (stbi_uc) (stbi__get16be(s) >> 8);
5299 } else {
5300 for (i = 0; i < pixelCount; i++, p += 4)
5301 *p = stbi__get8(s);
5302 }
5303 }
5304 }
5305 }
5306 }
5307
5308 // remove weird white matte from PSD
5309 if (channelCount >= 4) {
5310 if (ri->bits_per_channel == 16) {
5311 for (i=0; i < w*h; ++i) {
5312 stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5313 if (pixel[3] != 0 && pixel[3] != 65535) {
5314 float a = pixel[3] / 65535.0f;
5315 float ra = 1.0f / a;
5316 float inv_a = 65535.0f * (1 - ra);
5317 pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5318 pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5319 pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5320 }
5321 }
5322 } else {
5323 for (i=0; i < w*h; ++i) {
5324 unsigned char *pixel = out + 4*i;
5325 if (pixel[3] != 0 && pixel[3] != 255) {
5326 float a = pixel[3] / 255.0f;
5327 float ra = 1.0f / a;
5328 float inv_a = 255.0f * (1 - ra);
5329 pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5330 pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5331 pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5332 }
5333 }
5334 }
5335 }
5336
5337 // convert to desired output format
5338 if (req_comp && req_comp != 4) {
5339 if (ri->bits_per_channel == 16)
5340 out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5341 else
5342 out = stbi__convert_format(out, 4, req_comp, w, h);
5343 if (out == NULL) return out; // stbi__convert_format frees input on failure
5344 }
5345
5346 if (comp) *comp = 4;
5347 *y = h;
5348 *x = w;
5349
5350 return out;
5351}
5352#endif
5353
5354// *************************************************************************************************
5355// Softimage PIC loader
5356// by Tom Seddon
5357//
5358// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5359// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5360
5361#ifndef STBI_NO_PIC
5362static int stbi__pic_is4(stbi__context *s,const char *str)
5363{
5364 int i;
5365 for (i=0; i<4; ++i)
5366 if (stbi__get8(s) != (stbi_uc)str[i])
5367 return 0;
5368
5369 return 1;
5370}
5371
5372static int stbi__pic_test_core(stbi__context *s)
5373{
5374 int i;
5375
5376 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5377 return 0;
5378
5379 for(i=0;i<84;++i)
5380 stbi__get8(s);
5381
5382 if (!stbi__pic_is4(s,"PICT"))
5383 return 0;
5384
5385 return 1;
5386}
5387
5388typedef struct
5389{
5390 stbi_uc size,type,channel;
5391} stbi__pic_packet;
5392
5393static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5394{
5395 int mask=0x80, i;
5396
5397 for (i=0; i<4; ++i, mask>>=1) {
5398 if (channel & mask) {
5399 if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5400 dest[i]=stbi__get8(s);
5401 }
5402 }
5403
5404 return dest;
5405}
5406
5407static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5408{
5409 int mask=0x80,i;
5410
5411 for (i=0;i<4; ++i, mask>>=1)
5412 if (channel&mask)
5413 dest[i]=src[i];
5414}
5415
5416static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5417{
5418 int act_comp=0,num_packets=0,y,chained;
5419 stbi__pic_packet packets[10];
5420
5421 // this will (should...) cater for even some bizarre stuff like having data
5422 // for the same channel in multiple packets.
5423 do {
5424 stbi__pic_packet *packet;
5425
5426 if (num_packets==sizeof(packets)/sizeof(packets[0]))
5427 return stbi__errpuc("bad format","too many packets");
5428
5429 packet = &packets[num_packets++];
5430
5431 chained = stbi__get8(s);
5432 packet->size = stbi__get8(s);
5433 packet->type = stbi__get8(s);
5434 packet->channel = stbi__get8(s);
5435
5436 act_comp |= packet->channel;
5437
5438 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
5439 if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
5440 } while (chained);
5441
5442 *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5443
5444 for(y=0; y<height; ++y) {
5445 int packet_idx;
5446
5447 for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5448 stbi__pic_packet *packet = &packets[packet_idx];
5449 stbi_uc *dest = result+y*width*4;
5450
5451 switch (packet->type) {
5452 default:
5453 return stbi__errpuc("bad format","packet has bad compression type");
5454
5455 case 0: {//uncompressed
5456 int x;
5457
5458 for(x=0;x<width;++x, dest+=4)
5459 if (!stbi__readval(s,packet->channel,dest))
5460 return 0;
5461 break;
5462 }
5463
5464 case 1://Pure RLE
5465 {
5466 int left=width, i;
5467
5468 while (left>0) {
5469 stbi_uc count,value[4];
5470
5471 count=stbi__get8(s);
5472 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
5473
5474 if (count > left)
5475 count = (stbi_uc) left;
5476
5477 if (!stbi__readval(s,packet->channel,value)) return 0;
5478
5479 for(i=0; i<count; ++i,dest+=4)
5480 stbi__copyval(packet->channel,dest,value);
5481 left -= count;
5482 }
5483 }
5484 break;
5485
5486 case 2: {//Mixed RLE
5487 int left=width;
5488 while (left>0) {
5489 int count = stbi__get8(s), i;
5490 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
5491
5492 if (count >= 128) { // Repeated
5493 stbi_uc value[4];
5494
5495 if (count==128)
5496 count = stbi__get16be(s);
5497 else
5498 count -= 127;
5499 if (count > left)
5500 return stbi__errpuc("bad file","scanline overrun");
5501
5502 if (!stbi__readval(s,packet->channel,value))
5503 return 0;
5504
5505 for(i=0;i<count;++i, dest += 4)
5506 stbi__copyval(packet->channel,dest,value);
5507 } else { // Raw
5508 ++count;
5509 if (count>left) return stbi__errpuc("bad file","scanline overrun");
5510
5511 for(i=0;i<count;++i, dest+=4)
5512 if (!stbi__readval(s,packet->channel,dest))
5513 return 0;
5514 }
5515 left-=count;
5516 }
5517 break;
5518 }
5519 }
5520 }
5521 }
5522
5523 return result;
5524}
5525
5526static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
5527{
5528 stbi_uc *result;
5529 int i, x,y, internal_comp;
5530 STBI_NOTUSED(ri);
5531
5532 if (!comp) comp = &internal_comp;
5533
5534 for (i=0; i<92; ++i)
5535 stbi__get8(s);
5536
5537 x = stbi__get16be(s);
5538 y = stbi__get16be(s);
5539 if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
5540 if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
5541
5542 stbi__get32be(s); //skip `ratio'
5543 stbi__get16be(s); //skip `fields'
5544 stbi__get16be(s); //skip `pad'
5545
5546 // intermediate buffer is RGBA
5547 result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
5548 memset(result, 0xff, x*y*4);
5549
5550 if (!stbi__pic_load_core(s,x,y,comp, result)) {
5551 STBI_FREE(result);
5552 result=0;
5553 }
5554 *px = x;
5555 *py = y;
5556 if (req_comp == 0) req_comp = *comp;
5557 result=stbi__convert_format(result,4,req_comp,x,y);
5558
5559 return result;
5560}
5561
5562static int stbi__pic_test(stbi__context *s)
5563{
5564 int r = stbi__pic_test_core(s);
5565 stbi__rewind(s);
5566 return r;
5567}
5568#endif
5569
5570// *************************************************************************************************
5571// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
5572
5573#ifndef STBI_NO_GIF
5574typedef struct
5575{
5576 stbi__int16 prefix;
5577 stbi_uc first;
5578 stbi_uc suffix;
5579} stbi__gif_lzw;
5580
5581typedef struct
5582{
5583 int w,h;
5584 stbi_uc *out, *old_out; // output buffer (always 4 components)
5585 int flags, bgindex, ratio, transparent, eflags, delay;
5586 stbi_uc pal[256][4];
5587 stbi_uc lpal[256][4];
5588 stbi__gif_lzw codes[4096];
5589 stbi_uc *color_table;
5590 int parse, step;
5591 int lflags;
5592 int start_x, start_y;
5593 int max_x, max_y;
5594 int cur_x, cur_y;
5595 int line_size;
5596} stbi__gif;
5597
5598static int stbi__gif_test_raw(stbi__context *s)
5599{
5600 int sz;
5601 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
5602 sz = stbi__get8(s);
5603 if (sz != '9' && sz != '7') return 0;
5604 if (stbi__get8(s) != 'a') return 0;
5605 return 1;
5606}
5607
5608static int stbi__gif_test(stbi__context *s)
5609{
5610 int r = stbi__gif_test_raw(s);
5611 stbi__rewind(s);
5612 return r;
5613}
5614
5615static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
5616{
5617 int i;
5618 for (i=0; i < num_entries; ++i) {
5619 pal[i][2] = stbi__get8(s);
5620 pal[i][1] = stbi__get8(s);
5621 pal[i][0] = stbi__get8(s);
5622 pal[i][3] = transp == i ? 0 : 255;
5623 }
5624}
5625
5626static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
5627{
5628 stbi_uc version;
5629 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
5630 return stbi__err("not GIF", "Corrupt GIF");
5631
5632 version = stbi__get8(s);
5633 if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
5634 if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
5635
5636 stbi__g_failure_reason = "";
5637 g->w = stbi__get16le(s);
5638 g->h = stbi__get16le(s);
5639 g->flags = stbi__get8(s);
5640 g->bgindex = stbi__get8(s);
5641 g->ratio = stbi__get8(s);
5642 g->transparent = -1;
5643
5644 if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
5645
5646 if (is_info) return 1;
5647
5648 if (g->flags & 0x80)
5649 stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
5650
5651 return 1;
5652}
5653
5654static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
5655{
5656 stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
5657 if (!stbi__gif_header(s, g, comp, 1)) {
5658 STBI_FREE(g);
5659 stbi__rewind( s );
5660 return 0;
5661 }
5662 if (x) *x = g->w;
5663 if (y) *y = g->h;
5664 STBI_FREE(g);
5665 return 1;
5666}
5667
5668static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
5669{
5670 stbi_uc *p, *c;
5671
5672 // recurse to decode the prefixes, since the linked-list is backwards,
5673 // and working backwards through an interleaved image would be nasty
5674 if (g->codes[code].prefix >= 0)
5675 stbi__out_gif_code(g, g->codes[code].prefix);
5676
5677 if (g->cur_y >= g->max_y) return;
5678
5679 p = &g->out[g->cur_x + g->cur_y];
5680 c = &g->color_table[g->codes[code].suffix * 4];
5681
5682 if (c[3] >= 128) {
5683 p[0] = c[2];
5684 p[1] = c[1];
5685 p[2] = c[0];
5686 p[3] = c[3];
5687 }
5688 g->cur_x += 4;
5689
5690 if (g->cur_x >= g->max_x) {
5691 g->cur_x = g->start_x;
5692 g->cur_y += g->step;
5693
5694 while (g->cur_y >= g->max_y && g->parse > 0) {
5695 g->step = (1 << g->parse) * g->line_size;
5696 g->cur_y = g->start_y + (g->step >> 1);
5697 --g->parse;
5698 }
5699 }
5700}
5701
5702static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
5703{
5704 stbi_uc lzw_cs;
5705 stbi__int32 len, init_code;
5706 stbi__uint32 first;
5707 stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
5708 stbi__gif_lzw *p;
5709
5710 lzw_cs = stbi__get8(s);
5711 if (lzw_cs > 12) return NULL;
5712 clear = 1 << lzw_cs;
5713 first = 1;
5714 codesize = lzw_cs + 1;
5715 codemask = (1 << codesize) - 1;
5716 bits = 0;
5717 valid_bits = 0;
5718 for (init_code = 0; init_code < clear; init_code++) {
5719 g->codes[init_code].prefix = -1;
5720 g->codes[init_code].first = (stbi_uc) init_code;
5721 g->codes[init_code].suffix = (stbi_uc) init_code;
5722 }
5723
5724 // support no starting clear code
5725 avail = clear+2;
5726 oldcode = -1;
5727
5728 len = 0;
5729 for(;;) {
5730 if (valid_bits < codesize) {
5731 if (len == 0) {
5732 len = stbi__get8(s); // start new block
5733 if (len == 0)
5734 return g->out;
5735 }
5736 --len;
5737 bits |= (stbi__int32) stbi__get8(s) << valid_bits;
5738 valid_bits += 8;
5739 } else {
5740 stbi__int32 code = bits & codemask;
5741 bits >>= codesize;
5742 valid_bits -= codesize;
5743 // @OPTIMIZE: is there some way we can accelerate the non-clear path?
5744 if (code == clear) { // clear code
5745 codesize = lzw_cs + 1;
5746 codemask = (1 << codesize) - 1;
5747 avail = clear + 2;
5748 oldcode = -1;
5749 first = 0;
5750 } else if (code == clear + 1) { // end of stream code
5751 stbi__skip(s, len);
5752 while ((len = stbi__get8(s)) > 0)
5753 stbi__skip(s,len);
5754 return g->out;
5755 } else if (code <= avail) {
5756 if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
5757
5758 if (oldcode >= 0) {
5759 p = &g->codes[avail++];
5760 if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF");
5761 p->prefix = (stbi__int16) oldcode;
5762 p->first = g->codes[oldcode].first;
5763 p->suffix = (code == avail) ? p->first : g->codes[code].first;
5764 } else if (code == avail)
5765 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5766
5767 stbi__out_gif_code(g, (stbi__uint16) code);
5768
5769 if ((avail & codemask) == 0 && avail <= 0x0FFF) {
5770 codesize++;
5771 codemask = (1 << codesize) - 1;
5772 }
5773
5774 oldcode = code;
5775 } else {
5776 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5777 }
5778 }
5779 }
5780}
5781
5782static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
5783{
5784 int x, y;
5785 stbi_uc *c = g->pal[g->bgindex];
5786 for (y = y0; y < y1; y += 4 * g->w) {
5787 for (x = x0; x < x1; x += 4) {
5788 stbi_uc *p = &g->out[y + x];
5789 p[0] = c[2];
5790 p[1] = c[1];
5791 p[2] = c[0];
5792 p[3] = 0;
5793 }
5794 }
5795}
5796
5797// this function is designed to support animated gifs, although stb_image doesn't support it
5798static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
5799{
5800 int i;
5801 stbi_uc *prev_out = 0;
5802
5803 if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
5804 return 0; // stbi__g_failure_reason set by stbi__gif_header
5805
5806 if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
5807 return stbi__errpuc("too large", "GIF too large");
5808
5809 prev_out = g->out;
5810 g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
5811 if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5812
5813 switch ((g->eflags & 0x1C) >> 2) {
5814 case 0: // unspecified (also always used on 1st frame)
5815 stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
5816 break;
5817 case 1: // do not dispose
5818 if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
5819 g->old_out = prev_out;
5820 break;
5821 case 2: // dispose to background
5822 if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
5823 stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
5824 break;
5825 case 3: // dispose to previous
5826 if (g->old_out) {
5827 for (i = g->start_y; i < g->max_y; i += 4 * g->w)
5828 memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
5829 }
5830 break;
5831 }
5832
5833 for (;;) {
5834 switch (stbi__get8(s)) {
5835 case 0x2C: /* Image Descriptor */
5836 {
5837 int prev_trans = -1;
5838 stbi__int32 x, y, w, h;
5839 stbi_uc *o;
5840
5841 x = stbi__get16le(s);
5842 y = stbi__get16le(s);
5843 w = stbi__get16le(s);
5844 h = stbi__get16le(s);
5845 if (((x + w) > (g->w)) || ((y + h) > (g->h)))
5846 return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
5847
5848 g->line_size = g->w * 4;
5849 g->start_x = x * 4;
5850 g->start_y = y * g->line_size;
5851 g->max_x = g->start_x + w * 4;
5852 g->max_y = g->start_y + h * g->line_size;
5853 g->cur_x = g->start_x;
5854 g->cur_y = g->start_y;
5855
5856 g->lflags = stbi__get8(s);
5857
5858 if (g->lflags & 0x40) {
5859 g->step = 8 * g->line_size; // first interlaced spacing
5860 g->parse = 3;
5861 } else {
5862 g->step = g->line_size;
5863 g->parse = 0;
5864 }
5865
5866 if (g->lflags & 0x80) {
5867 stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
5868 g->color_table = (stbi_uc *) g->lpal;
5869 } else if (g->flags & 0x80) {
5870 if (g->transparent >= 0 && (g->eflags & 0x01)) {
5871 prev_trans = g->pal[g->transparent][3];
5872 g->pal[g->transparent][3] = 0;
5873 }
5874 g->color_table = (stbi_uc *) g->pal;
5875 } else
5876 return stbi__errpuc("missing color table", "Corrupt GIF");
5877
5878 o = stbi__process_gif_raster(s, g);
5879 if (o == NULL) return NULL;
5880
5881 if (prev_trans != -1)
5882 g->pal[g->transparent][3] = (stbi_uc) prev_trans;
5883
5884 return o;
5885 }
5886
5887 case 0x21: // Comment Extension.
5888 {
5889 int len;
5890 if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
5891 len = stbi__get8(s);
5892 if (len == 4) {
5893 g->eflags = stbi__get8(s);
5894 g->delay = stbi__get16le(s);
5895 g->transparent = stbi__get8(s);
5896 } else {
5897 stbi__skip(s, len);
5898 break;
5899 }
5900 }
5901 while ((len = stbi__get8(s)) != 0)
5902 stbi__skip(s, len);
5903 break;
5904 }
5905
5906 case 0x3B: // gif stream termination code
5907 return (stbi_uc *) s; // using '1' causes warning on some compilers
5908
5909 default:
5910 return stbi__errpuc("unknown code", "Corrupt GIF");
5911 }
5912 }
5913
5914 STBI_NOTUSED(req_comp);
5915}
5916
5917static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5918{
5919 stbi_uc *u = 0;
5920 stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
5921 memset(g, 0, sizeof(*g));
5922 STBI_NOTUSED(ri);
5923
5924 u = stbi__gif_load_next(s, g, comp, req_comp);
5925 if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
5926 if (u) {
5927 *x = g->w;
5928 *y = g->h;
5929 if (req_comp && req_comp != 4)
5930 u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
5931 }
5932 else if (g->out)
5933 STBI_FREE(g->out);
5934 STBI_FREE(g);
5935 return u;
5936}
5937
5938static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
5939{
5940 return stbi__gif_info_raw(s,x,y,comp);
5941}
5942#endif
5943
5944// *************************************************************************************************
5945// Radiance RGBE HDR loader
5946// originally by Nicolas Schulz
5947#ifndef STBI_NO_HDR
5948static int stbi__hdr_test_core(stbi__context *s, const char *signature)
5949{
5950 int i;
5951 for (i=0; signature[i]; ++i)
5952 if (stbi__get8(s) != signature[i])
5953 return 0;
5954 stbi__rewind(s);
5955 return 1;
5956}
5957
5958static int stbi__hdr_test(stbi__context* s)
5959{
5960 int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
5961 stbi__rewind(s);
5962 if(!r) {
5963 r = stbi__hdr_test_core(s, "#?RGBE\n");
5964 stbi__rewind(s);
5965 }
5966 return r;
5967}
5968
5969#define STBI__HDR_BUFLEN 1024
5970static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
5971{
5972 int len=0;
5973 char c = '\0';
5974
5975 c = (char) stbi__get8(z);
5976
5977 while (!stbi__at_eof(z) && c != '\n') {
5978 buffer[len++] = c;
5979 if (len == STBI__HDR_BUFLEN-1) {
5980 // flush to end of line
5981 while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
5982 ;
5983 break;
5984 }
5985 c = (char) stbi__get8(z);
5986 }
5987
5988 buffer[len] = 0;
5989 return buffer;
5990}
5991
5992static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
5993{
5994 if ( input[3] != 0 ) {
5995 float f1;
5996 // Exponent
5997 f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
5998 if (req_comp <= 2)
5999 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6000 else {
6001 output[0] = input[0] * f1;
6002 output[1] = input[1] * f1;
6003 output[2] = input[2] * f1;
6004 }
6005 if (req_comp == 2) output[1] = 1;
6006 if (req_comp == 4) output[3] = 1;
6007 } else {
6008 switch (req_comp) {
6009 case 4: output[3] = 1; /* fallthrough */
6010 case 3: output[0] = output[1] = output[2] = 0;
6011 break;
6012 case 2: output[1] = 1; /* fallthrough */
6013 case 1: output[0] = 0;
6014 break;
6015 }
6016 }
6017}
6018
6019static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6020{
6021 char buffer[STBI__HDR_BUFLEN];
6022 char *token;
6023 int valid = 0;
6024 int width, height;
6025 stbi_uc *scanline;
6026 float *hdr_data;
6027 int len;
6028 unsigned char count, value;
6029 int i, j, k, c1,c2, z;
6030 const char *headerToken;
6031 STBI_NOTUSED(ri);
6032
6033 // Check identifier
6034 headerToken = stbi__hdr_gettoken(s,buffer);
6035 if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6036 return stbi__errpf("not HDR", "Corrupt HDR image");
6037
6038 // Parse header
6039 for(;;) {
6040 token = stbi__hdr_gettoken(s,buffer);
6041 if (token[0] == 0) break;
6042 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6043 }
6044
6045 if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
6046
6047 // Parse width and height
6048 // can't use sscanf() if we're not using stdio!
6049 token = stbi__hdr_gettoken(s,buffer);
6050 if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6051 token += 3;
6052 height = (int) strtol(token, &token, 10);
6053 while (*token == ' ') ++token;
6054 if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6055 token += 3;
6056 width = (int) strtol(token, NULL, 10);
6057
6058 *x = width;
6059 *y = height;
6060
6061 if (comp) *comp = 3;
6062 if (req_comp == 0) req_comp = 3;
6063
6064 if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6065 return stbi__errpf("too large", "HDR image is too large");
6066
6067 // Read data
6068 hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6069 if (!hdr_data)
6070 return stbi__errpf("outofmem", "Out of memory");
6071
6072 // Load image data
6073 // image data is stored as some number of sca
6074 if ( width < 8 || width >= 32768) {
6075 // Read flat data
6076 for (j=0; j < height; ++j) {
6077 for (i=0; i < width; ++i) {
6078 stbi_uc rgbe[4];
6079 main_decode_loop:
6080 stbi__getn(s, rgbe, 4);
6081 stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6082 }
6083 }
6084 } else {
6085 // Read RLE-encoded data
6086 scanline = NULL;
6087
6088 for (j = 0; j < height; ++j) {
6089 c1 = stbi__get8(s);
6090 c2 = stbi__get8(s);
6091 len = stbi__get8(s);
6092 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6093 // not run-length encoded, so we have to actually use THIS data as a decoded
6094 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6095 stbi_uc rgbe[4];
6096 rgbe[0] = (stbi_uc) c1;
6097 rgbe[1] = (stbi_uc) c2;
6098 rgbe[2] = (stbi_uc) len;
6099 rgbe[3] = (stbi_uc) stbi__get8(s);
6100 stbi__hdr_convert(hdr_data, rgbe, req_comp);
6101 i = 1;
6102 j = 0;
6103 STBI_FREE(scanline);
6104 goto main_decode_loop; // yes, this makes no sense
6105 }
6106 len <<= 8;
6107 len |= stbi__get8(s);
6108 if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6109 if (scanline == NULL) {
6110 scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6111 if (!scanline) {
6112 STBI_FREE(hdr_data);
6113 return stbi__errpf("outofmem", "Out of memory");
6114 }
6115 }
6116
6117 for (k = 0; k < 4; ++k) {
6118 int nleft;
6119 i = 0;
6120 while ((nleft = width - i) > 0) {
6121 count = stbi__get8(s);
6122 if (count > 128) {
6123 // Run
6124 value = stbi__get8(s);
6125 count -= 128;
6126 if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6127 for (z = 0; z < count; ++z)
6128 scanline[i++ * 4 + k] = value;
6129 } else {
6130 // Dump
6131 if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6132 for (z = 0; z < count; ++z)
6133 scanline[i++ * 4 + k] = stbi__get8(s);
6134 }
6135 }
6136 }
6137 for (i=0; i < width; ++i)
6138 stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6139 }
6140 if (scanline)
6141 STBI_FREE(scanline);
6142 }
6143
6144 return hdr_data;
6145}
6146
6147static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6148{
6149 char buffer[STBI__HDR_BUFLEN];
6150 char *token;
6151 int valid = 0;
6152 int dummy;
6153
6154 if (!x) x = &dummy;
6155 if (!y) y = &dummy;
6156 if (!comp) comp = &dummy;
6157
6158 if (stbi__hdr_test(s) == 0) {
6159 stbi__rewind( s );
6160 return 0;
6161 }
6162
6163 for(;;) {
6164 token = stbi__hdr_gettoken(s,buffer);
6165 if (token[0] == 0) break;
6166 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6167 }
6168
6169 if (!valid) {
6170 stbi__rewind( s );
6171 return 0;
6172 }
6173 token = stbi__hdr_gettoken(s,buffer);
6174 if (strncmp(token, "-Y ", 3)) {
6175 stbi__rewind( s );
6176 return 0;
6177 }
6178 token += 3;
6179 *y = (int) strtol(token, &token, 10);
6180 while (*token == ' ') ++token;
6181 if (strncmp(token, "+X ", 3)) {
6182 stbi__rewind( s );
6183 return 0;
6184 }
6185 token += 3;
6186 *x = (int) strtol(token, NULL, 10);
6187 *comp = 3;
6188 return 1;
6189}
6190#endif // STBI_NO_HDR
6191
6192#ifndef STBI_NO_BMP
6193static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6194{
6195 void *p;
6196 stbi__bmp_data info;
6197
6198 info.all_a = 255;
6199 p = stbi__bmp_parse_header(s, &info);
6200 stbi__rewind( s );
6201 if (p == NULL)
6202 return 0;
6203 if (x) *x = s->img_x;
6204 if (y) *y = s->img_y;
6205 if (comp) *comp = info.ma ? 4 : 3;
6206 return 1;
6207}
6208#endif
6209
6210#ifndef STBI_NO_PSD
6211static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6212{
6213 int channelCount, dummy;
6214 if (!x) x = &dummy;
6215 if (!y) y = &dummy;
6216 if (!comp) comp = &dummy;
6217 if (stbi__get32be(s) != 0x38425053) {
6218 stbi__rewind( s );
6219 return 0;
6220 }
6221 if (stbi__get16be(s) != 1) {
6222 stbi__rewind( s );
6223 return 0;
6224 }
6225 stbi__skip(s, 6);
6226 channelCount = stbi__get16be(s);
6227 if (channelCount < 0 || channelCount > 16) {
6228 stbi__rewind( s );
6229 return 0;
6230 }
6231 *y = stbi__get32be(s);
6232 *x = stbi__get32be(s);
6233 if (stbi__get16be(s) != 8) {
6234 stbi__rewind( s );
6235 return 0;
6236 }
6237 if (stbi__get16be(s) != 3) {
6238 stbi__rewind( s );
6239 return 0;
6240 }
6241 *comp = 4;
6242 return 1;
6243}
6244#endif
6245
6246#ifndef STBI_NO_PIC
6247static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6248{
6249 int act_comp=0,num_packets=0,chained,dummy;
6250 stbi__pic_packet packets[10];
6251
6252 if (!x) x = &dummy;
6253 if (!y) y = &dummy;
6254 if (!comp) comp = &dummy;
6255
6256 if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6257 stbi__rewind(s);
6258 return 0;
6259 }
6260
6261 stbi__skip(s, 88);
6262
6263 *x = stbi__get16be(s);
6264 *y = stbi__get16be(s);
6265 if (stbi__at_eof(s)) {
6266 stbi__rewind( s);
6267 return 0;
6268 }
6269 if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6270 stbi__rewind( s );
6271 return 0;
6272 }
6273
6274 stbi__skip(s, 8);
6275
6276 do {
6277 stbi__pic_packet *packet;
6278
6279 if (num_packets==sizeof(packets)/sizeof(packets[0]))
6280 return 0;
6281
6282 packet = &packets[num_packets++];
6283 chained = stbi__get8(s);
6284 packet->size = stbi__get8(s);
6285 packet->type = stbi__get8(s);
6286 packet->channel = stbi__get8(s);
6287 act_comp |= packet->channel;
6288
6289 if (stbi__at_eof(s)) {
6290 stbi__rewind( s );
6291 return 0;
6292 }
6293 if (packet->size != 8) {
6294 stbi__rewind( s );
6295 return 0;
6296 }
6297 } while (chained);
6298
6299 *comp = (act_comp & 0x10 ? 4 : 3);
6300
6301 return 1;
6302}
6303#endif
6304
6305// *************************************************************************************************
6306// Portable Gray Map and Portable Pixel Map loader
6307// by Ken Miller
6308//
6309// PGM: http://netpbm.sourceforge.net/doc/pgm.html
6310// PPM: http://netpbm.sourceforge.net/doc/ppm.html
6311//
6312// Known limitations:
6313// Does not support comments in the header section
6314// Does not support ASCII image data (formats P2 and P3)
6315// Does not support 16-bit-per-channel
6316
6317#ifndef STBI_NO_PNM
6318
6319static int stbi__pnm_test(stbi__context *s)
6320{
6321 char p, t;
6322 p = (char) stbi__get8(s);
6323 t = (char) stbi__get8(s);
6324 if (p != 'P' || (t != '5' && t != '6')) {
6325 stbi__rewind( s );
6326 return 0;
6327 }
6328 return 1;
6329}
6330
6331static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6332{
6333 stbi_uc *out;
6334 STBI_NOTUSED(ri);
6335
6336 if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6337 return 0;
6338
6339 *x = s->img_x;
6340 *y = s->img_y;
6341 if (comp) *comp = s->img_n;
6342
6343 if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6344 return stbi__errpuc("too large", "PNM too large");
6345
6346 out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6347 if (!out) return stbi__errpuc("outofmem", "Out of memory");
6348 stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6349
6350 if (req_comp && req_comp != s->img_n) {
6351 out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6352 if (out == NULL) return out; // stbi__convert_format frees input on failure
6353 }
6354 return out;
6355}
6356
6357static int stbi__pnm_isspace(char c)
6358{
6359 return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6360}
6361
6362static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6363{
6364 for (;;) {
6365 while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6366 *c = (char) stbi__get8(s);
6367
6368 if (stbi__at_eof(s) || *c != '#')
6369 break;
6370
6371 while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
6372 *c = (char) stbi__get8(s);
6373 }
6374}
6375
6376static int stbi__pnm_isdigit(char c)
6377{
6378 return c >= '0' && c <= '9';
6379}
6380
6381static int stbi__pnm_getinteger(stbi__context *s, char *c)
6382{
6383 int value = 0;
6384
6385 while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6386 value = value*10 + (*c - '0');
6387 *c = (char) stbi__get8(s);
6388 }
6389
6390 return value;
6391}
6392
6393static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6394{
6395 int maxv, dummy;
6396 char c, p, t;
6397
6398 if (!x) x = &dummy;
6399 if (!y) y = &dummy;
6400 if (!comp) comp = &dummy;
6401
6402 stbi__rewind(s);
6403
6404 // Get identifier
6405 p = (char) stbi__get8(s);
6406 t = (char) stbi__get8(s);
6407 if (p != 'P' || (t != '5' && t != '6')) {
6408 stbi__rewind(s);
6409 return 0;
6410 }
6411
6412 *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
6413
6414 c = (char) stbi__get8(s);
6415 stbi__pnm_skip_whitespace(s, &c);
6416
6417 *x = stbi__pnm_getinteger(s, &c); // read width
6418 stbi__pnm_skip_whitespace(s, &c);
6419
6420 *y = stbi__pnm_getinteger(s, &c); // read height
6421 stbi__pnm_skip_whitespace(s, &c);
6422
6423 maxv = stbi__pnm_getinteger(s, &c); // read max value
6424
6425 if (maxv > 255)
6426 return stbi__err("max value > 255", "PPM image not 8-bit");
6427 else
6428 return 1;
6429}
6430#endif
6431
6432static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6433{
6434 #ifndef STBI_NO_JPEG
6435 if (stbi__jpeg_info(s, x, y, comp)) return 1;
6436 #endif
6437
6438 #ifndef STBI_NO_PNG
6439 if (stbi__png_info(s, x, y, comp)) return 1;
6440 #endif
6441
6442 #ifndef STBI_NO_GIF
6443 if (stbi__gif_info(s, x, y, comp)) return 1;
6444 #endif
6445
6446 #ifndef STBI_NO_BMP
6447 if (stbi__bmp_info(s, x, y, comp)) return 1;
6448 #endif
6449
6450 #ifndef STBI_NO_PSD
6451 if (stbi__psd_info(s, x, y, comp)) return 1;
6452 #endif
6453
6454 #ifndef STBI_NO_PIC
6455 if (stbi__pic_info(s, x, y, comp)) return 1;
6456 #endif
6457
6458 #ifndef STBI_NO_PNM
6459 if (stbi__pnm_info(s, x, y, comp)) return 1;
6460 #endif
6461
6462 #ifndef STBI_NO_HDR
6463 if (stbi__hdr_info(s, x, y, comp)) return 1;
6464 #endif
6465
6466 // test tga last because it's a crappy test!
6467 #ifndef STBI_NO_TGA
6468 if (stbi__tga_info(s, x, y, comp))
6469 return 1;
6470 #endif
6471 return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6472}
6473
6474#ifndef STBI_NO_STDIO
6475STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6476{
6477 FILE *f = stbi__fopen(filename, "rb");
6478 int result;
6479 if (!f) return stbi__err("can't fopen", "Unable to open file");
6480 result = stbi_info_from_file(f, x, y, comp);
6481 fclose(f);
6482 return result;
6483}
6484
6485STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6486{
6487 int r;
6488 stbi__context s;
6489 long pos = ftell(f);
6490 stbi__start_file(&s, f);
6491 r = stbi__info_main(&s,x,y,comp);
6492 fseek(f,pos,SEEK_SET);
6493 return r;
6494}
6495#endif // !STBI_NO_STDIO
6496
6497STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6498{
6499 stbi__context s;
6500 stbi__start_mem(&s,buffer,len);
6501 return stbi__info_main(&s,x,y,comp);
6502}
6503
6504STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6505{
6506 stbi__context s;
6507 stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6508 return stbi__info_main(&s,x,y,comp);
6509}
6510
6511#endif // STB_IMAGE_IMPLEMENTATION
6512
6513/*
6514 revision history:
6515 2.16 (2017-07-23) all functions have 16-bit variants;
6516 STBI_NO_STDIO works again;
6517 compilation fixes;
6518 fix rounding in unpremultiply;
6519 optimize vertical flip;
6520 disable raw_len validation;
6521 documentation fixes
6522 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
6523 warning fixes; disable run-time SSE detection on gcc;
6524 uniform handling of optional "return" values;
6525 thread-safe initialization of zlib tables
6526 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
6527 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now
6528 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
6529 2.11 (2016-04-02) allocate large structures on the stack
6530 remove white matting for transparent PSD
6531 fix reported channel count for PNG & BMP
6532 re-enable SSE2 in non-gcc 64-bit
6533 support RGB-formatted JPEG
6534 read 16-bit PNGs (only as 8-bit)
6535 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
6536 2.09 (2016-01-16) allow comments in PNM files
6537 16-bit-per-pixel TGA (not bit-per-component)
6538 info() for TGA could break due to .hdr handling
6539 info() for BMP to shares code instead of sloppy parse
6540 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
6541 code cleanup
6542 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
6543 2.07 (2015-09-13) fix compiler warnings
6544 partial animated GIF support
6545 limited 16-bpc PSD support
6546 #ifdef unused functions
6547 bug with < 92 byte PIC,PNM,HDR,TGA
6548 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
6549 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
6550 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
6551 2.03 (2015-04-12) extra corruption checking (mmozeiko)
6552 stbi_set_flip_vertically_on_load (nguillemot)
6553 fix NEON support; fix mingw support
6554 2.02 (2015-01-19) fix incorrect assert, fix warning
6555 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
6556 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
6557 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
6558 progressive JPEG (stb)
6559 PGM/PPM support (Ken Miller)
6560 STBI_MALLOC,STBI_REALLOC,STBI_FREE
6561 GIF bugfix -- seemingly never worked
6562 STBI_NO_*, STBI_ONLY_*
6563 1.48 (2014-12-14) fix incorrectly-named assert()
6564 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
6565 optimize PNG (ryg)
6566 fix bug in interlaced PNG with user-specified channel count (stb)
6567 1.46 (2014-08-26)
6568 fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
6569 1.45 (2014-08-16)
6570 fix MSVC-ARM internal compiler error by wrapping malloc
6571 1.44 (2014-08-07)
6572 various warning fixes from Ronny Chevalier
6573 1.43 (2014-07-15)
6574 fix MSVC-only compiler problem in code changed in 1.42
6575 1.42 (2014-07-09)
6576 don't define _CRT_SECURE_NO_WARNINGS (affects user code)
6577 fixes to stbi__cleanup_jpeg path
6578 added STBI_ASSERT to avoid requiring assert.h
6579 1.41 (2014-06-25)
6580 fix search&replace from 1.36 that messed up comments/error messages
6581 1.40 (2014-06-22)
6582 fix gcc struct-initialization warning
6583 1.39 (2014-06-15)
6584 fix to TGA optimization when req_comp != number of components in TGA;
6585 fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
6586 add support for BMP version 5 (more ignored fields)
6587 1.38 (2014-06-06)
6588 suppress MSVC warnings on integer casts truncating values
6589 fix accidental rename of 'skip' field of I/O
6590 1.37 (2014-06-04)
6591 remove duplicate typedef
6592 1.36 (2014-06-03)
6593 convert to header file single-file library
6594 if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
6595 1.35 (2014-05-27)
6596 various warnings
6597 fix broken STBI_SIMD path
6598 fix bug where stbi_load_from_file no longer left file pointer in correct place
6599 fix broken non-easy path for 32-bit BMP (possibly never used)
6600 TGA optimization by Arseny Kapoulkine
6601 1.34 (unknown)
6602 use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
6603 1.33 (2011-07-14)
6604 make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
6605 1.32 (2011-07-13)
6606 support for "info" function for all supported filetypes (SpartanJ)
6607 1.31 (2011-06-20)
6608 a few more leak fixes, bug in PNG handling (SpartanJ)
6609 1.30 (2011-06-11)
6610 added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
6611 removed deprecated format-specific test/load functions
6612 removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
6613 error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
6614 fix inefficiency in decoding 32-bit BMP (David Woo)
6615 1.29 (2010-08-16)
6616 various warning fixes from Aurelien Pocheville
6617 1.28 (2010-08-01)
6618 fix bug in GIF palette transparency (SpartanJ)
6619 1.27 (2010-08-01)
6620 cast-to-stbi_uc to fix warnings
6621 1.26 (2010-07-24)
6622 fix bug in file buffering for PNG reported by SpartanJ
6623 1.25 (2010-07-17)
6624 refix trans_data warning (Won Chun)
6625 1.24 (2010-07-12)
6626 perf improvements reading from files on platforms with lock-heavy fgetc()
6627 minor perf improvements for jpeg
6628 deprecated type-specific functions so we'll get feedback if they're needed
6629 attempt to fix trans_data warning (Won Chun)
6630 1.23 fixed bug in iPhone support
6631 1.22 (2010-07-10)
6632 removed image *writing* support
6633 stbi_info support from Jetro Lauha
6634 GIF support from Jean-Marc Lienher
6635 iPhone PNG-extensions from James Brown
6636 warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
6637 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
6638 1.20 added support for Softimage PIC, by Tom Seddon
6639 1.19 bug in interlaced PNG corruption check (found by ryg)
6640 1.18 (2008-08-02)
6641 fix a threading bug (local mutable static)
6642 1.17 support interlaced PNG
6643 1.16 major bugfix - stbi__convert_format converted one too many pixels
6644 1.15 initialize some fields for thread safety
6645 1.14 fix threadsafe conversion bug
6646 header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
6647 1.13 threadsafe
6648 1.12 const qualifiers in the API
6649 1.11 Support installable IDCT, colorspace conversion routines
6650 1.10 Fixes for 64-bit (don't use "unsigned long")
6651 optimized upsampling by Fabian "ryg" Giesen
6652 1.09 Fix format-conversion for PSD code (bad global variables!)
6653 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
6654 1.07 attempt to fix C++ warning/errors again
6655 1.06 attempt to fix C++ warning/errors again
6656 1.05 fix TGA loading to return correct *comp and use good luminance calc
6657 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
6658 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
6659 1.02 support for (subset of) HDR files, float interface for preferred access to them
6660 1.01 fix bug: possible bug in handling right-side up bmps... not sure
6661 fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
6662 1.00 interface to zlib that skips zlib header
6663 0.99 correct handling of alpha in palette
6664 0.98 TGA loader by lonesock; dynamically add loaders (untested)
6665 0.97 jpeg errors on too large a file; also catch another malloc failure
6666 0.96 fix detection of invalid v value - particleman@mollyrocket forum
6667 0.95 during header scan, seek to markers in case of padding
6668 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
6669 0.93 handle jpegtran output; verbose errors
6670 0.92 read 4,8,16,24,32-bit BMP files of several formats
6671 0.91 output 24-bit Windows 3.0 BMP files
6672 0.90 fix a few more warnings; bump version number to approach 1.0
6673 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
6674 0.60 fix compiling as c++
6675 0.59 fix warnings: merge Dave Moore's -Wall fixes
6676 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
6677 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
6678 0.56 fix bug: zlib uncompressed mode len vs. nlen
6679 0.55 fix bug: restart_interval not initialized to 0
6680 0.54 allow NULL for 'int *comp'
6681 0.53 fix bug in png 3->4; speedup png decoding
6682 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
6683 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
6684 on 'test' only check type, not whether we support this variant
6685 0.50 (2006-11-19)
6686 first released version
6687*/
6688
6689
6690/*
6691------------------------------------------------------------------------------
6692This software is available under 2 licenses -- choose whichever you prefer.
6693------------------------------------------------------------------------------
6694ALTERNATIVE A - MIT License
6695Copyright (c) 2017 Sean Barrett
6696Permission is hereby granted, free of charge, to any person obtaining a copy of
6697this software and associated documentation files (the "Software"), to deal in
6698the Software without restriction, including without limitation the rights to
6699use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6700of the Software, and to permit persons to whom the Software is furnished to do
6701so, subject to the following conditions:
6702The above copyright notice and this permission notice shall be included in all
6703copies or substantial portions of the Software.
6704THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6705IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6706FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6707AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
6708LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
6709OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
6710SOFTWARE.
6711------------------------------------------------------------------------------
6712ALTERNATIVE B - Public Domain (www.unlicense.org)
6713This is free and unencumbered software released into the public domain.
6714Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
6715software, either in source code form or as a compiled binary, for any purpose,
6716commercial or non-commercial, and by any means.
6717In jurisdictions that recognize copyright laws, the author or authors of this
6718software dedicate any and all copyright interest in the software to the public
6719domain. We make this dedication for the benefit of the public at large and to
6720the detriment of our heirs and successors. We intend this dedication to be an
6721overt act of relinquishment in perpetuity of all present and future rights to
6722this software under copyright law.
6723THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
6724IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
6725FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
6726AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
6727ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
6728WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6729------------------------------------------------------------------------------
6730*/
Note: See TracBrowser for help on using the repository browser.