Merge pull request #59 from sz3/bugfix-misc

Misc bugfixes + dependency upgrades
This commit is contained in:
sz3 2022-02-19 19:48:46 -06:00 committed by GitHub
commit 729eb7ebcb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 7450 additions and 5883 deletions

View File

@ -16,7 +16,7 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -g -O2 -fPIC")
endif()
if(DEFINED USE_WASM)
if(DEFINED USE_WASM) # wasm build needs OPENCV_DIR defined
set(DISABLE_TESTS true)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGF256_TARGET_MOBILE")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Os")
@ -28,10 +28,13 @@ if(DEFINED USE_WASM)
${OPENCV_DIR}/opencv-build-wasm/build_wasm/
${opencv_include_modules}
)
else() # if not wasm, go find opencv. 3 or 4 should both work
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
endif()
if(NOT DEFINED OPENCV_LIBS)
set(OPENCV_LIBS "opencv_core" "opencv_imgcodecs" "opencv_imgproc" "opencv_photo")
set(OPENCV_LIBS "opencv_calib3d" "opencv_imgcodecs" "opencv_imgproc" "opencv_photo" "opencv_core")
endif()
if(NOT DEFINED CPPFILESYSTEM)

View File

@ -70,7 +70,7 @@ int main(int argc, char** argv)
if (!initialize_GL(window_size, window_size))
{
std::cerr << "failed to create window :(" << std::endl;
return 50;
return 70;
}
configure(colorBits, ecc, compressionLevel);

View File

@ -1,8 +1,9 @@
/* This code is subject to the terms of the Mozilla Public License, v.2.0. http://mozilla.org/MPL/2.0/. */
#pragma once
#include <vector>
#include <cstddef>
#include <utility>
#include <vector>
class CellPositions
{

View File

@ -32,7 +32,7 @@ public:
size_t compressedBytes = ZSTD_compressCCtx(_cctx, _compBuff.data(), _compBuff.size(), data, writeLen, _compressionLevel);
if (ZSTD_isError(compressedBytes))
{
std::cout << "error? " << ZSTD_getErrorName(compressedBytes) << std::endl;
std::cerr << "error? " << ZSTD_getErrorName(compressedBytes) << std::endl;
return false;
}
STREAM::write(_compBuff.data(), compressedBytes);

View File

@ -56,5 +56,8 @@ TEST_CASE( "DecoderTest/testDecode.Sample", "[unit]" )
unsigned bytesDecoded = dec.decode(TestCimbar::getSample("6bit/4_30_f0_627_extract.jpg"), decodedFile);
assertEquals( 9300, bytesDecoded );
assertEquals( "3de927c8aa0221807a2784210160cdc17567eb587bf01233d166900aadf14bf5", get_hash(decodedFile) );
if (CV_VERSION_MAJOR == 3)
assertEquals( "3de927c8aa0221807a2784210160cdc17567eb587bf01233d166900aadf14bf5", get_hash(decodedFile) );
else // # cv4
assertEquals( "59ddb2516b4ff5a528aebe538a22b736a6714263a454d20e146e1ffbba36c5ae", get_hash(decodedFile) );
}

View File

@ -15,7 +15,7 @@ public:
}
public:
FountainMetadata(uint64_t id)
FountainMetadata(uint32_t id)
: _data(id)
{
}
@ -33,7 +33,7 @@ public:
to_uint8_arr(encode_id, size, d);
}
unsigned id() const
uint32_t id() const
{
return _data;
}
@ -65,5 +65,5 @@ protected:
}
protected:
uint64_t _data; // might invert this and only generate the uint64_t when we need it
uint32_t _data; // might invert this and only generate the uint32_t when we need it
};

View File

@ -39,10 +39,10 @@ public:
return true;
}
void mark_done(uint64_t id)
void mark_done(const FountainMetadata& md)
{
_done.insert(id);
auto it = _streams.find(id);
_done.insert(md.id());
auto it = _streams.find(stream_slot(md));
if (it != _streams.end())
_streams.erase(it);
}
@ -60,7 +60,7 @@ public:
std::vector<std::string> get_done() const
{
std::vector<std::string> done;
for (uint64_t id : _done)
for (uint32_t id : _done)
done.push_back( get_filename(FountainMetadata(id)) );
return done;
}
@ -77,7 +77,7 @@ public:
return progress;
}
bool is_done(uint64_t id) const
bool is_done(uint32_t id) const
{
return _done.find(id) != _done.end();
}
@ -106,7 +106,7 @@ public:
return false;
if (store(md, *finished))
mark_done(md.id());
mark_done(md);
return true;
}
@ -122,7 +122,7 @@ public:
}
protected:
// streams is limited to at most 8 decoders at a time. Current, we just use the lower bits of the encode_id.
// streams is limited to at most 8 decoders at a time. Currently, we just use the lower bits of the encode_id.
uint8_t stream_slot(const FountainMetadata& md) const
{
return md.encode_id() & 0x7;
@ -137,7 +137,10 @@ protected:
std::string _dataDir;
unsigned _chunkSize;
// maybe instead of unordered_map+set, something where we can "age out" old streams?
// e.g. most recent 16/8, or something?
// question is what happens to _done/_streams when we wrap for continuous data streaming...
std::unordered_map<uint8_t, fountain_decoder_stream> _streams;
// track the uint64_t combo of (encode_id,size) to avoid redundant work
std::set<uint64_t> _done;
// track the uint32_t combo of (encode_id,size) to avoid redundant work
std::set<uint32_t> _done;
};

View File

@ -2,7 +2,7 @@
#pragma once
#include "bit_extractor.h"
#include "intx/int128.hpp"
#include "intx/intx.hpp"
#include <array>
#include <iostream>
#include <utility>

View File

@ -6,7 +6,7 @@
#include "bit_file/bitmatrix.h"
#include "cimb_translator/Cell.h"
#include "intx/int128.hpp"
#include "intx/intx.hpp"
#include <opencv2/opencv.hpp>
#include <array>

View File

@ -2,7 +2,7 @@
#include "unittest.h"
#include "bit_extractor.h"
#include "intx/int128.hpp"
#include "intx/intx.hpp"
#include <bitset>
#include <iostream>
@ -36,7 +36,9 @@ TEST_CASE( "bitExtractorTest/testLargerValue.1", "[unit]" )
TEST_CASE( "bitExtractorTest/testLargerValue.2", "[unit]" )
{
intx::uint128 bits{0xFFBFCFE3FULL, 0xF83C0E030080000ULL};
intx::uint128 bits{0xF83C0E030080000ULL, 0xFFBFCFE3FULL};
assertEquals( "ffbfcfe3f0f83c0e030080000", intx::hex(bits) ); // sanity check
bit_extractor<intx::uint128, 100> be(bits);
uint64_t res = be.extract(1, 11, 21, 31);
assertEquals( 0xfffefcf8, res );

View File

@ -93,7 +93,7 @@ void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) {
assert(first + 64 == last);
static_cast<void>(last); // for avoiding unused-variable warning
word_t w[64];
std::fill(w, w + 64, 0);
std::fill(w, w + 64, word_t(0));
for (std::size_t i = 0; i < 16; ++i) {
w[i] = (static_cast<word_t>(mask_8bit(*(first + i * 4))) << 24) |
(static_cast<word_t>(mask_8bit(*(first + i * 4 + 1))) << 16) |
@ -185,7 +185,7 @@ class hash256_one_by_one {
void init() {
buffer_.clear();
std::fill(data_length_digits_, data_length_digits_ + 4, 0);
std::fill(data_length_digits_, data_length_digits_ + 4, word_t(0));
std::copy(detail::initial_message_digest,
detail::initial_message_digest + 8, h_);
}
@ -204,17 +204,17 @@ class hash256_one_by_one {
void finish() {
byte_t temp[64];
std::fill(temp, temp + 64, 0);
std::fill(temp, temp + 64, byte_t(0));
std::size_t remains = buffer_.size();
std::copy(buffer_.begin(), buffer_.end(), temp);
temp[remains] = 0x80;
if (remains > 55) {
std::fill(temp + remains + 1, temp + 64, 0);
std::fill(temp + remains + 1, temp + 64, byte_t(0));
detail::hash256_block(h_, temp, temp + 64);
std::fill(temp, temp + 64 - 4, 0);
std::fill(temp, temp + 64 - 4, byte_t(0));
} else {
std::fill(temp + remains + 1, temp + 64 - 4, 0);
std::fill(temp + remains + 1, temp + 64 - 4, byte_t(0));
}
write_data_bit_length(&(temp[56]));

View File

@ -1,6 +1,7 @@
This license file applies to everything in this repository except that which
is explicitly annotated as being written by other authors, i.e. the Boost
queue (included in the benchmarks for comparison), Intel's TBB library (ditto),
dlib::pipe (ditto),
the CDSChecker tool (used for verification), the Relacy model checker (ditto),
and Jeff Preshing's semaphore implementation (used in the blocking queue) which
has a zlib license (embedded in lightweightsempahore.h).

View File

@ -1688,7 +1688,7 @@ private:
{
}
virtual ~ProducerBase() { };
virtual ~ProducerBase() { }
template<typename U>
inline bool dequeue(U& element)
@ -1897,7 +1897,7 @@ private:
++pr_blockIndexSlotsUsed;
}
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
// The constructor may throw. We want the element not to appear in the queue in
// that case (without corrupting the queue):
MOODYCAMEL_TRY {
@ -1923,7 +1923,7 @@ private:
blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
this->tailIndex.store(newTailIndex, std::memory_order_release);
return true;
}
@ -2139,7 +2139,7 @@ private:
block = block->next;
}
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
}
}
@ -2158,7 +2158,7 @@ private:
if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
stopIndex = newTailIndex;
}
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
while (currentTailIndex != stopIndex) {
new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
}
@ -2173,7 +2173,7 @@ private:
// may only define a (noexcept) move constructor, and so calls to the
// cctor will not compile, even if they are in an if branch that will never
// be executed
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
++currentTailIndex;
++itemFirst;
}
@ -2220,7 +2220,7 @@ private:
this->tailBlock = this->tailBlock->next;
}
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
if (firstAllocatedBlock != nullptr)
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
}
@ -2239,7 +2239,7 @@ private:
desiredCount = desiredCount < max ? desiredCount : max;
std::atomic_thread_fence(std::memory_order_acquire);
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);;
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
tail = this->tailIndex.load(std::memory_order_acquire);
auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
@ -2501,7 +2501,7 @@ private:
#endif
newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
// May throw, try to insert now before we publish the fact that we have this new block
MOODYCAMEL_TRY {
new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
@ -2519,7 +2519,7 @@ private:
this->tailBlock = newBlock;
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
this->tailIndex.store(newTailIndex, std::memory_order_release);
return true;
}
@ -2697,7 +2697,7 @@ private:
if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
stopIndex = newTailIndex;
}
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
while (currentTailIndex != stopIndex) {
new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
}
@ -2705,7 +2705,7 @@ private:
else {
MOODYCAMEL_TRY {
while (currentTailIndex != stopIndex) {
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
++currentTailIndex;
++itemFirst;
}
@ -3459,7 +3459,7 @@ private:
}
auto newHash = new (raw) ImplicitProducerHash;
newHash->capacity = (size_t)newCapacity;
newHash->capacity = static_cast<size_t>(newCapacity);
newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
for (size_t i = 0; i != newCapacity; ++i) {
new (newHash->entries + i) ImplicitProducerKVP;
@ -3698,7 +3698,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
{
initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
lastKnownGlobalOffset = (std::uint32_t)-1;
lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}
template<typename T, typename Traits>
@ -3706,7 +3706,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
{
initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
lastKnownGlobalOffset = (std::uint32_t)-1;
lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}
template<typename T, typename Traits>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.10)
set(SOURCES
int128.hpp
intx.hpp
)
add_library(intx INTERFACE)

View File

@ -1,885 +0,0 @@
// intx: extended precision integer library.
// Copyright 2019-2020 Pawel Bylica.
// Licensed under the Apache License, Version 2.0.
#pragma once
#include <algorithm>
#include <climits>
#include <cstdint>
#include <limits>
#include <stdexcept>
#include <string>
#include <tuple>
#include <type_traits>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#ifdef _MSC_VER
#define INTX_UNREACHABLE __assume(0)
#else
#define INTX_UNREACHABLE __builtin_unreachable()
#endif
#ifdef _MSC_VER
#define INTX_UNLIKELY(EXPR) (bool{EXPR})
#else
#define INTX_UNLIKELY(EXPR) __builtin_expect(bool{EXPR}, false)
#endif
#ifdef NDEBUG
#define INTX_REQUIRE(X) (X) ? (void)0 : INTX_UNREACHABLE
#else
#include <cassert>
#define INTX_REQUIRE assert
#endif
namespace intx
{
template <unsigned N>
struct uint;
/// The 128-bit unsigned integer.
///
/// This type is defined as a specialization of uint<> to easier integration with full intx package,
/// however, uint128 may be used independently.
template <>
struct uint<128>
{
static constexpr unsigned num_bits = 128;
uint64_t lo = 0;
uint64_t hi = 0;
constexpr uint() noexcept = default;
constexpr uint(uint64_t high, uint64_t low) noexcept : lo{low}, hi{high} {}
template <typename T,
typename = typename std::enable_if_t<std::is_convertible<T, uint64_t>::value>>
constexpr uint(T x) noexcept : lo(static_cast<uint64_t>(x)) // NOLINT
{}
#ifdef __SIZEOF_INT128__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
constexpr uint(unsigned __int128 x) noexcept // NOLINT
: lo{uint64_t(x)}, hi{uint64_t(x >> 64)}
{}
constexpr explicit operator unsigned __int128() const noexcept
{
return (static_cast<unsigned __int128>(hi) << 64) | lo;
}
#pragma GCC diagnostic pop
#endif
constexpr explicit operator bool() const noexcept { return hi | lo; }
/// Explicit converting operator for all builtin integral types.
template <typename Int, typename = typename std::enable_if<std::is_integral<Int>::value>::type>
constexpr explicit operator Int() const noexcept
{
return static_cast<Int>(lo);
}
};
using uint128 = uint<128>;
/// Contains result of add/sub/etc with a carry flag.
template <typename T>
struct result_with_carry
{
T value;
bool carry;
/// Conversion to tuple of references, to allow usage with std::tie().
constexpr operator std::tuple<T&, bool&>() noexcept { return {value, carry}; }
};
/// Linear arithmetic operators.
/// @{
constexpr inline result_with_carry<uint64_t> add_with_carry(
uint64_t x, uint64_t y, bool carry = false) noexcept
{
const auto s = x + y;
const auto carry1 = s < x;
const auto t = s + carry;
const auto carry2 = t < s;
return {t, carry1 || carry2};
}
template <unsigned N>
constexpr result_with_carry<uint<N>> add_with_carry(
const uint<N>& a, const uint<N>& b, bool carry = false) noexcept
{
const auto lo = add_with_carry(a.lo, b.lo, carry);
const auto hi = add_with_carry(a.hi, b.hi, lo.carry);
return {{hi.value, lo.value}, hi.carry};
}
constexpr inline uint128 operator+(uint128 x, uint128 y) noexcept
{
return add_with_carry(x, y).value;
}
constexpr inline uint128 operator+(uint128 x) noexcept
{
return x;
}
constexpr inline result_with_carry<uint64_t> sub_with_carry(
uint64_t x, uint64_t y, bool carry = false) noexcept
{
const auto d = x - y;
const auto carry1 = d > x;
const auto e = d - carry;
const auto carry2 = e > d;
return {e, carry1 || carry2};
}
/// Performs subtraction of two unsigned numbers and returns the difference
/// and the carry bit (aka borrow, overflow).
template <unsigned N>
constexpr inline result_with_carry<uint<N>> sub_with_carry(
const uint<N>& a, const uint<N>& b, bool carry = false) noexcept
{
const auto lo = sub_with_carry(a.lo, b.lo, carry);
const auto hi = sub_with_carry(a.hi, b.hi, lo.carry);
return {{hi.value, lo.value}, hi.carry};
}
constexpr inline uint128 operator-(uint128 x, uint128 y) noexcept
{
return sub_with_carry(x, y).value;
}
constexpr inline uint128 operator-(uint128 x) noexcept
{
// Implementing as subtraction is better than ~x + 1.
// Clang9: Perfect.
// GCC8: Does something weird.
return 0 - x;
}
inline uint128& operator++(uint128& x) noexcept
{
return x = x + 1;
}
inline uint128& operator--(uint128& x) noexcept
{
return x = x - 1;
}
inline uint128 operator++(uint128& x, int) noexcept
{
auto ret = x;
++x;
return ret;
}
inline uint128 operator--(uint128& x, int) noexcept
{
auto ret = x;
--x;
return ret;
}
/// Optimized addition.
///
/// This keeps the multiprecision addition until CodeGen so the pattern is not
/// broken during other optimizations.
constexpr uint128 fast_add(uint128 x, uint128 y) noexcept
{
#ifdef __SIZEOF_INT128__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
using uint128_native = unsigned __int128;
return uint128_native{x} + uint128_native{y};
#pragma GCC diagnostic pop
#else
// Fallback to regular addition.
return x + y;
#endif
}
/// @}
/// Comparison operators.
///
/// In all implementations bitwise operators are used instead of logical ones
/// to avoid branching.
///
/// @{
constexpr bool operator==(uint128 x, uint128 y) noexcept
{
// Clang7: generates perfect xor based code,
// much better than __int128 where it uses vector instructions.
// GCC8: generates a bit worse cmp based code
// although it generates the xor based one for __int128.
return (x.lo == y.lo) & (x.hi == y.hi);
}
constexpr bool operator!=(uint128 x, uint128 y) noexcept
{
// Analogous to ==, but == not used directly, because that confuses GCC 8-9.
return (x.lo != y.lo) | (x.hi != y.hi);
}
constexpr bool operator<(uint128 x, uint128 y) noexcept
{
// OPT: This should be implemented by checking the borrow of x - y,
// but compilers (GCC8, Clang7)
// have problem with properly optimizing subtraction.
return (x.hi < y.hi) | ((x.hi == y.hi) & (x.lo < y.lo));
}
constexpr bool operator<=(uint128 x, uint128 y) noexcept
{
return !(y < x);
}
constexpr bool operator>(uint128 x, uint128 y) noexcept
{
return y < x;
}
constexpr bool operator>=(uint128 x, uint128 y) noexcept
{
return !(x < y);
}
/// @}
/// Bitwise operators.
/// @{
constexpr uint128 operator~(uint128 x) noexcept
{
return {~x.hi, ~x.lo};
}
constexpr uint128 operator|(uint128 x, uint128 y) noexcept
{
// Clang7: perfect.
// GCC8: stupidly uses a vector instruction in all bitwise operators.
return {x.hi | y.hi, x.lo | y.lo};
}
constexpr uint128 operator&(uint128 x, uint128 y) noexcept
{
return {x.hi & y.hi, x.lo & y.lo};
}
constexpr uint128 operator^(uint128 x, uint128 y) noexcept
{
return {x.hi ^ y.hi, x.lo ^ y.lo};
}
constexpr uint128 operator<<(uint128 x, unsigned shift) noexcept
{
return (shift < 64) ?
// Find the part moved from lo to hi.
// For shift == 0 right shift by (64 - shift) is invalid so
// split it into 2 shifts by 1 and (63 - shift).
uint128{(x.hi << shift) | ((x.lo >> 1) >> (63 - shift)), x.lo << shift} :
// Guarantee "defined" behavior for shifts larger than 128.
(shift < 128) ? uint128{x.lo << (shift - 64), 0} : 0;
}
constexpr uint128 operator<<(uint128 x, uint128 shift) noexcept
{
if (shift < 128)
return x << unsigned(shift);
return 0;
}
constexpr uint128 operator>>(uint128 x, unsigned shift) noexcept
{
return (shift < 64) ?
// Find the part moved from lo to hi.
// For shift == 0 left shift by (64 - shift) is invalid so
// split it into 2 shifts by 1 and (63 - shift).
uint128{x.hi >> shift, (x.lo >> shift) | ((x.hi << 1) << (63 - shift))} :
// Guarantee "defined" behavior for shifts larger than 128.
(shift < 128) ? uint128{0, x.hi >> (shift - 64)} : 0;
}
constexpr uint128 operator>>(uint128 x, uint128 shift) noexcept
{
if (shift < 128)
return x >> unsigned(shift);
return 0;
}
/// @}
/// Multiplication
/// @{
/// Portable full unsigned multiplication 64 x 64 -> 128.
constexpr uint128 constexpr_umul(uint64_t x, uint64_t y) noexcept
{
uint64_t xl = x & 0xffffffff;
uint64_t xh = x >> 32;
uint64_t yl = y & 0xffffffff;
uint64_t yh = y >> 32;
uint64_t t0 = xl * yl;
uint64_t t1 = xh * yl;
uint64_t t2 = xl * yh;
uint64_t t3 = xh * yh;
uint64_t u1 = t1 + (t0 >> 32);
uint64_t u2 = t2 + (u1 & 0xffffffff);
uint64_t lo = (u2 << 32) | (t0 & 0xffffffff);
uint64_t hi = t3 + (u2 >> 32) + (u1 >> 32);
return {hi, lo};
}
/// Full unsigned multiplication 64 x 64 -> 128.
inline uint128 umul(uint64_t x, uint64_t y) noexcept
{
#if defined(__SIZEOF_INT128__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
const auto p = static_cast<unsigned __int128>(x) * y;
return {uint64_t(p >> 64), uint64_t(p)};
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
unsigned __int64 hi;
const auto lo = _umul128(x, y, &hi);
return {hi, lo};
#else
return constexpr_umul(x, y);
#endif
}
inline uint128 operator*(uint128 x, uint128 y) noexcept
{
auto p = umul(x.lo, y.lo);
p.hi += (x.lo * y.hi) + (x.hi * y.lo);
return {p.hi, p.lo};
}
constexpr uint128 constexpr_mul(uint128 x, uint128 y) noexcept
{
auto p = constexpr_umul(x.lo, y.lo);
p.hi += (x.lo * y.hi) + (x.hi * y.lo);
return {p.hi, p.lo};
}
/// @}
/// Assignment operators.
/// @{
constexpr uint128& operator+=(uint128& x, uint128 y) noexcept
{
return x = x + y;
}
constexpr uint128& operator-=(uint128& x, uint128 y) noexcept
{
return x = x - y;
}
inline uint128& operator*=(uint128& x, uint128 y) noexcept
{
return x = x * y;
}
constexpr uint128& operator|=(uint128& x, uint128 y) noexcept
{
return x = x | y;
}
constexpr uint128& operator&=(uint128& x, uint128 y) noexcept
{
return x = x & y;
}
constexpr uint128& operator^=(uint128& x, uint128 y) noexcept
{
return x = x ^ y;
}
constexpr uint128& operator<<=(uint128& x, unsigned shift) noexcept
{
return x = x << shift;
}
constexpr uint128& operator>>=(uint128& x, unsigned shift) noexcept
{
return x = x >> shift;
}
/// @}
constexpr unsigned clz_generic(uint32_t x) noexcept
{
unsigned n = 32;
for (int i = 4; i >= 0; --i)
{
const auto s = unsigned{1} << i;
const auto hi = x >> s;
if (hi != 0)
{
n -= s;
x = hi;
}
}
return n - x;
}
constexpr unsigned clz_generic(uint64_t x) noexcept
{
unsigned n = 64;
for (int i = 5; i >= 0; --i)
{
const auto s = unsigned{1} << i;
const auto hi = x >> s;
if (hi != 0)
{
n -= s;
x = hi;
}
}
return n - static_cast<unsigned>(x);
}
constexpr inline unsigned clz(uint32_t x) noexcept
{
#ifdef _MSC_VER
return clz_generic(x);
#else
return x != 0 ? unsigned(__builtin_clz(x)) : 32;
#endif
}
constexpr inline unsigned clz(uint64_t x) noexcept
{
#ifdef _MSC_VER
return clz_generic(x);
#else
return x != 0 ? unsigned(__builtin_clzll(x)) : 64;
#endif
}
constexpr inline unsigned clz(uint128 x) noexcept
{
// In this order `h == 0` we get less instructions than in case of `h != 0`.
return x.hi == 0 ? clz(x.lo) + 64 : clz(x.hi);
}
inline uint64_t bswap(uint64_t x) noexcept
{
#ifdef _MSC_VER
return _byteswap_uint64(x);
#else
return __builtin_bswap64(x);
#endif
}
inline uint128 bswap(uint128 x) noexcept
{
return {bswap(x.lo), bswap(x.hi)};
}
/// Division.
/// @{
template <typename QuotT, typename RemT = QuotT>
struct div_result
{
QuotT quot;
RemT rem;
/// Conversion to tuple of references, to allow usage with std::tie().
constexpr operator std::tuple<QuotT&, RemT&>() noexcept { return {quot, rem}; }
};
namespace internal
{
constexpr uint16_t reciprocal_table_item(uint8_t d9) noexcept
{
return uint16_t(0x7fd00 / (0x100 | d9));
}
#define REPEAT4(x) \
reciprocal_table_item((x) + 0), reciprocal_table_item((x) + 1), \
reciprocal_table_item((x) + 2), reciprocal_table_item((x) + 3)
#define REPEAT32(x) \
REPEAT4((x) + 4 * 0), REPEAT4((x) + 4 * 1), REPEAT4((x) + 4 * 2), REPEAT4((x) + 4 * 3), \
REPEAT4((x) + 4 * 4), REPEAT4((x) + 4 * 5), REPEAT4((x) + 4 * 6), REPEAT4((x) + 4 * 7)
#define REPEAT256() \
REPEAT32(32 * 0), REPEAT32(32 * 1), REPEAT32(32 * 2), REPEAT32(32 * 3), REPEAT32(32 * 4), \
REPEAT32(32 * 5), REPEAT32(32 * 6), REPEAT32(32 * 7)
/// Reciprocal lookup table.
constexpr uint16_t reciprocal_table[] = {REPEAT256()};
#undef REPEAT4
#undef REPEAT32
#undef REPEAT256
} // namespace internal
/// Computes the reciprocal (2^128 - 1) / d - 2^64 for normalized d.
///
/// Based on Algorithm 2 from "Improved division by invariant integers".
inline uint64_t reciprocal_2by1(uint64_t d) noexcept
{
INTX_REQUIRE(d & 0x8000000000000000); // Must be normalized.
const uint64_t d9 = d >> 55;
const uint32_t v0 = internal::reciprocal_table[d9 - 256];
const uint64_t d40 = (d >> 24) + 1;
const uint64_t v1 = (v0 << 11) - uint32_t(v0 * v0 * d40 >> 40) - 1;
const uint64_t v2 = (v1 << 13) + (v1 * (0x1000000000000000 - v1 * d40) >> 47);
const uint64_t d0 = d & 1;
const uint64_t d63 = (d >> 1) + d0; // ceil(d/2)
const uint64_t e = ((v2 >> 1) & (0 - d0)) - v2 * d63;
const uint64_t v3 = (umul(v2, e).hi >> 1) + (v2 << 31);
const uint64_t v4 = v3 - (umul(v3, d) + d).hi - d;
return v4;
}
inline uint64_t reciprocal_3by2(uint128 d) noexcept
{
auto v = reciprocal_2by1(d.hi);
auto p = d.hi * v;
p += d.lo;
if (p < d.lo)
{
--v;
if (p >= d.hi)
{
--v;
p -= d.hi;
}
p -= d.hi;
}
const auto t = umul(v, d.lo);
p += t.hi;
if (p < t.hi)
{
--v;
if (p >= d.hi)
{
if (p > d.hi || t.lo >= d.lo)
--v;
}
}
return v;
}
inline div_result<uint64_t> udivrem_2by1(uint128 u, uint64_t d, uint64_t v) noexcept
{
auto q = umul(v, u.hi);
q = fast_add(q, u);
++q.hi;
auto r = u.lo - q.hi * d;
if (r > q.lo)
{
--q.hi;
r += d;
}
if (r >= d)
{
++q.hi;
r -= d;
}
return {q.hi, r};
}
inline div_result<uint64_t, uint128> udivrem_3by2(
uint64_t u2, uint64_t u1, uint64_t u0, uint128 d, uint64_t v) noexcept
{
auto q = umul(v, u2);
q = fast_add(q, {u2, u1});
auto r1 = u1 - q.hi * d.hi;
auto t = umul(d.lo, q.hi);
auto r = uint128{r1, u0} - t - d;
r1 = r.hi;
++q.hi;
if (r1 >= q.lo)
{
--q.hi;
r += d;
}
if (r >= d)
{
++q.hi;
r -= d;
}
return {q.hi, r};
}
inline div_result<uint128> udivrem(uint128 x, uint128 y) noexcept
{
if (y.hi == 0)
{
INTX_REQUIRE(y.lo != 0); // Division by 0.
const auto lsh = clz(y.lo);
const auto rsh = (64 - lsh) % 64;
const auto rsh_mask = uint64_t{lsh == 0} - 1;
const auto yn = y.lo << lsh;
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | ((x.lo >> rsh) & rsh_mask);
const auto xn_ex = (x.hi >> rsh) & rsh_mask;
const auto v = reciprocal_2by1(yn);
const auto res1 = udivrem_2by1({xn_ex, xn_hi}, yn, v);
const auto res2 = udivrem_2by1({res1.rem, xn_lo}, yn, v);
return {{res1.quot, res2.quot}, res2.rem >> lsh};
}
if (y.hi > x.hi)
return {0, x};
const auto lsh = clz(y.hi);
if (lsh == 0)
{
const auto q = unsigned{y.hi < x.hi} | unsigned{y.lo <= x.lo};
return {q, x - (q ? y : 0)};
}
const auto rsh = 64 - lsh;
const auto yn_lo = y.lo << lsh;
const auto yn_hi = (y.hi << lsh) | (y.lo >> rsh);
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | (x.lo >> rsh);
const auto xn_ex = x.hi >> rsh;
const auto v = reciprocal_3by2({yn_hi, yn_lo});
const auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);
return {res.quot, res.rem >> lsh};
}
inline div_result<uint128> sdivrem(uint128 x, uint128 y) noexcept
{
constexpr auto sign_mask = uint128{1} << 127;
const auto x_is_neg = (x & sign_mask) != 0;
const auto y_is_neg = (y & sign_mask) != 0;
const auto x_abs = x_is_neg ? -x : x;
const auto y_abs = y_is_neg ? -y : y;
const auto q_is_neg = x_is_neg ^ y_is_neg;
const auto res = udivrem(x_abs, y_abs);
return {q_is_neg ? -res.quot : res.quot, x_is_neg ? -res.rem : res.rem};
}
inline uint128 operator/(uint128 x, uint128 y) noexcept
{
return udivrem(x, y).quot;
}
inline uint128 operator%(uint128 x, uint128 y) noexcept
{
return udivrem(x, y).rem;
}
inline uint128& operator/=(uint128& x, uint128 y) noexcept
{
return x = x / y;
}
inline uint128& operator%=(uint128& x, uint128 y) noexcept
{
return x = x % y;
}
/// @}
} // namespace intx
namespace std
{
template <unsigned N>
struct numeric_limits<intx::uint<N>>
{
using type = intx::uint<N>;
static constexpr bool is_specialized = true;
static constexpr bool is_integer = true;
static constexpr bool is_signed = false;
static constexpr bool is_exact = true;
static constexpr bool has_infinity = false;
static constexpr bool has_quiet_NaN = false;
static constexpr bool has_signaling_NaN = false;
static constexpr float_denorm_style has_denorm = denorm_absent;
static constexpr bool has_denorm_loss = false;
static constexpr float_round_style round_style = round_toward_zero;
static constexpr bool is_iec559 = false;
static constexpr bool is_bounded = true;
static constexpr bool is_modulo = true;
static constexpr int digits = CHAR_BIT * sizeof(type);
static constexpr int digits10 = int(0.3010299956639812 * digits);
static constexpr int max_digits10 = 0;
static constexpr int radix = 2;
static constexpr int min_exponent = 0;
static constexpr int min_exponent10 = 0;
static constexpr int max_exponent = 0;
static constexpr int max_exponent10 = 0;
static constexpr bool traps = std::numeric_limits<unsigned>::traps;
static constexpr bool tinyness_before = false;
static constexpr type min() noexcept { return 0; }
static constexpr type lowest() noexcept { return min(); }
static constexpr type max() noexcept { return ~type{0}; }
static constexpr type epsilon() noexcept { return 0; }
static constexpr type round_error() noexcept { return 0; }
static constexpr type infinity() noexcept { return 0; }
static constexpr type quiet_NaN() noexcept { return 0; }
static constexpr type signaling_NaN() noexcept { return 0; }
static constexpr type denorm_min() noexcept { return 0; }
};
} // namespace std
namespace intx
{
template <typename T>
[[noreturn]] inline void throw_(const char* what)
{
#if __cpp_exceptions
throw T{what};
#else
std::fputs(what, stderr);
std::abort();
#endif
}
constexpr inline int from_dec_digit(char c)
{
if (c < '0' || c > '9')
throw_<std::invalid_argument>("invalid digit");
return c - '0';
}
constexpr inline int from_hex_digit(char c)
{
if (c >= 'a' && c <= 'f')
return c - ('a' - 10);
if (c >= 'A' && c <= 'F')
return c - ('A' - 10);
return from_dec_digit(c);
}
template <typename Int>
constexpr Int from_string(const char* str)
{
auto s = str;
auto x = Int{};
int num_digits = 0;
if (s[0] == '0' && s[1] == 'x')
{
s += 2;
while (const auto c = *s++)
{
if (++num_digits > int{sizeof(x) * 2})
throw_<std::out_of_range>(str);
x = (x << 4) | from_hex_digit(c);
}
return x;
}
while (const auto c = *s++)
{
if (num_digits++ > std::numeric_limits<Int>::digits10)
throw_<std::out_of_range>(str);
const auto d = from_dec_digit(c);
x = constexpr_mul(x, Int{10}) + d;
if (x < d)
throw_<std::out_of_range>(str);
}
return x;
}
template <typename Int>
constexpr Int from_string(const std::string& s)
{
return from_string<Int>(s.c_str());
}
constexpr uint128 operator""_u128(const char* s)
{
return from_string<uint128>(s);
}
template <unsigned N>
inline std::string to_string(uint<N> x, int base = 10)
{
if (base < 2 || base > 36)
throw_<std::invalid_argument>("invalid base");
if (x == 0)
return "0";
auto s = std::string{};
while (x != 0)
{
// TODO: Use constexpr udivrem_1?
const auto res = udivrem(x, uint<N>{base});
const auto d = int(res.rem);
const auto c = d < 10 ? '0' + d : 'a' + d - 10;
s.push_back(char(c));
x = res.quot;
}
std::reverse(s.begin(), s.end());
return s;
}
template <unsigned N>
inline std::string hex(uint<N> x)
{
return to_string(x, 16);
}
} // namespace intx

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* stb_image - v2.26 - public domain image loader - http://nothings.org/stb
/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
no warranty implied; use at your own risk
Do this:
@ -48,6 +48,7 @@ LICENSE
RECENT REVISION HISTORY:
2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
2.26 (2020-07-13) many minor fixes
2.25 (2020-02-02) fix warnings
2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
@ -89,7 +90,7 @@ RECENT REVISION HISTORY:
Jeremy Sawicki (handle all ImageNet JPGs)
Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
Arseny Kapoulkine
Arseny Kapoulkine Simon Breuss (16-bit PNM)
John-Mark Allen
Carmelo J Fdez-Aguera
@ -102,7 +103,7 @@ RECENT REVISION HISTORY:
Thomas Ruf Ronny Chevalier github:rlyeh
Janez Zemva John Bartholomew Michal Cichon github:romigrou
Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
Laurent Gomila Cort Stratton github:snagar
Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
Cass Everitt Ryamond Barbiero github:grim210
Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
@ -110,11 +111,13 @@ RECENT REVISION HISTORY:
Josh Tobin Matthew Gregan github:poppolopoppo
Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
Brad Weinberger Matvey Cherevko [reserved]
Brad Weinberger Matvey Cherevko github:mosra
Luca Sas Alexander Veselov Zack Middleton [reserved]
Ryan C. Gordon [reserved] [reserved]
DO NOT ADD YOUR NAME HERE
Jacko Dirks
To add your name to the credits, pick a random blank space in the middle and fill it.
80% of merge conflicts on stb PRs are due to people adding their name at the end
of the credits.
@ -176,6 +179,32 @@ RECENT REVISION HISTORY:
//
// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
//
// To query the width, height and component count of an image without having to
// decode the full file, you can use the stbi_info family of functions:
//
// int x,y,n,ok;
// ok = stbi_info(filename, &x, &y, &n);
// // returns ok=1 and sets x, y, n if image is a supported format,
// // 0 otherwise.
//
// Note that stb_image pervasively uses ints in its public API for sizes,
// including sizes of memory buffers. This is now part of the API and thus
// hard to change without causing breakage. As a result, the various image
// loaders all have certain limits on image size; these differ somewhat
// by format but generally boil down to either just under 2GB or just under
// 1GB. When the decoded image would be larger than this, stb_image decoding
// will fail.
//
// Additionally, stb_image will reject image files that have any of their
// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
// the only way to have an image with such dimensions load correctly
// is for it to have a rather extreme aspect ratio. Either way, the
// assumption here is that such larger images are likely to be malformed
// or malicious. If you do need to load an image with individual dimensions
// larger than that, and it still fits in the overall size limit, you can
// #define STBI_MAX_DIMENSIONS on your own to be something larger.
//
// ===========================================================================
//
// UNICODE:
@ -281,11 +310,10 @@ RECENT REVISION HISTORY:
//
// iPhone PNG support:
//
// By default we convert iphone-formatted PNGs back to RGB, even though
// they are internally encoded differently. You can disable this conversion
// by calling stbi_convert_iphone_png_to_rgb(0), in which case
// you will always just get the native iphone "format" through (which
// is BGR stored in RGB).
// We optionally support converting iPhone-formatted PNGs (which store
// premultiplied BGRA) back to RGB, even though they're internally encoded
// differently. To enable this conversion, call
// stbi_convert_iphone_png_to_rgb(1).
//
// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
// pixel to remove any premultiplied alpha *only* if the image file explicitly
@ -489,6 +517,8 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
// as above, but only applies to images loaded on the thread that calls the function
// this function is only available if your compiler supports thread-local variables;
// calling it will fail to link if your compiler doesn't
STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
// ZLIB client - used by PNG, available for other purposes
@ -634,7 +664,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
#ifdef STBI_HAS_LROTL
#define stbi_lrot(x,y) _lrotl(x,y)
#else
#define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
#define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
#endif
#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
@ -748,9 +778,12 @@ static int stbi__sse2_available(void)
#ifdef STBI_NEON
#include <arm_neon.h>
// assume GCC or Clang on ARM targets
#ifdef _MSC_VER
#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
#else
#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
#endif
#endif
#ifndef STBI_SIMD_ALIGN
#define STBI_SIMD_ALIGN(type, name) type name
@ -924,6 +957,7 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
static int stbi__pnm_test(stbi__context *s);
static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
static int stbi__pnm_is16(stbi__context *s);
#endif
static
@ -998,7 +1032,7 @@ static int stbi__mad3sizes_valid(int a, int b, int c, int add)
}
// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
{
return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
@ -1021,7 +1055,7 @@ static void *stbi__malloc_mad3(int a, int b, int c, int add)
return stbi__malloc(a*b*c + add);
}
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
{
if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
@ -1087,9 +1121,8 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
ri->num_channels = 0;
#ifndef STBI_NO_JPEG
if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
#endif
// test the formats with a very explicit header first (at least a FOURCC
// or distinctive magic number first)
#ifndef STBI_NO_PNG
if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
#endif
@ -1107,6 +1140,13 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
#ifndef STBI_NO_PIC
if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
#endif
// then the formats that can end up attempting to load with just 1 or 2
// bytes matching expectations; these are prone to false positives, so
// try them later
#ifndef STBI_NO_JPEG
if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
#endif
#ifndef STBI_NO_PNM
if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
#endif
@ -1262,12 +1302,12 @@ static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, in
#ifndef STBI_NO_STDIO
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
#endif
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
{
return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
@ -1277,16 +1317,16 @@ STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wch
static FILE *stbi__fopen(char const *filename, char const *mode)
{
FILE *f;
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
wchar_t wMode[64];
wchar_t wFilename[1024];
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
return 0;
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
return 0;
#if _MSC_VER >= 1400
#if defined(_MSC_VER) && _MSC_VER >= 1400
if (0 != _wfopen_s(&f, wFilename, wMode))
f = 0;
#else
@ -1662,7 +1702,8 @@ static int stbi__get16le(stbi__context *s)
static stbi__uint32 stbi__get32le(stbi__context *s)
{
stbi__uint32 z = stbi__get16le(s);
return z + (stbi__get16le(s) << 16);
z += (stbi__uint32)stbi__get16le(s) << 16;
return z;
}
#endif
@ -2090,13 +2131,12 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
int sgn;
if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
k = stbi_lrot(j->code_buffer, n);
if (n < 0 || n >= (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))) return 0;
j->code_buffer = k & ~stbi__bmask[n];
k &= stbi__bmask[n];
j->code_bits -= n;
return k + (stbi__jbias[n] & ~sgn);
return k + (stbi__jbias[n] & (sgn - 1));
}
// get some unsigned bits
@ -2146,7 +2186,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
t = stbi__jpeg_huff_decode(j, hdc);
if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
// 0 all the ac values now so we can do it 32-bits at a time
memset(data,0,64*sizeof(data[0]));
@ -2203,12 +2243,12 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
// first scan for DC coefficient, must be first
memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
t = stbi__jpeg_huff_decode(j, hdc);
if (t == -1) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
diff = t ? stbi__extend_receive(j, t) : 0;
dc = j->img_comp[b].dc_pred + diff;
j->img_comp[b].dc_pred = dc;
data[0] = (short) (dc << j->succ_low);
data[0] = (short) (dc * (1 << j->succ_low));
} else {
// refinement scan for DC coefficient
if (stbi__jpeg_get_bit(j))
@ -2245,7 +2285,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
j->code_buffer <<= s;
j->code_bits -= s;
zig = stbi__jpeg_dezigzag[k++];
data[zig] = (short) ((r >> 8) << shift);
data[zig] = (short) ((r >> 8) * (1 << shift));
} else {
int rs = stbi__jpeg_huff_decode(j, hac);
if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
@ -2263,7 +2303,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
} else {
k += r;
zig = stbi__jpeg_dezigzag[k++];
data[zig] = (short) (stbi__extend_receive(j,s) << shift);
data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
}
}
} while (k <= j->spec_end);
@ -3227,6 +3267,13 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
}
// check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
// and I've never seen a non-corrupted JPEG file actually use them
for (i=0; i < s->img_n; ++i) {
if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
}
// compute interleaved mcu info
z->img_h_max = h_max;
z->img_v_max = v_max;
@ -3782,6 +3829,10 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
else
decode_n = z->s->img_n;
// nothing to do if no components requested; check this now to avoid
// accessing uninitialized coutput[0] later
if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
// resample and color-convert
{
int k;
@ -3924,6 +3975,7 @@ static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int re
{
unsigned char* result;
stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
if (!j) return stbi__errpuc("outofmem", "Out of memory");
STBI_NOTUSED(ri);
j->s = s;
stbi__setup_jpeg(j);
@ -3936,6 +3988,7 @@ static int stbi__jpeg_test(stbi__context *s)
{
int r;
stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
if (!j) return stbi__err("outofmem", "Out of memory");
j->s = s;
stbi__setup_jpeg(j);
r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
@ -3960,6 +4013,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
{
int result;
stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
if (!j) return stbi__err("outofmem", "Out of memory");
j->s = s;
result = stbi__jpeg_info_raw(j, x, y, comp);
STBI_FREE(j);
@ -3979,6 +4033,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
// fast-way is faster to check than jpeg huffman, but slow way is slower
#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
// zlib-style huffman encoding
// (jpegs packs from left, zlib from right, so can't share code)
@ -3988,8 +4043,8 @@ typedef struct
stbi__uint16 firstcode[16];
int maxcode[17];
stbi__uint16 firstsymbol[16];
stbi_uc size[288];
stbi__uint16 value[288];
stbi_uc size[STBI__ZNSYMS];
stbi__uint16 value[STBI__ZNSYMS];
} stbi__zhuffman;
stbi_inline static int stbi__bitreverse16(int n)
@ -4120,7 +4175,7 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
if (s >= 16) return -1; // invalid code!
// code size is s, so:
b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
if (b >= sizeof (z->size)) return -1; // some data was corrupt somewhere!
if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
a->code_buffer >>= s;
a->num_bits -= s;
@ -4317,7 +4372,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
return 1;
}
static const stbi_uc stbi__zdefault_length[288] =
static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
{
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
@ -4363,7 +4418,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
} else {
if (type == 1) {
// use fixed code lengths
if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
} else {
if (!stbi__compute_huffman_codes(a)) return 0;
@ -4759,6 +4814,7 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
// de-interlacing
final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
if (!final) return stbi__err("outofmem", "Out of memory");
for (p=0; p < 7; ++p) {
int xorig[] = { 0,4,0,2,0,1,0 };
int yorig[] = { 0,0,4,0,2,0,1 };
@ -4879,19 +4935,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
return 1;
}
static int stbi__unpremultiply_on_load = 0;
static int stbi__de_iphone_flag = 0;
static int stbi__unpremultiply_on_load_global = 0;
static int stbi__de_iphone_flag_global = 0;
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
{
stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
}
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
{
stbi__de_iphone_flag = flag_true_if_should_convert;
stbi__de_iphone_flag_global = flag_true_if_should_convert;
}
#ifndef STBI_THREAD_LOCAL
#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
#define stbi__de_iphone_flag stbi__de_iphone_flag_global
#else
static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
{
stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
stbi__unpremultiply_on_load_set = 1;
}
STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
{
stbi__de_iphone_flag_local = flag_true_if_should_convert;
stbi__de_iphone_flag_set = 1;
}
#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
? stbi__unpremultiply_on_load_local \
: stbi__unpremultiply_on_load_global)
#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
? stbi__de_iphone_flag_local \
: stbi__de_iphone_flag_global)
#endif // STBI_THREAD_LOCAL
static void stbi__de_iphone(stbi__png *z)
{
stbi__context *s = z->s;
@ -5272,6 +5355,32 @@ typedef struct
int extra_read;
} stbi__bmp_data;
static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
{
// BI_BITFIELDS specifies masks explicitly, don't override
if (compress == 3)
return 1;
if (compress == 0) {
if (info->bpp == 16) {
info->mr = 31u << 10;
info->mg = 31u << 5;
info->mb = 31u << 0;
} else if (info->bpp == 32) {
info->mr = 0xffu << 16;
info->mg = 0xffu << 8;
info->mb = 0xffu << 0;
info->ma = 0xffu << 24;
info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
} else {
// otherwise, use defaults, which is all-0
info->mr = info->mg = info->mb = info->ma = 0;
}
return 1;
}
return 0; // error
}
static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
{
int hsz;
@ -5299,6 +5408,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
if (hsz != 12) {
int compress = stbi__get32le(s);
if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
stbi__get32le(s); // discard sizeof
stbi__get32le(s); // discard hres
stbi__get32le(s); // discard vres
@ -5313,17 +5424,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
}
if (info->bpp == 16 || info->bpp == 32) {
if (compress == 0) {
if (info->bpp == 32) {
info->mr = 0xffu << 16;
info->mg = 0xffu << 8;
info->mb = 0xffu << 0;
info->ma = 0xffu << 24;
info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
} else {
info->mr = 31u << 10;
info->mg = 31u << 5;
info->mb = 31u << 0;
}
stbi__bmp_set_mask_defaults(info, compress);
} else if (compress == 3) {
info->mr = stbi__get32le(s);
info->mg = stbi__get32le(s);
@ -5338,6 +5439,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
return stbi__errpuc("bad BMP", "bad BMP");
}
} else {
// V4/V5 header
int i;
if (hsz != 108 && hsz != 124)
return stbi__errpuc("bad BMP", "bad BMP");
@ -5345,6 +5447,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
info->mg = stbi__get32le(s);
info->mb = stbi__get32le(s);
info->ma = stbi__get32le(s);
if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
stbi__bmp_set_mask_defaults(info, compress);
stbi__get32le(s); // discard color space
for (i=0; i < 12; ++i)
stbi__get32le(s); // discard color space parameters
@ -5394,8 +5498,7 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req
psize = (info.offset - info.extra_read - info.hsz) >> 2;
}
if (psize == 0) {
STBI_ASSERT(info.offset == s->callback_already_read + (int) (s->img_buffer - s->img_buffer_original));
if (info.offset != s->callback_already_read + (s->img_buffer - s->buffer_start)) {
if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
return stbi__errpuc("bad offset", "Corrupt BMP");
}
}
@ -6342,6 +6445,7 @@ static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_c
// intermediate buffer is RGBA
result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
if (!result) return stbi__errpuc("outofmem", "Out of memory");
memset(result, 0xff, x*y*4);
if (!stbi__pic_load_core(s,x,y,comp, result)) {
@ -6457,6 +6561,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
{
stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
if (!g) return stbi__err("outofmem", "Out of memory");
if (!stbi__gif_header(s, g, comp, 1)) {
STBI_FREE(g);
stbi__rewind( s );
@ -6766,6 +6871,17 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
}
}
static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
{
STBI_FREE(g->out);
STBI_FREE(g->history);
STBI_FREE(g->background);
if (out) STBI_FREE(out);
if (delays && *delays) STBI_FREE(*delays);
return stbi__errpuc("outofmem", "Out of memory");
}
static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
{
if (stbi__gif_test(s)) {
@ -6777,6 +6893,10 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
int stride;
int out_size = 0;
int delays_size = 0;
STBI_NOTUSED(out_size);
STBI_NOTUSED(delays_size);
memset(&g, 0, sizeof(g));
if (delays) {
*delays = 0;
@ -6794,26 +6914,29 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
if (out) {
void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
if (NULL == tmp) {
STBI_FREE(g.out);
STBI_FREE(g.history);
STBI_FREE(g.background);
return stbi__errpuc("outofmem", "Out of memory");
}
if (!tmp)
return stbi__load_gif_main_outofmem(&g, out, delays);
else {
out = (stbi_uc*) tmp;
out_size = layers * stride;
}
if (delays) {
*delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
if (!new_delays)
return stbi__load_gif_main_outofmem(&g, out, delays);
*delays = new_delays;
delays_size = layers * sizeof(int);
}
} else {
out = (stbi_uc*)stbi__malloc( layers * stride );
if (!out)
return stbi__load_gif_main_outofmem(&g, out, delays);
out_size = layers * stride;
if (delays) {
*delays = (int*) stbi__malloc( layers * sizeof(int) );
if (!*delays)
return stbi__load_gif_main_outofmem(&g, out, delays);
delays_size = layers * sizeof(int);
}
}
@ -7138,9 +7261,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
info.all_a = 255;
p = stbi__bmp_parse_header(s, &info);
stbi__rewind( s );
if (p == NULL)
if (p == NULL) {
stbi__rewind( s );
return 0;
}
if (x) *x = s->img_x;
if (y) *y = s->img_y;
if (comp) {
@ -7206,8 +7330,8 @@ static int stbi__psd_is16(stbi__context *s)
stbi__rewind( s );
return 0;
}
(void) stbi__get32be(s);
(void) stbi__get32be(s);
STBI_NOTUSED(stbi__get32be(s));
STBI_NOTUSED(stbi__get32be(s));
depth = stbi__get16be(s);
if (depth != 16) {
stbi__rewind( s );
@ -7286,7 +7410,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
// Known limitations:
// Does not support comments in the header section
// Does not support ASCII image data (formats P2 and P3)
// Does not support 16-bit-per-channel
#ifndef STBI_NO_PNM
@ -7307,7 +7430,8 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
stbi_uc *out;
STBI_NOTUSED(ri);
if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
if (ri->bits_per_channel == 0)
return 0;
if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
@ -7317,12 +7441,12 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
*y = s->img_y;
if (comp) *comp = s->img_n;
if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
return stbi__errpuc("too large", "PNM too large");
out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
if (!out) return stbi__errpuc("outofmem", "Out of memory");
stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
if (req_comp && req_comp != s->img_n) {
out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
@ -7398,11 +7522,19 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
stbi__pnm_skip_whitespace(s, &c);
maxv = stbi__pnm_getinteger(s, &c); // read max value
if (maxv > 255)
return stbi__err("max value > 255", "PPM image not 8-bit");
if (maxv > 65535)
return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
else if (maxv > 255)
return 16;
else
return 1;
return 8;
}
static int stbi__pnm_is16(stbi__context *s)
{
if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
return 1;
return 0;
}
#endif
@ -7458,6 +7590,9 @@ static int stbi__is_16_main(stbi__context *s)
if (stbi__psd_is16(s)) return 1;
#endif
#ifndef STBI_NO_PNM
if (stbi__pnm_is16(s)) return 1;
#endif
return 0;
}
@ -7760,4 +7895,3 @@ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
------------------------------------------------------------------------------
*/

View File

@ -61,9 +61,6 @@ endif()
if(MSVC)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_NATIVE}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
add_definitions(-DLINUX_ARM=1)
endif()
endif()
add_library(wirehair ${LIB_SOURCE_FILES})

View File

@ -30,6 +30,7 @@
#include "WirehairTools.h"
#include <cmath>
#include <cstdlib>
#ifdef _MSC_VER
#include <intrin.h> // _BitScanReverse

View File

@ -200,7 +200,7 @@ static bool gf256_self_test()
#endif
#if defined(GF256_TRY_NEON)
# if defined(IOS) && defined(__ARM_NEON__)
# if defined(IOS) && (defined(__ARM_NEON) || defined(__ARM_NEON__))
// Requires iPhone 5S or newer
static const bool CpuHasNeon = true;
static const bool CpuHasNeon64 = true;

View File

@ -53,11 +53,17 @@
//------------------------------------------------------------------------------
// Platform/Architecture
#if defined(__ARM_ARCH) || defined(__ARM_NEON) || defined(__ARM_NEON__)
#if !defined IOS
#define LINUX_ARM
#endif
#endif
#if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__)
#define GF256_TARGET_MOBILE
#endif // ANDROID
#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900)
#if defined(__AVX2__) && (!defined (_MSC_VER) || _MSC_VER >= 1900)
#define GF256_TRY_AVX2 /* 256-bit */
#include <immintrin.h>
#define GF256_ALIGN_BYTES 32
@ -66,36 +72,28 @@
#endif // __AVX2__
#if !defined(GF256_TARGET_MOBILE)
// Note: MSVC currently only supports SSSE3 but not AVX2
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
#include <emmintrin.h> // SSE2
#endif // GF256_TARGET_MOBILE
#if defined(HAVE_ARM_NEON_H)
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#include <arm_neon.h>
#endif // HAVE_ARM_NEON_H
#if defined(GF256_TARGET_MOBILE)
#define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */
# if defined(HAVE_ARM_NEON_H)
// Compiler-specific 128-bit SIMD register keyword
#define GF256_M128 uint8x16_t
#define GF256_TRY_NEON
#endif
// Compiler-specific 128-bit SIMD register keyword
#if defined(GF256_TARGET_MOBILE)
#if defined(GF256_TRY_NEON)
#define GF256_M128 uint8x16_t
#else
#define GF256_M128 uint64_t
# endif
#endif // GF256_TRY_NEON
#else // GF256_TARGET_MOBILE
// Compiler-specific 128-bit SIMD register keyword
#define GF256_M128 __m128i
#endif // GF256_TARGET_MOBILE
// Compiler-specific 256-bit SIMD register keyword
#ifdef GF256_TRY_AVX2
// Compiler-specific 256-bit SIMD register keyword
#define GF256_M256 __m256i
#endif
@ -272,10 +270,6 @@ static GF256_FORCE_INLINE void gf256_div_mem(void * GF256_RESTRICT vz,
//------------------------------------------------------------------------------
// Misc Operations
/// Swap two memory buffers in-place
extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes);
#ifdef __cplusplus
}
#endif // __cplusplus

View File

@ -153,7 +153,7 @@ WIREHAIR_EXPORT WirehairCodec wirehair_decoder_create(
)
{
// If input is invalid:
if (messageBytes < 1 || blockBytes < 1) {
if (!m_init || messageBytes < 1 || blockBytes < 1) {
return nullptr;
}

File diff suppressed because it is too large Load Diff