mirror of https://github.com/sz3/libcimbar
Merge pull request #59 from sz3/bugfix-misc
Misc bugfixes + dependency upgrades
This commit is contained in:
commit
729eb7ebcb
|
@ -16,7 +16,7 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
|||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -g -O2 -fPIC")
|
||||
endif()
|
||||
|
||||
if(DEFINED USE_WASM)
|
||||
if(DEFINED USE_WASM) # wasm build needs OPENCV_DIR defined
|
||||
set(DISABLE_TESTS true)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGF256_TARGET_MOBILE")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Os")
|
||||
|
@ -28,10 +28,13 @@ if(DEFINED USE_WASM)
|
|||
${OPENCV_DIR}/opencv-build-wasm/build_wasm/
|
||||
${opencv_include_modules}
|
||||
)
|
||||
else() # if not wasm, go find opencv. 3 or 4 should both work
|
||||
find_package(OpenCV REQUIRED)
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED OPENCV_LIBS)
|
||||
set(OPENCV_LIBS "opencv_core" "opencv_imgcodecs" "opencv_imgproc" "opencv_photo")
|
||||
set(OPENCV_LIBS "opencv_calib3d" "opencv_imgcodecs" "opencv_imgproc" "opencv_photo" "opencv_core")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CPPFILESYSTEM)
|
||||
|
|
|
@ -70,7 +70,7 @@ int main(int argc, char** argv)
|
|||
if (!initialize_GL(window_size, window_size))
|
||||
{
|
||||
std::cerr << "failed to create window :(" << std::endl;
|
||||
return 50;
|
||||
return 70;
|
||||
}
|
||||
|
||||
configure(colorBits, ecc, compressionLevel);
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
/* This code is subject to the terms of the Mozilla Public License, v.2.0. http://mozilla.org/MPL/2.0/. */
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
class CellPositions
|
||||
{
|
||||
|
|
|
@ -32,7 +32,7 @@ public:
|
|||
size_t compressedBytes = ZSTD_compressCCtx(_cctx, _compBuff.data(), _compBuff.size(), data, writeLen, _compressionLevel);
|
||||
if (ZSTD_isError(compressedBytes))
|
||||
{
|
||||
std::cout << "error? " << ZSTD_getErrorName(compressedBytes) << std::endl;
|
||||
std::cerr << "error? " << ZSTD_getErrorName(compressedBytes) << std::endl;
|
||||
return false;
|
||||
}
|
||||
STREAM::write(_compBuff.data(), compressedBytes);
|
||||
|
|
|
@ -56,5 +56,8 @@ TEST_CASE( "DecoderTest/testDecode.Sample", "[unit]" )
|
|||
unsigned bytesDecoded = dec.decode(TestCimbar::getSample("6bit/4_30_f0_627_extract.jpg"), decodedFile);
|
||||
assertEquals( 9300, bytesDecoded );
|
||||
|
||||
assertEquals( "3de927c8aa0221807a2784210160cdc17567eb587bf01233d166900aadf14bf5", get_hash(decodedFile) );
|
||||
if (CV_VERSION_MAJOR == 3)
|
||||
assertEquals( "3de927c8aa0221807a2784210160cdc17567eb587bf01233d166900aadf14bf5", get_hash(decodedFile) );
|
||||
else // # cv4
|
||||
assertEquals( "59ddb2516b4ff5a528aebe538a22b736a6714263a454d20e146e1ffbba36c5ae", get_hash(decodedFile) );
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ public:
|
|||
}
|
||||
|
||||
public:
|
||||
FountainMetadata(uint64_t id)
|
||||
FountainMetadata(uint32_t id)
|
||||
: _data(id)
|
||||
{
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ public:
|
|||
to_uint8_arr(encode_id, size, d);
|
||||
}
|
||||
|
||||
unsigned id() const
|
||||
uint32_t id() const
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
@ -65,5 +65,5 @@ protected:
|
|||
}
|
||||
|
||||
protected:
|
||||
uint64_t _data; // might invert this and only generate the uint64_t when we need it
|
||||
uint32_t _data; // might invert this and only generate the uint32_t when we need it
|
||||
};
|
||||
|
|
|
@ -39,10 +39,10 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
void mark_done(uint64_t id)
|
||||
void mark_done(const FountainMetadata& md)
|
||||
{
|
||||
_done.insert(id);
|
||||
auto it = _streams.find(id);
|
||||
_done.insert(md.id());
|
||||
auto it = _streams.find(stream_slot(md));
|
||||
if (it != _streams.end())
|
||||
_streams.erase(it);
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ public:
|
|||
std::vector<std::string> get_done() const
|
||||
{
|
||||
std::vector<std::string> done;
|
||||
for (uint64_t id : _done)
|
||||
for (uint32_t id : _done)
|
||||
done.push_back( get_filename(FountainMetadata(id)) );
|
||||
return done;
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ public:
|
|||
return progress;
|
||||
}
|
||||
|
||||
bool is_done(uint64_t id) const
|
||||
bool is_done(uint32_t id) const
|
||||
{
|
||||
return _done.find(id) != _done.end();
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ public:
|
|||
return false;
|
||||
|
||||
if (store(md, *finished))
|
||||
mark_done(md.id());
|
||||
mark_done(md);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -122,7 +122,7 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
// streams is limited to at most 8 decoders at a time. Current, we just use the lower bits of the encode_id.
|
||||
// streams is limited to at most 8 decoders at a time. Currently, we just use the lower bits of the encode_id.
|
||||
uint8_t stream_slot(const FountainMetadata& md) const
|
||||
{
|
||||
return md.encode_id() & 0x7;
|
||||
|
@ -137,7 +137,10 @@ protected:
|
|||
std::string _dataDir;
|
||||
unsigned _chunkSize;
|
||||
|
||||
// maybe instead of unordered_map+set, something where we can "age out" old streams?
|
||||
// e.g. most recent 16/8, or something?
|
||||
// question is what happens to _done/_streams when we wrap for continuous data streaming...
|
||||
std::unordered_map<uint8_t, fountain_decoder_stream> _streams;
|
||||
// track the uint64_t combo of (encode_id,size) to avoid redundant work
|
||||
std::set<uint64_t> _done;
|
||||
// track the uint32_t combo of (encode_id,size) to avoid redundant work
|
||||
std::set<uint32_t> _done;
|
||||
};
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "bit_extractor.h"
|
||||
#include "intx/int128.hpp"
|
||||
#include "intx/intx.hpp"
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include "bit_file/bitmatrix.h"
|
||||
#include "cimb_translator/Cell.h"
|
||||
|
||||
#include "intx/int128.hpp"
|
||||
#include "intx/intx.hpp"
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include <array>
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#include "unittest.h"
|
||||
|
||||
#include "bit_extractor.h"
|
||||
#include "intx/int128.hpp"
|
||||
#include "intx/intx.hpp"
|
||||
|
||||
#include <bitset>
|
||||
#include <iostream>
|
||||
|
@ -36,7 +36,9 @@ TEST_CASE( "bitExtractorTest/testLargerValue.1", "[unit]" )
|
|||
|
||||
TEST_CASE( "bitExtractorTest/testLargerValue.2", "[unit]" )
|
||||
{
|
||||
intx::uint128 bits{0xFFBFCFE3FULL, 0xF83C0E030080000ULL};
|
||||
intx::uint128 bits{0xF83C0E030080000ULL, 0xFFBFCFE3FULL};
|
||||
assertEquals( "ffbfcfe3f0f83c0e030080000", intx::hex(bits) ); // sanity check
|
||||
|
||||
bit_extractor<intx::uint128, 100> be(bits);
|
||||
uint64_t res = be.extract(1, 11, 21, 31);
|
||||
assertEquals( 0xfffefcf8, res );
|
||||
|
|
|
@ -93,7 +93,7 @@ void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) {
|
|||
assert(first + 64 == last);
|
||||
static_cast<void>(last); // for avoiding unused-variable warning
|
||||
word_t w[64];
|
||||
std::fill(w, w + 64, 0);
|
||||
std::fill(w, w + 64, word_t(0));
|
||||
for (std::size_t i = 0; i < 16; ++i) {
|
||||
w[i] = (static_cast<word_t>(mask_8bit(*(first + i * 4))) << 24) |
|
||||
(static_cast<word_t>(mask_8bit(*(first + i * 4 + 1))) << 16) |
|
||||
|
@ -185,7 +185,7 @@ class hash256_one_by_one {
|
|||
|
||||
void init() {
|
||||
buffer_.clear();
|
||||
std::fill(data_length_digits_, data_length_digits_ + 4, 0);
|
||||
std::fill(data_length_digits_, data_length_digits_ + 4, word_t(0));
|
||||
std::copy(detail::initial_message_digest,
|
||||
detail::initial_message_digest + 8, h_);
|
||||
}
|
||||
|
@ -204,17 +204,17 @@ class hash256_one_by_one {
|
|||
|
||||
void finish() {
|
||||
byte_t temp[64];
|
||||
std::fill(temp, temp + 64, 0);
|
||||
std::fill(temp, temp + 64, byte_t(0));
|
||||
std::size_t remains = buffer_.size();
|
||||
std::copy(buffer_.begin(), buffer_.end(), temp);
|
||||
temp[remains] = 0x80;
|
||||
|
||||
if (remains > 55) {
|
||||
std::fill(temp + remains + 1, temp + 64, 0);
|
||||
std::fill(temp + remains + 1, temp + 64, byte_t(0));
|
||||
detail::hash256_block(h_, temp, temp + 64);
|
||||
std::fill(temp, temp + 64 - 4, 0);
|
||||
std::fill(temp, temp + 64 - 4, byte_t(0));
|
||||
} else {
|
||||
std::fill(temp + remains + 1, temp + 64 - 4, 0);
|
||||
std::fill(temp + remains + 1, temp + 64 - 4, byte_t(0));
|
||||
}
|
||||
|
||||
write_data_bit_length(&(temp[56]));
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
This license file applies to everything in this repository except that which
|
||||
is explicitly annotated as being written by other authors, i.e. the Boost
|
||||
queue (included in the benchmarks for comparison), Intel's TBB library (ditto),
|
||||
dlib::pipe (ditto),
|
||||
the CDSChecker tool (used for verification), the Relacy model checker (ditto),
|
||||
and Jeff Preshing's semaphore implementation (used in the blocking queue) which
|
||||
has a zlib license (embedded in lightweightsempahore.h).
|
||||
|
|
|
@ -1688,7 +1688,7 @@ private:
|
|||
{
|
||||
}
|
||||
|
||||
virtual ~ProducerBase() { };
|
||||
virtual ~ProducerBase() { }
|
||||
|
||||
template<typename U>
|
||||
inline bool dequeue(U& element)
|
||||
|
@ -1897,7 +1897,7 @@ private:
|
|||
++pr_blockIndexSlotsUsed;
|
||||
}
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
|
||||
// The constructor may throw. We want the element not to appear in the queue in
|
||||
// that case (without corrupting the queue):
|
||||
MOODYCAMEL_TRY {
|
||||
|
@ -1923,7 +1923,7 @@ private:
|
|||
blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
|
||||
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
|
||||
this->tailIndex.store(newTailIndex, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
@ -2139,7 +2139,7 @@ private:
|
|||
block = block->next;
|
||||
}
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
|
||||
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
@ -2158,7 +2158,7 @@ private:
|
|||
if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
|
||||
stopIndex = newTailIndex;
|
||||
}
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
|
||||
while (currentTailIndex != stopIndex) {
|
||||
new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
|
||||
}
|
||||
|
@ -2173,7 +2173,7 @@ private:
|
|||
// may only define a (noexcept) move constructor, and so calls to the
|
||||
// cctor will not compile, even if they are in an if branch that will never
|
||||
// be executed
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
|
||||
++currentTailIndex;
|
||||
++itemFirst;
|
||||
}
|
||||
|
@ -2220,7 +2220,7 @@ private:
|
|||
this->tailBlock = this->tailBlock->next;
|
||||
}
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
|
||||
if (firstAllocatedBlock != nullptr)
|
||||
blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
|
||||
}
|
||||
|
@ -2239,7 +2239,7 @@ private:
|
|||
desiredCount = desiredCount < max ? desiredCount : max;
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
|
||||
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);;
|
||||
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
|
||||
|
||||
tail = this->tailIndex.load(std::memory_order_acquire);
|
||||
auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
|
||||
|
@ -2501,7 +2501,7 @@ private:
|
|||
#endif
|
||||
newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
|
||||
// May throw, try to insert now before we publish the fact that we have this new block
|
||||
MOODYCAMEL_TRY {
|
||||
new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
|
||||
|
@ -2519,7 +2519,7 @@ private:
|
|||
|
||||
this->tailBlock = newBlock;
|
||||
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward<U>(element)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
|
||||
this->tailIndex.store(newTailIndex, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
@ -2697,7 +2697,7 @@ private:
|
|||
if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
|
||||
stopIndex = newTailIndex;
|
||||
}
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) {
|
||||
MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
|
||||
while (currentTailIndex != stopIndex) {
|
||||
new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
|
||||
}
|
||||
|
@ -2705,7 +2705,7 @@ private:
|
|||
else {
|
||||
MOODYCAMEL_TRY {
|
||||
while (currentTailIndex != stopIndex) {
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
|
||||
new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
|
||||
++currentTailIndex;
|
||||
++itemFirst;
|
||||
}
|
||||
|
@ -3459,7 +3459,7 @@ private:
|
|||
}
|
||||
|
||||
auto newHash = new (raw) ImplicitProducerHash;
|
||||
newHash->capacity = (size_t)newCapacity;
|
||||
newHash->capacity = static_cast<size_t>(newCapacity);
|
||||
newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
|
||||
for (size_t i = 0; i != newCapacity; ++i) {
|
||||
new (newHash->entries + i) ImplicitProducerKVP;
|
||||
|
@ -3698,7 +3698,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
|
|||
: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
|
||||
{
|
||||
initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
|
||||
lastKnownGlobalOffset = (std::uint32_t)-1;
|
||||
lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
|
||||
}
|
||||
|
||||
template<typename T, typename Traits>
|
||||
|
@ -3706,7 +3706,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
|
|||
: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
|
||||
{
|
||||
initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
|
||||
lastKnownGlobalOffset = (std::uint32_t)-1;
|
||||
lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
|
||||
}
|
||||
|
||||
template<typename T, typename Traits>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
set(SOURCES
|
||||
int128.hpp
|
||||
intx.hpp
|
||||
)
|
||||
|
||||
add_library(intx INTERFACE)
|
||||
|
|
|
@ -1,885 +0,0 @@
|
|||
// intx: extended precision integer library.
|
||||
// Copyright 2019-2020 Pawel Bylica.
|
||||
// Licensed under the Apache License, Version 2.0.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define INTX_UNREACHABLE __assume(0)
|
||||
#else
|
||||
#define INTX_UNREACHABLE __builtin_unreachable()
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define INTX_UNLIKELY(EXPR) (bool{EXPR})
|
||||
#else
|
||||
#define INTX_UNLIKELY(EXPR) __builtin_expect(bool{EXPR}, false)
|
||||
#endif
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define INTX_REQUIRE(X) (X) ? (void)0 : INTX_UNREACHABLE
|
||||
#else
|
||||
#include <cassert>
|
||||
#define INTX_REQUIRE assert
|
||||
#endif
|
||||
|
||||
namespace intx
|
||||
{
|
||||
template <unsigned N>
|
||||
struct uint;
|
||||
|
||||
/// The 128-bit unsigned integer.
|
||||
///
|
||||
/// This type is defined as a specialization of uint<> to easier integration with full intx package,
|
||||
/// however, uint128 may be used independently.
|
||||
template <>
|
||||
struct uint<128>
|
||||
{
|
||||
static constexpr unsigned num_bits = 128;
|
||||
|
||||
uint64_t lo = 0;
|
||||
uint64_t hi = 0;
|
||||
|
||||
constexpr uint() noexcept = default;
|
||||
|
||||
constexpr uint(uint64_t high, uint64_t low) noexcept : lo{low}, hi{high} {}
|
||||
|
||||
template <typename T,
|
||||
typename = typename std::enable_if_t<std::is_convertible<T, uint64_t>::value>>
|
||||
constexpr uint(T x) noexcept : lo(static_cast<uint64_t>(x)) // NOLINT
|
||||
{}
|
||||
|
||||
#ifdef __SIZEOF_INT128__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
constexpr uint(unsigned __int128 x) noexcept // NOLINT
|
||||
: lo{uint64_t(x)}, hi{uint64_t(x >> 64)}
|
||||
{}
|
||||
|
||||
constexpr explicit operator unsigned __int128() const noexcept
|
||||
{
|
||||
return (static_cast<unsigned __int128>(hi) << 64) | lo;
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
constexpr explicit operator bool() const noexcept { return hi | lo; }
|
||||
|
||||
/// Explicit converting operator for all builtin integral types.
|
||||
template <typename Int, typename = typename std::enable_if<std::is_integral<Int>::value>::type>
|
||||
constexpr explicit operator Int() const noexcept
|
||||
{
|
||||
return static_cast<Int>(lo);
|
||||
}
|
||||
};
|
||||
|
||||
using uint128 = uint<128>;
|
||||
|
||||
|
||||
/// Contains result of add/sub/etc with a carry flag.
|
||||
template <typename T>
|
||||
struct result_with_carry
|
||||
{
|
||||
T value;
|
||||
bool carry;
|
||||
|
||||
/// Conversion to tuple of references, to allow usage with std::tie().
|
||||
constexpr operator std::tuple<T&, bool&>() noexcept { return {value, carry}; }
|
||||
};
|
||||
|
||||
|
||||
/// Linear arithmetic operators.
|
||||
/// @{
|
||||
|
||||
constexpr inline result_with_carry<uint64_t> add_with_carry(
|
||||
uint64_t x, uint64_t y, bool carry = false) noexcept
|
||||
{
|
||||
const auto s = x + y;
|
||||
const auto carry1 = s < x;
|
||||
const auto t = s + carry;
|
||||
const auto carry2 = t < s;
|
||||
return {t, carry1 || carry2};
|
||||
}
|
||||
|
||||
template <unsigned N>
|
||||
constexpr result_with_carry<uint<N>> add_with_carry(
|
||||
const uint<N>& a, const uint<N>& b, bool carry = false) noexcept
|
||||
{
|
||||
const auto lo = add_with_carry(a.lo, b.lo, carry);
|
||||
const auto hi = add_with_carry(a.hi, b.hi, lo.carry);
|
||||
return {{hi.value, lo.value}, hi.carry};
|
||||
}
|
||||
|
||||
constexpr inline uint128 operator+(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return add_with_carry(x, y).value;
|
||||
}
|
||||
|
||||
constexpr inline uint128 operator+(uint128 x) noexcept
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
constexpr inline result_with_carry<uint64_t> sub_with_carry(
|
||||
uint64_t x, uint64_t y, bool carry = false) noexcept
|
||||
{
|
||||
const auto d = x - y;
|
||||
const auto carry1 = d > x;
|
||||
const auto e = d - carry;
|
||||
const auto carry2 = e > d;
|
||||
return {e, carry1 || carry2};
|
||||
}
|
||||
|
||||
/// Performs subtraction of two unsigned numbers and returns the difference
|
||||
/// and the carry bit (aka borrow, overflow).
|
||||
template <unsigned N>
|
||||
constexpr inline result_with_carry<uint<N>> sub_with_carry(
|
||||
const uint<N>& a, const uint<N>& b, bool carry = false) noexcept
|
||||
{
|
||||
const auto lo = sub_with_carry(a.lo, b.lo, carry);
|
||||
const auto hi = sub_with_carry(a.hi, b.hi, lo.carry);
|
||||
return {{hi.value, lo.value}, hi.carry};
|
||||
}
|
||||
|
||||
constexpr inline uint128 operator-(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return sub_with_carry(x, y).value;
|
||||
}
|
||||
|
||||
constexpr inline uint128 operator-(uint128 x) noexcept
|
||||
{
|
||||
// Implementing as subtraction is better than ~x + 1.
|
||||
// Clang9: Perfect.
|
||||
// GCC8: Does something weird.
|
||||
return 0 - x;
|
||||
}
|
||||
|
||||
inline uint128& operator++(uint128& x) noexcept
|
||||
{
|
||||
return x = x + 1;
|
||||
}
|
||||
|
||||
inline uint128& operator--(uint128& x) noexcept
|
||||
{
|
||||
return x = x - 1;
|
||||
}
|
||||
|
||||
inline uint128 operator++(uint128& x, int) noexcept
|
||||
{
|
||||
auto ret = x;
|
||||
++x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint128 operator--(uint128& x, int) noexcept
|
||||
{
|
||||
auto ret = x;
|
||||
--x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// Optimized addition.
|
||||
///
|
||||
/// This keeps the multiprecision addition until CodeGen so the pattern is not
|
||||
/// broken during other optimizations.
|
||||
constexpr uint128 fast_add(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
#ifdef __SIZEOF_INT128__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
using uint128_native = unsigned __int128;
|
||||
return uint128_native{x} + uint128_native{y};
|
||||
#pragma GCC diagnostic pop
|
||||
#else
|
||||
// Fallback to regular addition.
|
||||
return x + y;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// Comparison operators.
|
||||
///
|
||||
/// In all implementations bitwise operators are used instead of logical ones
|
||||
/// to avoid branching.
|
||||
///
|
||||
/// @{
|
||||
|
||||
constexpr bool operator==(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
// Clang7: generates perfect xor based code,
|
||||
// much better than __int128 where it uses vector instructions.
|
||||
// GCC8: generates a bit worse cmp based code
|
||||
// although it generates the xor based one for __int128.
|
||||
return (x.lo == y.lo) & (x.hi == y.hi);
|
||||
}
|
||||
|
||||
constexpr bool operator!=(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
// Analogous to ==, but == not used directly, because that confuses GCC 8-9.
|
||||
return (x.lo != y.lo) | (x.hi != y.hi);
|
||||
}
|
||||
|
||||
constexpr bool operator<(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
// OPT: This should be implemented by checking the borrow of x - y,
|
||||
// but compilers (GCC8, Clang7)
|
||||
// have problem with properly optimizing subtraction.
|
||||
return (x.hi < y.hi) | ((x.hi == y.hi) & (x.lo < y.lo));
|
||||
}
|
||||
|
||||
constexpr bool operator<=(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return !(y < x);
|
||||
}
|
||||
|
||||
constexpr bool operator>(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return y < x;
|
||||
}
|
||||
|
||||
constexpr bool operator>=(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return !(x < y);
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// Bitwise operators.
|
||||
/// @{
|
||||
|
||||
constexpr uint128 operator~(uint128 x) noexcept
|
||||
{
|
||||
return {~x.hi, ~x.lo};
|
||||
}
|
||||
|
||||
constexpr uint128 operator|(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
// Clang7: perfect.
|
||||
// GCC8: stupidly uses a vector instruction in all bitwise operators.
|
||||
return {x.hi | y.hi, x.lo | y.lo};
|
||||
}
|
||||
|
||||
constexpr uint128 operator&(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return {x.hi & y.hi, x.lo & y.lo};
|
||||
}
|
||||
|
||||
constexpr uint128 operator^(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return {x.hi ^ y.hi, x.lo ^ y.lo};
|
||||
}
|
||||
|
||||
constexpr uint128 operator<<(uint128 x, unsigned shift) noexcept
|
||||
{
|
||||
return (shift < 64) ?
|
||||
// Find the part moved from lo to hi.
|
||||
// For shift == 0 right shift by (64 - shift) is invalid so
|
||||
// split it into 2 shifts by 1 and (63 - shift).
|
||||
uint128{(x.hi << shift) | ((x.lo >> 1) >> (63 - shift)), x.lo << shift} :
|
||||
|
||||
// Guarantee "defined" behavior for shifts larger than 128.
|
||||
(shift < 128) ? uint128{x.lo << (shift - 64), 0} : 0;
|
||||
}
|
||||
|
||||
constexpr uint128 operator<<(uint128 x, uint128 shift) noexcept
|
||||
{
|
||||
if (shift < 128)
|
||||
return x << unsigned(shift);
|
||||
return 0;
|
||||
}
|
||||
|
||||
constexpr uint128 operator>>(uint128 x, unsigned shift) noexcept
|
||||
{
|
||||
return (shift < 64) ?
|
||||
// Find the part moved from lo to hi.
|
||||
// For shift == 0 left shift by (64 - shift) is invalid so
|
||||
// split it into 2 shifts by 1 and (63 - shift).
|
||||
uint128{x.hi >> shift, (x.lo >> shift) | ((x.hi << 1) << (63 - shift))} :
|
||||
|
||||
// Guarantee "defined" behavior for shifts larger than 128.
|
||||
(shift < 128) ? uint128{0, x.hi >> (shift - 64)} : 0;
|
||||
}
|
||||
|
||||
constexpr uint128 operator>>(uint128 x, uint128 shift) noexcept
|
||||
{
|
||||
if (shift < 128)
|
||||
return x >> unsigned(shift);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// Multiplication
|
||||
/// @{
|
||||
|
||||
/// Portable full unsigned multiplication 64 x 64 -> 128.
|
||||
constexpr uint128 constexpr_umul(uint64_t x, uint64_t y) noexcept
|
||||
{
|
||||
uint64_t xl = x & 0xffffffff;
|
||||
uint64_t xh = x >> 32;
|
||||
uint64_t yl = y & 0xffffffff;
|
||||
uint64_t yh = y >> 32;
|
||||
|
||||
uint64_t t0 = xl * yl;
|
||||
uint64_t t1 = xh * yl;
|
||||
uint64_t t2 = xl * yh;
|
||||
uint64_t t3 = xh * yh;
|
||||
|
||||
uint64_t u1 = t1 + (t0 >> 32);
|
||||
uint64_t u2 = t2 + (u1 & 0xffffffff);
|
||||
|
||||
uint64_t lo = (u2 << 32) | (t0 & 0xffffffff);
|
||||
uint64_t hi = t3 + (u2 >> 32) + (u1 >> 32);
|
||||
return {hi, lo};
|
||||
}
|
||||
|
||||
/// Full unsigned multiplication 64 x 64 -> 128.
|
||||
inline uint128 umul(uint64_t x, uint64_t y) noexcept
|
||||
{
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
const auto p = static_cast<unsigned __int128>(x) * y;
|
||||
return {uint64_t(p >> 64), uint64_t(p)};
|
||||
#pragma GCC diagnostic pop
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned __int64 hi;
|
||||
const auto lo = _umul128(x, y, &hi);
|
||||
return {hi, lo};
|
||||
#else
|
||||
return constexpr_umul(x, y);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint128 operator*(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
auto p = umul(x.lo, y.lo);
|
||||
p.hi += (x.lo * y.hi) + (x.hi * y.lo);
|
||||
return {p.hi, p.lo};
|
||||
}
|
||||
|
||||
constexpr uint128 constexpr_mul(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
auto p = constexpr_umul(x.lo, y.lo);
|
||||
p.hi += (x.lo * y.hi) + (x.hi * y.lo);
|
||||
return {p.hi, p.lo};
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// Assignment operators.
|
||||
/// @{
|
||||
|
||||
constexpr uint128& operator+=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x + y;
|
||||
}
|
||||
|
||||
constexpr uint128& operator-=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x - y;
|
||||
}
|
||||
|
||||
inline uint128& operator*=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x * y;
|
||||
}
|
||||
|
||||
constexpr uint128& operator|=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x | y;
|
||||
}
|
||||
|
||||
constexpr uint128& operator&=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x & y;
|
||||
}
|
||||
|
||||
constexpr uint128& operator^=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x ^ y;
|
||||
}
|
||||
|
||||
constexpr uint128& operator<<=(uint128& x, unsigned shift) noexcept
|
||||
{
|
||||
return x = x << shift;
|
||||
}
|
||||
|
||||
constexpr uint128& operator>>=(uint128& x, unsigned shift) noexcept
|
||||
{
|
||||
return x = x >> shift;
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
constexpr unsigned clz_generic(uint32_t x) noexcept
|
||||
{
|
||||
unsigned n = 32;
|
||||
for (int i = 4; i >= 0; --i)
|
||||
{
|
||||
const auto s = unsigned{1} << i;
|
||||
const auto hi = x >> s;
|
||||
if (hi != 0)
|
||||
{
|
||||
n -= s;
|
||||
x = hi;
|
||||
}
|
||||
}
|
||||
return n - x;
|
||||
}
|
||||
|
||||
constexpr unsigned clz_generic(uint64_t x) noexcept
|
||||
{
|
||||
unsigned n = 64;
|
||||
for (int i = 5; i >= 0; --i)
|
||||
{
|
||||
const auto s = unsigned{1} << i;
|
||||
const auto hi = x >> s;
|
||||
if (hi != 0)
|
||||
{
|
||||
n -= s;
|
||||
x = hi;
|
||||
}
|
||||
}
|
||||
return n - static_cast<unsigned>(x);
|
||||
}
|
||||
|
||||
constexpr inline unsigned clz(uint32_t x) noexcept
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return clz_generic(x);
|
||||
#else
|
||||
return x != 0 ? unsigned(__builtin_clz(x)) : 32;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr inline unsigned clz(uint64_t x) noexcept
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return clz_generic(x);
|
||||
#else
|
||||
return x != 0 ? unsigned(__builtin_clzll(x)) : 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr inline unsigned clz(uint128 x) noexcept
|
||||
{
|
||||
// In this order `h == 0` we get less instructions than in case of `h != 0`.
|
||||
return x.hi == 0 ? clz(x.lo) + 64 : clz(x.hi);
|
||||
}
|
||||
|
||||
|
||||
inline uint64_t bswap(uint64_t x) noexcept
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return _byteswap_uint64(x);
|
||||
#else
|
||||
return __builtin_bswap64(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint128 bswap(uint128 x) noexcept
|
||||
{
|
||||
return {bswap(x.lo), bswap(x.hi)};
|
||||
}
|
||||
|
||||
|
||||
/// Division.
|
||||
/// @{
|
||||
|
||||
template <typename QuotT, typename RemT = QuotT>
|
||||
struct div_result
|
||||
{
|
||||
QuotT quot;
|
||||
RemT rem;
|
||||
|
||||
/// Conversion to tuple of references, to allow usage with std::tie().
|
||||
constexpr operator std::tuple<QuotT&, RemT&>() noexcept { return {quot, rem}; }
|
||||
};
|
||||
|
||||
namespace internal
|
||||
{
|
||||
constexpr uint16_t reciprocal_table_item(uint8_t d9) noexcept
|
||||
{
|
||||
return uint16_t(0x7fd00 / (0x100 | d9));
|
||||
}
|
||||
|
||||
#define REPEAT4(x) \
|
||||
reciprocal_table_item((x) + 0), reciprocal_table_item((x) + 1), \
|
||||
reciprocal_table_item((x) + 2), reciprocal_table_item((x) + 3)
|
||||
|
||||
#define REPEAT32(x) \
|
||||
REPEAT4((x) + 4 * 0), REPEAT4((x) + 4 * 1), REPEAT4((x) + 4 * 2), REPEAT4((x) + 4 * 3), \
|
||||
REPEAT4((x) + 4 * 4), REPEAT4((x) + 4 * 5), REPEAT4((x) + 4 * 6), REPEAT4((x) + 4 * 7)
|
||||
|
||||
#define REPEAT256() \
|
||||
REPEAT32(32 * 0), REPEAT32(32 * 1), REPEAT32(32 * 2), REPEAT32(32 * 3), REPEAT32(32 * 4), \
|
||||
REPEAT32(32 * 5), REPEAT32(32 * 6), REPEAT32(32 * 7)
|
||||
|
||||
/// Reciprocal lookup table.
|
||||
constexpr uint16_t reciprocal_table[] = {REPEAT256()};
|
||||
|
||||
#undef REPEAT4
|
||||
#undef REPEAT32
|
||||
#undef REPEAT256
|
||||
} // namespace internal
|
||||
|
||||
/// Computes the reciprocal (2^128 - 1) / d - 2^64 for normalized d.
|
||||
///
|
||||
/// Based on Algorithm 2 from "Improved division by invariant integers".
|
||||
inline uint64_t reciprocal_2by1(uint64_t d) noexcept
|
||||
{
|
||||
INTX_REQUIRE(d & 0x8000000000000000); // Must be normalized.
|
||||
|
||||
const uint64_t d9 = d >> 55;
|
||||
const uint32_t v0 = internal::reciprocal_table[d9 - 256];
|
||||
|
||||
const uint64_t d40 = (d >> 24) + 1;
|
||||
const uint64_t v1 = (v0 << 11) - uint32_t(v0 * v0 * d40 >> 40) - 1;
|
||||
|
||||
const uint64_t v2 = (v1 << 13) + (v1 * (0x1000000000000000 - v1 * d40) >> 47);
|
||||
|
||||
const uint64_t d0 = d & 1;
|
||||
const uint64_t d63 = (d >> 1) + d0; // ceil(d/2)
|
||||
const uint64_t e = ((v2 >> 1) & (0 - d0)) - v2 * d63;
|
||||
const uint64_t v3 = (umul(v2, e).hi >> 1) + (v2 << 31);
|
||||
|
||||
const uint64_t v4 = v3 - (umul(v3, d) + d).hi - d;
|
||||
return v4;
|
||||
}
|
||||
|
||||
inline uint64_t reciprocal_3by2(uint128 d) noexcept
|
||||
{
|
||||
auto v = reciprocal_2by1(d.hi);
|
||||
auto p = d.hi * v;
|
||||
p += d.lo;
|
||||
if (p < d.lo)
|
||||
{
|
||||
--v;
|
||||
if (p >= d.hi)
|
||||
{
|
||||
--v;
|
||||
p -= d.hi;
|
||||
}
|
||||
p -= d.hi;
|
||||
}
|
||||
|
||||
const auto t = umul(v, d.lo);
|
||||
|
||||
p += t.hi;
|
||||
if (p < t.hi)
|
||||
{
|
||||
--v;
|
||||
if (p >= d.hi)
|
||||
{
|
||||
if (p > d.hi || t.lo >= d.lo)
|
||||
--v;
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
inline div_result<uint64_t> udivrem_2by1(uint128 u, uint64_t d, uint64_t v) noexcept
|
||||
{
|
||||
auto q = umul(v, u.hi);
|
||||
q = fast_add(q, u);
|
||||
|
||||
++q.hi;
|
||||
|
||||
auto r = u.lo - q.hi * d;
|
||||
|
||||
if (r > q.lo)
|
||||
{
|
||||
--q.hi;
|
||||
r += d;
|
||||
}
|
||||
|
||||
if (r >= d)
|
||||
{
|
||||
++q.hi;
|
||||
r -= d;
|
||||
}
|
||||
|
||||
return {q.hi, r};
|
||||
}
|
||||
|
||||
inline div_result<uint64_t, uint128> udivrem_3by2(
|
||||
uint64_t u2, uint64_t u1, uint64_t u0, uint128 d, uint64_t v) noexcept
|
||||
{
|
||||
auto q = umul(v, u2);
|
||||
q = fast_add(q, {u2, u1});
|
||||
|
||||
auto r1 = u1 - q.hi * d.hi;
|
||||
|
||||
auto t = umul(d.lo, q.hi);
|
||||
|
||||
auto r = uint128{r1, u0} - t - d;
|
||||
r1 = r.hi;
|
||||
|
||||
++q.hi;
|
||||
|
||||
if (r1 >= q.lo)
|
||||
{
|
||||
--q.hi;
|
||||
r += d;
|
||||
}
|
||||
|
||||
if (r >= d)
|
||||
{
|
||||
++q.hi;
|
||||
r -= d;
|
||||
}
|
||||
|
||||
return {q.hi, r};
|
||||
}
|
||||
|
||||
inline div_result<uint128> udivrem(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
if (y.hi == 0)
|
||||
{
|
||||
INTX_REQUIRE(y.lo != 0); // Division by 0.
|
||||
|
||||
const auto lsh = clz(y.lo);
|
||||
const auto rsh = (64 - lsh) % 64;
|
||||
const auto rsh_mask = uint64_t{lsh == 0} - 1;
|
||||
|
||||
const auto yn = y.lo << lsh;
|
||||
const auto xn_lo = x.lo << lsh;
|
||||
const auto xn_hi = (x.hi << lsh) | ((x.lo >> rsh) & rsh_mask);
|
||||
const auto xn_ex = (x.hi >> rsh) & rsh_mask;
|
||||
|
||||
const auto v = reciprocal_2by1(yn);
|
||||
const auto res1 = udivrem_2by1({xn_ex, xn_hi}, yn, v);
|
||||
const auto res2 = udivrem_2by1({res1.rem, xn_lo}, yn, v);
|
||||
return {{res1.quot, res2.quot}, res2.rem >> lsh};
|
||||
}
|
||||
|
||||
if (y.hi > x.hi)
|
||||
return {0, x};
|
||||
|
||||
const auto lsh = clz(y.hi);
|
||||
if (lsh == 0)
|
||||
{
|
||||
const auto q = unsigned{y.hi < x.hi} | unsigned{y.lo <= x.lo};
|
||||
return {q, x - (q ? y : 0)};
|
||||
}
|
||||
|
||||
const auto rsh = 64 - lsh;
|
||||
|
||||
const auto yn_lo = y.lo << lsh;
|
||||
const auto yn_hi = (y.hi << lsh) | (y.lo >> rsh);
|
||||
const auto xn_lo = x.lo << lsh;
|
||||
const auto xn_hi = (x.hi << lsh) | (x.lo >> rsh);
|
||||
const auto xn_ex = x.hi >> rsh;
|
||||
|
||||
const auto v = reciprocal_3by2({yn_hi, yn_lo});
|
||||
const auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);
|
||||
|
||||
return {res.quot, res.rem >> lsh};
|
||||
}
|
||||
|
||||
inline div_result<uint128> sdivrem(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
constexpr auto sign_mask = uint128{1} << 127;
|
||||
const auto x_is_neg = (x & sign_mask) != 0;
|
||||
const auto y_is_neg = (y & sign_mask) != 0;
|
||||
|
||||
const auto x_abs = x_is_neg ? -x : x;
|
||||
const auto y_abs = y_is_neg ? -y : y;
|
||||
|
||||
const auto q_is_neg = x_is_neg ^ y_is_neg;
|
||||
|
||||
const auto res = udivrem(x_abs, y_abs);
|
||||
|
||||
return {q_is_neg ? -res.quot : res.quot, x_is_neg ? -res.rem : res.rem};
|
||||
}
|
||||
|
||||
inline uint128 operator/(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return udivrem(x, y).quot;
|
||||
}
|
||||
|
||||
inline uint128 operator%(uint128 x, uint128 y) noexcept
|
||||
{
|
||||
return udivrem(x, y).rem;
|
||||
}
|
||||
|
||||
inline uint128& operator/=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x / y;
|
||||
}
|
||||
|
||||
inline uint128& operator%=(uint128& x, uint128 y) noexcept
|
||||
{
|
||||
return x = x % y;
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
} // namespace intx
|
||||
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <unsigned N>
|
||||
struct numeric_limits<intx::uint<N>>
|
||||
{
|
||||
using type = intx::uint<N>;
|
||||
|
||||
static constexpr bool is_specialized = true;
|
||||
static constexpr bool is_integer = true;
|
||||
static constexpr bool is_signed = false;
|
||||
static constexpr bool is_exact = true;
|
||||
static constexpr bool has_infinity = false;
|
||||
static constexpr bool has_quiet_NaN = false;
|
||||
static constexpr bool has_signaling_NaN = false;
|
||||
static constexpr float_denorm_style has_denorm = denorm_absent;
|
||||
static constexpr bool has_denorm_loss = false;
|
||||
static constexpr float_round_style round_style = round_toward_zero;
|
||||
static constexpr bool is_iec559 = false;
|
||||
static constexpr bool is_bounded = true;
|
||||
static constexpr bool is_modulo = true;
|
||||
static constexpr int digits = CHAR_BIT * sizeof(type);
|
||||
static constexpr int digits10 = int(0.3010299956639812 * digits);
|
||||
static constexpr int max_digits10 = 0;
|
||||
static constexpr int radix = 2;
|
||||
static constexpr int min_exponent = 0;
|
||||
static constexpr int min_exponent10 = 0;
|
||||
static constexpr int max_exponent = 0;
|
||||
static constexpr int max_exponent10 = 0;
|
||||
static constexpr bool traps = std::numeric_limits<unsigned>::traps;
|
||||
static constexpr bool tinyness_before = false;
|
||||
|
||||
static constexpr type min() noexcept { return 0; }
|
||||
static constexpr type lowest() noexcept { return min(); }
|
||||
static constexpr type max() noexcept { return ~type{0}; }
|
||||
static constexpr type epsilon() noexcept { return 0; }
|
||||
static constexpr type round_error() noexcept { return 0; }
|
||||
static constexpr type infinity() noexcept { return 0; }
|
||||
static constexpr type quiet_NaN() noexcept { return 0; }
|
||||
static constexpr type signaling_NaN() noexcept { return 0; }
|
||||
static constexpr type denorm_min() noexcept { return 0; }
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace intx
|
||||
{
|
||||
template <typename T>
|
||||
[[noreturn]] inline void throw_(const char* what)
|
||||
{
|
||||
#if __cpp_exceptions
|
||||
throw T{what};
|
||||
#else
|
||||
std::fputs(what, stderr);
|
||||
std::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr inline int from_dec_digit(char c)
|
||||
{
|
||||
if (c < '0' || c > '9')
|
||||
throw_<std::invalid_argument>("invalid digit");
|
||||
return c - '0';
|
||||
}
|
||||
|
||||
constexpr inline int from_hex_digit(char c)
|
||||
{
|
||||
if (c >= 'a' && c <= 'f')
|
||||
return c - ('a' - 10);
|
||||
if (c >= 'A' && c <= 'F')
|
||||
return c - ('A' - 10);
|
||||
return from_dec_digit(c);
|
||||
}
|
||||
|
||||
template <typename Int>
|
||||
constexpr Int from_string(const char* str)
|
||||
{
|
||||
auto s = str;
|
||||
auto x = Int{};
|
||||
int num_digits = 0;
|
||||
|
||||
if (s[0] == '0' && s[1] == 'x')
|
||||
{
|
||||
s += 2;
|
||||
while (const auto c = *s++)
|
||||
{
|
||||
if (++num_digits > int{sizeof(x) * 2})
|
||||
throw_<std::out_of_range>(str);
|
||||
x = (x << 4) | from_hex_digit(c);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
while (const auto c = *s++)
|
||||
{
|
||||
if (num_digits++ > std::numeric_limits<Int>::digits10)
|
||||
throw_<std::out_of_range>(str);
|
||||
|
||||
const auto d = from_dec_digit(c);
|
||||
x = constexpr_mul(x, Int{10}) + d;
|
||||
if (x < d)
|
||||
throw_<std::out_of_range>(str);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename Int>
|
||||
constexpr Int from_string(const std::string& s)
|
||||
{
|
||||
return from_string<Int>(s.c_str());
|
||||
}
|
||||
|
||||
constexpr uint128 operator""_u128(const char* s)
|
||||
{
|
||||
return from_string<uint128>(s);
|
||||
}
|
||||
|
||||
template <unsigned N>
|
||||
inline std::string to_string(uint<N> x, int base = 10)
|
||||
{
|
||||
if (base < 2 || base > 36)
|
||||
throw_<std::invalid_argument>("invalid base");
|
||||
|
||||
if (x == 0)
|
||||
return "0";
|
||||
|
||||
auto s = std::string{};
|
||||
while (x != 0)
|
||||
{
|
||||
// TODO: Use constexpr udivrem_1?
|
||||
const auto res = udivrem(x, uint<N>{base});
|
||||
const auto d = int(res.rem);
|
||||
const auto c = d < 10 ? '0' + d : 'a' + d - 10;
|
||||
s.push_back(char(c));
|
||||
x = res.quot;
|
||||
}
|
||||
std::reverse(s.begin(), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
template <unsigned N>
|
||||
inline std::string hex(uint<N> x)
|
||||
{
|
||||
return to_string(x, 16);
|
||||
}
|
||||
} // namespace intx
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
/* stb_image - v2.26 - public domain image loader - http://nothings.org/stb
|
||||
/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
|
||||
no warranty implied; use at your own risk
|
||||
|
||||
Do this:
|
||||
|
@ -48,6 +48,7 @@ LICENSE
|
|||
|
||||
RECENT REVISION HISTORY:
|
||||
|
||||
2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
|
||||
2.26 (2020-07-13) many minor fixes
|
||||
2.25 (2020-02-02) fix warnings
|
||||
2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
|
||||
|
@ -89,7 +90,7 @@ RECENT REVISION HISTORY:
|
|||
Jeremy Sawicki (handle all ImageNet JPGs)
|
||||
Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
|
||||
Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
|
||||
Arseny Kapoulkine
|
||||
Arseny Kapoulkine Simon Breuss (16-bit PNM)
|
||||
John-Mark Allen
|
||||
Carmelo J Fdez-Aguera
|
||||
|
||||
|
@ -102,7 +103,7 @@ RECENT REVISION HISTORY:
|
|||
Thomas Ruf Ronny Chevalier github:rlyeh
|
||||
Janez Zemva John Bartholomew Michal Cichon github:romigrou
|
||||
Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
|
||||
Laurent Gomila Cort Stratton github:snagar
|
||||
Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
|
||||
Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
|
||||
Cass Everitt Ryamond Barbiero github:grim210
|
||||
Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
|
||||
|
@ -110,11 +111,13 @@ RECENT REVISION HISTORY:
|
|||
Josh Tobin Matthew Gregan github:poppolopoppo
|
||||
Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
|
||||
Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
|
||||
Brad Weinberger Matvey Cherevko [reserved]
|
||||
Brad Weinberger Matvey Cherevko github:mosra
|
||||
Luca Sas Alexander Veselov Zack Middleton [reserved]
|
||||
Ryan C. Gordon [reserved] [reserved]
|
||||
DO NOT ADD YOUR NAME HERE
|
||||
|
||||
Jacko Dirks
|
||||
|
||||
To add your name to the credits, pick a random blank space in the middle and fill it.
|
||||
80% of merge conflicts on stb PRs are due to people adding their name at the end
|
||||
of the credits.
|
||||
|
@ -176,6 +179,32 @@ RECENT REVISION HISTORY:
|
|||
//
|
||||
// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
|
||||
//
|
||||
// To query the width, height and component count of an image without having to
|
||||
// decode the full file, you can use the stbi_info family of functions:
|
||||
//
|
||||
// int x,y,n,ok;
|
||||
// ok = stbi_info(filename, &x, &y, &n);
|
||||
// // returns ok=1 and sets x, y, n if image is a supported format,
|
||||
// // 0 otherwise.
|
||||
//
|
||||
// Note that stb_image pervasively uses ints in its public API for sizes,
|
||||
// including sizes of memory buffers. This is now part of the API and thus
|
||||
// hard to change without causing breakage. As a result, the various image
|
||||
// loaders all have certain limits on image size; these differ somewhat
|
||||
// by format but generally boil down to either just under 2GB or just under
|
||||
// 1GB. When the decoded image would be larger than this, stb_image decoding
|
||||
// will fail.
|
||||
//
|
||||
// Additionally, stb_image will reject image files that have any of their
|
||||
// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
|
||||
// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
|
||||
// the only way to have an image with such dimensions load correctly
|
||||
// is for it to have a rather extreme aspect ratio. Either way, the
|
||||
// assumption here is that such larger images are likely to be malformed
|
||||
// or malicious. If you do need to load an image with individual dimensions
|
||||
// larger than that, and it still fits in the overall size limit, you can
|
||||
// #define STBI_MAX_DIMENSIONS on your own to be something larger.
|
||||
//
|
||||
// ===========================================================================
|
||||
//
|
||||
// UNICODE:
|
||||
|
@ -281,11 +310,10 @@ RECENT REVISION HISTORY:
|
|||
//
|
||||
// iPhone PNG support:
|
||||
//
|
||||
// By default we convert iphone-formatted PNGs back to RGB, even though
|
||||
// they are internally encoded differently. You can disable this conversion
|
||||
// by calling stbi_convert_iphone_png_to_rgb(0), in which case
|
||||
// you will always just get the native iphone "format" through (which
|
||||
// is BGR stored in RGB).
|
||||
// We optionally support converting iPhone-formatted PNGs (which store
|
||||
// premultiplied BGRA) back to RGB, even though they're internally encoded
|
||||
// differently. To enable this conversion, call
|
||||
// stbi_convert_iphone_png_to_rgb(1).
|
||||
//
|
||||
// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
|
||||
// pixel to remove any premultiplied alpha *only* if the image file explicitly
|
||||
|
@ -489,6 +517,8 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
|
|||
// as above, but only applies to images loaded on the thread that calls the function
|
||||
// this function is only available if your compiler supports thread-local variables;
|
||||
// calling it will fail to link if your compiler doesn't
|
||||
STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
|
||||
STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
|
||||
STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
|
||||
|
||||
// ZLIB client - used by PNG, available for other purposes
|
||||
|
@ -634,7 +664,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
|
|||
#ifdef STBI_HAS_LROTL
|
||||
#define stbi_lrot(x,y) _lrotl(x,y)
|
||||
#else
|
||||
#define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
|
||||
#define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
|
||||
#endif
|
||||
|
||||
#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
|
||||
|
@ -748,9 +778,12 @@ static int stbi__sse2_available(void)
|
|||
|
||||
#ifdef STBI_NEON
|
||||
#include <arm_neon.h>
|
||||
// assume GCC or Clang on ARM targets
|
||||
#ifdef _MSC_VER
|
||||
#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
|
||||
#else
|
||||
#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef STBI_SIMD_ALIGN
|
||||
#define STBI_SIMD_ALIGN(type, name) type name
|
||||
|
@ -924,6 +957,7 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
|
|||
static int stbi__pnm_test(stbi__context *s);
|
||||
static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
|
||||
static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
|
||||
static int stbi__pnm_is16(stbi__context *s);
|
||||
#endif
|
||||
|
||||
static
|
||||
|
@ -998,7 +1032,7 @@ static int stbi__mad3sizes_valid(int a, int b, int c, int add)
|
|||
}
|
||||
|
||||
// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
|
||||
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
|
||||
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
|
||||
static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
|
||||
{
|
||||
return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
|
||||
|
@ -1021,7 +1055,7 @@ static void *stbi__malloc_mad3(int a, int b, int c, int add)
|
|||
return stbi__malloc(a*b*c + add);
|
||||
}
|
||||
|
||||
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
|
||||
#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
|
||||
static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
|
||||
{
|
||||
if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
|
||||
|
@ -1087,9 +1121,8 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
|
|||
ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
|
||||
ri->num_channels = 0;
|
||||
|
||||
#ifndef STBI_NO_JPEG
|
||||
if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
|
||||
#endif
|
||||
// test the formats with a very explicit header first (at least a FOURCC
|
||||
// or distinctive magic number first)
|
||||
#ifndef STBI_NO_PNG
|
||||
if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
|
||||
#endif
|
||||
|
@ -1107,6 +1140,13 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
|
|||
#ifndef STBI_NO_PIC
|
||||
if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
|
||||
#endif
|
||||
|
||||
// then the formats that can end up attempting to load with just 1 or 2
|
||||
// bytes matching expectations; these are prone to false positives, so
|
||||
// try them later
|
||||
#ifndef STBI_NO_JPEG
|
||||
if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
|
||||
#endif
|
||||
#ifndef STBI_NO_PNM
|
||||
if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
|
||||
#endif
|
||||
|
@ -1262,12 +1302,12 @@ static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, in
|
|||
|
||||
#ifndef STBI_NO_STDIO
|
||||
|
||||
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
|
||||
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
|
||||
STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
|
||||
STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
|
||||
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
|
||||
STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
|
||||
{
|
||||
return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
|
||||
|
@ -1277,16 +1317,16 @@ STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wch
|
|||
static FILE *stbi__fopen(char const *filename, char const *mode)
|
||||
{
|
||||
FILE *f;
|
||||
#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
|
||||
#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
|
||||
wchar_t wMode[64];
|
||||
wchar_t wFilename[1024];
|
||||
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
|
||||
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
|
||||
return 0;
|
||||
|
||||
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
|
||||
if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
|
||||
return 0;
|
||||
|
||||
#if _MSC_VER >= 1400
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1400
|
||||
if (0 != _wfopen_s(&f, wFilename, wMode))
|
||||
f = 0;
|
||||
#else
|
||||
|
@ -1662,7 +1702,8 @@ static int stbi__get16le(stbi__context *s)
|
|||
static stbi__uint32 stbi__get32le(stbi__context *s)
|
||||
{
|
||||
stbi__uint32 z = stbi__get16le(s);
|
||||
return z + (stbi__get16le(s) << 16);
|
||||
z += (stbi__uint32)stbi__get16le(s) << 16;
|
||||
return z;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2090,13 +2131,12 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
|
|||
int sgn;
|
||||
if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
|
||||
|
||||
sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
|
||||
sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
|
||||
k = stbi_lrot(j->code_buffer, n);
|
||||
if (n < 0 || n >= (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))) return 0;
|
||||
j->code_buffer = k & ~stbi__bmask[n];
|
||||
k &= stbi__bmask[n];
|
||||
j->code_bits -= n;
|
||||
return k + (stbi__jbias[n] & ~sgn);
|
||||
return k + (stbi__jbias[n] & (sgn - 1));
|
||||
}
|
||||
|
||||
// get some unsigned bits
|
||||
|
@ -2146,7 +2186,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
|
|||
|
||||
if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
|
||||
t = stbi__jpeg_huff_decode(j, hdc);
|
||||
if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
|
||||
if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
|
||||
|
||||
// 0 all the ac values now so we can do it 32-bits at a time
|
||||
memset(data,0,64*sizeof(data[0]));
|
||||
|
@ -2203,12 +2243,12 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
|
|||
// first scan for DC coefficient, must be first
|
||||
memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
|
||||
t = stbi__jpeg_huff_decode(j, hdc);
|
||||
if (t == -1) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
|
||||
if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
|
||||
diff = t ? stbi__extend_receive(j, t) : 0;
|
||||
|
||||
dc = j->img_comp[b].dc_pred + diff;
|
||||
j->img_comp[b].dc_pred = dc;
|
||||
data[0] = (short) (dc << j->succ_low);
|
||||
data[0] = (short) (dc * (1 << j->succ_low));
|
||||
} else {
|
||||
// refinement scan for DC coefficient
|
||||
if (stbi__jpeg_get_bit(j))
|
||||
|
@ -2245,7 +2285,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
|
|||
j->code_buffer <<= s;
|
||||
j->code_bits -= s;
|
||||
zig = stbi__jpeg_dezigzag[k++];
|
||||
data[zig] = (short) ((r >> 8) << shift);
|
||||
data[zig] = (short) ((r >> 8) * (1 << shift));
|
||||
} else {
|
||||
int rs = stbi__jpeg_huff_decode(j, hac);
|
||||
if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
|
||||
|
@ -2263,7 +2303,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
|
|||
} else {
|
||||
k += r;
|
||||
zig = stbi__jpeg_dezigzag[k++];
|
||||
data[zig] = (short) (stbi__extend_receive(j,s) << shift);
|
||||
data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
|
||||
}
|
||||
}
|
||||
} while (k <= j->spec_end);
|
||||
|
@ -3227,6 +3267,13 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
|
|||
if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
|
||||
}
|
||||
|
||||
// check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
|
||||
// and I've never seen a non-corrupted JPEG file actually use them
|
||||
for (i=0; i < s->img_n; ++i) {
|
||||
if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
|
||||
if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
|
||||
}
|
||||
|
||||
// compute interleaved mcu info
|
||||
z->img_h_max = h_max;
|
||||
z->img_v_max = v_max;
|
||||
|
@ -3782,6 +3829,10 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
|
|||
else
|
||||
decode_n = z->s->img_n;
|
||||
|
||||
// nothing to do if no components requested; check this now to avoid
|
||||
// accessing uninitialized coutput[0] later
|
||||
if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
|
||||
|
||||
// resample and color-convert
|
||||
{
|
||||
int k;
|
||||
|
@ -3924,6 +3975,7 @@ static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int re
|
|||
{
|
||||
unsigned char* result;
|
||||
stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
|
||||
if (!j) return stbi__errpuc("outofmem", "Out of memory");
|
||||
STBI_NOTUSED(ri);
|
||||
j->s = s;
|
||||
stbi__setup_jpeg(j);
|
||||
|
@ -3936,6 +3988,7 @@ static int stbi__jpeg_test(stbi__context *s)
|
|||
{
|
||||
int r;
|
||||
stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
|
||||
if (!j) return stbi__err("outofmem", "Out of memory");
|
||||
j->s = s;
|
||||
stbi__setup_jpeg(j);
|
||||
r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
|
||||
|
@ -3960,6 +4013,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
|
|||
{
|
||||
int result;
|
||||
stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
|
||||
if (!j) return stbi__err("outofmem", "Out of memory");
|
||||
j->s = s;
|
||||
result = stbi__jpeg_info_raw(j, x, y, comp);
|
||||
STBI_FREE(j);
|
||||
|
@ -3979,6 +4033,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
|
|||
// fast-way is faster to check than jpeg huffman, but slow way is slower
|
||||
#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
|
||||
#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
|
||||
#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
|
||||
|
||||
// zlib-style huffman encoding
|
||||
// (jpegs packs from left, zlib from right, so can't share code)
|
||||
|
@ -3988,8 +4043,8 @@ typedef struct
|
|||
stbi__uint16 firstcode[16];
|
||||
int maxcode[17];
|
||||
stbi__uint16 firstsymbol[16];
|
||||
stbi_uc size[288];
|
||||
stbi__uint16 value[288];
|
||||
stbi_uc size[STBI__ZNSYMS];
|
||||
stbi__uint16 value[STBI__ZNSYMS];
|
||||
} stbi__zhuffman;
|
||||
|
||||
stbi_inline static int stbi__bitreverse16(int n)
|
||||
|
@ -4120,7 +4175,7 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
|
|||
if (s >= 16) return -1; // invalid code!
|
||||
// code size is s, so:
|
||||
b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
|
||||
if (b >= sizeof (z->size)) return -1; // some data was corrupt somewhere!
|
||||
if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
|
||||
if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
|
||||
a->code_buffer >>= s;
|
||||
a->num_bits -= s;
|
||||
|
@ -4317,7 +4372,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static const stbi_uc stbi__zdefault_length[288] =
|
||||
static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
|
||||
{
|
||||
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
||||
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
|
||||
|
@ -4363,7 +4418,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
|
|||
} else {
|
||||
if (type == 1) {
|
||||
// use fixed code lengths
|
||||
if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
|
||||
if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
|
||||
if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
|
||||
} else {
|
||||
if (!stbi__compute_huffman_codes(a)) return 0;
|
||||
|
@ -4759,6 +4814,7 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
|
|||
|
||||
// de-interlacing
|
||||
final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
|
||||
if (!final) return stbi__err("outofmem", "Out of memory");
|
||||
for (p=0; p < 7; ++p) {
|
||||
int xorig[] = { 0,4,0,2,0,1,0 };
|
||||
int yorig[] = { 0,0,4,0,2,0,1 };
|
||||
|
@ -4879,19 +4935,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int stbi__unpremultiply_on_load = 0;
|
||||
static int stbi__de_iphone_flag = 0;
|
||||
static int stbi__unpremultiply_on_load_global = 0;
|
||||
static int stbi__de_iphone_flag_global = 0;
|
||||
|
||||
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
|
||||
{
|
||||
stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
|
||||
stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
|
||||
}
|
||||
|
||||
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
|
||||
{
|
||||
stbi__de_iphone_flag = flag_true_if_should_convert;
|
||||
stbi__de_iphone_flag_global = flag_true_if_should_convert;
|
||||
}
|
||||
|
||||
#ifndef STBI_THREAD_LOCAL
|
||||
#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
|
||||
#define stbi__de_iphone_flag stbi__de_iphone_flag_global
|
||||
#else
|
||||
static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
|
||||
static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
|
||||
|
||||
STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
|
||||
{
|
||||
stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
|
||||
stbi__unpremultiply_on_load_set = 1;
|
||||
}
|
||||
|
||||
STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
|
||||
{
|
||||
stbi__de_iphone_flag_local = flag_true_if_should_convert;
|
||||
stbi__de_iphone_flag_set = 1;
|
||||
}
|
||||
|
||||
#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
|
||||
? stbi__unpremultiply_on_load_local \
|
||||
: stbi__unpremultiply_on_load_global)
|
||||
#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
|
||||
? stbi__de_iphone_flag_local \
|
||||
: stbi__de_iphone_flag_global)
|
||||
#endif // STBI_THREAD_LOCAL
|
||||
|
||||
static void stbi__de_iphone(stbi__png *z)
|
||||
{
|
||||
stbi__context *s = z->s;
|
||||
|
@ -5272,6 +5355,32 @@ typedef struct
|
|||
int extra_read;
|
||||
} stbi__bmp_data;
|
||||
|
||||
static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
|
||||
{
|
||||
// BI_BITFIELDS specifies masks explicitly, don't override
|
||||
if (compress == 3)
|
||||
return 1;
|
||||
|
||||
if (compress == 0) {
|
||||
if (info->bpp == 16) {
|
||||
info->mr = 31u << 10;
|
||||
info->mg = 31u << 5;
|
||||
info->mb = 31u << 0;
|
||||
} else if (info->bpp == 32) {
|
||||
info->mr = 0xffu << 16;
|
||||
info->mg = 0xffu << 8;
|
||||
info->mb = 0xffu << 0;
|
||||
info->ma = 0xffu << 24;
|
||||
info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
|
||||
} else {
|
||||
// otherwise, use defaults, which is all-0
|
||||
info->mr = info->mg = info->mb = info->ma = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return 0; // error
|
||||
}
|
||||
|
||||
static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
|
||||
{
|
||||
int hsz;
|
||||
|
@ -5299,6 +5408,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
|
|||
if (hsz != 12) {
|
||||
int compress = stbi__get32le(s);
|
||||
if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
|
||||
if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
|
||||
if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
|
||||
stbi__get32le(s); // discard sizeof
|
||||
stbi__get32le(s); // discard hres
|
||||
stbi__get32le(s); // discard vres
|
||||
|
@ -5313,17 +5424,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
|
|||
}
|
||||
if (info->bpp == 16 || info->bpp == 32) {
|
||||
if (compress == 0) {
|
||||
if (info->bpp == 32) {
|
||||
info->mr = 0xffu << 16;
|
||||
info->mg = 0xffu << 8;
|
||||
info->mb = 0xffu << 0;
|
||||
info->ma = 0xffu << 24;
|
||||
info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
|
||||
} else {
|
||||
info->mr = 31u << 10;
|
||||
info->mg = 31u << 5;
|
||||
info->mb = 31u << 0;
|
||||
}
|
||||
stbi__bmp_set_mask_defaults(info, compress);
|
||||
} else if (compress == 3) {
|
||||
info->mr = stbi__get32le(s);
|
||||
info->mg = stbi__get32le(s);
|
||||
|
@ -5338,6 +5439,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
|
|||
return stbi__errpuc("bad BMP", "bad BMP");
|
||||
}
|
||||
} else {
|
||||
// V4/V5 header
|
||||
int i;
|
||||
if (hsz != 108 && hsz != 124)
|
||||
return stbi__errpuc("bad BMP", "bad BMP");
|
||||
|
@ -5345,6 +5447,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
|
|||
info->mg = stbi__get32le(s);
|
||||
info->mb = stbi__get32le(s);
|
||||
info->ma = stbi__get32le(s);
|
||||
if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
|
||||
stbi__bmp_set_mask_defaults(info, compress);
|
||||
stbi__get32le(s); // discard color space
|
||||
for (i=0; i < 12; ++i)
|
||||
stbi__get32le(s); // discard color space parameters
|
||||
|
@ -5394,8 +5498,7 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req
|
|||
psize = (info.offset - info.extra_read - info.hsz) >> 2;
|
||||
}
|
||||
if (psize == 0) {
|
||||
STBI_ASSERT(info.offset == s->callback_already_read + (int) (s->img_buffer - s->img_buffer_original));
|
||||
if (info.offset != s->callback_already_read + (s->img_buffer - s->buffer_start)) {
|
||||
if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
|
||||
return stbi__errpuc("bad offset", "Corrupt BMP");
|
||||
}
|
||||
}
|
||||
|
@ -6342,6 +6445,7 @@ static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_c
|
|||
|
||||
// intermediate buffer is RGBA
|
||||
result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
|
||||
if (!result) return stbi__errpuc("outofmem", "Out of memory");
|
||||
memset(result, 0xff, x*y*4);
|
||||
|
||||
if (!stbi__pic_load_core(s,x,y,comp, result)) {
|
||||
|
@ -6457,6 +6561,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
|
|||
static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
|
||||
{
|
||||
stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
|
||||
if (!g) return stbi__err("outofmem", "Out of memory");
|
||||
if (!stbi__gif_header(s, g, comp, 1)) {
|
||||
STBI_FREE(g);
|
||||
stbi__rewind( s );
|
||||
|
@ -6766,6 +6871,17 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
|
|||
}
|
||||
}
|
||||
|
||||
static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
|
||||
{
|
||||
STBI_FREE(g->out);
|
||||
STBI_FREE(g->history);
|
||||
STBI_FREE(g->background);
|
||||
|
||||
if (out) STBI_FREE(out);
|
||||
if (delays && *delays) STBI_FREE(*delays);
|
||||
return stbi__errpuc("outofmem", "Out of memory");
|
||||
}
|
||||
|
||||
static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
|
||||
{
|
||||
if (stbi__gif_test(s)) {
|
||||
|
@ -6777,6 +6893,10 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
|
|||
int stride;
|
||||
int out_size = 0;
|
||||
int delays_size = 0;
|
||||
|
||||
STBI_NOTUSED(out_size);
|
||||
STBI_NOTUSED(delays_size);
|
||||
|
||||
memset(&g, 0, sizeof(g));
|
||||
if (delays) {
|
||||
*delays = 0;
|
||||
|
@ -6794,26 +6914,29 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
|
|||
|
||||
if (out) {
|
||||
void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
|
||||
if (NULL == tmp) {
|
||||
STBI_FREE(g.out);
|
||||
STBI_FREE(g.history);
|
||||
STBI_FREE(g.background);
|
||||
return stbi__errpuc("outofmem", "Out of memory");
|
||||
}
|
||||
if (!tmp)
|
||||
return stbi__load_gif_main_outofmem(&g, out, delays);
|
||||
else {
|
||||
out = (stbi_uc*) tmp;
|
||||
out_size = layers * stride;
|
||||
}
|
||||
|
||||
if (delays) {
|
||||
*delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
|
||||
int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
|
||||
if (!new_delays)
|
||||
return stbi__load_gif_main_outofmem(&g, out, delays);
|
||||
*delays = new_delays;
|
||||
delays_size = layers * sizeof(int);
|
||||
}
|
||||
} else {
|
||||
out = (stbi_uc*)stbi__malloc( layers * stride );
|
||||
if (!out)
|
||||
return stbi__load_gif_main_outofmem(&g, out, delays);
|
||||
out_size = layers * stride;
|
||||
if (delays) {
|
||||
*delays = (int*) stbi__malloc( layers * sizeof(int) );
|
||||
if (!*delays)
|
||||
return stbi__load_gif_main_outofmem(&g, out, delays);
|
||||
delays_size = layers * sizeof(int);
|
||||
}
|
||||
}
|
||||
|
@ -7138,9 +7261,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
|
|||
|
||||
info.all_a = 255;
|
||||
p = stbi__bmp_parse_header(s, &info);
|
||||
stbi__rewind( s );
|
||||
if (p == NULL)
|
||||
if (p == NULL) {
|
||||
stbi__rewind( s );
|
||||
return 0;
|
||||
}
|
||||
if (x) *x = s->img_x;
|
||||
if (y) *y = s->img_y;
|
||||
if (comp) {
|
||||
|
@ -7206,8 +7330,8 @@ static int stbi__psd_is16(stbi__context *s)
|
|||
stbi__rewind( s );
|
||||
return 0;
|
||||
}
|
||||
(void) stbi__get32be(s);
|
||||
(void) stbi__get32be(s);
|
||||
STBI_NOTUSED(stbi__get32be(s));
|
||||
STBI_NOTUSED(stbi__get32be(s));
|
||||
depth = stbi__get16be(s);
|
||||
if (depth != 16) {
|
||||
stbi__rewind( s );
|
||||
|
@ -7286,7 +7410,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
|
|||
// Known limitations:
|
||||
// Does not support comments in the header section
|
||||
// Does not support ASCII image data (formats P2 and P3)
|
||||
// Does not support 16-bit-per-channel
|
||||
|
||||
#ifndef STBI_NO_PNM
|
||||
|
||||
|
@ -7307,7 +7430,8 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
|
|||
stbi_uc *out;
|
||||
STBI_NOTUSED(ri);
|
||||
|
||||
if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
|
||||
ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
|
||||
if (ri->bits_per_channel == 0)
|
||||
return 0;
|
||||
|
||||
if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
|
||||
|
@ -7317,12 +7441,12 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
|
|||
*y = s->img_y;
|
||||
if (comp) *comp = s->img_n;
|
||||
|
||||
if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
|
||||
if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
|
||||
return stbi__errpuc("too large", "PNM too large");
|
||||
|
||||
out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
|
||||
out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
|
||||
if (!out) return stbi__errpuc("outofmem", "Out of memory");
|
||||
stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
|
||||
stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
|
||||
|
||||
if (req_comp && req_comp != s->img_n) {
|
||||
out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
|
||||
|
@ -7398,11 +7522,19 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
|
|||
stbi__pnm_skip_whitespace(s, &c);
|
||||
|
||||
maxv = stbi__pnm_getinteger(s, &c); // read max value
|
||||
|
||||
if (maxv > 255)
|
||||
return stbi__err("max value > 255", "PPM image not 8-bit");
|
||||
if (maxv > 65535)
|
||||
return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
|
||||
else if (maxv > 255)
|
||||
return 16;
|
||||
else
|
||||
return 1;
|
||||
return 8;
|
||||
}
|
||||
|
||||
static int stbi__pnm_is16(stbi__context *s)
|
||||
{
|
||||
if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -7458,6 +7590,9 @@ static int stbi__is_16_main(stbi__context *s)
|
|||
if (stbi__psd_is16(s)) return 1;
|
||||
#endif
|
||||
|
||||
#ifndef STBI_NO_PNM
|
||||
if (stbi__pnm_is16(s)) return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -7760,4 +7895,3 @@ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
|
|
@ -61,9 +61,6 @@ endif()
|
|||
if(MSVC)
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_NATIVE}")
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
add_definitions(-DLINUX_ARM=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_library(wirehair ${LIB_SOURCE_FILES})
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "WirehairTools.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h> // _BitScanReverse
|
||||
|
|
|
@ -200,7 +200,7 @@ static bool gf256_self_test()
|
|||
#endif
|
||||
|
||||
#if defined(GF256_TRY_NEON)
|
||||
# if defined(IOS) && defined(__ARM_NEON__)
|
||||
# if defined(IOS) && (defined(__ARM_NEON) || defined(__ARM_NEON__))
|
||||
// Requires iPhone 5S or newer
|
||||
static const bool CpuHasNeon = true;
|
||||
static const bool CpuHasNeon64 = true;
|
||||
|
|
|
@ -53,11 +53,17 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// Platform/Architecture
|
||||
|
||||
#if defined(__ARM_ARCH) || defined(__ARM_NEON) || defined(__ARM_NEON__)
|
||||
#if !defined IOS
|
||||
#define LINUX_ARM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__)
|
||||
#define GF256_TARGET_MOBILE
|
||||
#endif // ANDROID
|
||||
|
||||
#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900)
|
||||
#if defined(__AVX2__) && (!defined (_MSC_VER) || _MSC_VER >= 1900)
|
||||
#define GF256_TRY_AVX2 /* 256-bit */
|
||||
#include <immintrin.h>
|
||||
#define GF256_ALIGN_BYTES 32
|
||||
|
@ -66,36 +72,28 @@
|
|||
#endif // __AVX2__
|
||||
|
||||
#if !defined(GF256_TARGET_MOBILE)
|
||||
// Note: MSVC currently only supports SSSE3 but not AVX2
|
||||
#include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
|
||||
#include <emmintrin.h> // SSE2
|
||||
#endif // GF256_TARGET_MOBILE
|
||||
|
||||
#if defined(HAVE_ARM_NEON_H)
|
||||
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
|
||||
#include <arm_neon.h>
|
||||
#endif // HAVE_ARM_NEON_H
|
||||
|
||||
#if defined(GF256_TARGET_MOBILE)
|
||||
|
||||
#define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */
|
||||
|
||||
# if defined(HAVE_ARM_NEON_H)
|
||||
// Compiler-specific 128-bit SIMD register keyword
|
||||
#define GF256_M128 uint8x16_t
|
||||
#define GF256_TRY_NEON
|
||||
#endif
|
||||
|
||||
// Compiler-specific 128-bit SIMD register keyword
|
||||
#if defined(GF256_TARGET_MOBILE)
|
||||
#if defined(GF256_TRY_NEON)
|
||||
#define GF256_M128 uint8x16_t
|
||||
#else
|
||||
#define GF256_M128 uint64_t
|
||||
# endif
|
||||
|
||||
#endif // GF256_TRY_NEON
|
||||
#else // GF256_TARGET_MOBILE
|
||||
|
||||
// Compiler-specific 128-bit SIMD register keyword
|
||||
#define GF256_M128 __m128i
|
||||
|
||||
#endif // GF256_TARGET_MOBILE
|
||||
|
||||
// Compiler-specific 256-bit SIMD register keyword
|
||||
#ifdef GF256_TRY_AVX2
|
||||
// Compiler-specific 256-bit SIMD register keyword
|
||||
#define GF256_M256 __m256i
|
||||
#endif
|
||||
|
||||
|
@ -272,10 +270,6 @@ static GF256_FORCE_INLINE void gf256_div_mem(void * GF256_RESTRICT vz,
|
|||
//------------------------------------------------------------------------------
|
||||
// Misc Operations
|
||||
|
||||
/// Swap two memory buffers in-place
|
||||
extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
|
|
@ -153,7 +153,7 @@ WIREHAIR_EXPORT WirehairCodec wirehair_decoder_create(
|
|||
)
|
||||
{
|
||||
// If input is invalid:
|
||||
if (messageBytes < 1 || blockBytes < 1) {
|
||||
if (!m_init || messageBytes < 1 || blockBytes < 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
573
test/catch.hpp
573
test/catch.hpp
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue