Thanks for using Compiler Explorer
Sponsors
C++
LLVM IR
Cppx
Cppx-Gold
Cppx-Blue
C
Rust
D
Go
ispc
Haskell
OCaml
Python
Swift
Pascal
Fortran
Assembly
Analysis
CUDA
Zig
Clean
Ada
Nim
c++ source #1
Output
Compile to binary
Run the compiled output
Intel asm syntax
Demangle identifiers
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Compiler
ARM gcc 10.2 (linux)
ARM gcc 10.2.1 (none)
ARM gcc 10.3 (linux)
ARM gcc 4.5.4 (linux)
ARM gcc 4.6.4 (linux)
ARM gcc 5.4 (linux)
ARM gcc 5.4.1 (none)
ARM gcc 6.3.0 (linux)
ARM gcc 6.4 (linux)
ARM gcc 7.2.1 (none)
ARM gcc 7.3 (linux)
ARM gcc 8.2 (WinCE)
ARM gcc 8.2 (linux)
ARM gcc 8.3.1 (none)
ARM gcc 9.2.1 (none)
ARM gcc 9.3 (linux)
ARM gcc trunk (linux)
ARM msvc v19.0 (WINE)
ARM msvc v19.10 (WINE)
ARM msvc v19.14 (WINE)
ARM64 gcc 10.2
ARM64 gcc 10.3
ARM64 gcc 5.4
ARM64 gcc 6.3
ARM64 gcc 6.4
ARM64 gcc 7.3
ARM64 gcc 8.2
ARM64 gcc 9.3
ARM64 gcc trunk
ARM64 msvc v19.14 (WINE)
AVR gcc 4.5.4
AVR gcc 4.6.4
AVR gcc 5.4.0
AVR gcc 9.2.0
Arduino Mega (1.8.9)
Arduino Uno (1.8.9)
FRC 2019
FRC 2020
KVX gcc 7.5 (ACB 4.1.0)
KVX gcc 7.5 (ACB 4.1.0-cd1)
KVX gcc 7.5 (ACB 4.2.0)
KVX gcc 7.5 (ACB 4.3.0)
KVX gcc 7.5 (ACB 4.4.0)
MIPS gcc 5.4
MIPS gcc 5.4 (el)
MIPS64 gcc 5.4
MIPS64 gcc 5.4 (el)
MSP430 gcc 4.5.3
MSP430 gcc 5.3.0
MSP430 gcc 6.2.1
PowerPC gcc 4.8.5
RISC-V rv32gc clang (trunk)
RISC-V rv32gc clang 10.0.0
RISC-V rv32gc clang 10.0.1
RISC-V rv32gc clang 11.0.0
RISC-V rv32gc clang 11.0.1
RISC-V rv32gc clang 9.0.0
RISC-V rv32gc clang 9.0.1
RISC-V rv32gc gcc 10.2.0
RISC-V rv32gc gcc 8.2.0
RISC-V rv64gc clang (trunk)
RISC-V rv64gc clang 10.0.0
RISC-V rv64gc clang 10.0.1
RISC-V rv64gc clang 11.0.0
RISC-V rv64gc clang 11.0.1
RISC-V rv64gc clang 9.0.0
RISC-V rv64gc clang 9.0.1
RISC-V rv64gc gcc 10.2.0
RISC-V rv64gc gcc 8.2.0
Raspbian Buster
Raspbian Stretch
WebAssembly clang (trunk)
arm64 msvc v19.28 (VS16.9)
arm64 msvc v19.latest
armv7-a clang (trunk)
armv7-a clang 10.0.0
armv7-a clang 10.0.1
armv7-a clang 11.0.0
armv7-a clang 11.0.1
armv7-a clang 9.0.0
armv7-a clang 9.0.1
armv8-a clang (trunk)
armv8-a clang (trunk, all architectural features)
armv8-a clang 10.0.0
armv8-a clang 10.0.1
armv8-a clang 11.0.0
armv8-a clang 11.0.1
armv8-a clang 9.0.0
armv8-a clang 9.0.1
ellcc 0.1.33
ellcc 0.1.34
ellcc 2017-07-16
power64 AT12.0
power64 AT13.0
power64le AT12.0
power64le AT13.0
power64le clang (trunk)
power64le gcc 6.3.0
powerpc64 clang (trunk)
x64 msvc v19.0 (WINE)
x64 msvc v19.10 (WINE)
x64 msvc v19.14
x64 msvc v19.14 (WINE)
x64 msvc v19.15
x64 msvc v19.16
x64 msvc v19.20
x64 msvc v19.21
x64 msvc v19.22
x64 msvc v19.23
x64 msvc v19.24
x64 msvc v19.25
x64 msvc v19.26
x64 msvc v19.27
x64 msvc v19.28
x64 msvc v19.28 (VS16.9)
x64 msvc v19.latest
x86 djgpp 4.9.4
x86 djgpp 5.5.0
x86 djgpp 6.4.0
x86 djgpp 7.2.0
x86 msvc v19.0 (WINE)
x86 msvc v19.10 (WINE)
x86 msvc v19.14
x86 msvc v19.14 (WINE)
x86 msvc v19.15
x86 msvc v19.16
x86 msvc v19.20
x86 msvc v19.21
x86 msvc v19.22
x86 msvc v19.23
x86 msvc v19.24
x86 msvc v19.25
x86 msvc v19.26
x86 msvc v19.27
x86 msvc v19.28
x86 msvc v19.28 (VS16.9)
x86 msvc v19.latest
x86-64 Zapcc 190308
x86-64 clang (assertions trunk)
x86-64 clang (experimental -Wlifetime)
x86-64 clang (experimental P1144)
x86-64 clang (experimental P1221)
x86-64 clang (experimental auto NSDMI)
x86-64 clang (experimental pattern matching)
x86-64 clang (old concepts branch)
x86-64 clang (std::embed)
x86-64 clang (trunk)
x86-64 clang 10.0.0
x86-64 clang 10.0.1
x86-64 clang 11.0.0
x86-64 clang 11.0.1
x86-64 clang 12.0.0
x86-64 clang 3.0.0
x86-64 clang 3.1
x86-64 clang 3.2
x86-64 clang 3.3
x86-64 clang 3.4.1
x86-64 clang 3.5
x86-64 clang 3.5.1
x86-64 clang 3.5.2
x86-64 clang 3.6
x86-64 clang 3.7
x86-64 clang 3.7.1
x86-64 clang 3.8
x86-64 clang 3.8.1
x86-64 clang 3.9.0
x86-64 clang 3.9.1
x86-64 clang 4.0.0
x86-64 clang 4.0.1
x86-64 clang 5.0.0
x86-64 clang 5.0.1
x86-64 clang 6.0.0
x86-64 clang 6.0.1
x86-64 clang 7.0.0
x86-64 clang 7.0.1
x86-64 clang 7.1.0
x86-64 clang 8.0.0
x86-64 clang 8.0.1
x86-64 clang 9.0.0
x86-64 clang 9.0.1
x86-64 gcc (contract labels)
x86-64 gcc (contracts)
x86-64 gcc (coroutines)
x86-64 gcc (modules)
x86-64 gcc (static analysis)
x86-64 gcc (trunk)
x86-64 gcc 10.1
x86-64 gcc 10.2
x86-64 gcc 10.3
x86-64 gcc 4.1.2
x86-64 gcc 4.4.7
x86-64 gcc 4.5.3
x86-64 gcc 4.6.4
x86-64 gcc 4.7.1
x86-64 gcc 4.7.2
x86-64 gcc 4.7.3
x86-64 gcc 4.7.4
x86-64 gcc 4.8.1
x86-64 gcc 4.8.2
x86-64 gcc 4.8.3
x86-64 gcc 4.8.4
x86-64 gcc 4.8.5
x86-64 gcc 4.9.0
x86-64 gcc 4.9.1
x86-64 gcc 4.9.2
x86-64 gcc 4.9.3
x86-64 gcc 4.9.4
x86-64 gcc 5.1
x86-64 gcc 5.2
x86-64 gcc 5.3
x86-64 gcc 5.4
x86-64 gcc 5.5
x86-64 gcc 6.1
x86-64 gcc 6.2
x86-64 gcc 6.3
x86-64 gcc 6.4
x86-64 gcc 7.1
x86-64 gcc 7.2
x86-64 gcc 7.3
x86-64 gcc 7.4
x86-64 gcc 7.5
x86-64 gcc 8.1
x86-64 gcc 8.2
x86-64 gcc 8.3
x86-64 gcc 9.1
x86-64 gcc 9.2
x86-64 gcc 9.3
x86-64 icc 13.0.1
x86-64 icc 16.0.3
x86-64 icc 17.0.0
x86-64 icc 18.0.0
x86-64 icc 19.0.0
x86-64 icc 19.0.1
x86-64 icc 2021.1.2
x86-64 icc 2021.2.0
x86-64 icx 2021.1.2
x86-64 icx 2021.2.0
Options
Source code
#include "https://raw.githubusercontent.com/DenisYaroshevskiy/algorithm_dumpster/master/single_headers/pack_single_header.h" #include <iterator> #include <type_traits> #include <utility> namespace unsq { namespace _drill_down { struct error_t {}; template <typename T> struct type_t { using type = T; }; template <typename T> constexpr auto equivalent() { if constexpr (std::is_integral_v<T>) return type_t<T>{}; else if constexpr (std::is_enum_v<T>) return type_t<std::underlying_type<T>>{}; else if constexpr (std::is_pointer_v<T>) { static_assert(sizeof(T) == sizeof(std::int64_t)); return type_t<std::int64_t>{}; } // you asked for this else if constexpr (sizeof(T) == 1) return type_t<std::uint8_t>{}; else if constexpr (sizeof(T) == 2) return type_t<std::uint16_t>{}; else if constexpr (sizeof(T) == 4) return type_t<std::uint32_t>{}; else if constexpr (sizeof(T) == 8) return type_t<std::uint64_t>{}; else return error_t{}; } template <typename> struct is_const_pointer : std::false_type {}; template <typename T> struct is_const_pointer<const T*> : std::true_type {}; } // namespace _drill_down template <typename I> using ValueType = typename std::iterator_traits<I>::value_type; template <typename T> using equivalent = typename decltype(_drill_down::equivalent<T>())::type; template <typename I> auto* drill_down(I _it) { using T = equivalent<ValueType<I>>; auto* res = &*_it; if constexpr (_drill_down::is_const_pointer<decltype(res)>{}) { return reinterpret_cast<const T*>(&*_it); } else { return reinterpret_cast<T*>(&*_it); } } template <typename I> // require ContigiousIterator<I> auto drill_down_range(I _f, I _l) { auto* f = drill_down(_f); auto* l = f + (_l - _f); return std::pair{f, l}; } template <typename I, typename T> I undo_drill_down(I _f, T* f) { return _f + (f - drill_down(_f)); } } // namespace unsq namespace unsq { namespace _remove { // Figure out a safe load. (| - page boundary) // [f | l] => Ok from f, Ok from l - width // [|f l] => Ok from f, Not Ok from l - width // [f, l|] => Not ok from f, Ok from l - width template <typename Pack, typename T> std::pair<T*, simd::top_bits<simd::vbool_t<Pack>>> figure_out_safe_load(T* f, T* l) { using vbool = simd::vbool_t<Pack>; constexpr std::ptrdiff_t width = simd::size_v<Pack>; T* page_boundary = simd::end_of_page(f); if (page_boundary - f < width) { T* safe = l - width; return {safe, simd::ignore_first_n_mask<vbool>(f - safe)}; } return {f, simd::ignore_last_n_mask<vbool>(f + width - l)}; } } // namespace _remove template <std::size_t width, typename I, typename PV> // require ContigiousIterator<I> && VectorPredicate<PV, equivalent<ValueType<I>> I remove_if(I _f, I _l, PV p) { using T = equivalent<ValueType<I>>; auto [f, l] = unsq::drill_down_range(_f, _l); T* o = f; using pack = simd::pack<T, width>; using vbool = simd::vbool_t<pack>; // There is a tradeoff between doing aligned reads and more comresses // and unaligned with less compresses. // For now I do unaligned. auto get_mmask = [&](const pack& ts) { const vbool test = p(ts); return ~get_top_bits(test); // p marks trues to remove }; while ((l - f) >= static_cast<std::ptrdiff_t>(width)) { const pack ts = simd::load_unaligned<pack>(f); const auto mmask = get_mmask(ts); o = simd::compress_store_unsafe(o, ts, mmask); f += width; } auto [safe, mmask_filter] = _remove::figure_out_safe_load<pack>(f, l); const pack ts = simd::load_unaligned<pack>(safe); auto mmask = get_mmask(ts); mmask &= mmask_filter; o = simd::compress_store_masked(o, ts, mmask); return unsq::undo_drill_down(_f, o); } template <std::size_t width, typename I, typename T> I remove(I f, I l, const T& x) { using U = equivalent<ValueType<I>>; using pack = simd::pack<U, width>; auto xs = simd::set_all<pack>((U)x); return unsq::remove_if<width>(f, l, [&](const pack& read) { return simd::equal_pairwise(read, xs); }); } } // namespace unsq using type = char; constexpr std::size_t width = 16; type* remove_zeroes(type* f, type* l) { return unsq::remove<width>(f, l, 0); }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
About the author
Changelog
Version tree