GF2++
Loading...
Searching...
No Matches
BitVec.h
Go to the documentation of this file.
1#pragma once
2// SPDX-FileCopyrightText: 2025 Nessan Fitzmaurice <nzznfitz+gh@icloud.com>
3// SPDX-License-Identifier: MIT
4
8
9#include <gf2/BitStore.h>
10#include <gf2/BitSpan.h>
11
12#include <algorithm>
13#include <array>
14#include <bitset>
15#include <charconv>
16#include <cmath>
17#include <concepts>
18#include <cctype>
19#include <cstdint>
20#include <format>
21#include <optional>
22#include <ranges>
23#include <string>
24#include <string_view>
25#include <vector>
26
27namespace gf2 {
28
36template<unsigned_word Word = usize>
37class BitVec : public BitStore<BitVec<Word>> {
38private:
39 // The number of bit elements in the bit-vector.
40 usize m_size;
41
42 // The bit elements are packed compactly into this standard vector of unsigned words
43 std::vector<Word> m_data;
44
45public:
47 using word_type = Word;
48
50 static constexpr u8 bits_per_word = BITS<Word>;
51
54
64 constexpr usize size() const { return m_size; }
65
77 constexpr usize words() const { return m_data.size(); }
78
93 constexpr Word word(usize i) const {
94 gf2_debug_assert(i < words(), "Index {} is too large for a bit-vector with {} words.", i, words());
95 return m_data[i];
96 }
97
112 constexpr void set_word(usize i, Word word) {
113 gf2_debug_assert(i < words(), "Index {} is too large for a bit-vector with {} words.", i, words());
114
115 // Set the value and if it's the last word, make sure it is kept clean
116 m_data[i] = word;
117 if (i == words() - 1) clean();
118 }
119
128 constexpr const Word* data() const { return m_data.data(); }
129
140 constexpr Word* data() { return m_data.data(); }
141
145 constexpr u8 offset() const { return 0; }
146
150
166 explicit constexpr BitVec(usize len = 0) : m_size(len), m_data(gf2::words_needed<Word>(len)) {
167 // Empty body -- we now have an underlying vector of words all initialized to 0.
168 // Note how we avoided using uniform initialization on the `std::vector` data member.
169 }
170
182 explicit constexpr BitVec(usize len, Word word) : m_size(len), m_data(gf2::words_needed<Word>(len), word) {
183 // Make sure any excess bits are set to 0.
184 clean();
185 }
186
190
199 static constexpr BitVec with_capacity(usize capacity) {
200 auto result = BitVec<Word>{};
201 result.m_data.reserve(gf2::words_needed<Word>(capacity));
202 return result;
203 }
204
212 static constexpr BitVec zeros(usize n) { return BitVec{n}; }
213
220 static constexpr BitVec ones(usize n) { return BitVec{n, MAX<Word>}; }
221
231 static constexpr BitVec constant(usize n, bool value) { return BitVec{n, value ? MAX<Word> : Word{0}}; }
232
242 static constexpr BitVec unit(usize n, usize i) {
243 gf2_assert(i < n, "Unit axis i = {} should be less than the bit-vector size n = {}", i, n);
244 BitVec result{n};
245 result.set(i);
246 return result;
247 }
248
255 static constexpr BitVec alternating(usize n) { return BitVec{n, gf2::ALTERNATING<Word>}; }
256
276 template<typename Src>
277 static constexpr BitVec from(const BitStore<Src>& src) {
278 BitVec result{src.size()};
279 result.copy(src);
280 return result;
281 }
282
293 template<usize N>
294 static constexpr BitVec from(const std::bitset<N>& src) {
295 BitVec result{N};
296 result.copy(src);
297 return result;
298 }
299
311 static constexpr BitVec from(usize len, std::invocable<usize> auto f) {
312 BitVec result{len};
313 result.fill(f);
314 return result;
315 }
316
320
339 static BitVec random(usize len, double p = 0.5, u64 seed = 0) {
340 BitVec result{len};
341 result.random_fill(p, seed);
342 return result;
343 }
344
362 static BitVec seeded_random(usize len, u64 seed) { return random(len, 0.5, seed); }
363
376 static BitVec biased_random(usize len, double p) { return random(len, p, 0); }
377
381
402 static std::optional<BitVec> from_string(std::string_view sv) {
403 // Edge case ...
404 if (sv.empty()) return BitVec<Word>{};
405
406 // Remove any whitespace, commas, single quotes, or underscores characters.
407 std::string s{sv};
408 std::erase_if(s, [](char c) { return c == ' ' || c == ',' || c == '\'' || c == '_'; });
409 bool no_punctuation = true;
410
411 // Check for a binary prefix.
412 if (s.starts_with("0b")) return from_binary_string(s, no_punctuation);
413
414 // Check for a hex prefix.
415 if (s.starts_with("0x") || s.starts_with("0X")) return from_hex_string(s, no_punctuation);
416
417 // No prefix, but perhaps the string only contains '0' and '1' characters.
418 if (std::ranges::all_of(s, [](char c) { return c == '0' || c == '1'; })) {
419 return BitVec::from_binary_string(s, no_punctuation);
420 }
421
422 // Last gasp -- try hex ...
423 return BitVec::from_hex_string(s, no_punctuation);
424 }
425
439 static std::optional<BitVec> from_binary_string(std::string_view sv, bool no_punctuation = false) {
440 // Edge case ...
441 if (sv.empty()) return BitVec<Word>{};
442
443 // Convert to a string & remove the "0b" prefix if it exists.
444 std::string s{sv};
445 if (s.starts_with("0b")) s = s.substr(2);
446
447 // If necessary, also remove any whitespace, commas, single quotes, or underscores.
448 if (!no_punctuation) std::erase_if(s, [](char c) { return c == ' ' || c == ',' || c == '\'' || c == '_'; });
449
450 // The string should now be a sequence of 0's and 1's.
451 if (std::ranges::any_of(s, [](char c) { return c != '0' && c != '1'; })) return std::nullopt;
452
453 // Construct the bit-vector.
454 auto result = BitVec::zeros(s.size());
455 for (auto i = 0uz; i < s.size(); ++i)
456 if (s[i] == '1') result.set(i);
457 return result;
458 }
459
480 static std::optional<BitVec> from_hex_string(std::string_view sv, bool no_punctuation = false) {
481 // Edge case ...
482 if (sv.empty()) return BitVec<Word>{};
483
484 // Convert to a string and remove the optional "0x" prefix if it exists.
485 std::string s{sv};
486 if (s.starts_with("0x") || s.starts_with("0X")) s = s.substr(2);
487
488 // By default, the base of the last digit is 16 just like all the others.
489 // However, there may be a suffix of the form ".base" where `base` is one of 2, 4 or 8.
490 int last_digit_base = 16;
491 if (s.ends_with(".2"))
492 last_digit_base = 2;
493 else if (s.ends_with(".4"))
494 last_digit_base = 4;
495 else if (s.ends_with(".8"))
496 last_digit_base = 8;
497
498 // Remove the suffix if it exists.
499 if (last_digit_base != 16) s = s.substr(0, s.size() - 2);
500
501 // If necessary, also remove any whitespace, commas, single quotes, or underscores.
502 if (!no_punctuation) std::erase_if(s, [](char c) { return c == ' ' || c == ',' || c == '_'; });
503
504 // The string should now be a sequence of 0-9, A-F, a-f.
505 if (std::ranges::any_of(s, [](char c) { return !std::isxdigit(c); })) return std::nullopt;
506
507 // Construct the bit-vector where we allow all the characters to be hex digits.
508 auto result = BitVec::with_capacity(s.size() * 4);
509
510 // Push all but the last character -- those are hex digits for sure.
511 for (auto i = 0uz; i < s.size() - 1; ++i) result.append_hex_digit(s[i]);
512
513 // Push the last character -- it may be a hex digit or a base indicator.
514 result.append_digit(s[s.size() - 1], last_digit_base);
515
516 return result;
517 }
518
522
535 constexpr usize capacity() const { return bits_per_word * m_data.capacity(); }
536
548 constexpr BitVec& shrink_to_fit() {
550 m_data.shrink_to_fit();
551 return *this;
552 }
553
557 constexpr BitVec& clear() {
558 m_data.clear();
559 m_size = 0;
560 return *this;
561 }
562
576 constexpr BitVec& resize(usize n) {
577 if (n != size()) {
578 m_data.resize(gf2::words_needed<Word>(n), 0);
579 auto old_size = size();
580 m_size = n;
581
582 // If we truncated, we need to clean up the last occupied word if necessary.
583 if (n < old_size) clean();
584 }
585 return *this;
586 }
587
591 constexpr void clean() {
592 // NOTE: This works fine even if `size() == 0`.
593 auto shift = m_size % bits_per_word;
594 if (shift != 0) m_data[words() - 1] &= Word(~(MAX<Word> << shift));
595 }
596
600
613 constexpr BitVec& push(bool b) {
614 resize(size() + 1);
615 this->set(size() - 1, b);
616 return *this;
617 }
618
633 template<unsigned_word Src>
634 auto append(Src src) {
635 auto old_size = size();
636 resize(old_size + BITS<Src>);
637 this->span(old_size, size()).copy(src);
638 return *this;
639 }
640
655 template<typename Src>
656 constexpr BitVec& append(const BitStore<Src>& src) {
657 auto old_size = size();
658 resize(old_size + src.size());
659 this->span(old_size, size()).copy(src);
660 return *this;
661 }
662
672 template<usize N>
673 auto append(const std::bitset<N>& src) {
674 auto old_size = size();
675 resize(old_size + src.size());
676 this->span(old_size, size()).copy(src);
677 return *this;
678 }
679
700 constexpr BitVec& append_digit(char c, int base) {
701 // The known bases are 2, 4, 8, and 16.
702 static constexpr std::array<int, 4> known_bases = {2, 4, 8, 16};
703 if (std::ranges::contains(known_bases, base)) {
704 // Try to convert the character to an integer.
705 usize x;
706 if (std::from_chars(&c, &c + 1, x, base).ec == std::errc{}) {
707 // Success! Push the digits onto the bit-vector.
708 auto digits = static_cast<usize>(std::log2(base));
709 auto old_len = size();
710 resize(old_len + digits);
711 for (auto i = 0uz; i < digits; ++i) this->set(old_len + i, (x >> (digits - i - 1)) & 1);
712 }
713 }
714 return *this;
715 }
716
734 constexpr BitVec& append_hex_digit(char c) {
735 // Try to convert the hex digit to an integer.
736 int x;
737 if (std::from_chars(&c, &c + 1, x, 16).ec == std::errc{}) {
738 // Success! Push the digits onto the bit-vector.
739 auto old_len = size();
740 resize(old_len + 4);
741 for (auto i = 0uz; i < 4; ++i) this->set(old_len + i, (x >> (3 - i)) & 1);
742 }
743 return *this;
744 }
745
749
767 constexpr std::optional<bool> pop() {
768 if (m_size == 0) return std::nullopt;
769 auto b = this->get(m_size - 1);
770 resize(m_size - 1);
771 return b;
772 }
773
789 constexpr void split_off(usize at, BitVec<Word>& dst) {
790 gf2_assert(at <= m_size, "split point {} is beyond the end of the bit-vector", at);
791 dst.clear();
792 dst.append(this->span(at, m_size));
793 resize(at);
794 }
795
811 BitVec result;
812 split_off(at, result);
813 return result;
814 }
815
839 template<unsigned_word Dst = Word>
840 constexpr std::optional<Dst> split_off_unsigned() {
841 // Edge case ...
842 if (this->size() == 0) return std::nullopt;
843
844 // Perhaps we can safely cast an `Word` into a `Dst` without any loss of information.
845 constexpr usize bits_per_dst = BITS<Dst>;
846 if constexpr (bits_per_dst <= bits_per_word) {
847 // Easy cases when we can safely cast an `Word` into a `Dst` without any loss of information.
848 // 1. Just one word in the store & we know that all unused bits in that word are zeros.
849 // 2. More than one word in the store but the last word is fully occupied.
850 auto n_words = this->words();
851 auto shift = this->size() % bits_per_word;
852 if (n_words == 1 || shift == 0) {
853 auto result = static_cast<Dst>(m_data[n_words - 1]);
854 resize(this->size() - bits_per_dst);
855 return result;
856 }
857
858 // The last word is not fully occupied so we need to combine it with the next to last word to form a
859 // value that can be cast to a `Dst`.
860 auto lo = m_data[n_words - 2] >> shift;
861 auto hi = m_data[n_words - 1] << (bits_per_word - shift);
862 auto result = static_cast<Dst>(lo | hi);
863 resize(this->size() - bits_per_dst);
864 return result;
865 } else {
866 // `Dst` is larger than `Word` -- it should be an integer multiple of `bits_per_word` is size.
867 static_assert(bits_per_dst % bits_per_word == 0, "sizeof(Dst) % sizeof(Word) != 0");
868
869 // Pop an appropriate number of `Word` sized chunks off the end of the bit-vector.
870 auto words_per_dst = bits_per_dst / bits_per_word;
871 std::vector<Word> chunks(words_per_dst);
872 for (auto i = 0uz; i < words_per_dst; ++i) chunks[i] = *split_off_unsigned<Word>();
873
874 // Combine the chunks into a `Dst` value.
875 Dst result = 0;
876 for (auto i = words_per_dst; i-- > 0;) result |= static_cast<Dst>(chunks[i] << (i * bits_per_word));
877
878 return result;
879 }
880 }
881
883};
884
885} // namespace gf2
886
887// --------------------------------------------------------------------------------------------------------------------
888// Specialises `std::formatter` for `BitVec` -- defers to the version for `BitStore` ...
889// -------------------------------------------------------------------------------------------------------------------
890template<gf2::unsigned_word Word>
891struct std::formatter<gf2::BitVec<Word>> : std::formatter<gf2::BitStore<gf2::BitVec<Word>>> {
892 // Inherits all functionality from `BitStore` formatter
893};
Non-owning views of a contiguous range of bits from some underlying store of unsigned words....
The base class for the vector-like types in the gf2 library. See the BitStore page for more details...
#define gf2_debug_assert(cond,...)
A confirmation macro that checks a boolean condition cond. On failure, the confirmation prints an e...
Definition assert.h:68
#define gf2_assert(cond,...)
A confirmation macro that checks a boolean condition cond. On failure, the confirmation prints an e...
Definition assert.h:98
constexpr auto span(usize begin, usize end) const
Definition BitStore.h:1035
auto fill(std::invocable< usize > auto f)
Fill the store by repeatedly calling f(i) and returns a reference to this for chaining.
Definition BitStore.h:536
constexpr bool get(usize i) const
Definition BitStore.h:123
constexpr usize size() const
Returns the number of bit elements in the store.
Definition BitStore.h:63
auto set(usize index, bool value=true)
Sets the bit-element at the given index to the specified boolean value & returns this for chaining....
Definition BitStore.h:191
auto copy(Src src)
Copies the bits from an unsigned integral src value and returns a reference to this for chaining.
Definition BitStore.h:412
auto random_fill(double p=0.5, u64 seed=0)
Fill the store with random bits and returns a reference to this for chaining.
Definition BitStore.h:560
static std::optional< BitVec > from_hex_string(std::string_view sv, bool no_punctuation=false)
Factory method to construct a bit-vector from a hex string, returning std::nullopt on failure.
Definition BitVec.h:480
constexpr BitVec(usize len=0)
Constructs a bit-vector of length n with all the bit elements set to 0.
Definition BitVec.h:166
static BitVec biased_random(usize len, double p)
Factory method to generate a bit-vector of size len where the elements are from independent fair coin...
Definition BitVec.h:376
static std::optional< BitVec > from_string(std::string_view sv)
Factory method to construct a bit-vector from a string s, returning std::nullopt on failure.
Definition BitVec.h:402
constexpr Word word(usize i) const
Returns word i from the bit-vector's underlying word store.
Definition BitVec.h:93
static constexpr BitVec from(const std::bitset< N > &src)
Factory method to construct a bit-vector from the bits of a std::bitset.
Definition BitVec.h:294
constexpr void clean()
Sets any unused bits in the last occupied word to 0.
Definition BitVec.h:591
auto append(const std::bitset< N > &src)
Appends all the bits from a std::bitset onto the end of the bit-vector and returns this for chaining.
Definition BitVec.h:673
constexpr BitVec & append_digit(char c, int base)
Appends a single character c onto the end of bit-vector and returns this for chaining.
Definition BitVec.h:700
constexpr const Word * data() const
Returns a const pointer to the underlying store of words .
Definition BitVec.h:128
constexpr usize size() const
Returns the number of bit-elements in the bit-vector.
Definition BitVec.h:64
constexpr BitVec & push(bool b)
Pushes a single bit b onto the bit-vector.
Definition BitVec.h:613
constexpr u8 offset() const
Returns the offset (in bits) of the first bit in the store within the first word.
Definition BitVec.h:145
constexpr void split_off(usize at, BitVec< Word > &dst)
Splits a bit-vector into two at the given index, returning the second part in dst.
Definition BitVec.h:789
constexpr usize capacity() const
Definition BitVec.h:535
constexpr BitVec(usize len, Word word)
Constructs a bit-vector with len elements by repeatedly copying all the bits from word.
Definition BitVec.h:182
static constexpr BitVec from(usize len, std::invocable< usize > auto f)
Factory method to construct a bit-vector by repeatedly calling f(i) for i in [0, len).
Definition BitVec.h:311
static constexpr BitVec constant(usize n, bool value)
Factory method to generate a bit-vector of length n where the elements are set to value.
Definition BitVec.h:231
constexpr std::optional< bool > pop()
Removes the last bit from the bit-vector and returns it or std::nullopt if the bit-vector is empty.
Definition BitVec.h:767
constexpr BitVec & shrink_to_fit()
Shrinks the bit-vector's capacity as much as possible.
Definition BitVec.h:548
constexpr usize words() const
Returns the number of words in the bit-vector's underlying word store.
Definition BitVec.h:77
static constexpr BitVec with_capacity(usize capacity)
Factory method to construct an empty bit-vector with at least the specified capacity.
Definition BitVec.h:199
static constexpr BitVec zeros(usize n)
Factory method to generate a bit-vector of length n where the elements are all 0.
Definition BitVec.h:212
static constexpr BitVec from(const BitStore< Src > &src)
Factory method to construct a bit-vector by copying all the bits from any BitStore instance.
Definition BitVec.h:277
constexpr BitVec< Word > split_off(usize at)
Splits a bit-vector into two at the given index, returning a new BitVec.
Definition BitVec.h:810
static constexpr BitVec alternating(usize n)
Factory method to generate a bit-vector of length n looking like 101010....
Definition BitVec.h:255
constexpr Word * data()
Returns a pointer to the underlying store of words.
Definition BitVec.h:140
static BitVec random(usize len, double p=0.5, u64 seed=0)
Factory method to generate a bit-vector of size len where the elements are picked at random.
Definition BitVec.h:339
constexpr BitVec & resize(usize n)
Resize the bit-vector so that its size() is n.
Definition BitVec.h:576
static constexpr BitVec ones(usize n)
Factory method to generate a bit-vector of length n where the elements are all 1.
Definition BitVec.h:220
static BitVec seeded_random(usize len, u64 seed)
Factory method to generate a bit-vector of size len where the elements are from independent fair coin...
Definition BitVec.h:362
constexpr BitVec & clear()
Removes all elements from the bit-vector so size()==0.
Definition BitVec.h:557
static std::optional< BitVec > from_binary_string(std::string_view sv, bool no_punctuation=false)
Factory method to construct a bit-vector from a binary string, returning std::nullopt on failure.
Definition BitVec.h:439
constexpr void set_word(usize i, Word word)
Sets word i in the bit-vector's underlying word store to value (masked if necessary).
Definition BitVec.h:112
Word word_type
Definition BitVec.h:47
constexpr std::optional< Dst > split_off_unsigned()
Split off a single arbitrary sized unsigned integer off the end of the bit-vector and returns it or s...
Definition BitVec.h:840
constexpr BitVec & append_hex_digit(char c)
Appends a single hex digit character c onto the end of bit-vector and returns this for chaining.
Definition BitVec.h:734
static constexpr u8 bits_per_word
Definition BitVec.h:50
constexpr BitVec & append(const BitStore< Src > &src)
Appends all the bits from any BitStore src onto the end of the bit-vector and returns this for chaini...
Definition BitVec.h:656
static constexpr BitVec unit(usize n, usize i)
Factory method to generate a "unit" bit-vector of length n where only element i is set.
Definition BitVec.h:242
auto append(Src src)
Appends all the bits from any unsigned integral src value and returns a reference to this for chainin...
Definition BitVec.h:634
The namespace for the gf2 library.
Definition assert.h:119
constexpr Word ALTERNATING
The unsigned_word instance with alternating bits set to 0 and 1.
Definition unsigned_word.h:90
std::uint64_t u64
Word type alias for a 64-bit unsigned integer.
Definition unsigned_word.h:38
std::uint8_t u8
Word type alias for an 8-bit unsigned integer.
Definition unsigned_word.h:29
std::size_t usize
Word type alias for the platform's "native"-sized unsigned integer.
Definition unsigned_word.h:41
constexpr Word MAX
The unsigned_word instance with all its bits set to 1.
Definition unsigned_word.h:81
constexpr u8 BITS
The number of bits in an unsigned_word returned as a u8.
Definition unsigned_word.h:54
constexpr usize words_needed(usize n_bits)
Returns the number of unsigned_words needed to store n_bits bits.
Definition unsigned_word.h:522