Irgendwo im Net kam vor ein paar Wochen die Frage nach einem C++ Caesar Chiffre ohne Verwendung von Schleifen. Da ich lange nix mit C++ gemacht hatte und mit C++20 erst recht nicht, hatte ich das über Weihnachten zum Anlass genommen, mich mit ein paar neuen Features wie Concepts, Ranges und Spans vertraut zu machen. Natürlich ist ein Caesar Chiffre den Aufwand nicht Wert, aber da ich es nun schon mal geschrieben habe ... Vielleicht holt sich der interessierte Leser die eine oder andere Anregung, keine Ahnung
caesar_cipher.hpp:
/*
Copyright (c) 2022 Steffen Illhardt
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/// @file caesar_cipher.hpp
/// @brief A C++ interface of a recursive Caesar Cipher algorithm.
/// @details The important elements of the public interface are:
/// - function `caesar::encipher`
/// - function `caesar::decipher`
/// - class `caesar::alphabet`
/// - type `caesar::shift_t`
///
/// @version 1.0
/// @author Steffen Illhardt
/// @date 2022
/// @copyright MIT License
/// @pre Requires compiler support for at least C++20.
/// @note The term "character" is used to express "single integral value which specifies a code unit" in this source code.
/// @warning Encryption (decryption, too) is performed at code-unit level. The output may or may not meet the original encoding requirements
/// anymore (e.g. code units of UTF-8 or UTF-16 may be substituted by code units which result in invalid code-point representations).
// clang-format off
#ifndef CAESAR_CIPHER_HPP_INCLUDED__
#define CAESAR_CIPHER_HPP_INCLUDED__
#include <climits> // CHAR_BIT
#include <concepts> // same_as<T, U>
#include <cstdint> // uint32_t, int_least64_t
#include <exception> // exception
#include <iterator> // output_iterator<I, T>, contiguous_iterator<I>, iter_value_t<I>
#include <ranges> // range<R>, iterator_t<R>
#include <span> // span<T, size_t N>
#include <type_traits> // make_unsigned_t<T>
#include <utility> // pair<T, U>
/// @brief Scope of the interface for encrypting and decrypting of strings using a Caesar Cipher. <br>
namespace caesar
{
static_assert(CHAR_BIT == 8 && CHAR_BIT * sizeof(char16_t) == 16 && CHAR_BIT * sizeof(char32_t) == 32, "Padded character types are not supported.");
/// @brief Only fundamental types of characters shall be used for strings and alphabets in the Caesar Cipher interface.
template<class T>
concept CharacterType = std::same_as<T, char> || std::same_as<T, signed char> || std::same_as<T, unsigned char> || std::same_as<T, wchar_t> || std::same_as<T, char8_t> || std::same_as<T, char16_t> || std::same_as<T, char32_t>;
/// @brief Only iterators which are both mutable and contiguous iterators in ranges of `caesar::CharacterType` values shall be used in the Caesar Cipher interface.
template<class I>
concept MutableContiguousCharacterIterator = std::output_iterator<I, std::iter_value_t<I>> && std::contiguous_iterator<I> && CharacterType<std::iter_value_t<I>>;
/// @brief Only spans which are both mutable and contiguous ranges of `caesar::CharacterType` values shall be used in the Caesar Cipher interface.
template<class R>
concept MutableContiguousCharacterRange = std::ranges::range<R> && MutableContiguousCharacterIterator<std::ranges::iterator_t<R>>;
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wweak-vtables" // vtable emitted in every translation unit because no out-of-line virtual method implemented
#endif
/// @brief Exception class thrown when the initialization of a `caesar::alphabet` object fails.
class bad_alphabet_initialization : public std::exception
{
};
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
#if defined(_MSC_VER) && !defined(__clang__)
# pragma warning(push)
# pragma warning(disable : 26472 26821) // don't use static_cast (use gsl::narrow_cast), don't use std::span (use gsl::span)
#endif
/// @brief Class specifying an alphabet by its first and last characters.
///
/// @tparam CharT Value type of the characters in the alphabet. Requires to meet the `caesar::CharacterType` concept.
template<CharacterType CharT>
class alphabet
{
public:
/// @brief Type of the elements in the represented alphabet.
using element_type = CharT;
/// @brief Unsigned value type with the same size as CharT.
using uvalue_type = std::make_unsigned_t<CharT>;
/// @brief Unsigned type to represent the size of the alphabet.
using size_type = std::uint32_t;
/// @brief Create a `caesar::alphabet` object with the value of both the first and last characters set to `0`.
alphabet() = default;
/// @brief Create a `caesar::alphabet` object from its first and last characters. <br>
/// Code units `0 <= alphabetFirst <= alphabetLast <= 0x10ffff` are deemed valid to initialize a `caesar::alphabet` object, where `0x10ffff` is the upper limit defined by the Unicode standard. <br>
/// A `caesar::bad_alphabet_initialization` exception is thrown in the event of a violation.
///
/// @param alphabetFirst First character of the alphabet.
/// @param alphabetLast Last character of the alphabet.
constexpr alphabet(const element_type alphabetFirst, const element_type alphabetLast) :
_alphabetAttr{ _check_valid_range(alphabetFirst, alphabetLast) }
{
}
/// @brief Get the first character of the alphabet, converted to its unsigned counterpart.
///
/// @return The first character of the alphabet.
[[nodiscard]] constexpr auto ufirst() const noexcept
{
return _alphabetAttr.first.first;
}
/// @brief Get the last character of the alphabet, converted to its unsigned counterpart.
///
/// @return The last character of the alphabet.
[[nodiscard]] constexpr auto ulast() const noexcept
{
return _alphabetAttr.first.second;
}
/// @brief Get the number of characters in the alphabet. The value is guaranteed to be `>0`.
///
/// @return The number of characters in the alphabet.
[[nodiscard]] constexpr auto size() const noexcept
{
return _alphabetAttr.second;
}
private:
/// @brief Tuple-like type for the values specifying the alphabet. (The `std::pair` class is chosen because member access doesn't require a bound check and it compiles without overhead.)
using _attr_type = std::pair<std::pair<uvalue_type, uvalue_type>, size_type>;
/// @brief Throw when the requirements described in the comments of the constructor are not met.
///
/// @param alphabetFirst First character of the alphabet.
/// @param alphabetLast Last character of the alphabet.
///
/// @return The object used to initialize the `_alphabetAttr` member.
[[nodiscard]] constexpr _attr_type _check_valid_range(const element_type alphabetFirst, const element_type alphabetLast) const
{
const auto uAlphabetFirst{ static_cast<uvalue_type>(alphabetFirst) }; // casts to unsigned, keeping type width and bit patterns of the values
const auto uAlphabetLast{ static_cast<uvalue_type>(alphabetLast) };
if constexpr (sizeof(uAlphabetLast) == sizeof(char32_t)) // the character type has a width of 32 bits where the maximum value of a valid Unicode code point matters
{
if (constexpr auto maxCodePoint{ U'\x10ffff' };
uAlphabetFirst > uAlphabetLast || uAlphabetLast > maxCodePoint)
throw bad_alphabet_initialization{};
}
else // the upper bound of uAlphabetLast is already limited by its type size
{
if (uAlphabetFirst > uAlphabetLast)
throw bad_alphabet_initialization{};
}
const auto alphabetSize{ size_type{ 1 } + uAlphabetLast - uAlphabetFirst };
return { { uAlphabetFirst, uAlphabetLast }, alphabetSize };
}
/// @brief Contains the first and last characters of the alphabet, both of which have been converted to their unsigned counterparts. Also the size of the alphabet. <br>
/// The alphabet is overdetermined, two of the three values would have been enough. However, this ensures that the third value is only calculated once.
_attr_type _alphabetAttr{ {}, 1 };
};
/// @brief Deduction guide for the `caesar::alphabet` constructor.
template<class CharT>
alphabet(const CharT alphabetFirst, const CharT alphabetLast) -> alphabet<CharT>;
/// @brief Signed integral type to represent the number of places to be shifted in the alphabet.
using shift_t = std::int_least64_t;
/// @brief Scope of internal implementation details which are not part of the public interface.
namespace details
{
/// @brief Internal Caesar substitution implementation, performing the core task. This function is recursively called for every character in the string.
///
/// @tparam IterT Type of the string iterator.
///
/// @param currentIter Iterator initially pointing to the begin of the string, and being incremented in each recursion.
/// @param endIter Iterator pointing to the past-the-last character of the string.
/// @param alphabetSpec Alphabet to be used.
/// @param shift Number of places to be shifted in the alphabet. The function relies on `shift >= 0`.
template<MutableContiguousCharacterIterator IterT>
constexpr void _substitute(IterT currentIter, const IterT endIter, const alphabet<std::iter_value_t<IterT>> alphabetSpec, const shift_t shift) noexcept
{
if (currentIter == endIter) // no characters leftover
return;
using uvalue_type = typename decltype(alphabetSpec)::uvalue_type;
if (const auto uCurrentChar{ static_cast<uvalue_type>(*currentIter) }; // the signedness of char and wchar_t is implementation-defined, however we always need an unsigned value; type width and bit pattern of the value are kept
alphabetSpec.ufirst() <= uCurrentChar && uCurrentChar <= alphabetSpec.ulast()) // if inside of the alphabet range ...
{
const auto shiftedVal{ (shift + uCurrentChar - alphabetSpec.ufirst()) % alphabetSpec.size() + alphabetSpec.ufirst() }; // ... perform rotary shifting in the alphabet ...
*currentIter = static_cast<std::iter_value_t<IterT>>(shiftedVal); // ... and substitute the old value with the shifted value (the narrowing cast removes unused high-order 0 bits)
}
_substitute(++currentIter, endIter, alphabetSpec, shift); // call the function recursively with the incremented iterator (some compilers achieve a better TCO by using incrementation rather than addition of 1)
}
/// @brief Symbolic direction of shifting in the alphabet, where `forward` is for encryption and `back` is for decryption.
enum class _direction
{
forward,
back
};
/// @brief Internal routine that combines encryption and decryption because they are the same thing in a Caesar Cipher.
///
/// @tparam ShiftDir A `detail::_direction` value specifying the symbolic direction of shifting.
/// @tparam CharT The `element_type` of both the `std::span` class and the `caesar::alphabet` class.
/// @tparam Extent Number of elements in the span sequence, or `std::dynamic_extent` if dynamic.
///
/// @param stringProxy Span of the string which is to be encrypted or decrypted. The characters in the range of the alphabet are substituted.
/// @param alphabetSpec A `caesar::alphabet` object specifying the alphabet to be used.
/// @param shift Number of places to be shifted in the alphabet.
template<_direction ShiftDir = _direction::forward, class CharT, std::size_t Extent>
constexpr void _cipher(const std::span<CharT, Extent> stringProxy, const alphabet<CharT> alphabetSpec, shift_t shift) noexcept
requires MutableContiguousCharacterRange<decltype(stringProxy)>
{
shift %= alphabetSpec.size(); // remove full alphabet turns (if absolute value of shift >= number of characters in the alphabet)
if (shift == 0) // no shifting to be performed
return;
if constexpr (ShiftDir == _direction::back) // reverse the sign for decrypting; to be done after the first modulo to avoid corner case -(minimum value of shift_t)
shift = -shift;
shift = (shift + alphabetSpec.size()) % alphabetSpec.size(); // yields a positive value in range 1..(alphabet size - 1) which is equivalent to the original value
_substitute(stringProxy.begin(), stringProxy.end(), alphabetSpec, shift); // perform the character substitution
}
} // namespace caesar::details
/// @brief Perform encryption in a Caesar Cipher. <br>
///
/// @tparam CharT The `element_type` of both the `std::span` class and the `caesar::alphabet` class.
/// @tparam Extent Number of elements in the span sequence, or `std::dynamic_extent` if dynamic.
///
/// @param stringProxy Span of the string which is to be encrypted. The characters in the range of the alphabet are substituted by the encrypted characters.
/// @param alphabetSpec A `caesar::alphabet` object specifying the alphabet to be used.
/// @param shift Number of places to be shifted in the alphabet. Shifting backwards is supported.
template<class CharT, std::size_t Extent>
constexpr void encipher(const std::span<CharT, Extent> stringProxy, const alphabet<CharT> alphabetSpec, const shift_t shift) noexcept
requires MutableContiguousCharacterRange<decltype(stringProxy)>
{
details::_cipher(stringProxy, alphabetSpec, shift);
}
/// @brief Perform decryption in a Caesar Cipher.
///
/// @tparam CharT The `element_type` of both the `std::span` class and the `caesar::alphabet` class.
/// @tparam Extent Number of elements in the span sequence, or `std::dynamic_extent` if dynamic.
///
/// @param stringProxy Span of the string which is to be decrypted. The characters in the range of the alphabet are substituted by the decrypted characters.
/// @param alphabetSpec A `caesar::alphabet` object specifying the alphabet to be used.
/// @param shift Number of places previously shifted to encrypt the string.
template<class CharT, std::size_t Extent>
constexpr void decipher(const std::span<CharT, Extent> stringProxy, const alphabet<CharT> alphabetSpec, const shift_t shift) noexcept
requires MutableContiguousCharacterRange<decltype(stringProxy)>
{
details::_cipher<details::_direction::back>(stringProxy, alphabetSpec, shift);
}
#if defined(_MSC_VER) && !defined(__clang__)
# pragma warning(pop)
#endif
} // namespace caesar
#endif // CAESAR_CIPHER_HPP_INCLUDED__
Testcode:
/// @file
/// @brief Contains an example of how to use the Caesar Cipher algorithm. Requires compiler support for at least C++20.
#include "caesar_cipher.hpp"
#include <iostream>
#include <span>
#include <string>
/// @brief Simple test code.
///
/// @return Always zero.
int main() {
// In our example, we want to encrypt the upper and lower case letters of the Latin alphabet. Caesar Cipher is a mono-alphabetic substitution cipher.
// Since both A..Z and a..z are separate alphabets, it would be required to either have an additional branching in the cipher algorithm, or to use
// a multi-stage encryption. I chose the latter to keep the algorithm able to be used with any range of characters. This way the example can easily
// be extended to incorporate digits as a third alphabet (just uncomment the related lines below). Moreover, both encipher and decipher are function
// templates. Hence they can be used with a string of any appropriate character type (e.g. `std::wstring`).
static std::string testString{
"In 814 BC Carthage was founded by Phoenician settlers."}; // the Caesar Cipher algorithm performs a character *substitution*, this means the string must be mutable
// static char testString[]{ "In 814 BC Carthage was founded by Phoenician settlers." };
// ^^^^ alternate test using a mutable C-string, just to demonstrate why `std::span` is used for the interface:
// it is constructible from all kind of string-like objects including a `std::array` or `std::vector` of characters
// the API can be used like that: caesar::encipher(std::span{ testString }, { 'A', 'Z' }, 4);
// however, ...
// 1) we want to reuse the values in both the encipher and decipher functions, and
// 2) constexpr - if applicable - has the advantage that a throwing `caesar::alphabet` construction would fail at compile-time already
const std::span strProxy{testString}; // non-owning representation of the string; unlike `std::string_view` a `std::span` grants mutable access to the referenced string buffer
constexpr caesar::alphabet upper{'A', 'Z'};
constexpr caesar::alphabet lower{'a', 'z'};
// constexpr caesar::alphabet digit{ '0', '9' };
constexpr caesar::shift_t shift{4};
caesar::encipher(strProxy, upper, shift);
caesar::encipher(strProxy, lower, shift);
// caesar::encipher(strProxy, digit, shift);
// `std::data` used only to *explicitly* decay the C-string array in the alternate test; using it for a `std::string` works here but the size information is lost (don't do in real world!)
std::cout << std::data(testString) << std::endl;
caesar::decipher(strProxy, upper, shift);
caesar::decipher(strProxy, lower, shift);
// caesar::decipher(strProxy, digit, shift);
std::cout << std::data(testString) << std::endl;
return 0;
}