32#define NPY_VERSION_MAJOR 2
33#define NPY_VERSION_MINOR 0
34#define NPY_VERSION_PATCH 0
35#define NPY_VERSION_STRING "2.0.0"
56 } endian_test = {0x01020304};
103const std::pair<data_type_t, endian_t> &
from_dtype(
const std::string &dtype);
117 const std::vector<size_t> &
shape);
146template <
typename CHAR>
148 const std::string &dtype,
bool fortran_order,
149 const std::vector<size_t> &shape) {
150 std::ostringstream buff;
151 buff <<
"{'descr': '" << dtype;
152 buff <<
"', 'fortran_order': " << (fortran_order ?
"True" :
"False");
153 buff <<
", 'shape': (";
154 for (
auto dim = shape.begin(); dim < shape.end(); ++dim) {
156 if (dim < shape.end() - 1) {
161 if (shape.size() == 1) {
166 std::string dictionary = buff.str();
167 auto dict_length = dictionary.size() + 1;
168 std::string end =
"\n";
170 if (header_length % 64 != 0) {
171 header_length = ((header_length / 64) + 1) * 64;
173 end = std::string(dict_length - dictionary.length(),
' ');
178 static_cast<char>(0x93),
'N',
'U',
'M',
'P',
'Y', 0x01, 0x00,
179 static_cast<char>(dict_length), 0x00};
181 output.write(
reinterpret_cast<const CHAR *
>(dictionary.data()),
182 dictionary.length());
183 output.write(
reinterpret_cast<const CHAR *
>(end.data()), end.length());
193template <
typename T,
typename CHAR>
195 size_t num_elements,
endian_t endianness);
203template <
typename T,
typename CHAR>
206 std::vector<size_t> shape;
223template <
typename T,
template <
typename>
class TENSOR,
typename CHAR>
224void save(std::basic_ostream<CHAR> &output,
const TENSOR<T> &
tensor,
226 save<TENSOR<T>, CHAR>(output,
tensor, endianness);
237 std::ofstream output(path, std::ios::out | std::ios::binary);
238 if (!output.is_open()) {
239 throw std::invalid_argument(
"path");
242 save<T>(output,
tensor, endianness);
251template <
typename T,
template <
typename>
class TENSOR>
254 save<TENSOR<T>>(path,
tensor, endianness);
260template <
typename CHAR>
264 assert(header[0] == 0x93);
265 assert(header[1] ==
'N');
266 assert(header[2] ==
'U');
267 assert(header[3] ==
'M');
268 assert(header[4] ==
'P');
269 assert(header[5] ==
'Y');
270 size_t dict_length = 0;
271 if (header[6] == 0x01 && header[7] == 0x00) {
272 dict_length = header[8] | (header[9] << 8);
273 }
else if (header[6] == 0x02 && header[7] == 0x00) {
274 std::uint8_t extra[2];
275 input.read(
reinterpret_cast<CHAR *
>(extra), 2);
277 header[8] | (header[9] << 8) | (extra[0] << 16) | (extra[1] << 24);
280 std::vector<CHAR> buffer(dict_length);
281 input.read(buffer.data(), dict_length);
282 std::string dictionary(buffer.begin(), buffer.end());
293template <
typename T,
typename CHAR>
297template <
typename T,
typename CHAR> T
load(std::basic_istream<CHAR> &input) {
299 return T::load(input, info);
308template <
typename T> T
load(
const std::string &path) {
309 std::ifstream input(path, std::ios::in | std::ios::binary);
310 if (!input.is_open()) {
311 throw std::invalid_argument(
"path");
314 return load<T>(input);
323template <
typename T,
template <
typename>
class TENSOR>
324TENSOR<T>
load(
const std::string &path) {
325 return load<TENSOR<T>>(path);
396 template <
typename T>
399 throw std::runtime_error(
"Stream is closed");
402 std::ostringstream output;
403 save<T>(output,
tensor, m_endianness);
405 std::string suffix =
".npy";
406 std::string name = filename;
407 if (name.size() < 4 ||
408 !std::equal(suffix.rbegin(), suffix.rend(), name.rbegin())) {
412 write_file(name, output.str());
419 void write_file(
const std::string &filename, std::string &&bytes);
422 std::ostringstream m_output;
425 std::vector<file_entry> m_entries;
470 template <
typename T>
473 throw std::runtime_error(
"Stream is closed");
476 std::ostringstream output;
477 save<T>(output,
tensor, m_endianness);
479 std::string suffix =
".npy";
480 std::string name = filename;
481 if (name.size() < 4 ||
482 !std::equal(suffix.rbegin(), suffix.rend(), name.rbegin())) {
486 write_file(name, output.str());
493 void write_file(
const std::string &filename, std::string &&bytes);
496 std::ofstream m_output;
499 std::vector<file_entry> m_entries;
514 const std::vector<std::string> &
keys()
const;
534 template <
typename T> T
read(
const std::string &filename) {
535 std::istringstream stream(read_file(filename));
536 return load<T>(stream);
539 template <
typename T,
template <
typename>
class TENSOR>
540 TENSOR<T>
read(
const std::string &filename) {
541 return read<TENSOR<T>>(filename);
548 std::string read_file(
const std::string &filename);
553 std::istringstream m_input;
554 std::map<std::string, file_entry> m_entries;
555 std::vector<std::string> m_keys;
580 const std::vector<std::string> &
keys()
const;
599 template <
typename T> T
read(
const std::string &filename) {
600 std::istringstream stream(read_file(filename));
601 return load<T>(stream);
612 template <
typename T,
template <
typename>
class TENSOR>
613 TENSOR<T>
read(
const std::string &filename) {
614 read<TENSOR<T>>(filename);
621 std::string read_file(
const std::string &filename);
626 std::ifstream m_input;
627 std::map<std::string, file_entry> m_entries;
628 std::vector<std::string> m_keys;
682 : m_shape(other.m_shape), m_ravel_strides(other.m_ravel_strides),
683 m_fortran_order(other.m_fortran_order), m_dtype(other.m_dtype),
684 m_values(other.m_values) {}
688 : m_shape(std::move(other.m_shape)),
689 m_ravel_strides(std::move(other.m_ravel_strides)),
690 m_fortran_order(other.m_fortran_order), m_dtype(other.m_dtype),
691 m_values(std::move(other.m_values)) {}
695 return npy::load<tensor<T>>(path);
712 throw std::runtime_error(
"requested dtype does not match stream's dtype");
715 read_values(input, result.m_values.data(), result.m_values.size(), info);
728 void save(std::basic_ostream<char> &output,
endian_t endianness)
const {
729 write_values(output, m_values.data(), m_values.size(), endianness);
736 template <
typename... Indices>
const T &
operator()(Indices... index)
const {
737 return m_values[
ravel(std::vector<std::int32_t>({index...}))];
743 const T &
operator()(
const std::vector<std::size_t> &multi_index)
const {
744 return m_values[
ravel(multi_index)];
751 template <
typename... Indices> T &
operator()(Indices... index) {
752 return m_values[
ravel(std::vector<std::int32_t>({index...}))];
759 return m_values[
ravel(multi_index)];
763 typename std::vector<T>::iterator
begin() {
return m_values.begin(); }
766 typename std::vector<T>::const_iterator
begin()
const {
767 return m_values.begin();
771 typename std::vector<T>::iterator
end() {
return m_values.end(); }
774 typename std::vector<T>::const_iterator
end()
const {
return m_values.end(); }
779 void set(
const std::vector<std::int32_t> &multi_index,
const T &value) {
780 m_values[
ravel(multi_index)] = value;
786 const T &
get(
const std::vector<std::int32_t> &multi_index)
const {
787 return m_values[
ravel(multi_index)];
792 return to_dtype(m_dtype, endianness);
802 const std::vector<T> &
values()
const {
return m_values; }
808 if (nitems !=
size()) {
809 throw std::invalid_argument(
"nitems");
812 std::copy(source, source + nitems, m_values.begin());
818 if (source.size() !=
size()) {
819 throw std::invalid_argument(
"source.size");
822 std::copy(source.begin(), source.end(), m_values.begin());
828 if (source.size() !=
size()) {
829 throw std::invalid_argument(
"source.size");
832 m_values = std::move(source);
836 T *
data() {
return m_values.data(); }
839 const T *
data()
const {
return m_values.data(); }
842 size_t size()
const {
return m_values.size(); }
846 const std::vector<size_t> &
shape()
const {
return m_shape; }
853 size_t shape(
int index)
const {
return m_shape[index]; }
859 size_t ndim()
const {
return m_shape.size(); }
870 m_shape = other.m_shape;
871 m_ravel_strides = other.m_ravel_strides;
872 m_fortran_order = other.m_fortran_order;
873 m_dtype = other.m_dtype;
874 m_values = other.m_values;
880 m_shape = std::move(other.m_shape);
881 m_ravel_strides = std::move(other.m_ravel_strides);
882 m_fortran_order = other.m_fortran_order;
883 m_dtype = other.m_dtype;
884 m_values = std::move(other.m_values);
891 void save(
const std::string &path,
902 template <
class INDEX_IT,
class SHAPE_IT>
904 std::size_t
ravel = 0;
905 for (
auto stride = m_ravel_strides.begin(); stride < m_ravel_strides.end();
906 ++index, ++
shape, ++stride) {
907 if (*index >= *
shape) {
908 throw std::invalid_argument(
"multi_index");
911 ravel += *index * *stride;
920 size_t ravel(
const std::vector<std::int32_t> &multi_index)
const {
921 if (multi_index.size() != m_shape.size()) {
922 throw std::invalid_argument(
"multi_index");
925 std::vector<std::size_t> abs_multi_index(multi_index.size());
926 std::transform(multi_index.begin(), multi_index.end(), m_shape.begin(),
927 abs_multi_index.begin(),
928 [](std::int32_t index, std::size_t
shape) -> std::size_t {
930 return static_cast<std::size_t>(shape + index);
933 return static_cast<std::size_t
>(index);
936 return ravel(abs_multi_index);
942 size_t ravel(
const std::vector<std::size_t> &abs_multi_index)
const {
943 if (m_fortran_order) {
944 return ravel(abs_multi_index.rbegin(), m_shape.rbegin());
947 return ravel(abs_multi_index.begin(), m_shape.begin());
951 std::vector<size_t> m_shape;
952 std::vector<size_t> m_ravel_strides;
953 bool m_fortran_order;
955 std::vector<T> m_values;
961 static size_t get_size(
const std::vector<size_t> &shape) {
963 for (
auto &dim : shape) {
971 static std::vector<size_t> get_ravel_strides(
const std::vector<size_t> &shape,
972 bool fortran_order) {
973 std::vector<size_t> ravel_strides(shape.size());
975 auto ravel = ravel_strides.rbegin();
977 for (
auto max_index = shape.begin(); max_index < shape.end();
978 ++max_index, ++ravel) {
980 stride *= *max_index;
983 for (
auto max_index = shape.rbegin(); max_index < shape.rend();
984 ++max_index, ++ravel) {
986 stride *= *max_index;
990 return ravel_strides;
997 std::size_t max_length = 0;
998 for (
const auto &element : m_values) {
999 if (element.size() > max_length) {
1000 max_length = element.
size();
1009 return "<U" + std::to_string(max_length);
1012 return ">U" + std::to_string(max_length);
Class handling reading of an NPZ from a file on disk.
Definition npy.h:559
const std::vector< std::string > & keys() const
The keys of the tensors in the NPZ.
TENSOR< T > read(const std::string &filename)
Read a tensor from the archive.
Definition npy.h:613
bool contains(const std::string &filename)
Returns whether this NPZ contains the specified tensor.
npzfilereader(const std::filesystem::path &path)
Constructor.
void close()
Closes the NPZ file.
T read(const std::string &filename)
Read a tensor from the archive.
Definition npy.h:599
npzfilereader(const char *path)
Constructor.
header_info peek(const std::string &filename)
Returns the header for a specified tensor.
npzfilereader(const std::string &path)
Constructor.
bool is_open() const
Whether the NPZ file is open.
Class which handles writing of an NPZ archive to disk.
Definition npy.h:429
bool is_open() const
Returns whether the NPZ file is open.
npzfilewriter(const std::string &path, compression_method_t compression=compression_method_t::STORED, endian_t endianness=npy::endian_t::NATIVE)
Constructor.
~npzfilewriter()
Destructor. This will call npy::npzfilewriter::close, if it has not been called already.
npzfilewriter(const char *path, compression_method_t compression=compression_method_t::STORED, endian_t endianness=npy::endian_t::NATIVE)
Constructor.
void write(const std::string &filename, const T &tensor)
Write a tensor to the NPZ archive.
Definition npy.h:471
npzfilewriter(const std::filesystem::path &path, compression_method_t compression=compression_method_t::STORED, endian_t endianness=npy::endian_t::NATIVE)
Constructor.
void close()
Writes the directory and end-matter of the NPZ file, and closes the file. Further writes will fail.
Class handling reading of an NPZ from an in-memory string stream.
Definition npy.h:503
TENSOR< T > read(const std::string &filename)
Definition npy.h:540
const std::vector< std::string > & keys() const
The keys of the tensors in the NPZ.
T read(const std::string &filename)
Read a tensor from the archive.
Definition npy.h:534
npzstringreader(const std::string &bytes)
Constructor.
npzstringreader(std::string &&bytes)
Constructor.
header_info peek(const std::string &filename)
Returns the header for a specified tensor.
bool contains(const std::string &filename)
Returns whether this NPZ contains the specified tensor.
Class which handles writing of an NPZ to an in-memory string stream.
Definition npy.h:371
void close()
Writes the directory and end-matter of the NPZ file. Further writes will fail.
void write(const std::string &filename, const T &tensor)
Write a tensor to the NPZ archive.
Definition npy.h:397
~npzstringwriter()
Destructor. This will call npy::npzstringwriter::close, if it has not been called already.
std::string str() const
Returns the contents of the string stream as a string.
npzstringwriter(compression_method_t compression=compression_method_t::STORED, endian_t endianness=npy::endian_t::NATIVE)
Constructor.
The default tensor class.
Definition npy.h:650
T * data()
A pointer to the start of the underlying values buffer.
Definition npy.h:836
void copy_from(const T *source, size_t nitems)
Copy values from the source to this tensor.
Definition npy.h:807
const T & get(const std::vector< std::int32_t > &multi_index) const
Gets the value at the provided index.
Definition npy.h:786
void save(std::basic_ostream< char > &output, endian_t endianness) const
Save the tensor to the provided stream.
Definition npy.h:728
const value_type & const_reference
The const reference type of the tensor.
Definition npy.h:657
std::vector< T >::iterator end()
Iterator pointing at the end of the tensor in memory.
Definition npy.h:771
data_type_t dtype() const
The data type of the tensor.
Definition npy.h:799
const T & operator()(Indices... index) const
Variable parameter index function.
Definition npy.h:736
const value_type * const_pointer
The const pointer type of the tensor.
Definition npy.h:661
size_t ravel(const std::vector< std::size_t > &abs_multi_index) const
Ravels a multi-index into a single value indexing the buffer.
Definition npy.h:942
T & operator()(const std::vector< std::size_t > &multi_index)
Index function.
Definition npy.h:758
std::vector< T >::const_iterator begin() const
Iterator pointing at the beginning of the tensor in memory.
Definition npy.h:766
value_type * pointer
The pointer type of the tensor.
Definition npy.h:659
void copy_from(const std::vector< T > &source)
Copy values from the provided vector.
Definition npy.h:817
bool fortran_order() const
Whether the tensor data is stored in FORTRAN, or column-major, order.
Definition npy.h:866
static tensor< T > from_file(const std::string &path)
Load a tensor from the specified location on disk.
Definition npy.h:694
void set(const std::vector< std::int32_t > &multi_index, const T &value)
Sets the value at the provided index.
Definition npy.h:779
tensor< T > & operator=(const tensor< T > &other)
Copy assignment operator.
Definition npy.h:869
size_t ravel(const std::vector< std::int32_t > &multi_index) const
Ravels a multi-index into a single value indexing the buffer.
Definition npy.h:920
tensor(const std::vector< size_t > &shape, bool fortran_order)
Constructor.
Definition npy.h:674
T & operator()(Indices... index)
Variable parameter index function.
Definition npy.h:751
void move_from(std::vector< T > &&source)
Move values from the provided vector.
Definition npy.h:827
tensor(const std::vector< size_t > &shape)
Constructor.
Definition npy.h:667
std::string dtype(endian_t endianness) const
The data type of the tensor.
Definition npy.h:791
tensor< T > & operator=(tensor< T > &&other)
Move assignment operator.
Definition npy.h:879
const T & operator()(const std::vector< std::size_t > &multi_index) const
Index function.
Definition npy.h:743
tensor(tensor< T > &&other)
Move constructor.
Definition npy.h:687
const T * data() const
A pointer to the start of the underlying values buffer.
Definition npy.h:839
T value_type
The value type of the tensor.
Definition npy.h:653
const std::vector< size_t > & shape() const
The shape of the vector. Each element is the size of the corresponding dimension.
Definition npy.h:846
std::vector< T >::iterator begin()
Iterator pointing at the beginning of the tensor in memory.
Definition npy.h:763
size_t ndim() const
The number of dimensions of the tensor.
Definition npy.h:859
size_t ravel(INDEX_IT index, SHAPE_IT shape) const
Ravels a multi-index into a single value indexing the buffer.
Definition npy.h:903
void save(const std::string &path, endian_t endianness=npy::endian_t::NATIVE)
Save this tensor to the provided location on disk.
Definition npy.h:891
size_t shape(int index) const
Returns the dimensionality of the tensor at the specified index.
Definition npy.h:853
std::vector< T >::const_iterator end() const
Iterator pointing at the end of the tensor in memory.
Definition npy.h:774
value_type & reference
The reference type of the tensor.
Definition npy.h:655
static tensor< T > load(std::basic_istream< char > &input, const header_info &info)
Load a tensor from the provided stream.
Definition npy.h:708
const std::vector< T > & values() const
The underlying values buffer.
Definition npy.h:802
size_t size() const
The number of elements in the tensor.
Definition npy.h:842
tensor(const tensor< T > &other)
Copy constructor.
Definition npy.h:681
void write_npy_header(std::basic_ostream< CHAR > &output, const std::string &dtype, bool fortran_order, const std::vector< size_t > &shape)
Writes an NPY header to the provided stream.
Definition npy.h:147
endian_t native_endian()
This function will return the endianness of the current hardware.
Definition npy.h:52
void write_values(std::basic_ostream< CHAR > &output, const T *data_ptr, size_t num_elements, endian_t endianness)
Write values to the provided stream.
header_info peek(std::basic_istream< CHAR > &input)
Return the header information for an NPY file.
Definition npy.h:331
const std::pair< data_type_t, endian_t > & from_dtype(const std::string &dtype)
endian_t
Enumeration which represents a type of endianness.
Definition npy.h:41
@ LITTLE
Indicates the use of little-endian encoding.
@ BIG
Indicates the use of big-endian encoding.
compression_method_t
Enumeration indicating the compression method to use for data in the NPZ archive.
Definition npy.h:342
@ STORED
Store the data with no compression.
@ DEFLATED
Use the DEFLATE algorithm to compress the data.
void read_values(std::basic_istream< CHAR > &input, T *data_ptr, size_t num_elements, const header_info &info)
Read values from the provided stream.
void save(std::basic_ostream< CHAR > &output, const T &tensor, endian_t endianness=npy::endian_t::NATIVE)
Saves a tensor to the provided stream.
Definition npy.h:204
header_info read_npy_header(std::basic_istream< CHAR > &input)
Read an NPY header from the provided stream.
Definition npy.h:261
const std::string & to_dtype(data_type_t dtype, endian_t endian=endian_t::NATIVE)
Convert a data type and endianness to a NPY dtype string.
data_type_t
This enum represents the different types of tensor data that can be stored.
Definition npy.h:63
@ UNICODE_STRING
Unicode string (std::wstring)
@ FLOAT64
64-bit floating-point value (double)
@ UINT32
32-bit unsigned integer (uint)
@ COMPLEX128
128-bit complex number (std::complex<double>)
@ UINT16
16-bit unsigned integer (ushort)
@ INT64
64-bit integer (long)
@ COMPLEX64
64-bit complex number (std::complex<float>)
@ INT16
16-bit signed integer (short)
@ INT32
32-bit signed integer (int)
@ UINT64
64-bit unsigned integer (long)
@ FLOAT32
32-bit floating-point value (float)
@ UINT8
8 bit unsigned integer
@ INT8
8 bit signed integer
std::ostream & operator<<(std::ostream &os, const endian_t &obj)
T load(std::basic_istream< CHAR > &input)
Definition npy.h:297
const int STATIC_HEADER_LENGTH
Definition npy.h:37
Struct representing a file in the NPZ archive.
Definition npy.h:350
std::string filename
The name of the file.
Definition npy.h:352
std::uint32_t crc32
The CRC32 checksum of the uncompressed data.
Definition npy.h:354
bool check(const file_entry &other) const
std::uint64_t offset
The offset of the file in the archive.
Definition npy.h:362
std::uint16_t compression_method
The method used to compress the data.
Definition npy.h:360
std::uint64_t compressed_size
The size of the compressed data.
Definition npy.h:356
std::uint64_t uncompressed_size
The size of the uncompressed data.
Definition npy.h:358