6 #ifndef TURI_ML2_COLUMN_STATISTICS_H_ 7 #define TURI_ML2_COLUMN_STATISTICS_H_ 9 #include <core/data/flexible_type/flexible_type.hpp> 10 #include <core/logging/assertions.hpp> 11 #include <core/storage/serialization/serialization_includes.hpp> 12 #include <toolkits/ml_data_2/ml_data_column_modes.hpp> 13 #include <model_server/lib/variant.hpp> 15 namespace turi {
namespace v2 {
namespace ml_data_internal {
54 virtual size_t count(
size_t index)
const {
return size_t(-1); }
59 virtual double mean(
size_t index)
const {
return NAN; }
64 virtual double stdev(
size_t index)
const {
return NAN; }
69 virtual size_t n_positive(
size_t index)
const {
return size_t(-1); }
81 size_t thread_idx,
const std::vector<size_t>& cat_index_vect) = 0;
85 size_t thread_idx,
const std::vector<double>& value_vect) = 0;
89 size_t thread_idx,
const std::vector<std::pair<size_t, double> >& dict) = 0;
114 const std::map<std::string, variant_type>& creation_options);
116 const std::map<std::string, variant_type>& get_serialization_parameters()
const {
117 return creation_options;
122 virtual void set_data(
const std::map<std::string, variant_type>& params) {}
132 std::map<std::string, variant_type> creation_options;
139 std::string column_name;
142 std::map<std::string, flexible_type> options;
158 size_t version = m->get_version();
162 std::map<std::string, variant_type> serialization_parameters =
163 m->get_serialization_parameters();
166 serialization_parameters[
"version"] =
to_variant(m->get_version());
173 } END_OUT_OF_PLACE_SAVE()
178 arc >> is_not_nullptr;
184 std::map<std::string, variant_type> creation_options;
189 m->load_version(arc, version);
192 m = std::shared_ptr<v2::ml_data_internal::column_statistics>(
nullptr);
194 } END_OUT_OF_PLACE_LOAD()
#define BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval)
Macro to make it easy to define out-of-place loads.
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
virtual void update_dict_statistics(size_t thread_idx, const std::vector< std::pair< size_t, double > > &dict)=0
Update statistics after observing a dictionary.
void variant_deep_load(variant_type &v, iarchive &iarc)
virtual size_t get_version() const =0
virtual void save_impl(turi::oarchive &oarc) const =0
virtual void initialize()=0
Initialize the statistics – counting, mean, and stdev.
void variant_deep_save(const variant_type &v, oarchive &oarc)
virtual void load_version(turi::iarchive &iarc, size_t version)=0
static std::shared_ptr< column_statistics > factory_create(const std::map< std::string, variant_type > &creation_options)
virtual void finalize()=0
virtual std::shared_ptr< column_statistics > create_cleared_copy() const =0
virtual size_t num_observations() const
bool operator!=(const column_statistics &other) const
virtual void update_categorical_statistics(size_t thread_idx, const std::vector< size_t > &cat_index_vect)=0
Update categorical statistics for a batch of categorical indices.
virtual void update_numeric_statistics(size_t thread_idx, const std::vector< double > &value_vect)=0
Update categorical statistics for a batch of real values.
virtual void set_data(const std::map< std::string, variant_type > ¶ms)
virtual bool is_equal(const column_statistics *other) const =0
variant_type to_variant(const T &f)
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
bool operator==(const column_statistics &other) const
#define BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval)
Macro to make it easy to define out-of-place saves.