6 #ifndef TURI_STANDARDIZATION_H_ 7 #define TURI_STANDARDIZATION_H_ 10 #include <core/data/flexible_type/flexible_type.hpp> 14 #include <Eigen/SparseCore> 17 #include <ml/optimization/optimization_interface.hpp> 20 #include <toolkits/ml_data_2/ml_data.hpp> 21 #include <toolkits/ml_data_2/metadata.hpp> 121 virtual void transform(DenseVector &point)
const = 0;
150 virtual void transform(SparseVector &point)
const = 0;
241 const std::shared_ptr<v2::ml_metadata> &
ml_metadata,
242 bool _use_reference =
true){
245 use_reference = _use_reference;
247 for(
size_t i = 0; i < ml_metadata->num_columns(); i++){
248 if (ml_metadata->is_categorical(i)) {
249 total_size += ml_metadata->index_size(i) - use_reference;
260 for(
size_t i = 0; i < ml_metadata->num_columns(); i++){
262 const auto& stats = ml_metadata->statistics(i);
273 switch(ml_metadata->column_mode(i)) {
276 case v2::ml_column_mode::NUMERIC: {
277 scale(idx) = stats->mean(0) * stats->mean(0) +
278 stats->stdev(0) * stats->stdev(0);
284 case v2::ml_column_mode::CATEGORICAL: {
285 for (
size_t c = 0; c < ml_metadata->index_size(i); c++){
286 if(c >= use_reference){
287 scale(idx) = stats->mean(c) * stats->mean(c) +
288 stats->stdev(c) * stats->stdev(c);
296 case v2::ml_column_mode::NUMERIC_VECTOR: {
297 for (
size_t c = 0; c < ml_metadata->index_size(i); c++){
298 scale(idx) = stats->mean(c) * stats->mean(c) +
299 stats->stdev(c) * stats->stdev(c);
306 case v2::ml_column_mode::CATEGORICAL_VECTOR: {
307 for (
size_t c = 0; c < ml_metadata->index_size(i); c++){
308 if(c >= use_reference){
309 scale(idx) = stats->mean(c) * stats->mean(c) +
310 stats->stdev(c) * stats->stdev(c);
318 case v2::ml_column_mode::DICTIONARY: {
319 for(
size_t k = 0; k < ml_metadata->index_size(i); ++k) {
320 scale(idx) = stats->mean(k) * stats->mean(k) +
321 stats->stdev(k) * stats->stdev(k);
327 case v2::ml_column_mode::UNTRANSLATED: {
332 std::cerr <<
"Unsupported ml_column_mode for L2 rescaling" << std::endl;
333 ASSERT_UNREACHABLE();
340 scale = scale.array().pow(0.5);
356 point = point.cwiseQuotient(scale);
368 for (
size_t i = 0; i < size_t(points.rows()); i++) {
369 points.row(i) = points.row(i).cwiseQuotient(scale.transpose());
381 point = point.cwiseProduct(scale);
395 for (SparseVector::InnerIterator i(point); i; ++i){
396 i.valueRef() = i.value() * scale(i.index());
409 for (SparseVector::InnerIterator i(point); i; ++i){
410 i.valueRef() = i.value() / scale(i.index());
void load(turi::iarchive &iarc)
virtual void load(turi::iarchive &iarc)=0
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
const double OPTIMIZATION_ZERO
Optimization method zero.
virtual void transform(DenseVector &point) const =0
virtual void inverse_transform(DenseVector &point) const =0
void transform(DenseMatrix &points) const
void transform(DenseVector &point) const
standardization_interface()
void save(turi::oarchive &oarc) const
virtual void save(turi::oarchive &oarc) const =0
virtual ~standardization_interface()
l2_rescaling(const std::shared_ptr< v2::ml_metadata > &ml_metadata, bool _use_reference=true)
void transform(SparseVector &point) const
size_t get_total_size() const
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
void inverse_transform(DenseVector &point) const
void inverse_transform(SparseVector &point) const