Turi Create  4.0
sframe_rows.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SFRAME_sframe_rows_HPP
7 #define TURI_SFRAME_sframe_rows_HPP
8 #include <vector>
9 #include <map>
10 #include <core/data/flexible_type/flexible_type.hpp>
11 namespace turi {
12 class oarchive;
13 class iarchive;
14 
15 
16 /**
17  * \ingroup sframe_physical
18  * \addtogroup sframe_main Main SFrame Objects
19  * \{
20  */
21 
22 /**
23  *
24  * sframe-rows is a semi-opaque wrapper around a collection of columns of
25  * flexible_type (i.e. from an SFrame / SArray). The objective is to allow the
26  * underlying representation to be column-wise, while maintaining a row-wise
27  * iterator interface.
28  *
29  * sframe_rows are fast and cheap to copy, and also allow values to be modified.
30  * Internally, sframe_rows are built on a copy-on-write architecture thus
31  * allowing for safe mutation. Most accessor methods have a "constant" version
32  * which should be used if no value modifications are to be made. For instance:
33  * - sframe_rows::begin() vs sframe_rows::cbegin()
34  * - sframe_rows::get_columns() vs sframe_rows::cget_columns()
35  *
36  * The sframe_rows object is a relatively shallow wrapper over an
37  *
38  * vector<shared_ptr<vector<flexible_type>>>
39  *
40  * where each shared_ptr<vector<flexible_type>> represents a single column.
41  * The column set can be directly accessed and modified using
42  * sframe_rows::get_columns() (returns a reference to the underlying vector)
43  * or sframe_rows::cget_columns()
44  *
45  * \TODO: We *could* templatize this around the column type, allowing this to
46  * be used for anything.
47  */
48 class sframe_rows {
49  public:
50  /// The data type of decoded column (block_contents::DECODED_COLUMN)
51  typedef std::vector<flexible_type> decoded_column_type;
52  typedef std::shared_ptr<decoded_column_type> ptr_to_decoded_column_type;
53 
54  /**
55  * Constructor
56  */
57  sframe_rows() = default;
58 
59  /**
60  * Copy constructor. The copy constructor is fast as only pointers
61  * are copied in a copy-on-write fashion.
62  */
63  sframe_rows(const sframe_rows& other) {
64  m_decoded_columns = other.m_decoded_columns;
65  m_is_unique = false;
66  other.m_is_unique = false;
67  }
68  /**
69  * Move constructor.
70  */
71  sframe_rows(sframe_rows&&) = default;
72 
73  /**
74  * Assignment operator. The assignment operator is fast as only
75  * pointers are copied in a copy on write fashion.
76  */
78  m_decoded_columns = other.m_decoded_columns;
79  m_is_unique = false;
80  other.m_is_unique = false;
81  return *this;
82  }
83 
84  /**
85  * Move assignment
86  */
87  sframe_rows& operator=(sframe_rows&&) = default;
88 
89  /// Returns the number of columns
90  inline size_t num_columns() const {
91  return m_decoded_columns.size();
92  }
93 
94  /// Returns the number of rows
95  inline size_t num_rows() const {
96  if (m_decoded_columns.empty()) return 0;
97  else if (m_decoded_columns[0] == nullptr) return 0;
98  else return m_decoded_columns[0]->size();
99  }
100 
101  /**
102  * Clears the contents of the sframe_rows datastructure.
103  */
104  void clear();
105 
106  /**
107  * Sets the size of sframe_rows. If num_rows == -1, columns are not resized.
108  *
109  * \note sframe_rows is a copy-on-write datastructure. This may trigger
110  * a full copy of the contents of sframe_rows.
111  */
112  void resize(size_t num_cols, ssize_t num_rows = -1);
113 
114  /**
115  * Adds to the right of the sframe_rows, a collection of decoded columns
116  *
117  * \code
118  * add_decoded_column(std::move(source))
119  * \endcode
120  */
121  void add_decoded_column(const ptr_to_decoded_column_type& decoded_column);
122 
123 
124  /**
125  * Returns a modifiable reference to the set of column groups
126  *
127  * \note sframe_rows is a copy-on-write datastructure. This may trigger
128  * a full copy of the contents of sframe_rows.
129  */
130  inline std::vector<ptr_to_decoded_column_type>& get_columns() {
131  if (!m_is_unique) ensure_unique();
132  return m_decoded_columns;
133  }
134 
135  /**
136  * Returns a const reference to the set of column groups
137  */
138  inline const std::vector<ptr_to_decoded_column_type>& get_columns() const {
139  return m_decoded_columns;
140  }
141 
142  /**
143  * Returns a const reference to the set of column groups
144  */
145  inline const std::vector<ptr_to_decoded_column_type>& cget_columns() const {
146  return m_decoded_columns;
147  }
148 
149  /**
150  * Serializer
151  */
152  void save(oarchive& oarc) const;
153 
154  /**
155  * Deserializer
156  */
157  void load(iarchive& oarc);
158 
159  struct iterator;
160  struct const_iterator;
161 
162  /**
163  * An row object which refererences a row of the sframe_rows
164  * and mimics a std::vector<flexible_type>.
165  *
166  * \code
167  * sframe_rows::row& r = rows[5];
168  *
169  * // assigns row 5 in the sframe_rows object "rows" to be the same as row 10
170  * r = rows[10];
171  *
172  * // assigns row 5 in the sframe_rows object "rows" to be the same as row 7
173  * // in some other sframe_rows object
174  * r = some_other_rows[7]
175  * \endcode
176  */
177  struct row {
178  private:
179  /// Creates a new row reference which references the same row as other.
180  inline row(const row& other) = default;
181 
182  /// Creates a new row reference which references the same row as other.
183  inline row(row&& other) = default;
184 
185  public:
186  inline row() = default;
187 
188 
189  /**
190  * Makes the current row object have the same reference as another
191  * row object.
192  */
193  void copy_reference(const row& other) {
194  m_source = other.m_source;
195  m_current_row_number = other.m_current_row_number;
196  }
197 
198  /**
199  * Assigns the value of this row. Modifies the row this row references
200  * to have the same values as another row.
201  */
202  row& operator=(const row& other) {
203  ASSERT_EQ(size(), other.size());
204  for (size_t i = 0;i < size(); ++i) {
205  (*this)[i] = other[i];
206  }
207  return *this;
208  }
209 
210  /**
211  * Moves another row value to this row. Modifies the row this row references
212  * to have the same values as another row.
213  */
214  row& operator=(row&& other) {
215  ASSERT_EQ(size(), other.size());
216  for (size_t i = 0;i < size(); ++i) {
217  (*this)[i] = std::move(other[i]);
218  }
219  return *this;
220  }
221 
222  inline row(const sframe_rows* source, size_t current_row_number):
223  m_source(source), m_current_row_number(current_row_number) { }
224 
225  /**
226  * Implicit cast to std::vector<flexible_type>
227  */
228  inline operator std::vector<flexible_type>() const {
229  std::vector<flexible_type> ret(size());
230  for (size_t i = 0;i < ret.size(); ++i) ret[i] = (*this)[i];
231  return ret;
232  }
233 
234  /**
235  * Equivalent to operator[] but performs bounds checking
236  */
237  inline const flexible_type& at(size_t i) const {
238  if (i < size()) return (*this)[i];
239  else throw "Index out of bounds";
240  }
241 
242  /**
243  * Equivalent to operator[] but performs bounds checking
244  */
245  inline flexible_type& at(size_t i) {
246  if (i < size()) return (*this)[i];
247  else throw "Index out of bounds";
248  }
249 
250 
251  /**
252  * Directly index column i of this row
253  */
254  inline const flexible_type& fast_at(size_t i) const {
255  const auto& column = (*(m_source->m_decoded_columns[i]));
256  return column[m_current_row_number];
257  }
258 
259  /**
260  * Directly index column i of this row
261  */
262  inline const flexible_type& operator[](size_t i) const{
263  const auto& column = (*(m_source->m_decoded_columns[i]));
264  return column[m_current_row_number];
265  }
266 
267  /**
268  * Directly index column i of this row
269  */
270  inline flexible_type& operator[](size_t i) {
271  auto& column = (*(m_source->m_decoded_columns[i]));
272  return column[m_current_row_number];
273  }
274 
275  /**
276  * Returns the number of columns in this row.
277  */
278  inline size_t size() const {
279  return m_source->num_columns();
280  }
281 
282  const sframe_rows* m_source = NULL;
283  ssize_t m_current_row_number = 0;
284  friend struct iterator;
285  friend struct const_iterator;
286  friend class sframe_rows;
287 
288  /**
289  * Iterator over values of a row
290  */
292  public boost::iterator_facade<const_iterator,
293  const flexible_type,
294  boost::random_access_traversal_tag> {
295  /// Pointer to the input range. NULL if end iterator.
296  const row* m_source = nullptr;
297  size_t m_current_idx = 0;
298  const_iterator() { }
299  const_iterator(const const_iterator&) = default;
300  const_iterator(const_iterator&&) = default;
301  const_iterator& operator=(const const_iterator&) = default;
302  const_iterator& operator=(const_iterator&&) = default;
303  /// default constructor
304  explicit const_iterator(const sframe_rows::row& source, size_t current_idx = 0):
305  m_source(&source), m_current_idx(current_idx) { };
306 
307  private:
308  friend class boost::iterator_core_access;
309  /// advances the iterator. See boost::iterator_facade
310  inline void increment() {
311  ++m_current_idx;
312  }
313  /// advances the iterator. See boost::iterator_facade
314  inline void advance(size_t n) {
315  m_current_idx += n;
316  }
317 
318  /// Tests for iterator equality. See boost::iterator_facade
319  inline bool equal(const const_iterator& other) const {
320  return this->m_source == other.m_source &&
321  this->m_current_idx == other.m_current_idx;
322  }
323 
324  /// Dereference. See boost::iterator_facade
325  inline const flexible_type& dereference() const {
326  return m_source->fast_at(m_current_idx);
327  }
328 
329  /// Dereference. See boost::iterator_facade
330  const ssize_t distance_to(const const_iterator& other) const {
331  return other.m_current_idx - m_current_idx;
332  }
333  };
334  /**
335  * Gets a constant iterator to the first element of the row.
336  */
337  inline const_iterator begin() const {
338  return const_iterator(*this, 0);
339  }
340 
341  /**
342  * Gets a constant iterator to the last element of the row
343  */
344  inline const_iterator end() const {
345  return const_iterator(*this, size());
346  }
347 
348 
349  };
350 
351  /**
352  * A constant iterator across rows of sframe_rows
353  */
355  public boost::iterator_facade<const_iterator,
356  const row,
357  boost::random_access_traversal_tag> {
358  /// Pointer to the input range. NULL if end iterator.
359  const sframe_rows* m_source = NULL;
360  row m_row;
361  /// default constructor
363  const_iterator(const const_iterator& other) {
364  m_source = other.m_source;
365  m_row.copy_reference(other.m_row);
366  }
367  const_iterator(const_iterator&& other) {
368  m_source = other.m_source;
369  m_row.copy_reference(other.m_row);
370  }
371  const_iterator& operator=(const const_iterator& other) {
372  m_source = other.m_source;
373  m_row.copy_reference(other.m_row);
374  return *this;
375  }
376  const_iterator& operator=(const_iterator&& other) {
377  m_source = other.m_source;
378  m_row.copy_reference(other.m_row);
379  return *this;
380  }
381  explicit const_iterator(const sframe_rows* source, size_t current_row_number = 0):
382  m_row(source, current_row_number) { };
383  private:
384  friend class boost::iterator_core_access;
385  /// advances the iterator. See boost::iterator_facade
386  inline void increment() {
387  ++m_row.m_current_row_number;
388  }
389  /// advances the iterator. See boost::iterator_facade
390  inline void advance(size_t n) {
391  m_row.m_current_row_number += n;
392  }
393 
394  /// Tests for iterator equality. See boost::iterator_facade
395  inline bool equal(const const_iterator& other) const {
396  return this->m_source == other.m_source &&
397  m_row.m_current_row_number == other.m_row.m_current_row_number;
398  }
399 
400  /// Dereference. See boost::iterator_facade
401  inline const row& dereference() const {
402  return m_row;
403  }
404 
405  /// Dereference. See boost::iterator_facade
406  const ssize_t distance_to(const const_iterator& other) const {
407  return other.m_row.m_current_row_number - m_row.m_current_row_number;
408  }
409  };
410 
411  /**
412  * A non-constant interator over rows of sframe_rows
413  */
414  struct iterator:
415  public boost::iterator_facade<iterator,
416  row,
417  boost::random_access_traversal_tag> {
418  /// Pointer to the input range. NULL if end iterator.
419  const sframe_rows* m_source = NULL;
420  mutable row m_row;
421  /// default constructor
422  iterator() {}
423  iterator(const iterator& other) {
424  m_source = other.m_source;
425  m_row.copy_reference(other.m_row);
426  }
427  iterator(iterator&& other) {
428  m_source = std::move(other.m_source);
429  m_row.copy_reference(other.m_row);
430  }
431  iterator& operator=(const iterator& other) {
432  m_source = other.m_source;
433  m_row.copy_reference(other.m_row);
434  return *this;
435  }
436  iterator& operator=(iterator&& other) {
437  m_source = std::move(other.m_source);
438  m_row.copy_reference(other.m_row);
439  return *this;
440  }
441  explicit iterator(sframe_rows* source, size_t current_row_number = 0):
442  m_row(source, current_row_number) { };
443  private:
444  friend class boost::iterator_core_access;
445  /// advances the iterator. See boost::iterator_facade
446  inline void increment() {
447  ++m_row.m_current_row_number;
448  }
449  /// advances the iterator. See boost::iterator_facade
450  inline void advance(size_t n) {
451  m_row.m_current_row_number += n;
452  }
453 
454  /// Tests for iterator equality. See boost::iterator_facade
455  inline bool equal(const iterator& other) const {
456  return this->m_source == other.m_source &&
457  m_row.m_current_row_number == other.m_row.m_current_row_number;
458  }
459 
460  /// Dereference. See boost::iterator_facade
461  inline row& dereference() const {
462  return m_row;
463  }
464 
465  /// Dereference. See boost::iterator_facade
466  const ssize_t distance_to(const iterator& other) const {
467  return other.m_row.m_current_row_number - m_row.m_current_row_number;
468  }
469  };
470 
471  /**
472  * Gets a constant iterator to the first row of the sframe_rows.
473  */
474  inline const_iterator begin() const {
475  return const_iterator(this, 0);
476  }
477 
478  /**
479  * Gets a constant iterator to the end of the sframe_rows.
480  */
481  inline const_iterator end() const {
482  return const_iterator(this, num_rows());
483  }
484 
485  /**
486  * Gets a constant iterator to the first row of the sframe_rows.
487  */
488  inline const_iterator cbegin() const {
489  return const_iterator(this, 0);
490  }
491 
492  /**
493  * Gets a constant iterator to the end of the sframe_rows.
494  */
495  inline const_iterator cend() const {
496  return const_iterator(this, num_rows());
497  }
498 
499  /**
500  * Gets a mutable iterator to the first row of the sframe_rows.
501  *
502  * \note sframe_rows is a copy-on-write datastructure. This may trigger
503  * a full copy of the contents of sframe_rows.
504  */
505  inline iterator begin() {
506  if (!m_is_unique) ensure_unique();
507  return iterator(this, 0);
508  }
509 
510  /**
511  * Gets a mutable iterator to the first row of the sframe_rows.
512  *
513  * \note sframe_rows is a copy-on-write datastructure. This may trigger
514  * a full copy of the contents of sframe_rows.
515  */
516  inline iterator end() {
517  if (!m_is_unique) ensure_unique();
518  return iterator(this, num_rows());
519  }
520 
521  /**
522  * Reads a particular row of the sframe_rows object.
523  */
524  inline const row operator[](size_t i) const {
525  return row(this, i);
526  }
527 
528  /**
529  * gets a mutable reference to a particular row of the sframe_rows object
530  */
531  inline row operator[](size_t i) {
532  if (!m_is_unique) ensure_unique();
533  return row(this, i);
534  }
535 
536  /**
537  * Ensures that this is a unique copy
538  */
539  void ensure_unique();
540 
541  /**
542  * Modifies the SFrame Rows inplace to enforce typing.
543  * \see type_check
544  */
545  void type_check_inplace(const std::vector<flex_type_enum>& typelist);
546 
547  /**
548  * Returns a new sframe rows where each column has the set of types enforced
549  * \see type_check_inplace
550  */
551  sframe_rows type_check(const std::vector<flex_type_enum>& typelist) const;
552 
553  private:
554  std::vector<ptr_to_decoded_column_type> m_decoded_columns;
555  mutable bool m_is_unique = true;
556  }; // class sframe_rows
557 
558 /// \}
559 } // namespace turi
560 #endif
const flexible_type & fast_at(size_t i) const
const_iterator end() const
size_t num_rows() const
Returns the number of rows.
Definition: sframe_rows.hpp:95
sframe_rows & operator=(const sframe_rows &other)
Definition: sframe_rows.hpp:77
const row * m_source
Pointer to the input range. NULL if end iterator.
row & operator=(const row &other)
const_iterator begin() const
void save(oarchive &oarc) const
sframe_rows(const sframe_rows &other)
Definition: sframe_rows.hpp:63
The serialization input archive object which, provided with a reference to an istream, will read from the istream, providing deserialization capabilities.
Definition: iarchive.hpp:60
size_t num_columns() const
Returns the number of columns.
Definition: sframe_rows.hpp:90
const std::vector< ptr_to_decoded_column_type > & get_columns() const
void load(iarchive &oarc)
const std::vector< ptr_to_decoded_column_type > & cget_columns() const
const_iterator cbegin() const
const row operator[](size_t i) const
const flexible_type & operator[](size_t i) const
row operator[](size_t i)
const_iterator begin() const
const_iterator cend() const
const_iterator(const sframe_rows::row &source, size_t current_idx=0)
default constructor
sframe_rows()=default
const sframe_rows * m_source
Pointer to the input range. NULL if end iterator.
std::vector< ptr_to_decoded_column_type > & get_columns()
const_iterator()
default constructor
iterator()
default constructor
sframe_rows type_check(const std::vector< flex_type_enum > &typelist) const
row & operator=(row &&other)
flexible_type & at(size_t i)
const sframe_rows * m_source
Pointer to the input range. NULL if end iterator.
flexible_type & operator[](size_t i)
void resize(size_t num_cols, ssize_t num_rows=-1)
const_iterator end() const
The serialization output archive object which, provided with a reference to an ostream, will write to the ostream, providing serialization capabilities.
Definition: oarchive.hpp:80
std::vector< flexible_type > decoded_column_type
The data type of decoded column (block_contents::DECODED_COLUMN)
Definition: sframe_rows.hpp:51
void add_decoded_column(const ptr_to_decoded_column_type &decoded_column)
void copy_reference(const row &other)
const flexible_type & at(size_t i) const
void type_check_inplace(const std::vector< flex_type_enum > &typelist)