Turi Create  4.0
csv_writer.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SFRAME_CSV_WRITER_HPP
7 #define TURI_SFRAME_CSV_WRITER_HPP
8 #include <string>
9 #include <vector>
10 #include <iostream>
11 #include <core/data/flexible_type/flexible_type.hpp>
12 namespace turi {
13 
14 
15 /**
16  * \ingroup sframe_physical
17  * \addtogroup csv_utils CSV Parsing and Writing
18  * \{
19  */
20 
21 /**
22  * CSV Writer
23  */
24 class csv_writer {
25  public:
26 
27  // the ordering is slightly odd. But this is compatible with the python csv
28  // quote level
29  enum class csv_quote_level {
30  QUOTE_MINIMAL, ///< NOT IMPLEMENTED. Equivalent to QUOTE_NONNUMERIC
31  QUOTE_ALL, /// Quotes all fields
32  QUOTE_NONNUMERIC, ///< Equivalent to python csv.QUOTE_NONNUMERIC. Numbers are not quoted
33  QUOTE_NONE, ///< Equivalent to python csv.QUOTE_NONE. No quoting is performed
34  };
35 
36  /**
37  * The delimiter character to use to separate fields (Default ',')
38  */
39  std::string delimiter = ",";
40 
41  /**
42  * The character to use to identify the beginning of a C escape sequence
43  * (Defualt '\'). i.e. "\n" will be converted to the '\n' character, "\\"
44  * will be converted to "\", etc. Note that only the single character
45  * escapes are converted. unicode (\Unnnn), octal (\nnn), hexadecimal (\xnn)
46  * are not interpreted.
47  */
48  char escape_char = '\\';
49 
50  /**
51  * If true, escape characters will not be used at all. Note that enabling
52  * this may result in non-parseable CSVs.
53  */
54  bool use_escape_char = true;
55 
56  /**
57  * If set to true, pairs of quote characters in a quoted string
58  * are interpreted as a single quote (Default false).
59  * For instance, if set to true, the 2nd field of the 2nd line is read as
60  * \b "hello "world""
61  * \verbatim
62  * user, message
63  * 123, "hello ""world"""
64  * \endverbatim
65  */
66  bool double_quote = true;
67 
68  /**
69  * The quote character to use (Default '\"')
70  */
71  char quote_char = '\"';
72 
73  /**
74  * new line terminator. Defaults to "\n"
75  */
76  std::string line_terminator= "\n";
77 
78  /**
79  * Whether the header is written
80  */
81  bool header = true;
82 
83  /**
84  * The quoting level. Defaults to quoting everything except for numbers
85  */
87 
88  /**
89  * String to emit for missing values
90  */
91  std::string na_value= "";
92 
93  /**
94  * Writes an array of strings as a row, verbatim without escaping /
95  * modifications. (only inserting delimiter characters).
96  * Not safe to use in parallel.
97  */
98  void write_verbatim(std::ostream& out, const std::vector<std::string>& row);
99 
100  /**
101  * Writes an array of values as a row, making the appropriate formatting
102  * changes. Not safe to use in parallel.
103  */
104  void write(std::ostream& out, const std::vector<flexible_type>& row);
105 
106  /**
107  * Converts one value to a string.
108  * \param out The stream to write to
109  * \param val The value of emit
110  * \param allow_empty_output This parameter is a little tricky to interpret.
111  * If this flag is set to true (default), some inputs may result in
112  * completely empty outputs. (For instance empty string, or missing value
113  * where na_value is the empty string). This can cause issues in some
114  * situations. For instance, in a csv file with only a single columns, some
115  * parsers may skip empty lines. If this flag is set to false, the complete
116  * empty output will never be emitted and instead
117  * out << quote_char << quote_char
118  * will be generated.
119  */
120  void csv_print(std::ostream& out,
121  const flexible_type& val,
122  bool allow_empty_output=true);
123 
124  private:
125 
126  /**
127  * Converts one value, appending it to a string.
128  * minimal quoting is performed: only strings are quoted.
129  * This is used for recursive prints (ex: printing a list)
130  */
131  void csv_print_internal(std::string& out, const flexible_type& val);
132 
133  /*
134  * These are basically some optimizations to csv_print / csv_print_internal
135  * to avoid allocating additional strings everytime. We just
136  * repeatedly make use of the same set of buffers.
137  * This does mean that csv_print is *not* thread safe. But that's alright.
138  */
139 
140  /** The buffer used by csv_print to handle additional quoting required.
141  * Specifically, csv_print calls csv_print_internal on some cases
142  * (like dictionary) into this buffer. Then calls escape_string on this
143  * buffer to generate a dictionary in string form.
144  */
145  std::string m_complex_type_temporary;
146  std::string m_complex_type_escape_buffer;
147  size_t m_complex_type_escape_buffer_len = 0;
148 
149  /**
150  * Temporary storage used by csv_print, and csv_print_internal to escape
151  * flexible_types containing strings.
152  */
153  std::string m_string_escape_buffer;
154  size_t m_string_escape_buffer_len = 0;
155 };
156 
157 /// \}
158 
159 } // namespace turi
160 #endif
void write_verbatim(std::ostream &out, const std::vector< std::string > &row)
NOT IMPLEMENTED. Equivalent to QUOTE_NONNUMERIC.
void csv_print(std::ostream &out, const flexible_type &val, bool allow_empty_output=true)
void write(std::ostream &out, const std::vector< flexible_type > &row)
csv_quote_level quote_level
Definition: csv_writer.hpp:86
Equivalent to python csv.QUOTE_NONE. No quoting is performed.
std::string delimiter
Definition: csv_writer.hpp:39
std::string na_value
Definition: csv_writer.hpp:91
std::string line_terminator
Definition: csv_writer.hpp:76