6 #ifndef TURI_UNITY_LIB_SFRAME_CSV_LINE_TOKENIZER_HPP 7 #define TURI_UNITY_LIB_SFRAME_CSV_LINE_TOKENIZER_HPP 13 #include <core/data/flexible_type/flexible_type.hpp> 14 #include <core/parallel/mutex.hpp> 19 class flexible_type_parser;
166 std::vector<std::string>& output);
203 std::function<
bool (std::string&,
size_t)> fn);
279 std::vector<flexible_type>& output,
280 bool permit_undefined,
281 const std::vector<size_t>* output_order =
nullptr);
295 bool parse_as(
char** buf,
size_t len,
296 const char* raw,
size_t rawlen,
309 std::string field_buffer;
311 size_t field_buffer_len = 0;
314 std::string parse_error;
317 std::string tokenizer_impl_error;
318 ssize_t tokenizer_impl_fail_pos = -1;
321 enum class tokenizer_state {
322 START_FIELD, IN_FIELD, IN_QUOTED_FIELD
342 template <
typename Fn,
typename Fn2,
typename Fn3>
343 bool tokenize_line_impl(
char* str,
size_t len,
348 std::shared_ptr<flexible_type_parser> parser;
352 bool delimiter_is_new_line =
false;
353 bool delimiter_is_space_but_not_tab =
false;
354 char delimiter_first_character;
355 bool delimiter_is_singlechar =
false;
356 bool delimiter_is_not_empty =
true;
357 bool empty_string_in_na_values =
false;
358 bool is_regular_line_terminator =
true;
365 bool check_substitutions(
const char* buf,
size_t len,
flexible_type& out);
377 <<
"delimiter=\"" << t.
delimiter <<
"\", " 385 for (
size_t i = 0; i < t.
na_values.size(); ++i) {
std::unordered_set< std::string > false_values
const std::string & get_last_parse_error_diagnosis() const
bool tokenize_line(const char *str, size_t len, std::vector< std::string > &output)
std::unordered_set< std::string > true_values
std::vector< std::string > na_values
bool parse_as(char **buf, size_t len, const char *raw, size_t rawlen, flexible_type &out, bool recursive_parse=false)
bool only_raw_string_substitutions
std::string line_terminator