6 #ifndef TURI_FLEXIBLE_TYPE_STRING_PARSER_HPP 7 #define TURI_FLEXIBLE_TYPE_STRING_PARSER_HPP 8 #include <boost/algorithm/string.hpp> 9 #include <boost/spirit/include/qi.hpp> 10 #include <core/data/flexible_type/flexible_type.hpp> 11 #include <core/data/flexible_type/string_escape.hpp> 41 std::unordered_set<std::string> na_val;
42 std::unordered_set<std::string> true_val;
43 std::unordered_set<std::string> false_val;
53 BOOST_SPIRIT_TERMINAL_EX(restricted_string);
61 template <
typename T1>
62 struct use_terminal<qi::domain,
63 terminal_ex<parser_impl::tag::restricted_string, fusion::vector1<T1> > >
79 constexpr
static size_t STACK_BUF_SIZE = 128;
80 char buf[STACK_BUF_SIZE];
83 inline void add_char(
char c) {
84 if (pos < STACK_BUF_SIZE) {
87 }
else if (pos == STACK_BUF_SIZE) {
89 altbuf = std::string(buf, STACK_BUF_SIZE);
97 std::string& get_string() {
98 if (pos <= STACK_BUF_SIZE) altbuf = std::string(buf, pos);
117 : boost::spirit::qi::primitive_parser<string_parser> {
119 template <
typename Context,
typename Iterator>
121 typedef ::turi::flexible_type type;
126 bool has_delimiter =
false;
127 char delimiter_first_char;
128 bool delimiter_is_singlechar =
false;
129 std::unordered_map<std::string, turi::flexible_type> map_vals;
134 has_delimiter = config.
delimiter.length() > 0;
135 delimiter_is_singlechar = config.
delimiter.length() == 1;
136 if (has_delimiter) delimiter_first_char = config.
delimiter[0];
137 for (
auto s: config.na_val) {
140 for (
auto s: config.true_val) {
143 for (
auto s: config.false_val) {
149 enum class tokenizer_state {
150 START_FIELD, IN_FIELD, IN_QUOTED_FIELD,
153 static inline bool test_is_delimiter(
const char* c,
const char* end,
154 const char*
delimiter,
const char* delimiter_end) {
157 if (delimiter_end - delimiter > end - c)
return false;
158 while (delimiter != delimiter_end) {
159 if ((*c) != (*delimiter))
return false;
164 #define PUSH_CHAR(c) ret.add_char(c); escape_sequence = config.use_escape_char && (c == config.escape_char); 169 template <
typename Iterator,
typename Context,
typename Skipper,
typename Attribute>
170 bool parse(Iterator& first, Iterator
const& last,
171 Context&, Skipper
const& skipper, Attribute& attr)
const {
172 boost::spirit::qi::skip_over(first, last, skipper);
173 Iterator cur = first;
175 const char* delimiter_begin = config.
delimiter.c_str();
176 const char* delimiter_end = delimiter_begin + config.
delimiter.length();
178 tokenizer_state state = tokenizer_state::START_FIELD;
179 bool keep_parsing =
true;
181 const char* raw_field_begin =
nullptr;
184 bool escape_sequence =
false;
185 while(keep_parsing && cur != last) {
189 bool reset_escape_sequence = escape_sequence;
193 if(state != tokenizer_state::IN_QUOTED_FIELD &&
194 config.
restrictions.find(c) != std::string::npos)
break;
199 (state != tokenizer_state::IN_QUOTED_FIELD) &&
205 delimiter_first_char == c &&
206 (delimiter_is_singlechar ||
207 test_is_delimiter(cur, last, delimiter_begin, delimiter_end));
209 if (is_delimiter)
break;
213 case tokenizer_state::START_FIELD:
214 raw_field_begin = cur-1;
215 if (c ==
'\'' || c ==
'\"') {
217 state = tokenizer_state::IN_QUOTED_FIELD;
221 state = tokenizer_state::IN_FIELD;
225 case tokenizer_state::IN_FIELD:
230 case tokenizer_state::IN_QUOTED_FIELD:
232 if (c == quote_char && !escape_sequence) {
236 if (cur + 1 < last && *cur == quote_char) {
243 keep_parsing =
false;
251 if (reset_escape_sequence) escape_sequence =
false;
253 if (cur == first)
return false;
256 if (only_raw_string_substitutions ==
true && raw_field_begin !=
nullptr) {
257 std::string raw_str = std::string(raw_field_begin, cur - raw_field_begin);
258 boost::algorithm::trim_right(raw_str);
259 auto map_val_iter = map_vals.find(raw_str);
260 if (map_val_iter != map_vals.end()) {
261 attr = map_val_iter->second;
266 std::string final_str = std::move(ret.get_string());
267 if (!quote_char) boost::algorithm::trim_right(final_str);
268 else if (quote_char) {
275 if (only_raw_string_substitutions ==
false) {
276 auto map_val_iter = map_vals.find(final_str);
277 if (map_val_iter != map_vals.end()) {
278 attr = map_val_iter->second;
282 attr = std::move(final_str);
290 template <
typename Context>
291 boost::spirit::info what(Context&)
const {
292 return boost::spirit::info(
"string_parser");
303 template <
typename Modifiers,
typename T1>
304 struct make_primitive<terminal_ex<
parser_impl::tag::restricted_string, fusion::vector1<T1>>, Modifiers> {
305 typedef parser_impl::string_parser result_type;
307 template <
typename Terminal>
308 result_type operator()(
const Terminal& term, unused_type)
const {
309 return result_type(fusion::at_c<0>(term.args));
bool only_raw_string_substitutions
char escape_char
The character to use for an escape character.
void unescape_string(std::string &cal, bool use_escape_char, char escape_char, char quote_char, bool double_quote)
bool use_escape_char
Whether escape char should be used.