libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2017 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_executor.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 // FIXME convert comments to doxygen format. 00032 00033 namespace std _GLIBCXX_VISIBILITY(default) 00034 { 00035 namespace __detail 00036 { 00037 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00038 00039 /** 00040 * @addtogroup regex-detail 00041 * @{ 00042 */ 00043 00044 /** 00045 * @brief Takes a regex and an input string and does the matching. 00046 * 00047 * The %_Executor class has two modes: DFS mode and BFS mode, controlled 00048 * by the template parameter %__dfs_mode. 00049 */ 00050 template<typename _BiIter, typename _Alloc, typename _TraitsT, 00051 bool __dfs_mode> 00052 class _Executor 00053 { 00054 using __search_mode = integral_constant<bool, __dfs_mode>; 00055 using __dfs = true_type; 00056 using __bfs = false_type; 00057 00058 enum class _Match_mode : unsigned char { _Exact, _Prefix }; 00059 00060 public: 00061 typedef typename iterator_traits<_BiIter>::value_type _CharT; 00062 typedef basic_regex<_CharT, _TraitsT> _RegexT; 00063 typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; 00064 typedef regex_constants::match_flag_type _FlagT; 00065 typedef typename _TraitsT::char_class_type _ClassT; 00066 typedef _NFA<_TraitsT> _NFAT; 00067 00068 public: 00069 _Executor(_BiIter __begin, 00070 _BiIter __end, 00071 _ResultsVec& __results, 00072 const _RegexT& __re, 00073 _FlagT __flags) 00074 : _M_begin(__begin), 00075 _M_end(__end), 00076 _M_re(__re), 00077 _M_nfa(*__re._M_automaton), 00078 _M_results(__results), 00079 _M_rep_count(_M_nfa.size()), 00080 _M_states(_M_nfa._M_start(), _M_nfa.size()), 00081 _M_flags((__flags & regex_constants::match_prev_avail) 00082 ? (__flags 00083 & ~regex_constants::match_not_bol 00084 & ~regex_constants::match_not_bow) 00085 : __flags) 00086 { } 00087 00088 // Set matched when string exactly matches the pattern. 00089 bool 00090 _M_match() 00091 { 00092 _M_current = _M_begin; 00093 return _M_main(_Match_mode::_Exact); 00094 } 00095 00096 // Set matched when some prefix of the string matches the pattern. 00097 bool 00098 _M_search_from_first() 00099 { 00100 _M_current = _M_begin; 00101 return _M_main(_Match_mode::_Prefix); 00102 } 00103 00104 bool 00105 _M_search(); 00106 00107 private: 00108 void 00109 _M_rep_once_more(_Match_mode __match_mode, _StateIdT); 00110 00111 void 00112 _M_handle_repeat(_Match_mode, _StateIdT); 00113 00114 void 00115 _M_handle_subexpr_begin(_Match_mode, _StateIdT); 00116 00117 void 00118 _M_handle_subexpr_end(_Match_mode, _StateIdT); 00119 00120 void 00121 _M_handle_line_begin_assertion(_Match_mode, _StateIdT); 00122 00123 void 00124 _M_handle_line_end_assertion(_Match_mode, _StateIdT); 00125 00126 void 00127 _M_handle_word_boundary(_Match_mode, _StateIdT); 00128 00129 void 00130 _M_handle_subexpr_lookahead(_Match_mode, _StateIdT); 00131 00132 void 00133 _M_handle_match(_Match_mode, _StateIdT); 00134 00135 void 00136 _M_handle_backref(_Match_mode, _StateIdT); 00137 00138 void 00139 _M_handle_accept(_Match_mode, _StateIdT); 00140 00141 void 00142 _M_handle_alternative(_Match_mode, _StateIdT); 00143 00144 void 00145 _M_dfs(_Match_mode __match_mode, _StateIdT __start); 00146 00147 bool 00148 _M_main(_Match_mode __match_mode) 00149 { return _M_main_dispatch(__match_mode, __search_mode{}); } 00150 00151 bool 00152 _M_main_dispatch(_Match_mode __match_mode, __dfs); 00153 00154 bool 00155 _M_main_dispatch(_Match_mode __match_mode, __bfs); 00156 00157 bool 00158 _M_is_word(_CharT __ch) const 00159 { 00160 static const _CharT __s[2] = { 'w' }; 00161 return _M_re._M_automaton->_M_traits.isctype 00162 (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1)); 00163 } 00164 00165 bool 00166 _M_at_begin() const 00167 { 00168 return _M_current == _M_begin 00169 && !(_M_flags & (regex_constants::match_not_bol 00170 | regex_constants::match_prev_avail)); 00171 } 00172 00173 bool 00174 _M_at_end() const 00175 { 00176 return _M_current == _M_end 00177 && !(_M_flags & regex_constants::match_not_eol); 00178 } 00179 00180 bool 00181 _M_word_boundary() const; 00182 00183 bool 00184 _M_lookahead(_StateIdT __next); 00185 00186 // Holds additional information used in BFS-mode. 00187 template<typename _SearchMode, typename _ResultsVec> 00188 struct _State_info; 00189 00190 template<typename _ResultsVec> 00191 struct _State_info<__bfs, _ResultsVec> 00192 { 00193 explicit 00194 _State_info(_StateIdT __start, size_t __n) 00195 : _M_visited_states(new bool[__n]()), _M_start(__start) 00196 { } 00197 00198 bool _M_visited(_StateIdT __i) 00199 { 00200 if (_M_visited_states[__i]) 00201 return true; 00202 _M_visited_states[__i] = true; 00203 return false; 00204 } 00205 00206 void _M_queue(_StateIdT __i, const _ResultsVec& __res) 00207 { _M_match_queue.emplace_back(__i, __res); } 00208 00209 // Dummy implementations for BFS mode. 00210 _BiIter* _M_get_sol_pos() { return nullptr; } 00211 00212 // Saves states that need to be considered for the next character. 00213 vector<pair<_StateIdT, _ResultsVec>> _M_match_queue; 00214 // Indicates which states are already visited. 00215 unique_ptr<bool[]> _M_visited_states; 00216 // To record current solution. 00217 _StateIdT _M_start; 00218 }; 00219 00220 template<typename _ResultsVec> 00221 struct _State_info<__dfs, _ResultsVec> 00222 { 00223 explicit 00224 _State_info(_StateIdT __start, size_t) : _M_start(__start) 00225 { } 00226 00227 // Dummy implementations for DFS mode. 00228 bool _M_visited(_StateIdT) const { return false; } 00229 void _M_queue(_StateIdT, const _ResultsVec&) { } 00230 00231 _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } 00232 00233 // To record current solution. 00234 _StateIdT _M_start; 00235 _BiIter _M_sol_pos; 00236 }; 00237 00238 public: 00239 _ResultsVec _M_cur_results; 00240 _BiIter _M_current; 00241 _BiIter _M_begin; 00242 const _BiIter _M_end; 00243 const _RegexT& _M_re; 00244 const _NFAT& _M_nfa; 00245 _ResultsVec& _M_results; 00246 vector<pair<_BiIter, int>> _M_rep_count; 00247 _State_info<__search_mode, _ResultsVec> _M_states; 00248 _FlagT _M_flags; 00249 // Do we have a solution so far? 00250 bool _M_has_sol; 00251 }; 00252 00253 //@} regex-detail 00254 _GLIBCXX_END_NAMESPACE_VERSION 00255 } // namespace __detail 00256 } // namespace std 00257 00258 #include <bits/regex_executor.tcc>