Caffe2 - Python API
A deep learning, cross platform ML framework
parser.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package parser
17 # Module caffe2.python.docs.parser
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 import re
23 
24 
25 class Parser(object):
26  # List of tuples (regex_str, lambda(regex_match, formatter))
27  # If a lambda returns True it will be called repeatedly with replacement
28  # otherwise it will only be called on text that hasn't been parsed yet.
29  regexes = [
30  # Code blocks of various formats
31  ('````(.+?)````',
32  lambda m, f: f.addCode(m.group(1))
33  ),
34  ('```(.+?)```',
35  lambda m, f: f.addCode(m.group(1))
36  ),
37  ('((( {2})+)(\S.*)(\n\s*\n|\n))+',
38  lambda m, f: f.addCode(m.group(0))
39  ),
40  ('([^\.])\n',
41  lambda m, f: f.addRaw('{c} '.format(c=m.group(1))) or True
42  ),
43  ('`(.+?)`',
44  lambda m, f: f.addCode(m.group(1), True)
45  ),
46  # Make links clickable
47  ('http[s]?://(?:[a-zA-Z]|[0-9]|[[email protected]&+]'
48  '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
49  lambda m, f: f.addLink(m.group(0), m.group(0))
50  ),
51  ('\*\*(.+?)\*\*',
52  lambda m, f: f.addEmphasis(m.group(1), 2)
53  ),
54  ('\*(.+?)\*',
55  lambda m, f: f.addEmphasis(m.group(1), 1)
56  ),
57  ]
58 
59  def __init__(self, text, formatter):
60  self.text = text
61  self.lines = []
62  self.formatter = formatter
63 
64  def parseText(self):
65  UNPARSED = 0
66  PARSED = 1
67  parsed_block = [(UNPARSED, self.text)]
68  for regex, func in self.regexes:
69  index = 0
70  while index < len(parsed_block):
71  label, text = parsed_block[index]
72 
73  # Already been parsed
74  if (label == PARSED):
75  index += 1
76  continue
77 
78  match = re.search(regex, text)
79  if match:
80  parsed_block.pop(index)
81  start = match.start(0)
82  end = match.end(0)
83 
84  f = self.formatter.clone()
85  merge = func(match, f)
86 
87  if merge:
88  merged = text[:start] + f.dump() + text[end:]
89  parsed_block.insert(index, (UNPARSED, merged))
90  else:
91  if text[:start]:
92  parsed_block.insert(index,
93  (UNPARSED, text[:start]))
94 
95  index += 1
96  parsed_block.insert(index, (PARSED, f.dump()))
97 
98  index += 1
99  if text[end:]:
100  parsed_block.insert(index,
101  (UNPARSED, text[end:]))
102 
103  else:
104  index += 1
105 
106  self.lines += [i for _, i in parsed_block]
107  self.text = ' '.join(self.lines)
108 
109  def parse(self):
110  self.parseText()
111  return self.text
list regexes
Definition: parser.py:29
def parseText(self)
Definition: parser.py:64