## @package parser
# Module caffe2.python.docs.parser
import re
class Parser(object):
# List of tuples (regex_str, lambda(regex_match, formatter))
# If a lambda returns True it will be called repeatedly with replacement
# otherwise it will only be called on text that hasn't been parsed yet.
regexes = [
# Code blocks of various formats
('````(.+?)````',
lambda m, f: f.addCode(m.group(1))
),
('```(.+?)```',
lambda m, f: f.addCode(m.group(1))
),
(r'((( {2})+)(\S.*)(\n\s*\n|\n))+',
lambda m, f: f.addCode(m.group(0))
),
(r'([^\.])\n',
lambda m, f: f.addRaw('{c} '.format(c=m.group(1))) or True
),
('`(.+?)`',
lambda m, f: f.addCode(m.group(1), True)
),
# Make links clickable
('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
lambda m, f: f.addLink(m.group(0), m.group(0))
),
(r'\*\*(.+?)\*\*',
lambda m, f: f.addEmphasis(m.group(1), 2)
),
(r'\*(.+?)\*',
lambda m, f: f.addEmphasis(m.group(1), 1)
),
]
def __init__(self, text, formatter):
self.text = text
self.lines = []
self.formatter = formatter
def parseText(self):
UNPARSED = 0
PARSED = 1
parsed_block = [(UNPARSED, self.text)]
for regex, func in self.regexes:
index = 0
while index < len(parsed_block):
label, text = parsed_block[index]
# Already been parsed
if (label == PARSED):
index += 1
continue
match = re.search(regex, text)
if match:
parsed_block.pop(index)
start = match.start(0)
end = match.end(0)
f = self.formatter.clone()
merge = func(match, f)
if merge:
merged = text[:start] + f.dump() + text[end:]
parsed_block.insert(index, (UNPARSED, merged))
else:
if text[:start]:
parsed_block.insert(index,
(UNPARSED, text[:start]))
index += 1
parsed_block.insert(index, (PARSED, f.dump()))
index += 1
if text[end:]:
parsed_block.insert(index,
(UNPARSED, text[end:]))
else:
index += 1
self.lines += [i for _, i in parsed_block]
self.text = ' '.join(self.lines)
def parse(self):
self.parseText()
return self.text