210 lines
7.1 KiB
Python
210 lines
7.1 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import re
|
|
|
|
|
|
def _tokens2re(**tokens):
|
|
# Create a pattern for non-escaped tokens, in the form:
|
|
# (?<!\\)(?:a|b|c...)
|
|
# This is meant to match patterns a, b, or c, or ... if they are not
|
|
# preceded by a backslash.
|
|
# where a, b, c... are in the form
|
|
# (?P<name>pattern)
|
|
# which matches the pattern and captures it in a named match group.
|
|
# The group names and patterns are given as arguments.
|
|
all_tokens = '|'.join('(?P<%s>%s)' % (name, value)
|
|
for name, value in tokens.iteritems())
|
|
nonescaped = r'(?<!\\)(?:%s)' % all_tokens
|
|
|
|
# The final pattern matches either the above pattern, or an escaped
|
|
# backslash, captured in the "escape" match group.
|
|
return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))
|
|
|
|
UNQUOTED_TOKENS_RE = _tokens2re(
|
|
whitespace=r'[\t\r\n ]+',
|
|
quote=r'[\'"]',
|
|
comment='#',
|
|
special=r'[<>&|`~(){}$;\*\?]',
|
|
backslashed=r'\\[^\\]',
|
|
)
|
|
|
|
DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
|
|
quote='"',
|
|
backslashedquote=r'\\"',
|
|
special='\$',
|
|
backslashed=r'\\[^\\"]',
|
|
)
|
|
|
|
ESCAPED_NEWLINES_RE = re.compile(r'\\\n')
|
|
|
|
# This regexp contains the same characters as all those listed in
|
|
# UNQUOTED_TOKENS_RE. Please keep in sync.
|
|
SHELL_QUOTE_RE = re.compile(r'[\\\t\r\n \'\"#<>&|`~(){}$;\*\?]')
|
|
|
|
|
|
class MetaCharacterException(Exception):
|
|
def __init__(self, char):
|
|
self.char = char
|
|
|
|
|
|
class _ClineSplitter(object):
|
|
'''
|
|
Parses a given command line string and creates a list of command
|
|
and arguments, with wildcard expansion.
|
|
'''
|
|
def __init__(self, cline):
|
|
self.arg = None
|
|
self.cline = cline
|
|
self.result = []
|
|
self._parse_unquoted()
|
|
|
|
def _push(self, str):
|
|
'''
|
|
Push the given string as part of the current argument
|
|
'''
|
|
if self.arg is None:
|
|
self.arg = ''
|
|
self.arg += str
|
|
|
|
def _next(self):
|
|
'''
|
|
Finalize current argument, effectively adding it to the list.
|
|
'''
|
|
if self.arg is None:
|
|
return
|
|
self.result.append(self.arg)
|
|
self.arg = None
|
|
|
|
def _parse_unquoted(self):
|
|
'''
|
|
Parse command line remainder in the context of an unquoted string.
|
|
'''
|
|
while self.cline:
|
|
# Find the next token
|
|
m = UNQUOTED_TOKENS_RE.search(self.cline)
|
|
# If we find none, the remainder of the string can be pushed to
|
|
# the current argument and the argument finalized
|
|
if not m:
|
|
self._push(self.cline)
|
|
break
|
|
# The beginning of the string, up to the found token, is part of
|
|
# the current argument
|
|
if m.start():
|
|
self._push(self.cline[:m.start()])
|
|
self.cline = self.cline[m.end():]
|
|
|
|
match = {name: value
|
|
for name, value in m.groupdict().items() if value}
|
|
if 'quote' in match:
|
|
# " or ' start a quoted string
|
|
if match['quote'] == '"':
|
|
self._parse_doubly_quoted()
|
|
else:
|
|
self._parse_quoted()
|
|
elif 'comment' in match:
|
|
# Comments are ignored. The current argument can be finalized,
|
|
# and parsing stopped.
|
|
break
|
|
elif 'special' in match:
|
|
# Unquoted, non-escaped special characters need to be sent to a
|
|
# shell.
|
|
raise MetaCharacterException(match['special'])
|
|
elif 'whitespace' in match:
|
|
# Whitespaces terminate current argument.
|
|
self._next()
|
|
elif 'escape' in match:
|
|
# Escaped backslashes turn into a single backslash
|
|
self._push('\\')
|
|
elif 'backslashed' in match:
|
|
# Backslashed characters are unbackslashed
|
|
# e.g. echo \a -> a
|
|
self._push(match['backslashed'][1])
|
|
else:
|
|
raise Exception("Shouldn't reach here")
|
|
if self.arg:
|
|
self._next()
|
|
|
|
def _parse_quoted(self):
|
|
# Single quoted strings are preserved, except for the final quote
|
|
index = self.cline.find("'")
|
|
if index == -1:
|
|
raise Exception('Unterminated quoted string in command')
|
|
self._push(self.cline[:index])
|
|
self.cline = self.cline[index+1:]
|
|
|
|
def _parse_doubly_quoted(self):
|
|
if not self.cline:
|
|
raise Exception('Unterminated quoted string in command')
|
|
while self.cline:
|
|
m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
|
|
if not m:
|
|
raise Exception('Unterminated quoted string in command')
|
|
self._push(self.cline[:m.start()])
|
|
self.cline = self.cline[m.end():]
|
|
match = {name: value
|
|
for name, value in m.groupdict().items() if value}
|
|
if 'quote' in match:
|
|
# a double quote ends the quoted string, so go back to
|
|
# unquoted parsing
|
|
return
|
|
elif 'special' in match:
|
|
# Unquoted, non-escaped special characters in a doubly quoted
|
|
# string still have a special meaning and need to be sent to a
|
|
# shell.
|
|
raise MetaCharacterException(match['special'])
|
|
elif 'escape' in match:
|
|
# Escaped backslashes turn into a single backslash
|
|
self._push('\\')
|
|
elif 'backslashedquote' in match:
|
|
# Backslashed double quotes are un-backslashed
|
|
self._push('"')
|
|
elif 'backslashed' in match:
|
|
# Backslashed characters are kept backslashed
|
|
self._push(match['backslashed'])
|
|
|
|
|
|
def split(cline):
|
|
'''
|
|
Split the given command line string.
|
|
'''
|
|
s = ESCAPED_NEWLINES_RE.sub('', cline)
|
|
return _ClineSplitter(s).result
|
|
|
|
|
|
def _quote(s):
|
|
'''Given a string, returns a version that can be used literally on a shell
|
|
command line, enclosing it with single quotes if necessary.
|
|
|
|
As a special case, if given an int, returns a string containing the int,
|
|
not enclosed in quotes.
|
|
'''
|
|
if type(s) == int:
|
|
return '%d' % s
|
|
|
|
# Empty strings need to be quoted to have any significance
|
|
if s and not SHELL_QUOTE_RE.search(s):
|
|
return s
|
|
|
|
# Single quoted strings can contain any characters unescaped except the
|
|
# single quote itself, which can't even be escaped, so the string needs to
|
|
# be closed, an escaped single quote added, and reopened.
|
|
t = type(s)
|
|
return t("'%s'") % s.replace(t("'"), t("'\\''"))
|
|
|
|
|
|
def quote(*strings):
|
|
'''Given one or more strings, returns a quoted string that can be used
|
|
literally on a shell command line.
|
|
|
|
>>> quote('a', 'b')
|
|
"a b"
|
|
>>> quote('a b', 'c')
|
|
"'a b' c"
|
|
'''
|
|
return ' '.join(_quote(s) for s in strings)
|
|
|
|
|
|
__all__ = ['MetaCharacterException', 'split', 'quote']
|