CommandLineParser
WeihanLi opened this issue · comments
Weihan Li commented
Simple command line parser to parse the command line input and avoid issue:
dotnet/command-line-api#1385
Possible implement:
class CommandLineParer
{
public static IEnumerable<string> SplitCommandLine(string commandLine)
{
var memory = commandLine.AsMemory();
var startTokenIndex = 0;
var pos = 0;
var seeking = Boundary.TokenStart;
var seekingQuote = Boundary.QuoteStart;
while (pos < memory.Length)
{
var c = memory.Span[pos];
if (char.IsWhiteSpace(c))
{
if (seekingQuote == Boundary.QuoteStart)
{
switch (seeking)
{
case Boundary.WordEnd:
yield return CurrentToken();
startTokenIndex = pos;
seeking = Boundary.TokenStart;
break;
case Boundary.TokenStart:
startTokenIndex = pos;
break;
}
}
}
else if (c == '\"')
{
if (seeking == Boundary.TokenStart)
{
switch (seekingQuote)
{
case Boundary.QuoteEnd:
yield return CurrentToken();
startTokenIndex = pos;
seekingQuote = Boundary.QuoteStart;
break;
case Boundary.QuoteStart:
startTokenIndex = pos + 1;
seekingQuote = Boundary.QuoteEnd;
break;
}
}
else
{
switch (seekingQuote)
{
case Boundary.QuoteEnd:
seekingQuote = Boundary.QuoteStart;
break;
case Boundary.QuoteStart:
seekingQuote = Boundary.QuoteEnd;
break;
}
}
}
else if (seeking == Boundary.TokenStart && seekingQuote == Boundary.QuoteStart)
{
seeking = Boundary.WordEnd;
startTokenIndex = pos;
}
Advance();
if (IsAtEndOfInput())
{
switch (seeking)
{
case Boundary.TokenStart:
break;
default:
yield return CurrentToken();
break;
}
}
}
void Advance() => pos++;
string CurrentToken()
{
return memory.Slice(startTokenIndex, IndexOfEndOfToken()).ToString().Trim('"');
}
int IndexOfEndOfToken() => pos - startTokenIndex;
bool IsAtEndOfInput() => pos == memory.Length;
}
private enum Boundary
{
TokenStart,
WordEnd,
QuoteStart,
QuoteEnd
}
}
Weihan Li commented
public static IEnumerable<string> ParseLine(string line)
{
if (string.IsNullOrEmpty(line))
{
yield break;
}
var tokenBuilder = new StringBuilder();
var inToken= false;
var inQuotes = false;
// Iterate through every character in the line
for (var i = 0; i < line.Length; i++)
{
var character = line[i];
// If we are not currently inside a column
if (!inToken)
{
// If the current character is a quote then the token value is contained within
// quotes, otherwise append the next character
inToken = true;
if (character == '\'')
{
inQuotes = true;
continue;
}
}
// If we are in between quotes
if (inQuotes)
{
if (i + 1 == line.Length)
{
break;
}
if (character == '\'' && line[i + 1] == ' ') // quotes end
{
inQuotes = false;
inToken = false;
i++; //skip next
}
else if (character == '\'' && line[i + 1] == '\'') // quotes
{
i++; //skip next
}
else if (character == '\'')
{
throw new ArgumentException($"unable to escape {line}");
}
}
else if (character == ' ')
{
inToken = false;
}
// If we are no longer in the token clear the builder and add the columns to the list
if (!inToken)
{
if (tokenBuilder.Length > 0)
{
yield return tokenBuilder.ToString();
tokenBuilder.Clear();
}
}
else
{
tokenBuilder.Append(character);
}
}
if (tokenBuilder.Length > 0)
{
yield return tokenBuilder.ToString();
tokenBuilder.Clear();
}
}