
Grammar of Starlark
==================

File = {Statement | newline} eof .

Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .

DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .

Parameters = Parameter {',' Parameter}.

Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .

IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .

ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .

WhileStmt = 'while' Test ':' Suite .

Suite = [newline indent {Statement} outdent] | SimpleStmt .

SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
# NOTE: '\n' optional at EOF

SmallStmt = ReturnStmt
          | BreakStmt | ContinueStmt | PassStmt
          | AssignStmt
          | ExprStmt
          | LoadStmt
          .

ReturnStmt   = 'return' [Expression] .
BreakStmt    = 'break' .
ContinueStmt = 'continue' .
PassStmt     = 'pass' .
AssignStmt   = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
ExprStmt     = Expression .

LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .

Test = LambdaExpr
     | IfExpr
     | PrimaryExpr
     | UnaryExpr
     | BinaryExpr
     .

LambdaExpr = 'lambda' [Parameters] ':' Test .

IfExpr = Test 'if' Test 'else' Test .

PrimaryExpr = Operand
            | PrimaryExpr DotSuffix
            | PrimaryExpr CallSuffix
            | PrimaryExpr SliceSuffix
            .

Operand = identifier
        | int | float | string
        | ListExpr | ListComp
        | DictExpr | DictComp
        | '(' [Expression [',']] ')'
        | ('-' | '+') PrimaryExpr
        .

DotSuffix   = '.' identifier .
CallSuffix  = '(' [Arguments [',']] ')' .
SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .

Arguments = Argument {',' Argument} .
Argument  = Test | identifier '=' Test | '*' Test | '**' Test .

ListExpr = '[' [Expression [',']] ']' .
ListComp = '[' Test {CompClause} ']'.

DictExpr = '{' [Entries [',']] '}' .
DictComp = '{' Entry {CompClause} '}' .
Entries  = Entry {',' Entry} .
Entry    = Test ':' Test .

CompClause = 'for' LoopVariables 'in' Test | 'if' Test .

UnaryExpr = 'not' Test .

BinaryExpr = Test {Binop Test} .

Binop = 'or'
      | 'and'
      | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
      | '|'
      | '^'
      | '&'
      | '-' | '+'
      | '*' | '%' | '/' | '//'
      .

Expression = Test {',' Test} .
# NOTE: trailing comma permitted only when within [...] or (...).

LoopVariables = PrimaryExpr {',' PrimaryExpr} .


# Notation (similar to Go spec):
- lowercase and 'quoted' items are lexical tokens.
- Capitalized names denote grammar productions.
- (...) implies grouping
- x | y means either x or y.
- [x] means x is optional
- {x} means x is repeated zero or more times
- The end of each declaration is marked with a period.

# Tokens
- spaces: newline, eof, indent, outdent.
- identifier.
- literals: string, int, float.
- plus all quoted tokens such as '+=', 'return'.

# Notes:
- Ambiguity is resolved using operator precedence.
- The grammar does not enforce the legal order of params and args,
  nor that the first compclause must be a 'for'.

TODO:
- explain how the lexer generates indent, outdent, and newline tokens.
- why is unary NOT separated from unary - and +?
- the grammar is (mostly) in LL(1) style so, for example,
  dot expressions are formed suffixes, not complete expressions,
  which makes the spec harder to read.  Reorganize into non-LL(1) form?
