Skip to content

Change fewerbraces to always use a colon, even before lambdas #15273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/src/dotty/tools/dotc/parsing/JavaScanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ object JavaScanners {
nextChar()

case ':' =>
token = COLON
token = COLONop
nextChar()

case '@' =>
Expand Down
282 changes: 177 additions & 105 deletions compiler/src/dotty/tools/dotc/parsing/Parsers.scala

Large diffs are not rendered by default.

77 changes: 47 additions & 30 deletions compiler/src/dotty/tools/dotc/parsing/Scanners.scala
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,8 @@ object Scanners {
def isNestedStart = token == LBRACE || token == INDENT
def isNestedEnd = token == RBRACE || token == OUTDENT

/** Is token a COLON, after having converted COLONEOL to COLON?
* The conversion means that indentation is not significant after `:`
* anymore. So, warning: this is a side-effecting operation.
*/
def isColon() =
if token == COLONEOL then token = COLON
token == COLON
def isColon =
token == COLONop || token == COLONfollow || token == COLONeol

/** Is current token first one after a newline? */
def isAfterLineEnd: Boolean = lineOffset >= 0
Expand Down Expand Up @@ -189,7 +184,10 @@ object Scanners {
val indentSyntax =
((if (Config.defaultIndent) !noindentSyntax else ctx.settings.indent.value)
|| rewriteNoIndent)
&& !isInstanceOf[LookaheadScanner]
&& { this match
case self: LookaheadScanner => self.allowIndent
case _ => true
}

if (rewrite) {
val s = ctx.settings
Expand All @@ -206,12 +204,22 @@ object Scanners {
def featureEnabled(name: TermName) = Feature.enabled(name)(using languageImportContext)
def erasedEnabled = featureEnabled(Feature.erasedDefinitions)

private inline val fewerBracesByDefault = false
// turn on to study impact on codebase if `fewerBraces` was the default

private var fewerBracesEnabledCache = false
private var fewerBracesEnabledCtx: Context = NoContext

def fewerBracesEnabled =
if fewerBracesEnabledCtx ne myLanguageImportContext then
fewerBracesEnabledCache = featureEnabled(Feature.fewerBraces)
fewerBracesEnabledCache =
featureEnabled(Feature.fewerBraces)
|| fewerBracesByDefault && indentSyntax && !migrateTo3
// ensure that fewer braces is not the default for 3.0-migration since
// { x: T =>
// expr
// }
// would be ambiguous
fewerBracesEnabledCtx = myLanguageImportContext
fewerBracesEnabledCache

Expand Down Expand Up @@ -386,10 +394,11 @@ object Scanners {
*/
def nextToken(): Unit =
val lastToken = token
val lastName = name
adjustSepRegions(lastToken)
getNextToken(lastToken)
if isAfterLineEnd then handleNewLine(lastToken)
postProcessToken()
postProcessToken(lastToken, lastName)
printState()

final def printState() =
Expand Down Expand Up @@ -420,7 +429,7 @@ object Scanners {
&& {
// Is current lexeme assumed to start an expression?
// This is the case if the lexime is one of the tokens that
// starts an expression or it is a COLONEOL. Furthermore, if
// starts an expression or it is a COLONeol. Furthermore, if
// the previous token is in backticks, the lexeme may not be a binary operator.
// I.e. in
//
Expand All @@ -431,7 +440,7 @@ object Scanners {
// in backticks and is a binary operator. Hence, `x` is not classified as a
// leading infix operator.
def assumeStartsExpr(lexeme: TokenData) =
(canStartExprTokens.contains(lexeme.token) || lexeme.token == COLONEOL)
(canStartExprTokens.contains(lexeme.token) || lexeme.token == COLONeol)
&& (!lexeme.isOperator || nme.raw.isUnary(lexeme.name))
val lookahead = LookaheadScanner()
lookahead.allowLeadingInfixOperators = false
Expand Down Expand Up @@ -607,12 +616,11 @@ object Scanners {
currentRegion match
case r: Indented =>
insert(OUTDENT, offset)
if next.token != COLON then
handleNewIndentWidth(r.enclosing, ir =>
errorButContinue(
i"""The start of this line does not match any of the previous indentation widths.
|Indentation width of current line : $nextWidth
|This falls between previous widths: ${ir.width} and $lastWidth"""))
handleNewIndentWidth(r.enclosing, ir =>
errorButContinue(
i"""The start of this line does not match any of the previous indentation widths.
|Indentation width of current line : $nextWidth
|This falls between previous widths: ${ir.width} and $lastWidth"""))
case r =>
if skipping then
if r.enclosing.isClosedByUndentAt(nextWidth) then
Expand All @@ -629,7 +637,7 @@ object Scanners {
currentRegion.knownWidth = nextWidth
else if (lastWidth != nextWidth)
errorButContinue(spaceTabMismatchMsg(lastWidth, nextWidth))
if token != OUTDENT || next.token == COLON then
if token != OUTDENT then
handleNewIndentWidth(currentRegion, _.otherIndentWidths += nextWidth)
end handleNewLine

Expand All @@ -638,19 +646,24 @@ object Scanners {
|Previous indent : $lastWidth
|Latest indent : $nextWidth"""

def observeColonEOL(): Unit =
if token == COLON then
def observeColonEOL(inTemplate: Boolean): Unit =
val enabled =
if token == COLONop && inTemplate then
report.deprecationWarning(em"`:` after symbolic operator is deprecated; use backticks around operator instead", sourcePos(offset))
true
else token == COLONfollow && (inTemplate || fewerBracesEnabled)
if enabled then
lookAhead()
val atEOL = isAfterLineEnd || token == EOF
reset()
if atEOL then token = COLONEOL
if atEOL then token = COLONeol

def observeIndented(): Unit =
if indentSyntax && isNewLine then
val nextWidth = indentWidth(next.offset)
val lastWidth = currentRegion.indentWidth
if lastWidth < nextWidth then
currentRegion = Indented(nextWidth, COLONEOL, currentRegion)
currentRegion = Indented(nextWidth, COLONeol, currentRegion)
offset = next.offset
token = INDENT
end observeIndented
Expand Down Expand Up @@ -683,10 +696,10 @@ object Scanners {
case _ =>

/** - Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT
* SEMI + ELSE => ELSE, COLON + <EOL> => COLONEOL
* SEMI + ELSE => ELSE, COLON following id/)/] => COLONfollow
* - Insert missing OUTDENTs at EOF
*/
def postProcessToken(): Unit = {
def postProcessToken(lastToken: Token, lastName: SimpleName): Unit = {
def fuse(tok: Int) = {
token = tok
offset = prev.offset
Expand Down Expand Up @@ -721,8 +734,10 @@ object Scanners {
reset()
case END =>
if !isEndMarker then token = IDENTIFIER
case COLON =>
if fewerBracesEnabled then observeColonEOL()
case COLONop =>
if lastToken == IDENTIFIER && lastName != null && isIdentifierStart(lastName.head)
|| colonEOLPredecessors.contains(lastToken)
then token = COLONfollow
case RBRACE | RPAREN | RBRACKET =>
closeIndented()
case EOF =>
Expand Down Expand Up @@ -1067,7 +1082,7 @@ object Scanners {
reset()
next

class LookaheadScanner() extends Scanner(source, offset) {
class LookaheadScanner(val allowIndent: Boolean = false) extends Scanner(source, offset) {
override def languageImportContext = Scanner.this.languageImportContext
}

Expand Down Expand Up @@ -1179,7 +1194,7 @@ object Scanners {
isSoftModifier && inModifierPosition()

def isSoftModifierInParamModifierPosition: Boolean =
isSoftModifier && lookahead.token != COLON
isSoftModifier && !lookahead.isColon

def isErased: Boolean = isIdent(nme.erased) && erasedEnabled

Expand Down Expand Up @@ -1518,7 +1533,9 @@ object Scanners {
case NEWLINE => ";"
case NEWLINES => ";;"
case COMMA => ","
case _ => showToken(token)
case COLONfollow | COLONeol => "':'"
case _ =>
if debugTokenStream then showTokenDetailed(token) else showToken(token)
}

/* Resume normal scanning after XML */
Expand Down
29 changes: 17 additions & 12 deletions compiler/src/dotty/tools/dotc/parsing/Tokens.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ abstract class TokensCommon {

def tokenRange(lo: Int, hi: Int): TokenSet = BitSet(lo to hi: _*)

def showTokenDetailed(token: Int): String = debugString(token)

def showToken(token: Int): String = {
val str = tokenString(token)
if (isKeyword(token)) s"'$str'" else str
}

val tokenString, debugString: Array[String] = new Array[String](maxToken + 1)

def enter(token: Int, str: String, debugStr: String = ""): Unit = {
Expand Down Expand Up @@ -107,7 +100,7 @@ abstract class TokensCommon {

/** special keywords */
//inline val USCORE = 73; enter(USCORE, "_")
inline val COLON = 74; enter(COLON, ":")
inline val COLONop = 74; enter(COLONop, ":") // a stand-alone `:`, see also COLONfollow
inline val EQUALS = 75; enter(EQUALS, "=")
//inline val LARROW = 76; enter(LARROW, "<-")
//inline val ARROW = 77; enter(ARROW, "=>")
Expand Down Expand Up @@ -204,8 +197,11 @@ object Tokens extends TokensCommon {

inline val QUOTE = 87; enter(QUOTE, "'")

inline val COLONEOL = 88; enter(COLONEOL, ":", ": at eol")
inline val SELFARROW = 89; enter(SELFARROW, "=>") // reclassified ARROW following self-type
inline val COLONfollow = 88; enter(COLONfollow, ":")
// A `:` following an alphanumeric identifier or one of the tokens in colonEOLPredecessors
inline val COLONeol = 89; enter(COLONeol, ":", ": at eol")
// A `:` recognized as starting an indentation block
inline val SELFARROW = 90; enter(SELFARROW, "=>") // reclassified ARROW following self-type

/** XML mode */
inline val XMLSTART = 99; enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate
Expand Down Expand Up @@ -233,7 +229,7 @@ object Tokens extends TokensCommon {
final val canStartExprTokens2: TokenSet = canStartExprTokens3 | BitSet(DO)

final val canStartTypeTokens: TokenSet = literalTokens | identifierTokens | BitSet(
THIS, SUPER, USCORE, LPAREN, AT)
THIS, SUPER, USCORE, LPAREN, LBRACE, AT)

final val templateIntroTokens: TokenSet = BitSet(CLASS, TRAIT, OBJECT, ENUM, CASECLASS, CASEOBJECT)

Expand Down Expand Up @@ -276,7 +272,7 @@ object Tokens extends TokensCommon {
final val closingRegionTokens = BitSet(RBRACE, RPAREN, RBRACKET, CASE) | statCtdTokens

final val canStartIndentTokens: BitSet =
statCtdTokens | BitSet(COLONEOL, WITH, EQUALS, ARROW, CTXARROW, LARROW, WHILE, TRY, FOR, IF, THROW, RETURN)
statCtdTokens | BitSet(COLONeol, WITH, EQUALS, ARROW, CTXARROW, LARROW, WHILE, TRY, FOR, IF, THROW, RETURN)

/** Faced with the choice between a type and a formal parameter, the following
* tokens determine it's a formal parameter.
Expand All @@ -287,7 +283,16 @@ object Tokens extends TokensCommon {

final val endMarkerTokens = identifierTokens | BitSet(IF, WHILE, FOR, MATCH, TRY, NEW, THROW, GIVEN, VAL, THIS)

final val colonEOLPredecessors = BitSet(RPAREN, RBRACKET, BACKQUOTED_IDENT, THIS, SUPER, NEW)

final val closingParens = BitSet(RPAREN, RBRACKET, RBRACE)

final val softModifierNames = Set(nme.inline, nme.opaque, nme.open, nme.transparent, nme.infix)

def showTokenDetailed(token: Int): String = debugString(token)

def showToken(token: Int): String = {
val str = tokenString(token)
if isKeyword(token) || token == COLONfollow || token == COLONeol then s"'$str'" else str
}
}
44 changes: 31 additions & 13 deletions docs/_docs/internals/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@ layout: doc-page
title: "Scala 3 Syntax Summary"
---

<!--

This page has a companion page at _docs/reference/syntax.md.

!! Make sure to edit both pages in sync. !!

reference/syntax.md shows the official Scala 3 syntax, without deprecated or experimental features.

internals/syntax.md shows the Scala 3 syntax as supported by the parser, including
deprecated and experimental features. It also gives some indications how
productions map to AST nodes.

-->

The following description of Scala tokens uses literal characters `‘c’` when
referring to the ASCII fragment `\u0000` – `\u007F`.

Expand Down Expand Up @@ -88,22 +102,24 @@ nl ::= “new line character”
semi ::= ‘;’ | nl {nl}
```


## Optional Braces

The lexical analyzer also inserts `indent` and `outdent` tokens that represent regions of indented code [at certain points](../reference/other-new-features/indentation.md)

In the context-free productions below we use the notation `<<< ts >>>`
to indicate a token sequence `ts` that is either enclosed in a pair of braces `{ ts }` or that constitutes an indented region `indent ts outdent`. Analogously, the
notation `:<<< ts >>>` indicates a token sequence `ts` that is either enclosed in a pair of braces `{ ts }` or that constitutes an indented region `indent ts outdent` that follows
a `:` at the end of a line.
a `colon` token.

A `colon` token reads as the standard colon "`:`" but is generated instead of it where `colon` is legal according to the context free syntax, but only if the previous token
is an alphanumeric identifier, a backticked identifier, or one of the tokens `this`, `super`, `new`, "`)`", and "`]`".

```
colon ::= ':' -- with side conditions explained above
<<< ts >>> ::= ‘{’ ts ‘}’
| indent ts outdent
:<<< ts >>> ::= [nl] ‘{’ ts ‘}’
| `:` indent ts outdent
| colon indent ts outdent
```

## Keywords
Expand All @@ -124,7 +140,7 @@ type val var while with yield
### Soft keywords

```
as derives end extension infix inline opaque open transparent using | * + -
as derives end extension infix inline opaque open throws transparent using | * + -
```

See the [separate section on soft keywords](../reference/soft-modifier.md) for additional
Expand Down Expand Up @@ -197,7 +213,7 @@ FunArgTypes ::= FunArgType { ‘,’ FunArgType }
ParamType ::= [‘=>’] ParamValueType
ParamValueType ::= Type [‘*’] PostfixOp(t, "*")
TypeArgs ::= ‘[’ Types ‘]’ ts
Refinement ::= ‘{’ [RefineDcl] {semi [RefineDcl]} ‘}’ ds
Refinement ::= :<<< [RefineDcl] {semi [RefineDcl]} >>> ds
TypeBounds ::= [‘>:’ Type] [‘<:’ Type] TypeBoundsTree(lo, hi)
TypeParamBounds ::= TypeBounds {‘:’ Type} ContextBounds(typeBounds, tps)
Types ::= Type {‘,’ Type}
Expand Down Expand Up @@ -231,13 +247,13 @@ Expr1 ::= [‘inline’] ‘if’ ‘(’ Expr ‘)’ {nl} Expr [[
Ascription ::= ‘:’ InfixType Typed(expr, tp)
| ‘:’ Annotation {Annotation} Typed(expr, Annotated(EmptyTree, annot)*)
Catches ::= ‘catch’ (Expr | ExprCaseClause)
PostfixExpr ::= InfixExpr [id] PostfixOp(expr, op)
PostfixExpr ::= InfixExpr [id] PostfixOp(expr, op) -- only if language.postfixOperators is enabled
InfixExpr ::= PrefixExpr
| InfixExpr id [nl] InfixExpr InfixOp(expr, op, expr)
| InfixExpr id ‘:’ IndentedExpr
| InfixExpr id ColonArgument
| InfixExpr MatchClause
MatchClause ::= ‘match’ <<< CaseClauses >>> Match(expr, cases)
PrefixExpr ::= [PrefixOperator] SimpleExpr PrefixOp(expr, op)
PrefixExpr ::= [PrefixOperator] SimpleExpr PrefixOp(expr, op)
PrefixOperator ::= ‘-’ | ‘+’ | ‘~’ | ‘!’
SimpleExpr ::= SimpleRef
| Literal
Expand All @@ -253,11 +269,13 @@ SimpleExpr ::= SimpleRef
| SimpleExpr ‘.’ MatchClause
| SimpleExpr TypeArgs TypeApply(expr, args)
| SimpleExpr ArgumentExprs Apply(expr, args)
| SimpleExpr ‘:’ IndentedExpr -- under language.experimental.fewerBraces
| SimpleExpr FunParams (‘=>’ | ‘?=>’) IndentedExpr -- under language.experimental.fewerBraces
| SimpleExpr ColonArgument -- under language.experimental.fewerBraces
| SimpleExpr ‘_’ PostfixOp(expr, _) (to be dropped)
| XmlExpr -- to be dropped
IndentedExpr ::= indent CaseClauses | Block outdent
| XmlExpr -- to be dropped
ColonArgument ::= colon [LambdaStart]
indent (CaseClauses | Block) outdent
LambdaStart ::= FunParams (‘=>’ | ‘?=>’)
| HkTypeParamClause ‘=>’
Quoted ::= ‘'’ ‘{’ Block ‘}’
| ‘'’ ‘[’ Type ‘]’
ExprSplice ::= spliceId -- if inside quoted block
Expand Down Expand Up @@ -300,7 +318,7 @@ TypeCaseClause ::= ‘case’ (InfixType | ‘_’) ‘=>’ Type [semi]

Pattern ::= Pattern1 { ‘|’ Pattern1 } Alternative(pats)
Pattern1 ::= Pattern2 [‘:’ RefinedType] Bind(name, Typed(Ident(wildcard), tpe))
Pattern2 ::= [id ‘@’] InfixPattern Bind(name, pat)
Pattern2 ::= [id ‘@’] InfixPattern [‘*’] Bind(name, pat)
InfixPattern ::= SimplePattern { id [nl] SimplePattern } InfixOp(pat, op, pat)
SimplePattern ::= PatVar Ident(wildcard)
| Literal Bind(name, Ident(wildcard))
Expand Down
Loading