Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 74 additions & 17 deletions fyi/semgrep-grammars/src/semgrep-python/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,98 @@ module.exports = grammar(base_grammar, {
name: 'python',

conflicts: ($, previous) => previous.concat([
// `...` inside a dict literal is ambiguous between a
// `semgrep_ellipsis` element (LANG-465) and an `ellipsis` that
// would otherwise start a `pair` key. Real Semgrep patterns
// never write `... :` as a key, but tree-sitter still needs an
// explicit conflict declaration to resolve the lookahead.
[$.ellipsis, $.semgrep_ellipsis],
]),

/*
Support for semgrep ellipsis ('...') and metavariables ('$FOO'),
if they're not already part of the base grammar.
*/
rules: {
/*
semgrep_ellipsis: $ => '...',

_expression: ($, previous) => choice(
$.semgrep_ellipsis,
...previous.members
),
*/
// Metavariables

// Rather than creating a separate metavariable term
// and adding it to identifiers, this instead overrides the
// regex that is defined in the original tree-sitter grammar.
// this is needed since currently in the original tree-sitter grammar,
// identifier is a terminal, and thus can't do
// the usual choice/previous shadowing definition.

//
// Rather than creating a separate metavariable term and adding it
// to identifiers, this overrides the regex defined in the original
// tree-sitter grammar. This is needed because in the upstream
// grammar `identifier` is a terminal and cannot use the usual
// choice/previous shadowing definition.
identifier: $ => /\$?[_\p{XID_Start}][_\p{XID_Continue}]*/,

// Allow '...' in the attribute position of a dot-access expression,
// so that patterns like `a. ... .d` work for matching call chains.
// This mirrors the Java grammar's field_access override.
// PREC.call = 22 in the base Python grammar.
attribute: $ => prec(22, seq(
field('object', $.primary_expression),
'.',
field('attribute', choice($.identifier, '...')),
)),

// Shared semgrep ellipsis token. Used by parameter lists, match-case
// pattern lists, and dictionary literals.
semgrep_ellipsis: $ => '...',

// Variadic metavariable (e.g. `$...ARGS`) used in argument lists.
semgrep_ellipsis_metavar: $ => token(/\$\.\.\.[A-Z_][A-Z_0-9]*/),

// LANG-460: allow `...` in a function's parameter list, so patterns
// like `def $F(...): ...` parse cleanly.
parameter: ($, previous) => choice(
$.semgrep_ellipsis,
...previous.members,
),

// LANG-461: accept `$...ARGS` as a primary expression. This makes
// it usable through any rule that already takes an `expression`,
// including `argument_list`.
primary_expression: ($, previous) => choice(
$.semgrep_ellipsis_metavar,
$.typed_metavar,
...previous.members,
),

// LANG-463: typed metavariable `($X : T)`. The leading identifier
// must be a metavariable (start with `$`) to disambiguate from a
// regular `parenthesized_expression`. We use a dynamic precedence
// bump so the parser prefers `typed_metavar` whenever the inner
// identifier is a metavariable.
typed_metavar: $ => prec.dynamic(1, seq(
'(',
$.identifier,
':',
field('type', $.type),
')',
)),

// LANG-464: allow `...` as a sub-pattern inside class/list/tuple/
// dict patterns of a `match` statement. `case_pattern` is the
// shared choice used by every pattern container, so a single
// override lights up ellipsis everywhere.
case_pattern: ($, previous) => prec(1, choice(
$.semgrep_ellipsis,
...previous.content.members,
)),

// LANG-465: allow `...` as an element of a dict literal alongside
// `pair` and `dictionary_splat`. The base rule is a `seq(...)`
// not a `choice(...)`, so we restate it.
dictionary: $ => seq(
'{',
optional(commaSep1(choice(
$.pair,
$.dictionary_splat,
$.semgrep_ellipsis,
))),
optional(','),
'}',
),
}
});

function commaSep1(rule) {
return seq(rule, repeat(seq(',', rule)));
}
38 changes: 26 additions & 12 deletions fyi/versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,35 @@ Last change in file:
feat: add fields in `except_clause`
---
File: semgrep-grammars/src/semgrep-python/grammar.js
Git repo name: ocaml-tree-sitter-semgrep
Latest commit in repo: 58a4c3c46ea65e6208fc2a74143dc87bd4261f08
Git repo name: agent-ac7864f9219235c16
Latest commit in repo: 246f2765f42fe7bb0298764c275e36a11257a449
Last change in file:
commit 58a4c3c46ea65e6208fc2a74143dc87bd4261f08
Author: brandonspark <wu.brandonj@gmail.com>
Date: Tue Mar 3 16:12:17 2026 -0800
commit 246f2765f42fe7bb0298764c275e36a11257a449
Author: brandonspark <brandon@semgrep.com>
Date: Wed Apr 29 17:30:27 2026 -0700

feat(python): support ellipsis in dot-access chain patterns
fix(python): augment grammar for canonical Semgrep pattern idioms

Override the `attribute` rule to accept `'...'` as an alternative to
`identifier` in the attribute field. This lets semgrep patterns like
`a. ... .d` parse correctly for matching call chains, matching the
existing Java `field_access` behavior.
Adds grammar overrides so common Semgrep pattern idioms parse cleanly
in Python:

Fixes https://github.com/semgrep/semgrep/issues/11545
- LANG-460: `...` in a function parameter list (e.g. `def $F(...): ...`)
- LANG-461: `$...ARGS` variadic metavariable in argument lists
- LANG-463: typed metavariable `($X : T)`
- LANG-464: `...` in match-case class/list/tuple/dict patterns
- LANG-465: `...` as a dict-literal element

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Introduces shared `semgrep_ellipsis` and `semgrep_ellipsis_metavar`
terminals, plus `typed_metavar`, and extends `parameter`,
`primary_expression`, `case_pattern`, and `dictionary` to accept them.
Adds a single `[ellipsis, semgrep_ellipsis]` conflict to resolve the
1-lookahead ambiguity between an `ellipsis` pair-key and a
`semgrep_ellipsis` element inside a dict literal.

LANG-462 is intentionally deferred (deep ellipsis -- needs scanner
work, tracked separately).

Corpus tests added for each fix; `make test` passes 123/123.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
57 changes: 41 additions & 16 deletions lib/Boilerplate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,38 @@ let token (env : env) (tok : Tree_sitter_run.Token.t) =
let blank (env : env) () =
R.Tuple []

let map_semgrep_ellipsis_metavar (env : env) (tok : CST.semgrep_ellipsis_metavar) =
(* semgrep_ellipsis_metavar *) token env tok

let map_import_prefix (env : env) (xs : CST.import_prefix) =
R.List (List.map (token env (* "." *)) xs)

let map_type_conversion (env : env) (tok : CST.type_conversion) =
(* pattern ![a-z] *) token env tok

let map_dedent (env : env) (tok : CST.dedent) =
(* dedent *) token env tok

let map_string_content_ (env : env) (tok : CST.string_content_) =
(* string_content_ *) token env tok

let map_float_ (env : env) (tok : CST.float_) =
(* float *) token env tok

let map_string_end (env : env) (tok : CST.string_end) =
(* string_end *) token env tok

let map_escape_interpolation (env : env) (tok : CST.escape_interpolation) =
(* escape_interpolation *) token env tok

let map_indent (env : env) (tok : CST.indent) =
(* indent *) token env tok

let map_tok_prec_p1_pat_a2d1fce (env : env) (tok : CST.tok_prec_p1_pat_a2d1fce) =
(* tok_prec_p1_pat_a2d1fce *) token env tok
let map_string_start (env : env) (tok : CST.string_start) =
(* string_start *) token env tok

let map_string_end (env : env) (tok : CST.string_end) =
(* string_end *) token env tok

let map_identifier (env : env) (tok : CST.identifier) =
(* pattern \$?[_\p{XID_Start}][_\p{XID_Continue}]* *) token env tok

let map_string_start (env : env) (tok : CST.string_start) =
(* string_start *) token env tok
let map_tok_prec_p1_pat_a2d1fce (env : env) (tok : CST.tok_prec_p1_pat_a2d1fce) =
(* tok_prec_p1_pat_a2d1fce *) token env tok

let map_string_content_ (env : env) (tok : CST.string_content_) =
(* string_content_ *) token env tok

let map_is_not (env : env) ((v1, v2) : CST.is_not) =
let v1 = (* "is" *) token env v1 in
Expand All @@ -60,6 +60,9 @@ let map_is_not (env : env) ((v1, v2) : CST.is_not) =
let map_integer (env : env) (tok : CST.integer) =
(* integer *) token env tok

let map_type_conversion (env : env) (tok : CST.type_conversion) =
(* pattern ![a-z] *) token env tok

let map_newline (env : env) (tok : CST.newline) =
(* newline *) token env tok

Expand Down Expand Up @@ -283,14 +286,17 @@ and map_anon_choice_id_9e93682 (env : env) (x : CST.anon_choice_id_9e93682) =
)
)

and map_anon_choice_pair_002ffed (env : env) (x : CST.anon_choice_pair_002ffed) =
and map_anon_choice_pair_a7b6116 (env : env) (x : CST.anon_choice_pair_a7b6116) =
(match x with
| `Pair x -> R.Case ("Pair",
map_pair env x
)
| `Dict_splat x -> R.Case ("Dict_splat",
map_dictionary_splat env x
)
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
)

and map_argument_list (env : env) ((v1, v2, v3, v4) : CST.argument_list) =
Expand Down Expand Up @@ -765,6 +771,9 @@ and map_pair (env : env) ((v1, v2, v3) : CST.pair) =

and map_parameter (env : env) (x : CST.parameter) =
(match x with
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Id tok -> R.Case ("Id",
(* pattern \$?[_\p{XID_Start}][_\p{XID_Continue}]* *) token env tok
)
Expand Down Expand Up @@ -943,6 +952,19 @@ and map_patterns (env : env) ((v1, v2, v3) : CST.patterns) =

and map_primary_expression (env : env) (x : CST.primary_expression) =
(match x with
| `Semg_ellips_meta tok -> R.Case ("Semg_ellips_meta",
(* semgrep_ellipsis_metavar *) token env tok
)
| `Typed_meta (v1, v2, v3, v4, v5) -> R.Case ("Typed_meta",
let v1 = (* "(" *) token env v1 in
let v2 =
(* pattern \$?[_\p{XID_Start}][_\p{XID_Continue}]* *) token env v2
in
let v3 = (* ":" *) token env v3 in
let v4 = map_type_ env v4 in
let v5 = (* ")" *) token env v5 in
R.Tuple [v1; v2; v3; v4; v5]
)
| `Await (v1, v2) -> R.Case ("Await",
let v1 = (* "await" *) token env v1 in
let v2 = map_primary_expression env v2 in
Expand Down Expand Up @@ -1039,11 +1061,11 @@ and map_primary_expression (env : env) (x : CST.primary_expression) =
let v2 =
(match v2 with
| Some (v1, v2) -> R.Option (Some (
let v1 = map_anon_choice_pair_002ffed env v1 in
let v1 = map_anon_choice_pair_a7b6116 env v1 in
let v2 =
R.List (List.map (fun (v1, v2) ->
let v1 = (* "," *) token env v1 in
let v2 = map_anon_choice_pair_002ffed env v2 in
let v2 = map_anon_choice_pair_a7b6116 env v2 in
R.Tuple [v1; v2]
) v2)
in
Expand Down Expand Up @@ -1311,6 +1333,9 @@ and map_anon_choice_key_value_pat_9cde426 (env : env) (x : CST.anon_choice_key_v

and map_case_pattern (env : env) (x : CST.case_pattern) =
(match x with
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `As_pat (v1, v2, v3) -> R.Case ("As_pat",
let v1 = map_case_pattern env v1 in
let v2 = (* "as" *) token env v2 in
Expand Down
Loading