diff --git a/Cargo.toml b/Cargo.toml index 46180a77..a4521758 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,14 +12,18 @@ include = ["LICENSE", "Cargo.toml", "src/**/*.rs"] resolver = "2" members = [ "ast", "core", "literal", "parser", + "ruff_text_size", "ruff_source_location", ] [workspace.dependencies] +rustpython-ast = { path = "ast", version = "0.2.0" } +rustpython-parser-core = { path = "core", version = "0.2.0" } +rustpython-literal = { path = "literal", version = "0.2.0" } +ruff_text_size = { path = "ruff_text_size" } +ruff_source_location = { path = "ruff_source_location" } + ahash = "0.7.6" anyhow = "1.0.45" -ascii = "1.0" -bitflags = "1.3.2" -bstr = "0.2.17" cfg-if = "1.0" insta = "1.14.0" itertools = "0.10.3" @@ -30,6 +34,7 @@ num-traits = "0.2" rand = "0.8.5" serde = "1.0" static_assertions = "1.1" +once_cell = "1.17.1" unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } [profile.dev.package."*"] diff --git a/ast/Cargo.toml b/ast/Cargo.toml index 5de211dd..a7fb168c 100644 --- a/ast/Cargo.toml +++ b/ast/Cargo.toml @@ -8,13 +8,14 @@ repository = "https://github.com/RustPython/RustPython" license = "MIT" [features] -default = ["constant-optimization", "fold"] +default = ["constant-optimization", "fold", "source-code"] constant-optimization = ["fold"] +source-code = ["fold"] fold = [] unparse = ["rustpython-literal"] [dependencies] -rustpython-compiler-core = { path = "../core", version = "0.2.0" } -rustpython-literal = { path = "../literal", version = "0.2.0", optional = true } +rustpython-parser-core = { workspace = true } +rustpython-literal = { workspace = true, optional = true } num-bigint = { workspace = true } diff --git a/ast/asdl_rs.py b/ast/asdl_rs.py index d74d730b..b8f5c9a1 100755 --- a/ast/asdl_rs.py +++ b/ast/asdl_rs.py @@ -7,6 +7,7 @@ from argparse import ArgumentParser from pathlib import Path +from typing import Optional, Dict import asdl @@ -16,7 +17,7 @@ builtin_type_mapping = { "identifier": "Ident", "string": "String", - "int": "usize", + "int": "u32", "constant": "Constant", } assert builtin_type_mapping.keys() == asdl.builtin_types @@ -62,38 +63,62 @@ def asdl_of(name, obj): return "{} = {}".format(name, types) -class EmitVisitor(asdl.VisitorBase): - """Visit that emits lines""" - - def __init__(self, file): - self.file = file - self.identifiers = set() - super(EmitVisitor, self).__init__() - - def emit_identifier(self, name): - name = str(name) - if name in self.identifiers: - return - self.emit("_Py_IDENTIFIER(%s);" % name, 0) - self.identifiers.add(name) - - def emit(self, line, depth): - if line: - line = (" " * TABSIZE * depth) + line - self.file.write(line + "\n") - - class TypeInfo: + name: str + enum_name: Optional[str] + has_userdata: Optional[bool] + has_attributes: bool + children: set + boxed: bool + product: bool + has_expr: bool = False + def __init__(self, name): self.name = name + self.enum_name = None self.has_userdata = None + self.has_attributes = False self.children = set() self.boxed = False self.product = False + self.product_has_expr = False def __repr__(self): return f"" + @property + def rust_name(self): + return get_rust_type(self.name) + + @property + def sum_name(self): + if self.enum_name is None: + return self.name + else: + return f"{self.enum_name}_{self.name}" + + @property + def rust_sum_name(self): + rust_name = get_rust_type(self.name) + if self.enum_name is None: + return rust_name + else: + name = get_rust_type(self.enum_name) + rust_name + return name + + @property + def rust_suffix(self): + if self.product: + if self.has_attributes: + return "Data" + else: + return "" + else: + if self.has_attributes: + return "Kind" + else: + return "" + def determine_userdata(self, typeinfo, stack): if self.name in stack: return None @@ -110,6 +135,41 @@ def determine_userdata(self, typeinfo, stack): return self.has_userdata +class TypeInfoMixin: + typeinfo: Dict[str, TypeInfo] + + def has_userdata(self, typ): + return self.typeinfo[typ].has_userdata + + def get_generics(self, typ, *generics): + if self.has_userdata(typ): + return [f"<{g}>" for g in generics] + else: + return ["" for g in generics] + + +class EmitVisitor(asdl.VisitorBase, TypeInfoMixin): + """Visit that emits lines""" + + def __init__(self, file, typeinfo): + self.file = file + self.typeinfo = typeinfo + self.identifiers = set() + super(EmitVisitor, self).__init__() + + def emit_identifier(self, name): + name = str(name) + if name in self.identifiers: + return + self.emit("_Py_IDENTIFIER(%s);" % name, 0) + self.identifiers.add(name) + + def emit(self, line, depth): + if line: + line = (" " * TABSIZE * depth) + line + self.file.write(line + "\n") + + class FindUserdataTypesVisitor(asdl.VisitorBase): def __init__(self, typeinfo): self.typeinfo = typeinfo @@ -132,21 +192,29 @@ def visitSum(self, sum, name): info.has_userdata = False else: for t in sum.types: - self.typeinfo[t.name] = TypeInfo(t.name) + if not t.fields: + continue + t_info = TypeInfo(t.name) + t_info.enum_name = name + self.typeinfo[t.name] = t_info self.add_children(t.name, t.fields) if len(sum.types) > 1: info.boxed = True if sum.attributes: - # attributes means Located, which has the `custom: U` field + # attributes means located, which has the `custom: U` field info.has_userdata = True + info.has_attributes = True + for variant in sum.types: self.add_children(name, variant.fields) def visitProduct(self, product, name): info = self.typeinfo[name] if product.attributes: - # attributes means Located, which has the `custom: U` field + # attributes means located, which has the `custom: U` field info.has_userdata = True + info.has_attributes = True + info.has_expr = product_has_expr(product) if len(product.fields) > 2: info.boxed = True info.product = True @@ -163,24 +231,17 @@ def rust_field(field_name): return field_name -class TypeInfoEmitVisitor(EmitVisitor): - def __init__(self, file, typeinfo): - self.typeinfo = typeinfo - super().__init__(file) - - def has_userdata(self, typ): - return self.typeinfo[typ].has_userdata - - def get_generics(self, typ, *generics): - if self.has_userdata(typ): - return [f"<{g}>" for g in generics] - else: - return ["" for g in generics] +def product_has_expr(product): + return any(f.type != "identifier" for f in product.fields) -class StructVisitor(TypeInfoEmitVisitor): +class StructVisitor(EmitVisitor): """Visitor to generate typedefs for AST.""" + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + self.rust_type_defs = [] + def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) @@ -208,57 +269,56 @@ def simple_sum(self, sum, name, depth): def sum_with_constructors(self, sum, name, depth): typeinfo = self.typeinfo[name] - enumname = rustname = get_rust_type(name) + suffix = typeinfo.rust_suffix + rustname = get_rust_type(name) # all the attributes right now are for location, so if it has attrs we - # can just wrap it in Located<> - if sum.attributes: - enumname = rustname + "Kind" + # can just wrap it in Attributed<> for t in sum.types: if not t.fields: continue - self.emit_attrs(depth) - self.typeinfo[t] = TypeInfo(t) - t_generics, t_generics_applied = self.get_generics(t.name, "U = ()", "U") - payload_name = f"{rustname}{t.name}" - self.emit(f"pub struct {payload_name}{t_generics} {{", depth) - for f in t.fields: - self.visit(f, typeinfo, "pub ", depth + 1, t.name) - self.emit("}", depth) - self.emit( - textwrap.dedent( - f""" - impl{t_generics_applied} From<{payload_name}{t_generics_applied}> for {enumname}{t_generics_applied} {{ - fn from(payload: {payload_name}{t_generics_applied}) -> Self {{ - {enumname}::{t.name}(payload) - }} - }} - """ - ), - depth, - ) + self.sum_subtype_struct(typeinfo, t, rustname, depth) generics, generics_applied = self.get_generics(name, "U = ()", "U") self.emit_attrs(depth) - self.emit(f"pub enum {enumname}{generics} {{", depth) + self.emit(f"pub enum {rustname}{suffix}{generics} {{", depth) for t in sum.types: if t.fields: - t_generics, t_generics_applied = self.get_generics( - t.name, "U = ()", "U" - ) + (t_generics_applied,) = self.get_generics(t.name, "U") self.emit( f"{t.name}({rustname}{t.name}{t_generics_applied}),", depth + 1 ) else: self.emit(f"{t.name},", depth + 1) self.emit("}", depth) - if sum.attributes: + if typeinfo.has_attributes: self.emit( - f"pub type {rustname} = Located<{enumname}{generics_applied}, U>;", + f"pub type {rustname} = Attributed<{rustname}{suffix}{generics_applied}, U>;", depth, ) self.emit("", depth) + def sum_subtype_struct(self, sum_typeinfo, t, rustname, depth): + self.emit_attrs(depth) + generics, generics_applied = self.get_generics(t.name, "U = ()", "U") + payload_name = f"{rustname}{t.name}" + self.emit(f"pub struct {payload_name}{generics} {{", depth) + for f in t.fields: + self.visit(f, sum_typeinfo, "pub ", depth + 1, t.name) + self.emit("}", depth) + self.emit( + textwrap.dedent( + f""" + impl{generics_applied} From<{payload_name}{generics_applied}> for {rustname}{sum_typeinfo.rust_suffix}{generics_applied} {{ + fn from(payload: {payload_name}{generics_applied}) -> Self {{ + {rustname}{sum_typeinfo.rust_suffix}::{t.name}(payload) + }} + }} + """ + ), + depth, + ) + def visitConstructor(self, cons, parent, depth): if cons.fields: self.emit(f"{cons.name} {{", depth) @@ -300,7 +360,7 @@ def visitProduct(self, product, name, depth): if product.attributes: dataname = rustname + "Data" self.emit_attrs(depth) - has_expr = any(f.type != "identifier" for f in product.fields) + has_expr = product_has_expr(product) if has_expr: datadef = f"{dataname}{generics}" else: @@ -314,20 +374,35 @@ def visitProduct(self, product, name, depth): if not has_expr: generics_applied = "" self.emit( - f"pub type {rustname} = Located<{dataname}{generics_applied}, U>;", + f"pub type {rustname} = Attributed<{dataname}{generics_applied}, U>;", depth, ) self.emit("", depth) -class FoldTraitDefVisitor(TypeInfoEmitVisitor): +class FoldTraitDefVisitor(EmitVisitor): def visitModule(self, mod, depth): self.emit("pub trait Fold {", depth) self.emit("type TargetU;", depth + 1) self.emit("type Error;", depth + 1) self.emit( "fn map_user(&mut self, user: U) -> Result;", - depth + 2, + depth + 1, + ) + self.emit( + """ + fn map_attributed(&mut self, attributed: Attributed) -> Result, Self::Error> { + let custom = self.map_user(attributed.custom)?; + Ok(Attributed { range: attributed.range, custom, node: attributed.node }) + }""", + depth + 1, + ) + self.emit( + """ + fn fold>(&mut self, node: X) -> Result { + node.fold(self) + }""", + depth + 1, ) for dfn in mod.dfns: self.visit(dfn, depth + 2) @@ -345,14 +420,14 @@ def visitType(self, type, depth): self.emit("}", depth) -class FoldImplVisitor(TypeInfoEmitVisitor): +class FoldImplVisitor(EmitVisitor): def visitModule(self, mod, depth): self.emit( - "fn fold_located + ?Sized, T, MT>(folder: &mut F, node: Located, f: impl FnOnce(&mut F, T) -> Result) -> Result, F::Error> {", + "fn fold_attributed + ?Sized, T, MT>(folder: &mut F, node: Attributed, f: impl FnOnce(&mut F, T) -> Result) -> Result, F::Error> {", depth, ) self.emit( - "Ok(Located { custom: folder.map_user(node.custom)?, location: node.location, end_location: node.end_location, node: f(folder, node.node)? })", + "let node = folder.map_attributed(node)?; Ok(Attributed { custom: node.custom, range: node.range, node: f(folder, node.node)? })", depth + 1, ) self.emit("}", depth) @@ -363,11 +438,11 @@ def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): + typeinfo = self.typeinfo[name] apply_t, apply_u, apply_target_u = self.get_generics( name, "T", "U", "F::TargetU" ) enumname = get_rust_type(name) - is_located = bool(sum.attributes) self.emit(f"impl Foldable for {enumname}{apply_t} {{", depth) self.emit(f"type Mapped = {enumname}{apply_u};", depth + 1) @@ -383,15 +458,13 @@ def visitSum(self, sum, name, depth): f"pub fn fold_{name} + ?Sized>(#[allow(unused)] folder: &mut F, node: {enumname}{apply_u}) -> Result<{enumname}{apply_target_u}, F::Error> {{", depth, ) - if is_located: - self.emit("fold_located(folder, node, |folder, node| {", depth) - rustname = enumname + "Kind" - else: - rustname = enumname + if typeinfo.has_attributes: + self.emit("fold_attributed(folder, node, |folder, node| {", depth) + self.emit("match node {", depth + 1) for cons in sum.types: fields_pattern = self.make_pattern( - enumname, rustname, cons.name, cons.fields + enumname, typeinfo.rust_suffix, cons.name, cons.fields ) self.emit( f"{fields_pattern[0]} {{ {fields_pattern[1]} }} {fields_pattern[2]} => {{", @@ -402,7 +475,7 @@ def visitSum(self, sum, name, depth): ) self.emit("}", depth + 2) self.emit("}", depth + 1) - if is_located: + if typeinfo.has_attributes: self.emit("})", depth) self.emit("}", depth) @@ -411,7 +484,7 @@ def visitProduct(self, product, name, depth): name, "T", "U", "F::TargetU" ) structname = get_rust_type(name) - is_located = bool(product.attributes) + has_attributes = bool(product.attributes) self.emit(f"impl Foldable for {structname}{apply_t} {{", depth) self.emit(f"type Mapped = {structname}{apply_u};", depth + 1) @@ -427,24 +500,24 @@ def visitProduct(self, product, name, depth): f"pub fn fold_{name} + ?Sized>(#[allow(unused)] folder: &mut F, node: {structname}{apply_u}) -> Result<{structname}{apply_target_u}, F::Error> {{", depth, ) - if is_located: - self.emit("fold_located(folder, node, |folder, node| {", depth) + if has_attributes: + self.emit("fold_attributed(folder, node, |folder, node| {", depth) rustname = structname + "Data" else: rustname = structname fields_pattern = self.make_pattern(rustname, structname, None, product.fields) self.emit(f"let {rustname} {{ {fields_pattern[1]} }} = node;", depth + 1) self.gen_construction(rustname, product.fields, "", depth + 1) - if is_located: + if has_attributes: self.emit("})", depth) self.emit("}", depth) - def make_pattern(self, rustname, pyname, fieldname, fields): + def make_pattern(self, rustname, suffix, fieldname, fields): if fields: - header = f"{pyname}::{fieldname}({rustname}{fieldname}" + header = f"{rustname}{suffix}::{fieldname}({rustname}{fieldname}" footer = ")" else: - header = f"{pyname}::{fieldname}" + header = f"{rustname}{suffix}::{fieldname}" footer = "" body = ",".join(rust_field(f.name) for f in fields) @@ -458,7 +531,7 @@ def gen_construction(self, header, fields, footer, depth): self.emit(f"}}{footer})", depth) -class FoldModuleVisitor(TypeInfoEmitVisitor): +class FoldModuleVisitor(EmitVisitor): def visitModule(self, mod): depth = 0 self.emit('#[cfg(feature = "fold")]', depth) @@ -576,10 +649,10 @@ def visitSum(self, sum, name, depth): if sum.attributes: rustname = enumname + "Kind" - self.emit(f"impl NamedNode for ast::{rustname} {{", depth) + self.emit(f"impl NamedNode for ast::located::{rustname} {{", depth) self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1) self.emit("}", depth) - self.emit(f"impl Node for ast::{rustname} {{", depth) + self.emit(f"impl Node for ast::located::{rustname} {{", depth) self.emit( "fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1 ) @@ -597,10 +670,12 @@ def visitSum(self, sum, name, depth): self.emit("}", depth) def constructor_to_object(self, cons, enumname, rustname, depth): - self.emit(f"ast::{rustname}::{cons.name}", depth) + self.emit(f"ast::located::{rustname}::{cons.name}", depth) if cons.fields: fields_pattern = self.make_pattern(cons.fields) - self.emit(f"( ast::{enumname}{cons.name} {{ {fields_pattern} }} )", depth) + self.emit( + f"( ast::located::{enumname}{cons.name} {{ {fields_pattern} }} )", depth + ) self.emit(" => {", depth) self.make_node(cons.name, cons.fields, depth + 1) self.emit("}", depth) @@ -610,15 +685,17 @@ def visitProduct(self, product, name, depth): if product.attributes: structname += "Data" - self.emit(f"impl NamedNode for ast::{structname} {{", depth) + self.emit(f"impl NamedNode for ast::located::{structname} {{", depth) self.emit(f"const NAME: &'static str = {json.dumps(name)};", depth + 1) self.emit("}", depth) - self.emit(f"impl Node for ast::{structname} {{", depth) + self.emit(f"impl Node for ast::located::{structname} {{", depth) self.emit( "fn ast_to_object(self, _vm: &VirtualMachine) -> PyObjectRef {", depth + 1 ) fields_pattern = self.make_pattern(product.fields) - self.emit(f"let ast::{structname} {{ {fields_pattern} }} = self;", depth + 2) + self.emit( + f"let ast::located::{structname} {{ {fields_pattern} }} = self;", depth + 2 + ) self.make_node(name, product.fields, depth + 2) self.emit("}", depth + 1) self.emit( @@ -648,19 +725,22 @@ def make_pattern(self, fields): return ",".join(rust_field(f.name) for f in fields) def gen_sum_fromobj(self, sum, sumname, enumname, rustname, depth): - if sum.attributes: - self.extract_location(sumname, depth) + # if sum.attributes: + # self.extract_location(sumname, depth) self.emit("let _cls = _object.class();", depth) self.emit("Ok(", depth) for cons in sum.types: self.emit(f"if _cls.is(Node{cons.name}::static_type()) {{", depth) if cons.fields: - self.emit(f"ast::{rustname}::{cons.name} (ast::{enumname}{cons.name} {{", depth + 1) + self.emit( + f"ast::located::{rustname}::{cons.name} (ast::located::{enumname}{cons.name} {{", + depth + 1, + ) self.gen_construction_fields(cons, sumname, depth + 1) self.emit("})", depth + 1) else: - self.emit(f"ast::{rustname}::{cons.name}", depth + 1) + self.emit(f"ast::located::{rustname}::{cons.name}", depth + 1) self.emit("} else", depth) self.emit("{", depth) @@ -669,8 +749,8 @@ def gen_sum_fromobj(self, sum, sumname, enumname, rustname, depth): self.emit("})", depth) def gen_product_fromobj(self, product, prodname, structname, depth): - if product.attributes: - self.extract_location(prodname, depth) + # if product.attributes: + # self.extract_location(prodname, depth) self.emit("Ok(", depth) self.gen_construction(structname, product, prodname, depth + 1) @@ -684,14 +764,22 @@ def gen_construction_fields(self, cons, name, depth): ) def gen_construction(self, cons_path, cons, name, depth): - self.emit(f"ast::{cons_path} {{", depth) + self.emit(f"ast::located::{cons_path} {{", depth) self.gen_construction_fields(cons, name, depth + 1) self.emit("}", depth) def extract_location(self, typename, depth): row = self.decode_field(asdl.Field("int", "lineno"), typename) column = self.decode_field(asdl.Field("int", "col_offset"), typename) - self.emit(f"let _location = ast::Location::new({row}, {column});", depth) + self.emit( + f""" + let _location = {{ + let row = {row}; + let column = {column}; + try_location(row, column) + }};""", + depth, + ) def decode_field(self, field, typename): name = json.dumps(field.name) @@ -711,81 +799,75 @@ def visit(self, object): v.emit("", 0) -def write_ast_def(mod, typeinfo, f): +def write_generic_def(mod, typeinfo, f): f.write( textwrap.dedent( """ - #![allow(clippy::derive_partial_eq_without_eq)] - - pub use crate::constant::*; - pub use crate::Location; + pub use crate::{Attributed, constant::*}; type Ident = String; \n - """ + """ ) ) - StructVisitor(f, typeinfo).emit_attrs(0) + + c = ChainOfVisitors(StructVisitor(f, typeinfo), FoldModuleVisitor(f, typeinfo)) + c.visit(mod) + + +def write_located_def(typeinfo, f): f.write( textwrap.dedent( """ - pub struct Located { - pub location: Location, - pub end_location: Option, - pub custom: U, - pub node: T, - } - - impl Located { - pub fn new(location: Location, end_location: Location, node: T) -> Self { - Self { location, end_location: Some(end_location), custom: (), node } - } - - pub const fn start(&self) -> Location { - self.location - } - - /// Returns the node's [`end_location`](Located::end_location) or [`location`](Located::start) if - /// [`end_location`](Located::end_location) is `None`. - pub fn end(&self) -> Location { - self.end_location.unwrap_or(self.location) - } - } - - impl std::ops::Deref for Located { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.node - } - } - \n - """.lstrip() + use rustpython_parser_core::source_code::SourceRange; + + pub type Located = super::generic::Attributed; + """ ) ) - - c = ChainOfVisitors(StructVisitor(f, typeinfo), FoldModuleVisitor(f, typeinfo)) - c.visit(mod) + for info in typeinfo.values(): + if info.has_userdata: + generics = "::" + else: + generics = "" + f.write( + f"pub type {info.rust_sum_name} = super::generic::{info.rust_sum_name}{generics};\n" + ) + if info.rust_suffix: + if info.rust_suffix == "Data" and not info.has_expr: + generics = "" + f.write( + f"pub type {info.rust_sum_name}{info.rust_suffix} = super::generic::{info.rust_sum_name}{info.rust_suffix}{generics};\n" + ) -def write_ast_mod(mod, f): +def write_ast_mod(mod, typeinfo, f): f.write( textwrap.dedent( """ - #![allow(clippy::all)] - - use super::*; - use crate::common::ascii; + #![allow(clippy::all)] - """ + use super::*; + use crate::common::ascii; + """ ) ) - c = ChainOfVisitors(ClassDefVisitor(f), TraitImplVisitor(f), ExtendModuleVisitor(f)) + c = ChainOfVisitors( + ClassDefVisitor(f, typeinfo), + TraitImplVisitor(f, typeinfo), + ExtendModuleVisitor(f, typeinfo), + ) c.visit(mod) -def main(input_filename, ast_mod_filename, ast_def_filename, dump_module=False): +def main( + input_filename, + generic_filename, + located_filename, + module_filename, + dump_module=False, +): auto_gen_msg = AUTOGEN_MESSAGE.format("/".join(Path(__file__).parts[-2:])) mod = asdl.parse(input_filename) if dump_module: @@ -797,22 +879,34 @@ def main(input_filename, ast_mod_filename, ast_def_filename, dump_module=False): typeinfo = {} FindUserdataTypesVisitor(typeinfo).visit(mod) - with ast_def_filename.open("w") as def_file, ast_mod_filename.open("w") as mod_file: - def_file.write(auto_gen_msg) - write_ast_def(mod, typeinfo, def_file) + with generic_filename.open("w") as generic_file, located_filename.open( + "w" + ) as located_file: + generic_file.write(auto_gen_msg) + write_generic_def(mod, typeinfo, generic_file) + located_file.write(auto_gen_msg) + write_located_def(typeinfo, located_file) - mod_file.write(auto_gen_msg) - write_ast_mod(mod, mod_file) + with module_filename.open("w") as module_file: + module_file.write(auto_gen_msg) + write_ast_mod(mod, typeinfo, module_file) - print(f"{ast_def_filename}, {ast_mod_filename} regenerated.") + print(f"{generic_filename}, {located_filename}, {module_filename} regenerated.") if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("input_file", type=Path) - parser.add_argument("-M", "--mod-file", type=Path, required=True) - parser.add_argument("-D", "--def-file", type=Path, required=True) + parser.add_argument("-G", "--generic-file", type=Path, required=True) + parser.add_argument("-L", "--located-file", type=Path, required=True) + parser.add_argument("-M", "--module-file", type=Path, required=True) parser.add_argument("-d", "--dump-module", action="store_true") args = parser.parse_args() - main(args.input_file, args.mod_file, args.def_file, args.dump_module) + main( + args.input_file, + args.generic_file, + args.located_file, + args.module_file, + args.dump_module, + ) diff --git a/ast/src/attributed.rs b/ast/src/attributed.rs new file mode 100644 index 00000000..234b965d --- /dev/null +++ b/ast/src/attributed.rs @@ -0,0 +1,76 @@ +use rustpython_parser_core::{ + source_code::{SourceLocation, SourceRange}, + text_size::{TextRange, TextSize}, +}; + +#[derive(Clone, Debug, PartialEq)] +pub struct Attributed { + pub range: TextRange, + pub custom: U, + pub node: T, +} + +impl Attributed { + /// Returns the node + #[inline] + pub fn node(&self) -> &T { + &self.node + } + + /// Returns the `range` of the node. The range offsets are absolute to the start of the document. + #[inline] + pub const fn range(&self) -> TextRange { + self.range + } + + /// Returns the absolute start position of the node from the beginning of the document. + #[inline] + pub const fn start(&self) -> TextSize { + self.range.start() + } + + /// Returns the absolute position at which the node ends in the source document. + #[inline] + pub const fn end(&self) -> TextSize { + self.range.end() + } +} + +impl Attributed { + /// Creates a new node that spans the position specified by `range`. + pub fn new(range: impl Into, node: T) -> Self { + Self { + range: range.into(), + custom: (), + node, + } + } + + /// Consumes self and returns the node. + #[inline] + pub fn into_node(self) -> T { + self.node + } +} + +impl Attributed { + /// Returns the absolute start position of the node from the beginning of the document. + #[inline] + pub const fn location(&self) -> SourceLocation { + self.custom.start + } + + /// Returns the absolute position at which the node ends in the source document. + #[inline] + pub const fn end_location(&self) -> Option { + self.custom.end + } +} + +impl std::ops::Deref for Attributed { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.node + } +} diff --git a/ast/src/constant.rs b/ast/src/constant.rs index 0b9a9958..d6bacbc2 100644 --- a/ast/src/constant.rs +++ b/ast/src/constant.rs @@ -1,5 +1,4 @@ use num_bigint::BigInt; -pub use rustpython_compiler_core::ConversionFlag; #[derive(Clone, Debug, PartialEq)] pub enum Constant { @@ -126,8 +125,7 @@ impl crate::fold::Fold for ConstantOptimizer { Ok(crate::Expr { node: expr, custom: node.custom, - location: node.location, - end_location: node.end_location, + range: node.range, }) } _ => crate::fold::fold_expr(self, node), @@ -138,25 +136,24 @@ impl crate::fold::Fold for ConstantOptimizer { #[cfg(test)] mod tests { use super::*; + use rustpython_parser_core::text_size::TextRange; #[cfg(feature = "constant-optimization")] #[test] fn test_constant_opt() { use crate::{fold::Fold, *}; - let start = Default::default(); - let end = None; + let range = TextRange::default(); + #[allow(clippy::let_unit_value)] let custom = (); - let ast = Located { - location: start, - end_location: end, + let ast = Attributed { + range, custom, node: ExprTuple { ctx: ExprContext::Load, elts: vec![ - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: BigInt::from(1).into(), @@ -164,9 +161,8 @@ mod tests { } .into(), }, - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: BigInt::from(2).into(), @@ -174,16 +170,14 @@ mod tests { } .into(), }, - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprTuple { ctx: ExprContext::Load, elts: vec![ - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: BigInt::from(3).into(), @@ -191,9 +185,8 @@ mod tests { } .into(), }, - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: BigInt::from(4).into(), @@ -201,9 +194,8 @@ mod tests { } .into(), }, - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: BigInt::from(5).into(), @@ -224,9 +216,8 @@ mod tests { .unwrap_or_else(|e| match e {}); assert_eq!( new_ast, - Located { - location: start, - end_location: end, + Attributed { + range, custom, node: ExprConstant { value: Constant::Tuple(vec![ diff --git a/ast/src/fold_helpers.rs b/ast/src/fold_helpers.rs index 969ea4e5..773d5c84 100644 --- a/ast/src/fold_helpers.rs +++ b/ast/src/fold_helpers.rs @@ -1,6 +1,6 @@ use crate::{constant, fold::Fold}; -pub(crate) trait Foldable { +pub trait Foldable { type Mapped; fn fold + ?Sized>( self, @@ -62,4 +62,4 @@ macro_rules! simple_fold { }; } -simple_fold!(usize, String, bool, constant::Constant); +simple_fold!(u32, String, bool, constant::Constant); diff --git a/ast/src/ast_gen.rs b/ast/src/gen/generic.rs similarity index 96% rename from ast/src/ast_gen.rs rename to ast/src/gen/generic.rs index 5e22f95e..be488361 100644 --- a/ast/src/ast_gen.rs +++ b/ast/src/gen/generic.rs @@ -1,49 +1,9 @@ // File automatically generated by ast/asdl_rs.py. -#![allow(clippy::derive_partial_eq_without_eq)] - -pub use crate::constant::*; -pub use crate::Location; +pub use crate::{constant::*, Attributed}; type Ident = String; -#[derive(Clone, Debug, PartialEq)] -pub struct Located { - pub location: Location, - pub end_location: Option, - pub custom: U, - pub node: T, -} - -impl Located { - pub fn new(location: Location, end_location: Location, node: T) -> Self { - Self { - location, - end_location: Some(end_location), - custom: (), - node, - } - } - - pub const fn start(&self) -> Location { - self.location - } - - /// Returns the node's [`end_location`](Located::end_location) or [`location`](Located::start) if - /// [`end_location`](Located::end_location) is `None`. - pub fn end(&self) -> Location { - self.end_location.unwrap_or(self.location) - } -} - -impl std::ops::Deref for Located { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.node - } -} - #[derive(Clone, Debug, PartialEq)] pub struct ModModule { pub body: Vec>, @@ -198,7 +158,7 @@ pub struct StmtAnnAssign { pub target: Box>, pub annotation: Box>, pub value: Option>>, - pub simple: usize, + pub simple: u32, } impl From> for StmtKind { @@ -368,7 +328,7 @@ impl From> for StmtKind { pub struct StmtImportFrom { pub module: Option, pub names: Vec>, - pub level: Option, + pub level: Option, } impl From> for StmtKind { @@ -440,7 +400,7 @@ pub enum StmtKind { Break, Continue, } -pub type Stmt = Located, U>; +pub type Stmt = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct ExprBoolOp { @@ -650,7 +610,7 @@ impl From> for ExprKind { #[derive(Clone, Debug, PartialEq)] pub struct ExprFormattedValue { pub value: Box>, - pub conversion: usize, + pub conversion: u32, pub format_spec: Option>>, } @@ -800,7 +760,7 @@ pub enum ExprKind { Tuple(ExprTuple), Slice(ExprSlice), } -pub type Expr = Located, U>; +pub type Expr = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub enum ExprContext { @@ -859,7 +819,7 @@ pub struct Comprehension { pub target: Expr, pub iter: Expr, pub ifs: Vec>, - pub is_async: usize, + pub is_async: u32, } #[derive(Clone, Debug, PartialEq)] @@ -879,7 +839,7 @@ impl From> for ExcepthandlerKind { pub enum ExcepthandlerKind { ExceptHandler(ExcepthandlerExceptHandler), } -pub type Excepthandler = Located, U>; +pub type Excepthandler = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct Arguments { @@ -898,21 +858,21 @@ pub struct ArgData { pub annotation: Option>>, pub type_comment: Option, } -pub type Arg = Located, U>; +pub type Arg = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct KeywordData { pub arg: Option, pub value: Expr, } -pub type Keyword = Located, U>; +pub type Keyword = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct AliasData { pub name: Ident, pub asname: Option, } -pub type Alias = Located; +pub type Alias = Attributed; #[derive(Clone, Debug, PartialEq)] pub struct Withitem { @@ -1032,11 +992,11 @@ pub enum PatternKind { MatchAs(PatternMatchAs), MatchOr(PatternMatchOr), } -pub type Pattern = Located, U>; +pub type Pattern = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct TypeIgnoreTypeIgnore { - pub lineno: usize, + pub lineno: u32, pub tag: String, } @@ -1059,6 +1019,25 @@ pub mod fold { type TargetU; type Error; fn map_user(&mut self, user: U) -> Result; + + fn map_attributed( + &mut self, + attributed: Attributed, + ) -> Result, Self::Error> { + let custom = self.map_user(attributed.custom)?; + Ok(Attributed { + range: attributed.range, + custom, + node: attributed.node, + }) + } + + fn fold>( + &mut self, + node: X, + ) -> Result { + node.fold(self) + } fn fold_mod(&mut self, node: Mod) -> Result, Self::Error> { fold_mod(self, node) } @@ -1135,15 +1114,15 @@ pub mod fold { fold_type_ignore(self, node) } } - fn fold_located + ?Sized, T, MT>( + fn fold_attributed + ?Sized, T, MT>( folder: &mut F, - node: Located, + node: Attributed, f: impl FnOnce(&mut F, T) -> Result, - ) -> Result, F::Error> { - Ok(Located { - custom: folder.map_user(node.custom)?, - location: node.location, - end_location: node.end_location, + ) -> Result, F::Error> { + let node = folder.map_attributed(node)?; + Ok(Attributed { + custom: node.custom, + range: node.range, node: f(folder, node.node)?, }) } @@ -1192,7 +1171,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Stmt, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| match node { + fold_attributed(folder, node, |folder, node| match node { StmtKind::FunctionDef(StmtFunctionDef { name, args, @@ -1396,7 +1375,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Expr, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| match node { + fold_attributed(folder, node, |folder, node| match node { ExprKind::BoolOp(ExprBoolOp { op, values }) => Ok(ExprKind::BoolOp(ExprBoolOp { op: Foldable::fold(op, folder)?, values: Foldable::fold(values, folder)?, @@ -1696,7 +1675,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Excepthandler, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| match node { + fold_attributed(folder, node, |folder, node| match node { ExcepthandlerKind::ExceptHandler(ExcepthandlerExceptHandler { type_, name, body }) => { Ok(ExcepthandlerKind::ExceptHandler( ExcepthandlerExceptHandler { @@ -1753,7 +1732,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Arg, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| { + fold_attributed(folder, node, |folder, node| { let ArgData { arg, annotation, @@ -1779,7 +1758,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Keyword, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| { + fold_attributed(folder, node, |folder, node| { let KeywordData { arg, value } = node; Ok(KeywordData { arg: Foldable::fold(arg, folder)?, @@ -1800,7 +1779,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Alias, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| { + fold_attributed(folder, node, |folder, node| { let AliasData { name, asname } = node; Ok(AliasData { name: Foldable::fold(name, folder)?, @@ -1867,7 +1846,7 @@ pub mod fold { #[allow(unused)] folder: &mut F, node: Pattern, ) -> Result, F::Error> { - fold_located(folder, node, |folder, node| match node { + fold_attributed(folder, node, |folder, node| match node { PatternKind::MatchValue(PatternMatchValue { value }) => { Ok(PatternKind::MatchValue(PatternMatchValue { value: Foldable::fold(value, folder)?, diff --git a/ast/src/gen/located.rs b/ast/src/gen/located.rs new file mode 100644 index 00000000..0e71a3c8 --- /dev/null +++ b/ast/src/gen/located.rs @@ -0,0 +1,95 @@ +// File automatically generated by ast/asdl_rs.py. + +use rustpython_parser_core::source_code::SourceRange; + +pub type Located = super::generic::Attributed; +pub type Mod = super::generic::Mod; +pub type ModModule = super::generic::ModModule; +pub type ModInteractive = super::generic::ModInteractive; +pub type ModExpression = super::generic::ModExpression; +pub type ModFunctionType = super::generic::ModFunctionType; +pub type Stmt = super::generic::Stmt; +pub type StmtKind = super::generic::StmtKind; +pub type StmtFunctionDef = super::generic::StmtFunctionDef; +pub type StmtAsyncFunctionDef = super::generic::StmtAsyncFunctionDef; +pub type StmtClassDef = super::generic::StmtClassDef; +pub type StmtReturn = super::generic::StmtReturn; +pub type StmtDelete = super::generic::StmtDelete; +pub type StmtAssign = super::generic::StmtAssign; +pub type StmtAugAssign = super::generic::StmtAugAssign; +pub type StmtAnnAssign = super::generic::StmtAnnAssign; +pub type StmtFor = super::generic::StmtFor; +pub type StmtAsyncFor = super::generic::StmtAsyncFor; +pub type StmtWhile = super::generic::StmtWhile; +pub type StmtIf = super::generic::StmtIf; +pub type StmtWith = super::generic::StmtWith; +pub type StmtAsyncWith = super::generic::StmtAsyncWith; +pub type StmtMatch = super::generic::StmtMatch; +pub type StmtRaise = super::generic::StmtRaise; +pub type StmtTry = super::generic::StmtTry; +pub type StmtTryStar = super::generic::StmtTryStar; +pub type StmtAssert = super::generic::StmtAssert; +pub type StmtImport = super::generic::StmtImport; +pub type StmtImportFrom = super::generic::StmtImportFrom; +pub type StmtGlobal = super::generic::StmtGlobal; +pub type StmtNonlocal = super::generic::StmtNonlocal; +pub type StmtExpr = super::generic::StmtExpr; +pub type Expr = super::generic::Expr; +pub type ExprKind = super::generic::ExprKind; +pub type ExprBoolOp = super::generic::ExprBoolOp; +pub type ExprNamedExpr = super::generic::ExprNamedExpr; +pub type ExprBinOp = super::generic::ExprBinOp; +pub type ExprUnaryOp = super::generic::ExprUnaryOp; +pub type ExprLambda = super::generic::ExprLambda; +pub type ExprIfExp = super::generic::ExprIfExp; +pub type ExprDict = super::generic::ExprDict; +pub type ExprSet = super::generic::ExprSet; +pub type ExprListComp = super::generic::ExprListComp; +pub type ExprSetComp = super::generic::ExprSetComp; +pub type ExprDictComp = super::generic::ExprDictComp; +pub type ExprGeneratorExp = super::generic::ExprGeneratorExp; +pub type ExprAwait = super::generic::ExprAwait; +pub type ExprYield = super::generic::ExprYield; +pub type ExprYieldFrom = super::generic::ExprYieldFrom; +pub type ExprCompare = super::generic::ExprCompare; +pub type ExprCall = super::generic::ExprCall; +pub type ExprFormattedValue = super::generic::ExprFormattedValue; +pub type ExprJoinedStr = super::generic::ExprJoinedStr; +pub type ExprConstant = super::generic::ExprConstant; +pub type ExprAttribute = super::generic::ExprAttribute; +pub type ExprSubscript = super::generic::ExprSubscript; +pub type ExprStarred = super::generic::ExprStarred; +pub type ExprName = super::generic::ExprName; +pub type ExprList = super::generic::ExprList; +pub type ExprTuple = super::generic::ExprTuple; +pub type ExprSlice = super::generic::ExprSlice; +pub type ExprContext = super::generic::ExprContext; +pub type Boolop = super::generic::Boolop; +pub type Operator = super::generic::Operator; +pub type Unaryop = super::generic::Unaryop; +pub type Cmpop = super::generic::Cmpop; +pub type Comprehension = super::generic::Comprehension; +pub type Excepthandler = super::generic::Excepthandler; +pub type ExcepthandlerKind = super::generic::ExcepthandlerKind; +pub type ExcepthandlerExceptHandler = super::generic::ExcepthandlerExceptHandler; +pub type Arguments = super::generic::Arguments; +pub type Arg = super::generic::Arg; +pub type ArgData = super::generic::ArgData; +pub type Keyword = super::generic::Keyword; +pub type KeywordData = super::generic::KeywordData; +pub type Alias = super::generic::Alias; +pub type AliasData = super::generic::AliasData; +pub type Withitem = super::generic::Withitem; +pub type MatchCase = super::generic::MatchCase; +pub type Pattern = super::generic::Pattern; +pub type PatternKind = super::generic::PatternKind; +pub type PatternMatchValue = super::generic::PatternMatchValue; +pub type PatternMatchSingleton = super::generic::PatternMatchSingleton; +pub type PatternMatchSequence = super::generic::PatternMatchSequence; +pub type PatternMatchMapping = super::generic::PatternMatchMapping; +pub type PatternMatchClass = super::generic::PatternMatchClass; +pub type PatternMatchStar = super::generic::PatternMatchStar; +pub type PatternMatchAs = super::generic::PatternMatchAs; +pub type PatternMatchOr = super::generic::PatternMatchOr; +pub type TypeIgnore = super::generic::TypeIgnore; +pub type TypeIgnoreTypeIgnore = super::generic::TypeIgnoreTypeIgnore; diff --git a/ast/src/lib.rs b/ast/src/lib.rs index d668bede..683698c0 100644 --- a/ast/src/lib.rs +++ b/ast/src/lib.rs @@ -1,12 +1,27 @@ -mod ast_gen; +mod attributed; mod constant; #[cfg(feature = "fold")] mod fold_helpers; +mod generic { + #![allow(clippy::derive_partial_eq_without_eq)] + include!("gen/generic.rs"); +} mod impls; +#[cfg(feature = "source-code")] +mod source_locator; #[cfg(feature = "unparse")] mod unparse; -pub use ast_gen::*; -pub use rustpython_compiler_core::Location; +pub use attributed::Attributed; +pub use constant::Constant; +pub use generic::*; +pub use rustpython_parser_core::{text_size, ConversionFlag}; pub type Suite = Vec>; + +#[cfg(feature = "source-code")] +pub mod located { + include!("gen/located.rs"); +} + +pub use rustpython_parser_core::source_code; diff --git a/ast/src/source_locator.rs b/ast/src/source_locator.rs new file mode 100644 index 00000000..8a81e29c --- /dev/null +++ b/ast/src/source_locator.rs @@ -0,0 +1,25 @@ +use crate::attributed::Attributed; +use rustpython_parser_core::source_code::{SourceLocator, SourceRange}; + +impl crate::fold::Fold<()> for SourceLocator<'_> { + type TargetU = SourceRange; + type Error = std::convert::Infallible; + + #[cold] + fn map_user(&mut self, _user: ()) -> Result { + unreachable!("implemented map_attributed"); + } + + fn map_attributed( + &mut self, + node: Attributed, + ) -> Result, Self::Error> { + let start = self.locate(node.range.start()); + let end = self.locate(node.range.end()); + Ok(Attributed { + range: node.range, + custom: (start..end).into(), + node: node.node, + }) + } +} diff --git a/ast/src/unparse.rs b/ast/src/unparse.rs index 807b0f16..575cf40d 100644 --- a/ast/src/unparse.rs +++ b/ast/src/unparse.rs @@ -1,7 +1,5 @@ -use crate::{ - Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Expr, ExprKind, - Operator, -}; +use crate::ConversionFlag; +use crate::{Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Expr, ExprKind, Operator}; use std::fmt; mod precedence { @@ -452,7 +450,7 @@ impl<'a> Unparser<'a> { fn unparse_formatted( &mut self, val: &Expr, - conversion: usize, + conversion: u32, spec: Option<&Expr>, ) -> fmt::Result { let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1)); @@ -466,7 +464,7 @@ impl<'a> Unparser<'a> { self.p(&buffered)?; drop(buffered); - if conversion != ConversionFlag::None as usize { + if conversion != ConversionFlag::None as u32 { self.p("!")?; let buf = &[conversion as u8]; let c = std::str::from_utf8(buf).unwrap(); diff --git a/core/Cargo.toml b/core/Cargo.toml index 79622a95..8269c440 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "rustpython-compiler-core" -description = "RustPython specific bytecode." +name = "rustpython-parser-core" +description = "RustPython parser data types." version = "0.2.0" authors = ["RustPython Team"] edition = "2021" @@ -8,11 +8,17 @@ repository = "https://github.com/RustPython/RustPython" license = "MIT" [dependencies] -bitflags = { workspace = true } -bstr = { workspace = true } itertools = { workspace = true } num-bigint = { workspace = true } num-complex = { workspace = true } -serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] } +# ruff dependency shouldn't be placed out of this crate +ruff_text_size = { path = "../ruff_text_size" } +ruff_source_location = { path = "../ruff_source_location", optional = true } + +serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] } lz4_flex = "0.9.2" + +[features] +default = ["source-code"] +source-code = ["ruff_source_location"] diff --git a/core/src/bytecode.rs b/core/src/bytecode.rs deleted file mode 100644 index a522d3fb..00000000 --- a/core/src/bytecode.rs +++ /dev/null @@ -1,1610 +0,0 @@ -//! Implement python as a virtual machine with bytecode. This module -//! implements bytecode structure. - -use crate::{marshal, Location}; -use bitflags::bitflags; -use itertools::Itertools; -use num_bigint::BigInt; -use num_complex::Complex64; -use std::marker::PhantomData; -use std::{collections::BTreeSet, fmt, hash, mem}; - -pub trait Constant: Sized { - type Name: AsRef; - - /// Transforms the given Constant to a BorrowedConstant - fn borrow_constant(&self) -> BorrowedConstant; -} - -impl Constant for ConstantData { - type Name = String; - fn borrow_constant(&self) -> BorrowedConstant { - use BorrowedConstant::*; - match self { - ConstantData::Integer { value } => Integer { value }, - ConstantData::Float { value } => Float { value: *value }, - ConstantData::Complex { value } => Complex { value: *value }, - ConstantData::Boolean { value } => Boolean { value: *value }, - ConstantData::Str { value } => Str { value }, - ConstantData::Bytes { value } => Bytes { value }, - ConstantData::Code { code } => Code { code }, - ConstantData::Tuple { elements } => Tuple { elements }, - ConstantData::None => None, - ConstantData::Ellipsis => Ellipsis, - } - } -} - -/// A Constant Bag -pub trait ConstantBag: Sized + Copy { - type Constant: Constant; - fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant; - fn make_int(&self, value: BigInt) -> Self::Constant; - fn make_tuple(&self, elements: impl Iterator) -> Self::Constant; - fn make_code(&self, code: CodeObject) -> Self::Constant; - fn make_name(&self, name: &str) -> ::Name; -} - -pub trait AsBag { - type Bag: ConstantBag; - #[allow(clippy::wrong_self_convention)] - fn as_bag(self) -> Self::Bag; -} - -impl AsBag for Bag { - type Bag = Self; - fn as_bag(self) -> Self { - self - } -} - -#[derive(Clone, Copy)] -pub struct BasicBag; - -impl ConstantBag for BasicBag { - type Constant = ConstantData; - fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant { - constant.to_owned() - } - fn make_int(&self, value: BigInt) -> Self::Constant { - ConstantData::Integer { value } - } - fn make_tuple(&self, elements: impl Iterator) -> Self::Constant { - ConstantData::Tuple { - elements: elements.collect(), - } - } - fn make_code(&self, code: CodeObject) -> Self::Constant { - ConstantData::Code { - code: Box::new(code), - } - } - fn make_name(&self, name: &str) -> ::Name { - name.to_owned() - } -} - -/// Primary container of a single code object. Each python function has -/// a code object. Also a module has a code object. -#[derive(Clone)] -pub struct CodeObject { - pub instructions: Box<[CodeUnit]>, - pub locations: Box<[Location]>, - pub flags: CodeFlags, - pub posonlyarg_count: u32, - // Number of positional-only arguments - pub arg_count: u32, - pub kwonlyarg_count: u32, - pub source_path: C::Name, - pub first_line_number: u32, - pub max_stackdepth: u32, - pub obj_name: C::Name, - // Name of the object that created this code object - pub cell2arg: Option>, - pub constants: Box<[C]>, - pub names: Box<[C::Name]>, - pub varnames: Box<[C::Name]>, - pub cellvars: Box<[C::Name]>, - pub freevars: Box<[C::Name]>, -} - -bitflags! { - pub struct CodeFlags: u16 { - const NEW_LOCALS = 0x01; - const IS_GENERATOR = 0x02; - const IS_COROUTINE = 0x04; - const HAS_VARARGS = 0x08; - const HAS_VARKEYWORDS = 0x10; - const IS_OPTIMIZED = 0x20; - } -} - -impl CodeFlags { - pub const NAME_MAPPING: &'static [(&'static str, CodeFlags)] = &[ - ("GENERATOR", CodeFlags::IS_GENERATOR), - ("COROUTINE", CodeFlags::IS_COROUTINE), - ( - "ASYNC_GENERATOR", - Self::from_bits_truncate(Self::IS_GENERATOR.bits | Self::IS_COROUTINE.bits), - ), - ("VARARGS", CodeFlags::HAS_VARARGS), - ("VARKEYWORDS", CodeFlags::HAS_VARKEYWORDS), - ]; -} - -/// an opcode argument that may be extended by a prior ExtendedArg -#[derive(Copy, Clone, PartialEq, Eq)] -#[repr(transparent)] -pub struct OpArgByte(pub u8); -impl OpArgByte { - pub const fn null() -> Self { - OpArgByte(0) - } -} -impl fmt::Debug for OpArgByte { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -/// a full 32-bit op_arg, including any possible ExtendedArg extension -#[derive(Copy, Clone, Debug)] -#[repr(transparent)] -pub struct OpArg(pub u32); -impl OpArg { - pub const fn null() -> Self { - OpArg(0) - } - - /// Returns how many CodeUnits a instruction with this op_arg will be encoded as - #[inline] - pub fn instr_size(self) -> usize { - (self.0 > 0xff) as usize + (self.0 > 0xff_ff) as usize + (self.0 > 0xff_ff_ff) as usize + 1 - } - - /// returns the arg split into any necessary ExtendedArg components (in big-endian order) and - /// the arg for the real opcode itself - #[inline(always)] - pub fn split(self) -> (impl ExactSizeIterator, OpArgByte) { - let mut it = self - .0 - .to_le_bytes() - .map(OpArgByte) - .into_iter() - .take(self.instr_size()); - let lo = it.next().unwrap(); - (it.rev(), lo) - } -} - -#[derive(Default, Copy, Clone)] -#[repr(transparent)] -pub struct OpArgState { - state: u32, -} - -impl OpArgState { - #[inline(always)] - pub fn get(&mut self, ins: CodeUnit) -> (Instruction, OpArg) { - let arg = self.extend(ins.arg); - if ins.op != Instruction::ExtendedArg { - self.reset(); - } - (ins.op, arg) - } - #[inline(always)] - pub fn extend(&mut self, arg: OpArgByte) -> OpArg { - self.state = self.state << 8 | u32::from(arg.0); - OpArg(self.state) - } - #[inline(always)] - pub fn reset(&mut self) { - self.state = 0 - } -} - -pub trait OpArgType: Copy { - fn from_op_arg(x: u32) -> Option; - fn to_op_arg(self) -> u32; -} - -impl OpArgType for u32 { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(x) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self - } -} - -impl OpArgType for bool { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(x != 0) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self as u32 - } -} - -macro_rules! op_arg_enum { - ($(#[$attr:meta])* $vis:vis enum $name:ident { $($(#[$var_attr:meta])* $var:ident = $value:literal,)* }) => { - $(#[$attr])* - $vis enum $name { - $($(#[$var_attr])* $var = $value,)* - } - - impl OpArgType for $name { - fn to_op_arg(self) -> u32 { - self as u32 - } - fn from_op_arg(x: u32) -> Option { - Some(match u8::try_from(x).ok()? { - $($value => Self::$var,)* - _ => return None, - }) - } - } - }; -} - -#[derive(Copy, Clone)] -pub struct Arg(PhantomData); - -impl Arg { - #[inline] - pub fn marker() -> Self { - Arg(PhantomData) - } - #[inline] - pub fn new(arg: T) -> (Self, OpArg) { - (Self(PhantomData), OpArg(arg.to_op_arg())) - } - #[inline] - pub fn new_single(arg: T) -> (Self, OpArgByte) - where - T: Into, - { - (Self(PhantomData), OpArgByte(arg.into())) - } - #[inline(always)] - pub fn get(self, arg: OpArg) -> T { - self.try_get(arg).unwrap() - } - #[inline(always)] - pub fn try_get(self, arg: OpArg) -> Option { - T::from_op_arg(arg.0) - } - #[inline(always)] - /// # Safety - /// T::from_op_arg(self) must succeed - pub unsafe fn get_unchecked(self, arg: OpArg) -> T { - match T::from_op_arg(arg.0) { - Some(t) => t, - None => std::hint::unreachable_unchecked(), - } - } -} - -impl PartialEq for Arg { - fn eq(&self, _: &Self) -> bool { - true - } -} -impl Eq for Arg {} - -impl fmt::Debug for Arg { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Arg<{}>", std::any::type_name::()) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] -#[repr(transparent)] -// XXX: if you add a new instruction that stores a Label, make sure to add it in -// Instruction::label_arg -pub struct Label(pub u32); - -impl OpArgType for Label { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(Label(x)) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self.0 - } -} - -impl fmt::Display for Label { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.fmt(f) - } -} - -op_arg_enum!( - /// Transforms a value prior to formatting it. - #[derive(Copy, Clone, Debug, PartialEq, Eq)] - #[repr(u8)] - pub enum ConversionFlag { - /// No conversion - None = 0, // CPython uses -1 but not pleasure for us - /// Converts by calling `str()`. - Str = b's', - /// Converts by calling `ascii()`. - Ascii = b'a', - /// Converts by calling `repr()`. - Repr = b'r', - } -); - -impl TryFrom for ConversionFlag { - type Error = usize; - fn try_from(b: usize) -> Result { - u32::try_from(b).ok().and_then(Self::from_op_arg).ok_or(b) - } -} - -op_arg_enum!( - /// The kind of Raise that occurred. - #[derive(Copy, Clone, Debug, PartialEq, Eq)] - #[repr(u8)] - pub enum RaiseKind { - Reraise = 0, - Raise = 1, - RaiseCause = 2, - } -); - -pub type NameIdx = u32; - -/// A Single bytecode instruction. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[repr(u8)] -pub enum Instruction { - /// Importing by name - ImportName { - idx: Arg, - }, - /// Importing without name - ImportNameless, - /// Import * - ImportStar, - /// from ... import ... - ImportFrom { - idx: Arg, - }, - LoadFast(Arg), - LoadNameAny(Arg), - LoadGlobal(Arg), - LoadDeref(Arg), - LoadClassDeref(Arg), - StoreFast(Arg), - StoreLocal(Arg), - StoreGlobal(Arg), - StoreDeref(Arg), - DeleteFast(Arg), - DeleteLocal(Arg), - DeleteGlobal(Arg), - DeleteDeref(Arg), - LoadClosure(Arg), - Subscript, - StoreSubscript, - DeleteSubscript, - StoreAttr { - idx: Arg, - }, - DeleteAttr { - idx: Arg, - }, - LoadConst { - /// index into constants vec - idx: Arg, - }, - UnaryOperation { - op: Arg, - }, - BinaryOperation { - op: Arg, - }, - BinaryOperationInplace { - op: Arg, - }, - LoadAttr { - idx: Arg, - }, - TestOperation { - op: Arg, - }, - CompareOperation { - op: Arg, - }, - Pop, - Rotate2, - Rotate3, - Duplicate, - Duplicate2, - GetIter, - Continue { - target: Arg