diff --git a/pyproject.toml b/pyproject.toml
index 1fbc250..4d629ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,14 +47,15 @@ dev = [
[tool.setuptools]
package-dir = {"" = "src"}
-py-modules = ["cedarscript_editor"]
+py-modules = ["cedarscript_editor", "text_manipulaiton"]
[tool.setuptools.dynamic]
version = {attr = "cedarscript_editor.__version__"}
[tool.setuptools.packages.find]
where = ["src"]
-include = ["cedarscript_editor*", "text_editor*"]
+include = ["cedarscript_editor*", "text_editor*", "identifier_selector*", "*identifier_finder*",
+"indentation_*", "range_*"]
namespaces = false
[tool.setuptools.package-data]
diff --git a/src/cedarscript_editor/__init__.py b/src/cedarscript_editor/__init__.py
index 9d6c692..6f0f36e 100644
--- a/src/cedarscript_editor/__init__.py
+++ b/src/cedarscript_editor/__init__.py
@@ -1,7 +1,6 @@
-from .cedarscript_editor_java import JavaCEDARScriptEditor
-from .cedarscript_editor_kotlin import KotlinCEDARScriptEditor
-from .cedarscript_editor_python import PythonCEDARScriptEditor
+from cedarscript_editor.cedarscript_editor import CEDARScriptEditor
-__version__ = "0.1.10"
+__version__ = "0.2.0"
+
+__all__ = ["CEDARScriptEditor"]
-__all__ = ["PythonCEDARScriptEditor"]
diff --git a/src/cedarscript_editor/cedarscript_editor.py b/src/cedarscript_editor/cedarscript_editor.py
new file mode 100644
index 0000000..39eeebe
--- /dev/null
+++ b/src/cedarscript_editor/cedarscript_editor.py
@@ -0,0 +1,349 @@
+import os
+from collections.abc import Sequence
+from typing import Callable
+
+from cedarscript_ast_parser import Command, RmFileCommand, MvFileCommand, UpdateCommand, \
+ SelectCommand, IdentifierFromFile, Segment, Marker, MoveClause, DeleteClause, \
+ InsertClause, ReplaceClause, EditingAction, BodyOrWhole, RegionClause, MarkerType
+from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, \
+ RelativePositionType
+from text_manipulation.indentation_kit import IndentationInfo
+from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec
+from text_manipulation.text_editor_kit import read_file, write_file, bow_to_search_range
+
+from .identifier_selector import select_finder
+
+
+class CEDARScriptEditorException(Exception):
+ def __init__(self, command_ordinal: int, description: str):
+ match command_ordinal:
+ case 0 | 1:
+ items = ''
+ case 2:
+ items = "#1"
+ case 3:
+ items = "#1 and #2"
+ case _:
+ sequence = ", ".join(f'#{i}' for i in range(1, command_ordinal - 1))
+ items = f"{sequence} and #{command_ordinal - 1}"
+ if command_ordinal <= 1:
+ note = ''
+ previous_cmd_notes = ''
+ else:
+
+ previous_cmd_notes = (
+ f", bearing in mind the file was updated and now contains all changes expressed in "
+ f"commands {items}"
+ )
+ if 'syntax' in description.casefold():
+ probability_indicator = "most probably"
+ else:
+ probability_indicator= "might have"
+
+ note = (
+ f"*ALL* commands *before* command #{command_ordinal} were applied and *their changes are already committed*. "
+ f"Re-read the file to catch up with the applied changes."
+ f"ATTENTION: The previous command (#{command_ordinal - 1}) {probability_indicator} caused command #{command_ordinal} to fail "
+ f"due to changes that left the file in an invalid state (check that by re-analyzing the file!)"
+ )
+ super().__init__(
+ f"COMMAND #{command_ordinal}{note}"
+ f"{description}"
+ "NEVER apologize; just relax, take a deep breath, think step-by-step and write an in-depth analysis of what went wrong "
+ "(specifying which command ordinal failed), then acknowledge which commands were already applied and concisely describe the state at which the file was left "
+ "(saying what needs to be done now), "
+ f"then write new commands that will fix the problem{previous_cmd_notes} "
+ "(you'll get a one-million dollar tip if you get it right!) "
+ "Use descriptive comment before each command."
+ )
+
+
+class CEDARScriptEditor:
+ def __init__(self, root_path):
+ self.root_path = os.path.abspath(root_path)
+ print(f'[{self.__class__}] root: {self.root_path}')
+
+ # TODO Add 'target_search_range: RangeSpec' parameter
+ def find_identifier(self, source_info: tuple[str, str | Sequence[str]], marker: Marker) -> IdentifierBoundaries:
+ file_path = source_info[0]
+ source = source_info[1]
+ if not isinstance(source, str):
+ source = '\n'.join(source)
+ return (
+ select_finder(self.root_path, file_path, source)
+ (self.root_path, file_path, source, marker)
+ )
+
+ def apply_commands(self, commands: Sequence[Command]):
+ result = []
+ for i, command in enumerate(commands):
+ try:
+ match command:
+ case UpdateCommand() as cmd:
+ result.append(self._update_command(cmd))
+ # case CreateCommand() as cmd:
+ # result.append(self._create_command(cmd))
+ case RmFileCommand() as cmd:
+ result.append(self._rm_command(cmd))
+ case MvFileCommand() as cmd:
+ raise ValueError('Noy implemented: MV')
+ case SelectCommand() as cmd:
+ raise ValueError('Noy implemented: SELECT')
+ case _ as invalid:
+ raise ValueError(f"Unknown command '{type(invalid)}'")
+ except Exception as e:
+ print(f'[apply_commands] (command #{i+1}) Failed: {command}')
+ if isinstance(command, UpdateCommand):
+ print(f'CMD CONTENT: ***{command.content}***')
+ raise CEDARScriptEditorException(i + 1, str(e)) from e
+ return result
+
+ def _update_command(self, cmd: UpdateCommand):
+ action: EditingAction = cmd.action
+ target = cmd.target
+ content = cmd.content or []
+ file_path = os.path.join(self.root_path, target.file_path)
+
+ # Example 1:
+ # UPDATE FILE "tmp.benchmarks/2024-10-04-22-59-58--CEDARScript-Gemini-small/bowling/bowling.py"
+ # INSERT INSIDE FUNCTION "__init__" TOP
+ # WITH CONTENT '''
+ # @0:print("This line will be inserted at the top")
+ # ''';
+ # After parsing ->
+ # UpdateCommand(
+ # type='update',
+ # target=SingleFileClause(file_path='tmp.benchmarks/2024-10-04-22-59-58--CEDARScript-Gemini-small/bowling/bowling.py'),
+ # action=InsertClause(insert_position=RelativeMarker(type=, value='__init__', offset=None)),
+ # content='\n @0:print("This line will be inserted at the top")\n '
+ # )
+
+
+ # Example 2:
+ # UPDATE FUNCTION
+ # FROM FILE "tmp.benchmarks/2024-10-04-22-59-58--CEDARScript-Gemini-small/bowling/bowling.py"
+ # WHERE NAME = "__init__"
+ # REPLACE SEGMENT
+ # STARTING AFTER LINE "def __init__(self):"
+ # ENDING AFTER LINE "def __init__(self):"
+ # WITH CONTENT '''
+ # @0:print("This line will be inserted at the top")
+ # ''';
+ # After parsing ->
+ # UpdateCommand(
+ # type='update',
+ # target=IdentifierFromFile(file_path='bowling.py',
+ # where_clause=WhereClause(field='NAME', operator='=', value='__init__'),
+ # identifier_type='FUNCTION', offset=None
+ # ),
+ # action=ReplaceClause(
+ # region=Segment(
+ # start=RelativeMarker(type=, value='def __init__(self):', offset=None),
+ # end=RelativeMarker(type=, value='def __init__(self):', offset=None)
+ # )),
+ # content='\n @0:print("This line will be inserted at the top")\n '
+ # )
+
+ src = read_file(file_path)
+ lines = src.splitlines()
+
+ source_info: tuple[str, str | Sequence[str]] = (file_path, src)
+
+ def identifier_resolver(m: Marker):
+ return self.find_identifier(source_info, m)
+
+ match action:
+ case MoveClause():
+ # (Check parse_update_command)
+ # when action=MoveClause example (MOVE roll TO AFTER score):
+ # action.deleteclause.region=WHOLE
+ # action.as_marker = action.insertclause.as_marker
+ # action.insertclause.insert_position=FUNCTION(score)
+ # target.as_marker = FUNCTION(roll) (the one to delete)
+ search_range = RangeSpec.EMPTY
+ move_src_range = restrict_search_range(action, target, identifier_resolver)
+ case _:
+ move_src_range = None
+ # Set range_spec to cover the identifier
+ search_range = restrict_search_range(action, target, identifier_resolver)
+
+ marker, search_range = find_marker_or_segment(action, lines, search_range)
+
+ search_range = restrict_search_range_for_marker(
+ marker, action, lines, search_range, identifier_resolver
+ )
+
+ match content:
+ case str() | [str(), *_] | (str(), *_):
+ pass
+ case (region, relindent):
+ dest_indent = search_range.indent
+ content_range = restrict_search_range_for_marker(
+ region, action, lines, RangeSpec.EMPTY, identifier_resolver
+ )
+ content = content_range.read(lines)
+ count = dest_indent + (relindent or 0)
+ content = IndentationInfo.from_content(content).shift_indentation(
+ content, count
+ )
+ content = (region, content)
+ case _:
+ match action:
+ case MoveClause(insert_position=region, relative_indentation=relindent):
+ dest_range = restrict_search_range_for_marker(
+ region, action, lines, RangeSpec.EMPTY, identifier_resolver
+ )
+ dest_indent = dest_range.indent
+ content = move_src_range.read(lines)
+ count = dest_indent + (relindent or 0)
+ content = IndentationInfo.from_content(content).shift_indentation(
+ content, count
+ )
+ case _:
+ raise ValueError(f'Invalid content: {content}')
+
+ self._apply_action(action, lines, search_range, content)
+
+ write_file(file_path, lines)
+
+ return f"Updated {target if target else 'file'} in {file_path}\n -> {action}"
+
+ def _apply_action(self, action: EditingAction, lines: Sequence[str], range_spec: RangeSpec, content: str | None = None):
+ match action:
+
+ case MoveClause(insert_position=insert_position, to_other_file=other_file, relative_indentation=relindent):
+ # TODO Move from 'lines' to the same file or to 'other_file'
+ range_spec.write(content, lines)
+
+ case DeleteClause():
+ range_spec.delete(lines)
+
+ case ReplaceClause() | InsertClause():
+ match content:
+ case (region, processed_content):
+ content = processed_content
+ case str():
+ content = IndentationInfo.from_content(lines).apply_relative_indents(
+ content, range_spec.indent
+ )
+
+ range_spec.write(content, lines)
+
+ case _ as invalid:
+ raise ValueError(f"Unsupported action type: {type(invalid)}")
+
+ def _rm_command(self, cmd: RmFileCommand):
+ file_path = os.path.join(self.root_path, cmd.file_path)
+
+ def _delete_function(self, cmd): # TODO
+ file_path = os.path.join(self.root_path, cmd.file_path)
+
+ # def _create_command(self, cmd: CreateCommand):
+ # file_path = os.path.join(self.root_path, cmd.file_path)
+ #
+ # os.makedirs(os.path.dirname(file_path), exist_ok=False)
+ # with open(file_path, 'w') as file:
+ # file.write(content)
+ #
+ # return f"Created file: {command['file']}"
+
+ def find_index_range_for_region(self,
+ region: BodyOrWhole | Marker | Segment | RelativeMarker,
+ lines: Sequence[str],
+ identifier_resolver: Callable[[Marker], IdentifierBoundaries],
+ search_range: RangeSpec | IdentifierBoundaries | None = None,
+ ) -> RangeSpec:
+ # BodyOrWhole | RelativeMarker | MarkerOrSegment
+ # marker_or_segment_to_index_range_impl
+ # IdentifierBoundaries.location_to_search_range(self, location: BodyOrWhole | RelativePositionType) -> RangeSpec
+ match region:
+ case BodyOrWhole() as bow:
+ # TODO Set indent char count
+ index_range = bow_to_search_range(bow, search_range)
+ case Marker() | Segment() as mos:
+ if isinstance(search_range, IdentifierBoundaries):
+ search_range = search_range.whole
+ match mos:
+ case Marker(type=marker_type):
+ match marker_type:
+ case MarkerType.LINE:
+ pass
+ case _:
+ # TODO transform to RangeSpec
+ mos = self.find_identifier(("find_index_range_for_region", lines), mos).body
+ index_range = mos.to_search_range(
+ lines,
+ search_range.start if search_range else 0,
+ search_range.end if search_range else -1,
+ )
+ case _ as invalid:
+ raise ValueError(f"Invalid: {invalid}")
+ return index_range
+
+
+def find_marker_or_segment(action: EditingAction, lines: Sequence[str], search_range: RangeSpec) -> tuple[Marker, RangeSpec]:
+ marker: Marker | Segment | None = None
+ match action:
+ case MarkerCompatible() as marker_compatible:
+ marker = marker_compatible.as_marker
+ case RegionClause(region=region):
+ match region:
+ case MarkerCompatible():
+ marker = region.as_marker
+ case Segment() as segment:
+ # TODO Handle segment's start and end as a marker and support identifier markers
+ search_range = segment.to_search_range(lines, search_range)
+ marker = None
+ return marker, search_range
+
+
+def restrict_search_range(action, target, identifier_resolver: Callable[[Marker], IdentifierBoundaries]) -> RangeSpec:
+ search_range = RangeSpec.EMPTY
+ match target:
+ case IdentifierFromFile() as identifier_from_file:
+ identifier_marker = identifier_from_file.as_marker
+ identifier_boundaries = identifier_resolver(identifier_marker)
+ if not identifier_boundaries:
+ raise ValueError(f"'{identifier_marker}' not found")
+ match action:
+ case RegionClause(region=region):
+ match region: # BodyOrWhole | Marker | Segment
+ case BodyOrWhole():
+ search_range = identifier_boundaries.location_to_search_range(region)
+ case _:
+ search_range = identifier_boundaries.location_to_search_range(BodyOrWhole.WHOLE)
+ return search_range
+
+
+def restrict_search_range_for_marker(
+ marker: Marker,
+ action: EditingAction,
+ lines: Sequence[str],
+ search_range: RangeSpec,
+ identifier_resolver: Callable[[Marker], IdentifierBoundaries]
+) -> RangeSpec:
+ if marker is None:
+ return search_range
+
+ match marker:
+ case Marker():
+ match marker.type:
+ case MarkerType.LINE:
+ search_range = marker.to_search_range(lines, search_range)
+ match action:
+ case InsertClause():
+ if action.insert_position.qualifier == RelativePositionType.BEFORE:
+ search_range = search_range.inc()
+ case DeleteClause():
+ search_range = search_range.set_length(1)
+ case _:
+ identifier_boundaries = identifier_resolver(marker)
+ if not identifier_boundaries:
+ raise ValueError(f"'{marker}' not found")
+ qualifier: RelativePositionType = marker.qualifier if isinstance(
+ marker, RelativeMarker
+ ) else RelativePositionType.AT
+ search_range = identifier_boundaries.location_to_search_range(qualifier)
+ case Segment():
+ pass # TODO
+ return search_range
diff --git a/src/cedarscript_editor/cedarscript_editor_base.py b/src/cedarscript_editor/cedarscript_editor_base.py
deleted file mode 100644
index 8e3321c..0000000
--- a/src/cedarscript_editor/cedarscript_editor_base.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import os
-from abc import ABC, abstractmethod
-
-from cedarscript_ast_parser import Command, CreateCommand, RmFileCommand, MvFileCommand, UpdateCommand, \
- SelectCommand, IdentifierFromFile, SingleFileClause, Segment, Marker, MoveClause, DeleteClause, \
- InsertClause, ReplaceClause, EditingAction, Region, BodyOrWhole, WhereClause, RegionClause
-from .text_editor_kit import \
- normalize_indent, write_file, read_file, bow_to_search_range, \
- FunctionBoundaries, SearchRange, analyze_indentation, IndentationInfo
-
-class CEDARScriptEditorException(Exception):
- def __init__(self, command_ordinal: int, description: str):
- match command_ordinal:
- case 0 | 1:
- items = ''
- case 2:
- items = "#1"
- case 3:
- items = "#1 and #2"
- case _:
- sequence = ", ".join(f'#{i}' for i in range(1, command_ordinal - 1))
- items = f"{sequence} and #{command_ordinal - 1}"
- if command_ordinal <= 1:
- note = ''
- plural_indicator=''
- previous_cmd_notes = ''
- else:
-
- plural_indicator='s'
- previous_cmd_notes = f", bearing in mind the file was updated and now contains all changes expressed in command{plural_indicator} {items}"
- if 'syntax' in description.casefold():
- probability_indicator = "most probably"
- else:
- probability_indicator= "might have"
-
- note = (
- f"*ALL* commands *before* command #{command_ordinal} were applied and *their changes are already committed*. "
- f"Re-read the file to catch up with the applied changes."
- f"ATTENTION: The previous command (#{command_ordinal - 1}) {probability_indicator} caused command #{command_ordinal} to fail "
- f"due to changes that left the file in an invalid state (check that by re-analyzing the file!)"
- )
- super().__init__(
- f"COMMAND #{command_ordinal}{note}"
- f"{description}"
- "NEVER apologize; just relax, take a deep breath, think step-by-step and write an in-depth analysis of what went wrong "
- "(specifying which command ordinal failed), then acknowledge which commands were already applied and concisely describe the state at which the file was left "
- "(saying what needs to be done now), "
- f"then write new commands that will fix the problem{previous_cmd_notes} "
- "(you'll get a one-million dollar tip if you get it right!) "
- "Use descriptive comment before each command."
- )
-
-
-class CEDARScriptEditorBase(ABC):
- def __init__(self, root_path):
- self.root_path = os.path.abspath(root_path)
- print(f'[{self.__class__}] root: {self.root_path}')
-
- # TODO Add search_range: SearchRange parameter
- def find_function(self, source: str | list[str], file_name: str, function_name: str, offset: int | None = None) -> FunctionBoundaries:
- if not isinstance(source, str):
- source = '\n'.join(source)
- return self._find_function(source, file_name, function_name, offset)
-
- @abstractmethod
- def _find_function(self, source: str, file_name: str, function_name: str, offset: int | None = None) -> FunctionBoundaries | None:
- pass
-
- def apply_commands(self, commands: list[Command]):
- result = []
- for i, command in enumerate(commands):
- try:
- match command:
- case UpdateCommand() as cmd:
- result.append(self._update_command(cmd))
- case CreateCommand() as cmd:
- result.append(self._create_command(cmd))
- case RmFileCommand() as cmd:
- result.append(self._rm_command(cmd))
- case MvFileCommand() as cmd:
- raise ValueError('Noy implemented: MV')
- case SelectCommand() as cmd:
- raise ValueError('Noy implemented: SELECT')
- case _ as invalid:
- raise ValueError(f"Unknown command '{type(invalid)}'")
- except Exception as e:
- print(f'[apply_commands] (command #{i+1}) Failed: {command}')
- if isinstance(command, UpdateCommand):
- print(f'CMD CONTENT: ***{command.content}***')
- raise CEDARScriptEditorException(i + 1, str(e)) from e
- return result
-
- def _update_command(self, cmd: UpdateCommand):
- file_path = os.path.join(self.root_path, cmd.target.file_path)
- content = cmd.content or []
-
- match cmd.target:
-
- case IdentifierFromFile(
- identifier_type='FUNCTION', where_clause=WhereClause(field='NAME', operator='=', value=function_name)
- ):
- try:
- return self._update_content(file_path, cmd.action, content, function_name=function_name, offset = cmd.target.offset)
- except IOError as e:
- msg = f"function `{function_name}` in `{cmd.target.file_path}`"
- raise IOError(f"Error updating {msg}: {e}")
-
- case SingleFileClause():
- try:
- return self._update_content(file_path, cmd.action, content)
- except IOError as e:
- msg = f"file `{cmd.target.file_path}`"
- raise IOError(f"Error updating {msg}: {e}")
-
- case _ as invalid:
- raise ValueError(f"Not implemented: {invalid}")
-
- def _update_content(self, file_path: str, action: EditingAction, content: str | None,
- search_range: SearchRange | None = None, function_name: str | None = None, offset: int | None = None) -> str:
- src = read_file(file_path)
- lines = src.splitlines()
-
- if function_name:
- function_boundaries = self.find_function(src, file_path, function_name, offset)
- if not function_boundaries:
- raise ValueError(f"Function '{function_name}' not found in {file_path}")
- if search_range:
- print(f'Discarding search range to use function range...')
- search_range = _get_index_range(action, lines, function_boundaries)
- else:
- search_range = _get_index_range(action, lines)
-
- self._apply_action(action, lines, search_range, content)
-
- write_file(file_path, lines)
-
- return f"Updated {'function ' + function_name if function_name else 'file'} in {file_path}\n -> {action}"
-
- def _apply_action(self, action: EditingAction, lines: list[str], search_range: SearchRange, content: str | None = None):
- index_start, index_end, reference_indent = search_range
-
- match action:
-
- case MoveClause(insert_position=insert_position, to_other_file=other_file, relative_indentation=relindent):
- saved_content = lines[index_start:index_end]
- lines[index_start:index_end] = []
- # TODO Move from 'lines' to the same file or to 'other_file'
- dest_range = _get_index_range(InsertClause(insert_position), lines)
- indentation_info: IndentationInfo = analyze_indentation(saved_content)
- lines[dest_range.start:dest_range.end] = indentation_info.adjust_indentation(saved_content, dest_range.indent + (relindent or 0))
-
- case DeleteClause():
- lines[index_start:index_end] = []
-
- case ReplaceClause() | InsertClause():
- indentation_info: IndentationInfo = analyze_indentation(lines)
- lines[index_start:index_end] = normalize_indent(content, reference_indent, indentation_info)
-
- case _ as invalid:
- raise ValueError(f"Unsupported action type: {type(invalid)}")
-
- def _rm_command(self, cmd: RmFileCommand):
- file_path = os.path.join(self.root_path, cmd.file_path)
-
- def _delete_function(self, cmd): # TODO
- file_path = os.path.join(self.root_path, cmd.file_path)
-
- # def _create_command(self, cmd: CreateCommand):
- # file_path = os.path.join(self.root_path, cmd.file_path)
- #
- # os.makedirs(os.path.dirname(file_path), exist_ok=False)
- # with open(file_path, 'w') as file:
- # file.write(content)
- #
- # return f"Created file: {command['file']}"
-
-
-def _get_index_range(action: EditingAction, lines: list[str], search_range: SearchRange | FunctionBoundaries | None = None) -> SearchRange:
- match action:
- case RegionClause(region=r) | InsertClause(insert_position=r):
- return find_index_range_for_region(r, lines, search_range)
- case _ as invalid:
- raise ValueError(f"Unsupported action type: {type(invalid)}")
-
-def find_index_range_for_region(region: Region, lines: list[str], search_range: SearchRange | FunctionBoundaries | None = None) -> SearchRange:
- match region:
- case BodyOrWhole() as bow:
- # TODO Set indent char count
- index_range = bow_to_search_range(bow, search_range)
- case Marker() | Segment() as mos:
- if isinstance(search_range, FunctionBoundaries):
- search_range = search_range.whole
- index_range = mos.marker_or_segment_to_index_range(
- lines,
- search_range.start if search_range else 0,
- search_range.end if search_range else -1,
- )
- case _ as invalid:
- raise ValueError(f"Invalid: {invalid}")
- return index_range
diff --git a/src/cedarscript_editor/cedarscript_editor_java.py b/src/cedarscript_editor/cedarscript_editor_java.py
deleted file mode 100644
index fe2da8e..0000000
--- a/src/cedarscript_editor/cedarscript_editor_java.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import re
-import os
-from .cedarscript_editor_base import CEDARScriptEditorBase
-
-class JavaCEDARScriptEditor(CEDARScriptEditorBase):
- def _find_function(self, lines, function_name):
- # Java method pattern: [modifiers] [return type] methodName(
- pattern = re.compile(rf'^\s*(public|protected|private|static|\s) +[\w<>\[\]]+\s+{re.escape(function_name)}\s*\(')
- for i, line in enumerate(lines):
- if pattern.search(line):
- return i
- return None
-
- def _find_function_end(self, lines, start_index):
- brace_count = 0
- in_string = False
- string_delimiter = None
- for i in range(start_index, len(lines)):
- for char in lines[i]:
- if char in ['"', "'"]:
- if not in_string:
- in_string = True
- string_delimiter = char
- elif string_delimiter == char:
- in_string = False
- string_delimiter = None
- elif not in_string:
- if char == '{':
- brace_count += 1
- elif char == '}':
- brace_count -= 1
- if brace_count == 0:
- return i + 1
- return len(lines)
-
- def _create_command(self, command):
- file_path = os.path.join(self.root_path, command['file_path'])
- insert_position = command['insert_position']
- content = command['content']
-
- with open(file_path, 'r') as file:
- lines = file.readlines()
-
- marker = insert_position.split('"')[1]
- for i, line in enumerate(lines):
- if marker in line:
- # In Java, we typically want to insert methods inside a class
- class_indent = len(line) - len(line.lstrip())
- indented_content = '\n'.join(' ' * (class_indent + 4) + l for l in content.split('\n'))
- lines.insert(i + 1, indented_content + '\n\n')
- break
-
- with open(file_path, 'w') as file:
- file.writelines(lines)
-
- return f"Created method in {command['file_path']}"
diff --git a/src/cedarscript_editor/cedarscript_editor_kotlin.py b/src/cedarscript_editor/cedarscript_editor_kotlin.py
deleted file mode 100644
index 2b0a48a..0000000
--- a/src/cedarscript_editor/cedarscript_editor_kotlin.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import re
-from .cedarscript_editor_base import CEDARScriptEditorBase
-
-class KotlinCEDARScriptEditor(CEDARScriptEditorBase):
- def _find_function(self, lines, function_name):
- pattern = re.compile(rf'^\s*fun\s+{re.escape(function_name)}\s*[\(<]')
- for i, line in enumerate(lines):
- if pattern.match(line):
- return i
- return None
-
- def _find_function_end(self, lines, start_index):
- brace_count = 0
- in_string = False
- string_delimiter = None
- for i in range(start_index, len(lines)):
- for char in lines[i]:
- if char in ['"', "'"]:
- if not in_string:
- in_string = True
- string_delimiter = char
- elif string_delimiter == char:
- in_string = False
- string_delimiter = None
- elif not in_string:
- if char == '{':
- brace_count += 1
- elif char == '}':
- brace_count -= 1
- if brace_count == 0:
- return i + 1
- return len(lines)
diff --git a/src/cedarscript_editor/cedarscript_editor_python.py b/src/cedarscript_editor/cedarscript_editor_python.py
deleted file mode 100644
index d9bcb9e..0000000
--- a/src/cedarscript_editor/cedarscript_editor_python.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from .cedarscript_editor_base import CEDARScriptEditorBase, FunctionBoundaries
-import rope.base.project
-from rope.base import libutils, ast
-
-from .text_editor_kit import SearchRange, get_line_indent_count
-
-
-def get_by_offset(obj: list, offset: int):
- if 0 <= offset < len(obj):
- return obj[offset]
- return None
-
-class PythonCEDARScriptEditor(CEDARScriptEditorBase):
- """
- A class to handle Python code editing operations.
- """
-
- # TODO Support search_start_line, search_end_line
- def _find_function(self, source: str, file_name: str, function_name: str, offset: int | None = None) -> FunctionBoundaries | None:
- """
- Find the starting line index of a specified function in the given lines.
-
- :param source: Source code.
- :param function_name: Name of the function to find.
- :param offset: how many functions to skip. TODO: If `None` when there are 2 or more functions with the same name, raise exception.
- :return: FunctionBoundaries with function start, body start, and end lines of the function or None if not found.
- """
- project = rope.base.project.Project(self.root_path)
- resource = libutils.path_to_resource(project, file_name)
- pymodule = libutils.get_string_module(project, source, resource=resource)
-
- candidates: list[FunctionBoundaries] = []
- lines = source.splitlines()
- # Use rope's AST to find the function
- for node in ast.walk(pymodule.get_ast()):
- if not isinstance(node, ast.FunctionDef) or node.name != function_name:
- continue
- start_line = node.lineno
- body_start_line = node.body[0].lineno if node.body else start_line
- # Find the last line by traversing all child nodes
- end_line = start_line
- for child in ast.walk(node):
- if hasattr(child, 'lineno'):
- end_line = max(end_line, child.lineno)
- # TODO Set indentation for all 3 lines
- candidates.append(FunctionBoundaries(
- SearchRange(start_line - 1, end_line, get_line_indent_count(lines[start_line - 1])),
- SearchRange(body_start_line - 1, end_line, get_line_indent_count(lines[body_start_line - 1]))
- ))
-
- candidate_count = len(candidates)
- if not candidate_count:
- return None
- if candidate_count > 1 and offset is None:
- raise ValueError(
- f"There are {candidate_count} functions named `{function_name}` in file `{file_name}`. "
- f"Use `OFFSET <0..{candidate_count - 1}>` to determine how many to skip. "
- f"Example to reference the *last* `{function_name}`: `OFFSET {candidate_count - 1}`"
- )
- if offset and offset >= candidate_count:
- raise ValueError(
- f"There are only {candidate_count} functions named `{function_name} in file `{file_name}`, "
- f"but 'offset' was set to {offset} (you can only skip {candidate_count - 1} functions)"
- )
- candidates.sort(key=lambda x: x.start_line)
- return get_by_offset(candidates, offset or 0)
-
-
diff --git a/src/cedarscript_editor/identifier_selector.py b/src/cedarscript_editor/identifier_selector.py
new file mode 100644
index 0000000..f096a51
--- /dev/null
+++ b/src/cedarscript_editor/identifier_selector.py
@@ -0,0 +1,18 @@
+from typing import Callable
+
+from cedarscript_ast_parser import Marker
+
+import logging
+
+from cedarscript_editor.python_identifier_finder import find_python_identifier
+from text_manipulation.range_spec import IdentifierBoundaries
+
+_log = logging.getLogger(__name__)
+
+
+def select_finder(
+ root_path: str, file_name: str, source: str
+) -> Callable[[str, str, str, Marker], IdentifierBoundaries | None]:
+ # TODO
+ _log.info("[select_finder] Python selected")
+ return find_python_identifier
diff --git a/src/cedarscript_editor/python_identifier_finder.py b/src/cedarscript_editor/python_identifier_finder.py
new file mode 100644
index 0000000..77e0c46
--- /dev/null
+++ b/src/cedarscript_editor/python_identifier_finder.py
@@ -0,0 +1,74 @@
+import rope
+from cedarscript_ast_parser import Marker, MarkerType
+from rope.base import ast, libutils
+from collections.abc import Sequence
+
+from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec
+from text_manipulation.indentation_kit import get_line_indent_count
+
+
+def get_by_offset(obj: Sequence, offset: int):
+ if 0 <= offset < len(obj):
+ return obj[offset]
+ return None
+
+
+def find_python_identifier(root_path: str, file_name: str, source: str, marker: Marker) -> IdentifierBoundaries | None:
+ """
+ Find the starting line index of a specified function in the given lines.
+
+ :param root_path:
+ :param file_name:
+ :param source: Source code.
+ :param marker: Type, name and offset of the identifier to find.
+ TODO: If `None` when there are 2 or more identifiers with the same name, raise exception.
+ :return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier
+ or None if not found.
+ """
+ project = rope.base.project.Project(root_path)
+ resource = libutils.path_to_resource(project, file_name)
+ pymodule = libutils.get_string_module(project, source, resource=resource)
+
+ candidates: list[IdentifierBoundaries] = []
+ lines = source.splitlines()
+ # Use rope's AST to find the identifier
+ match marker.type:
+ case MarkerType.FUNCTION:
+ ast_type = ast.FunctionDef
+ case MarkerType.CLASS:
+ ast_type = ast.ClassDef
+ case _:
+ raise ValueError(f'Invalid identifier type: {marker.type}')
+ for node in ast.walk(pymodule.get_ast()):
+ if not isinstance(node, ast_type) or node.name != marker.value:
+ continue
+ start_line = node.lineno
+ body_start_line = node.body[0].lineno if node.body else start_line
+ # Find the last line by traversing all child nodes
+ end_line = start_line
+ for child in ast.walk(node):
+ if hasattr(child, 'lineno'):
+ end_line = max(end_line, child.lineno)
+ # TODO Set indentation for all 3 lines
+ candidates.append(IdentifierBoundaries(
+ RangeSpec(start_line - 1, end_line, get_line_indent_count(lines[start_line - 1])),
+ RangeSpec(body_start_line - 1, end_line, get_line_indent_count(lines[body_start_line - 1]))
+ ))
+
+ candidate_count = len(candidates)
+ if not candidate_count:
+ return None
+ if candidate_count > 1 and marker.offset is None:
+ raise ValueError(
+ f"There are {candidate_count} functions named `{marker.value}` in file `{file_name}`. "
+ f"Use `OFFSET <0..{candidate_count - 1}>` to determine how many to skip. "
+ f"Example to reference the *last* `{marker.value}`: `OFFSET {candidate_count - 1}`"
+ )
+ if marker.offset and marker.offset >= candidate_count:
+ raise ValueError(
+ f"There are only {candidate_count} functions named `{marker.value} in file `{file_name}`, "
+ f"but 'offset' was set to {marker.offset} (you can only skip {candidate_count - 1} functions)"
+ )
+ candidates.sort(key=lambda x: x.start_line)
+ result: IdentifierBoundaries = get_by_offset(candidates, marker.offset or 0)
+ return result
diff --git a/src/cedarscript_editor/text_editor_kit.py b/src/cedarscript_editor/text_editor_kit.py
deleted file mode 100644
index f586ee6..0000000
--- a/src/cedarscript_editor/text_editor_kit.py
+++ /dev/null
@@ -1,348 +0,0 @@
-import re
-from collections import Counter
-from typing import NamedTuple, Protocol, runtime_checkable
-from math import gcd
-
-from cedarscript_ast_parser import Marker, RelativeMarker, RelativePositionType, Segment, MarkerType, BodyOrWhole
-
-MATCH_TYPES = ('exact', 'stripped', 'normalized', 'partial')
-
-class MarkerMatchResult(NamedTuple):
- match_type: str
- index: int
- indent: int
-
- def __str__(self):
- return f"{self.match_type.lower()} @ {self.index} ({self.indent})"
-
-
-class IndexBoundaries(NamedTuple):
- start: MarkerMatchResult
- end: MarkerMatchResult
-
-
-class SearchRange(NamedTuple):
- start: int
- end: int
- indent: int = 0
-
-
-class FunctionBoundaries(NamedTuple):
- whole: SearchRange
- body: SearchRange
- # TODO Derive these 3 attrs from search ranges below
-
- @property
- def start_line(self) -> int:
- return self.whole.start + 1
-
- @property
- def body_start_line(self) -> int:
- return self.body.start + 1
-
- @property
- def end_line(self) -> int:
- return self.whole.end
-
-
-def read_file(file_path: str) -> str:
- with open(file_path, 'r') as file:
- return file.read()
-
-
-def write_file(file_path: str, lines: list[str]):
- with open(file_path, 'w') as file:
- file.writelines([line + '\n' for line in lines])
-
-class IndentationInfo(NamedTuple):
- char_count: int
- char: str
- min_indent_level: int
- consistency: bool = True
- message: str | None = None
-
- def level_difference(self, base_indentation_count: int):
- return self.char_count_to_level(base_indentation_count) - self.min_indent_level
-
- def char_count_to_level(self, char_count: int) -> int:
- return char_count // self.char_count
-
- def level_to_chars(self, level: int) -> str:
- return level * self.char_count * self.char
-
- def adjust_indentation(self, lines: list[str], base_indentation_count: int) -> list[str]:
- line_adjuster = self._adjust_indentation_fun(base_indentation_count)
- # Return the transformed lines
- return [line_adjuster(line) for line in lines]
-
- def _adjust_indentation_fun(self, base_indentation_count: int):
- # Calculate the indentation difference
- level_difference = self.level_difference(base_indentation_count)
-
- def adjust_line(line: str) -> str:
- if not line.strip():
- # Handle empty lines or lines with only whitespace
- return line
-
- current_indent = get_line_indent_count(line)
- current_level = self.char_count_to_level(current_indent)
- new_level = max(0, current_level + level_difference)
- new_indent = self.level_to_chars(new_level)
-
- return new_indent + line.lstrip()
- return adjust_line
-
-def get_line_indent_count(line: str):
- return len(line) - len(line.lstrip())
-
-def count_leading_chars(line: str, char: str) -> int:
- return len(line) - len(line.lstrip(char))
-
-
-def normalize_line(line: str):
- return re.sub(r'[^\w]', '.', line.strip(), flags=re.UNICODE)
-
-
-def bow_to_search_range(bow: BodyOrWhole, searh_range: FunctionBoundaries | SearchRange | None = None, lines: list[str] | None = None) -> SearchRange:
- match searh_range:
-
- case SearchRange() | None:
- return searh_range or SearchRange(0, -1, 0)
-
- case FunctionBoundaries() as function_boundaries:
- match bow:
- case BodyOrWhole.BODY:
- return function_boundaries.body
- case BodyOrWhole.WHOLE:
- return function_boundaries.whole
- case _ as invalid:
- raise ValueError(f"Invalid: {invalid}")
-
- case _ as invalid:
- raise ValueError(f"Invalid: {invalid}")
-
-
-# MarkerOrSegment
-
-# class MarkerOrSegmentProtocol(Protocol):
-# def marker_or_segment_to_index_range(self) -> str:
-# ...
-
-
-@runtime_checkable
-class MarkerOrSegmentProtocol(Protocol):
- def marker_or_segment_to_index_range(
- self,
- lines: list[str],
- search_start_index: int = 0, search_end_index: int = -1
- ) -> SearchRange:
- ...
-
-
-def marker_or_segment_to_index_range_impl(
- self,
- lines: list[str],
- search_start_index: int = 0, search_end_index: int = -1
-) -> SearchRange | None:
- match self:
- case Marker(type=MarkerType.LINE):
- result = find_line_index_and_indent(lines, self, search_start_index, search_end_index)
- assert result, f"Unable to find `{self}`; Try: 1) Double-checking the marker (maybe you specified the the wrong one); or 2) using *exactly* the same characters from source; or 3) using another marker"
- return SearchRange(result.index, result.index + 1, result.indent)
- case Segment(start=s, end=e):
- result = segment_to_indexes(lines, s, e, search_start_index, search_end_index)
- return SearchRange(result.start.index, result.end.index, result.start.indent)
- case _ as invalid:
- raise ValueError(f"Unexpected type: {invalid}")
-
-
-Marker.marker_or_segment_to_index_range = marker_or_segment_to_index_range_impl
-Segment.marker_or_segment_to_index_range = marker_or_segment_to_index_range_impl
-
-
-def find_line_index_and_indent(
- lines: list[str],
- search_term: Marker | RelativeMarker,
- search_start_index: int = 0, search_end_index: int = -1
-) -> MarkerMatchResult | None:
- """
- Find the index of a specified line within a list of strings, considering different match types and an offset.
-
- This function searches for a given line within a list, considering 4 types of matches in order of priority:
- 1. Exact match
- 2. Stripped match (ignoring leading and trailing whitespace)
- 3. Normalized match (ignoring non-alphanumeric characters)
- 4. Partial (Searching for a substring, using `casefold` to ignore upper- and lower-case differences.
-
- The function applies the offset across all match types while maintaining the priority order.
-
- :Args:
- :param lines: The list of strings to search through.
- :param search_term:
- search_marker.value: The line to search for.
- search_marker.offset: The number of matches to skip before returning a result.
- 0 skips no match and returns the first match, 1 returns the second match, and so on.
- :param search_start_index: The index to start the search from. Defaults to 0.
- :param search_end_index: The index to end the search at (exclusive).
- Defaults to -1, which means search to the end of the list.
-
- :returns:
- MarkerMatchResult: The index for the desired line in the 'lines' list.
- Returns None if no match is found or if the offset exceeds the number of matches within each category.
-
- :Example:
- >> lines = ["Hello, world!", " Hello, world! ", "Héllo, wörld?", "Another line", "Hello, world!"]
- >> _find_line_index(lines, "Hello, world!", 1)
- 4 # Returns the index of the second exact match
-
- Note:
- - The function prioritizes match types in the order: exact, stripped, normalized, partial.
- - The offset is considered separately for each type.
- """
- search_line = search_term.value
- assert search_line, "Empty marker"
- assert search_term.type == MarkerType.LINE, f"Invalid marker type: {search_term.type}"
-
- matches = {t: [] for t in MATCH_TYPES}
-
- stripped_search = search_line.strip()
- normalized_search_line = normalize_line(stripped_search)
-
- if search_start_index < 0:
- search_start_index = 0
- if search_end_index < 0:
- search_end_index = len(lines)
-
- assert search_start_index < len(lines), f"search start index ({search_start_index}) must be less than line count ({len(lines)})"
- assert search_end_index <= len(lines), f"search end index ({search_end_index}) must be less than or equal to line count ({len(lines)})"
-
- for i in range(search_start_index, search_end_index):
- line = lines[i]
- reference_indent = get_line_indent_count(line)
-
- # Check for exact match
- if search_line == line:
- matches['exact'].append((i, reference_indent))
-
- # Check for stripped match
- elif stripped_search == line.strip():
- matches['stripped'].append((i, reference_indent))
-
- # Check for normalized match
- elif normalized_search_line == normalize_line(line):
- matches['normalized'].append((i, reference_indent))
-
- # Last resort!
- elif normalized_search_line.casefold() in normalize_line(line).casefold():
- matches['partial'].append((i, reference_indent))
-
- offset = search_term.offset or 0
- for match_type in MATCH_TYPES:
- if offset < len(matches[match_type]):
- index, reference_indent = matches[match_type][offset]
- match match_type:
- case 'normalized':
- print(f'Note: using {match_type} match for {search_term}')
- case 'partial':
- print(f"Note: Won't accept {match_type} match at index {index} for {search_term}")
- continue
- if isinstance(search_term, RelativeMarker):
- match search_term.qualifier:
- case RelativePositionType.BEFORE:
- index += -1
- case RelativePositionType.AFTER:
- index += 1
- case RelativePositionType.AT:
- pass
- case _ as invalid:
- raise ValueError(f"Not implemented: {invalid}")
- return MarkerMatchResult(match_type, index, reference_indent)
-
- return None
-
-
-def segment_to_indexes(
- lines: list[str],
- start_relpos: RelativeMarker, end_relpos: RelativeMarker,
- search_start_index: int = 0, search_end_index: int = -1
-) -> IndexBoundaries:
- assert len(lines), "`lines` is empty"
-
- start_match_result = find_line_index_and_indent(lines, start_relpos, search_start_index, search_end_index)
- assert start_match_result, f"Unable to find segment start \"{start_relpos}\"; Try: 1) Double-checking the marker (maybe you specified the the wrong one); or 2) using *exactly* the same characters from source; or 3) using a marker from above"
-
- end_match_result = find_line_index_and_indent(lines, end_relpos, start_match_result.index, search_end_index)
- if end_match_result:
- if end_match_result.index > -1:
- end_match_result = end_match_result._replace(index=end_match_result.index+1)
- assert end_match_result, f"Unable to find segment end \"{end_relpos}\" - Try: 1) using *exactly* the same characters from source; or 2) using a marker from below"
- return IndexBoundaries(start_match_result, end_match_result)
-
-
-def normalize_indent(content: str, context_indent_count: int = 0, indentation_info: IndentationInfo | None = None) -> list[str]:
- # TODO Always send str?
- lines = [line.lstrip() for line in content.splitlines() if line.strip()] if isinstance(content, str) else content
-
- context_indent_level = indentation_info.char_count_to_level(context_indent_count)
- for i in range(len(lines)):
- line = lines[i]
- parts = line.split(':', 1)
- if len(parts) == 2 and parts[0].startswith('@'):
- relative_indent_level = int(parts[0][1:])
- absolute_indent_level = context_indent_level + relative_indent_level
- assert absolute_indent_level >= 0, f"Final indentation for line `{line.strip()}` cannot be negative ({absolute_indent_level})"
- lines[i] = indentation_info.level_to_chars(absolute_indent_level) + parts[1].lstrip()
- else:
- absolute_indent_level = context_indent_level
- lines[i] = indentation_info.level_to_chars(absolute_indent_level) + line.lstrip()
-
- return lines
-
-def analyze_indentation(lines: list[str]) -> IndentationInfo:
-
- def extract_indentation(line: str) -> str:
- return re.match(r'^\s*', line).group(0)
-
- indentations = [extract_indentation(line) for line in lines if line.strip()]
-
- if not indentations:
- return IndentationInfo(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).")
-
- indent_chars = Counter(indent[0] for indent in indentations if indent)
- dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t'
-
- indent_lengths = [len(indent) for indent in indentations]
-
- if dominant_char == '\t':
- char_count = 1
- else:
- # For spaces, determine the most likely char_count
- space_counts = [len for len in indent_lengths if len % 2 == 0 and len > 0]
- if not space_counts:
- char_count = 2 # Default to 2 if no even space counts
- else:
- # Sort top 5 space counts and find the largest GCD
- sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True)
- char_count = sorted_counts[0]
- for i in range(1, len(sorted_counts)):
- new_gcd = gcd(char_count, sorted_counts[i])
- if new_gcd <= 1:
- break
- char_count = new_gcd
-
- min_indent_chars = min(indent_lengths) if indent_lengths else 0
- min_indent_level = min_indent_chars // char_count
-
- consistency = all(len(indent) % char_count == 0 for indent in indentations if indent)
- match dominant_char:
- case ' ':
- domcharstr = 'space'
- case '\t':
- domcharstr = 'tab'
- case _:
- domcharstr = dominant_char
- message = f"Found {char_count}-{domcharstr} indentation"
- if not consistency:
- message += " (inconsistent)"
-
- return IndentationInfo(char_count, dominant_char, min_indent_level, consistency, message)
diff --git a/src/text_manipulation/__init__.py b/src/text_manipulation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/text_manipulation/indentation_kit.py b/src/text_manipulation/indentation_kit.py
new file mode 100644
index 0000000..f339dd0
--- /dev/null
+++ b/src/text_manipulation/indentation_kit.py
@@ -0,0 +1,236 @@
+import re
+from collections import Counter
+from collections.abc import Sequence
+from math import gcd
+from typing import NamedTuple
+
+
+def get_line_indent_count(line: str):
+ return len(line) - len(line.lstrip())
+
+
+def extract_indentation(line: str) -> str:
+ """
+ Extract the leading whitespace from a given line.
+
+ Args:
+ line (str): The input line to process.
+
+ Returns:
+ str: The leading whitespace of the line.
+
+ Examples:
+ >>> extract_indentation(" Hello")
+ ' '
+ >>> extract_indentation("\t\tWorld")
+ '\t\t'
+ >>> extract_indentation("No indentation")
+ ''
+ """
+ return line[:len(line) - len(line.lstrip())]
+
+
+class IndentationInfo(NamedTuple):
+ """
+ A class to represent and manage indentation information.
+
+ This class analyzes and provides utilities for working with indentation.
+ It detects the indentation character (space or tab),
+ the number of characters used for each indentation level, and provides
+ methods to adjust and normalize indentation.
+
+ Attributes:
+ char_count (int): The number of characters used for each indentation level.
+ char (str): The character used for indentation (' ' for space, '\t' for tab).
+ min_indent_level (int): The minimum indentation level found in the analyzed content.
+ consistency (bool): Whether the indentation is consistent throughout the content.
+ message (str | None): A message describing the indentation analysis results.
+
+ Class Methods:
+ from_content: Analyzes the indentation in the given content and creates an IndentationInfo instance.
+
+ Methods:
+ level_difference: Calculates the difference in indentation levels.
+ char_count_to_level: Converts a character count to an indentation level.
+ level_to_chars: Converts an indentation level to a string of indentation characters.
+ shift_indentation: Adjusts the indentation of a sequence of lines.
+ apply_relative_indents: Applies relative indentation based on annotations in the content.
+
+ Note:
+ This class is particularly useful for processing Python code with varying
+ or inconsistent indentation, and for adjusting indentation to meet specific
+ formatting requirements.
+ """
+ char_count: int
+ char: str
+ min_indent_level: int
+ consistency: bool = True
+ message: str | None = None
+
+ @classmethod
+ def from_content[T: IndentationInfo, S: Sequence[str]](cls: T, content: str | S) -> T:
+ """
+ Analyzes the indentation in the given content and creates an IndentationInfo instance.
+
+ This method examines the indentation patterns in the provided content,
+ determines the dominant indentation character and count, and assesses
+ the consistency of indentation throughout the content.
+
+ Args:
+ content (str | Sequence[str]): The content to analyze. Can be a string
+ or a sequence of strings.
+
+ Returns:
+ IndentationInfo: An instance of IndentationInfo with the analysis results.
+
+ Note:
+ - If no indentation is found, it assumes 4 spaces as per PEP 8.
+ - For space indentation, it attempts to determine the most likely
+ character count by analyzing patterns and using GCD.
+ """
+ # TODO Always send str?
+ lines = [x.lstrip() for x in content.splitlines() if x.strip()] if isinstance(content, str) else content
+
+ indentations = [extract_indentation(line) for line in lines if line.strip()]
+
+ if not indentations:
+ return cls(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).")
+
+ indent_chars = Counter(indent[0] for indent in indentations if indent)
+ dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t'
+
+ indent_lengths = [len(indent) for indent in indentations]
+
+ if dominant_char == '\t':
+ char_count = 1
+ else:
+ # For spaces, determine the most likely char_count
+ space_counts = [sc for sc in indent_lengths if sc % 2 == 0 and sc > 0]
+ if not space_counts:
+ char_count = 2 # Default to 2 if no even space counts
+ else:
+ # Sort top 5 space counts and find the largest GCD
+ sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True)
+ char_count = sorted_counts[0]
+ for i in range(1, len(sorted_counts)):
+ new_gcd = gcd(char_count, sorted_counts[i])
+ if new_gcd <= 1:
+ break
+ char_count = new_gcd
+
+ min_indent_chars = min(indent_lengths) if indent_lengths else 0
+ min_indent_level = min_indent_chars // char_count
+
+ consistency = all(len(indent) % char_count == 0 for indent in indentations if indent)
+ match dominant_char:
+ case ' ':
+ domcharstr = 'space'
+ case '\t':
+ domcharstr = 'tab'
+ case _:
+ domcharstr = dominant_char
+ message = f"Found {char_count}-{domcharstr} indentation"
+ if not consistency:
+ message += " (inconsistent)"
+
+ return cls(char_count, dominant_char, min_indent_level, consistency, message)
+
+ def level_difference(self, base_indentation_count: int):
+ return self.char_count_to_level(base_indentation_count) - self.min_indent_level
+
+ def char_count_to_level(self, char_count: int) -> int:
+ return char_count // self.char_count
+
+ def level_to_chars(self, level: int) -> str:
+ return level * self.char_count * self.char
+
+ def shift_indentation(self, lines: Sequence[str], target_base_indentation_count: int) -> list[str]:
+ """
+ Shifts the indentation of a sequence of lines based on a base indentation count.
+
+ This method adjusts the indentation of each non-empty line in the input sequence.
+ It calculates the difference between the base indentation and the minimum
+ indentation found in the content, then applies this shift to all lines.
+
+ Args:
+ lines (Sequence[str]): A sequence of strings representing the lines to be adjusted.
+ target_base_indentation_count (int): The base indentation count to adjust from.
+
+ Returns:
+ list[str]: A new list of strings with adjusted indentation.
+
+ Note:
+ - Empty lines and lines with only whitespace are preserved as-is.
+ - The method uses the IndentationInfo of the instance to determine
+ the indentation character and count.
+ - This method is useful for uniformly adjusting indentation across all lines.
+ """
+ raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count)
+ # Return the transformed lines
+ return [raw_line_adjuster(line) for line in lines]
+
+ def _shift_indentation_fun(self, target_base_indentation_count: int):
+ # Calculate the indentation difference
+ level_difference = self.level_difference(target_base_indentation_count)
+
+ def adjust_line(line: str) -> str:
+ if not line.strip():
+ # Handle empty lines or lines with only whitespace
+ return line
+
+ current_indent_count = get_line_indent_count(line)
+ current_level = self.char_count_to_level(current_indent_count)
+ new_level = max(0, current_level + level_difference)
+ new_indent = self.level_to_chars(new_level)
+
+ return new_indent + line.lstrip()
+ return adjust_line
+
+ def apply_relative_indents[S: Sequence[str]](self, content: str | S, context_indent_count: int = 0) -> list[str]:
+ """
+ Applies relative indentation based on annotations in the content.
+
+ This method processes the input content, interpreting special annotations
+ to apply relative indentation. It uses '@' followed by a number to indicate
+ relative indentation levels.
+
+ Args:
+ content (str | Sequence[str]): The content to process. Can be a string
+ or a sequence of strings.
+ context_indent_count (int, optional): The base indentation count of the
+ context. Defaults to 0.
+
+ Returns:
+ list[str]: A new list of strings with normalized indentation (without the annotations)
+
+ Note:
+ - Lines starting with '@n:' (where n is an integer) are interpreted as
+ having a relative indentation of n levels from the context indent level.
+ - Empty lines and lines with only whitespace are removed.
+ - The method uses the IndentationInfo of the instance to determine
+ the indentation character and count.
+ - This method is particularly useful for content with varying
+ indentation levels specified by annotations.
+
+ Raises:
+ AssertionError: If the calculated indentation level for any line is negative.
+ """
+ # TODO Always send str?
+ lines = [line.lstrip() for line in content.splitlines() if line.strip()] if isinstance(content, str) else content
+
+ context_indent_level = self.char_count_to_level(context_indent_count)
+ for i in range(len(lines)):
+ line = lines[i]
+ parts = line.split(':', 1)
+ if len(parts) == 2 and parts[0].startswith('@'):
+ relative_indent_level = int(parts[0][1:])
+ absolute_indent_level = context_indent_level + relative_indent_level
+ assert absolute_indent_level >= 0, f"Final indentation for line `{line.strip()}` cannot be negative ({absolute_indent_level})"
+ lines[i] = self.level_to_chars(absolute_indent_level) + parts[1].lstrip()
+ else:
+ absolute_indent_level = context_indent_level
+ lines[i] = self.level_to_chars(absolute_indent_level) + line.lstrip()
+
+ return lines
+
+
diff --git a/src/text_manipulation/range_spec.py b/src/text_manipulation/range_spec.py
new file mode 100644
index 0000000..5d882a4
--- /dev/null
+++ b/src/text_manipulation/range_spec.py
@@ -0,0 +1,195 @@
+import re
+from collections.abc import Sequence
+from typing import NamedTuple
+
+from cedarscript_ast_parser import Marker, RelativeMarker, RelativePositionType, MarkerType, BodyOrWhole
+from text_manipulation.indentation_kit import get_line_indent_count
+
+MATCH_TYPES = ('exact', 'stripped', 'normalized', 'partial')
+
+
+class RangeSpec(NamedTuple):
+ start: int
+ end: int
+ indent: int = 0
+
+ def __str__(self):
+ return (f'{self.start}:{self.end}' if self.as_index is None else f'%{self.as_index}') + f'@{self.indent}'
+
+ def __len__(self):
+ return self.end - self.start
+
+ @property
+ def as_index(self) -> int | None:
+ return None if len(self) else self.start
+
+ @property
+ def collapsed(self):
+ return self.set_length(0)
+
+ def set_length(self, range_len: int):
+ return self._replace(end=self.start + range_len)
+
+ def inc(self, count: int = 1):
+ return self._replace(start=self.start + count, end=self.end + count)
+
+ def dec(self, count: int = 1):
+ return self._replace(start=self.start - count, end=self.end - count)
+
+ def read[S: Sequence[str]](self, src: S) -> S:
+ return src[self.start:self.end]
+
+ def write[S: Sequence[str]](self, src: S, target: S):
+ target[self.start:self.end] = src
+
+ def delete[S: Sequence[str]](self, src: S) -> S:
+ result = self.read(src)
+ del src[self.start:self.end]
+ return result
+
+ @staticmethod
+ def normalize_line(line: str):
+ return re.sub(r'[^\w]', '.', line.strip(), flags=re.UNICODE)
+
+ @classmethod
+ def from_line_marker[T: RangeSpec](
+ cls: T,
+ lines: Sequence[str],
+ search_term: Marker,
+ search_range: 'RangeSpec' = None
+ ) -> T | None:
+ """
+ Find the index of a specified line within a list of strings, considering different match types and an offset.
+
+ This function searches for a given line within a list, considering 4 types of matches in order of priority:
+ 1. Exact match
+ 2. Stripped match (ignoring leading and trailing whitespace)
+ 3. Normalized match (ignoring non-alphanumeric characters)
+ 4. Partial (Searching for a substring, using `casefold` to ignore upper- and lower-case differences).
+
+ The function applies the offset across all match types while maintaining the priority order.
+
+ :Args:
+ :param lines: The list of strings to search through.
+ :param search_term:
+ search_marker.value: The line to search for.
+ search_marker.offset: The number of matches to skip before returning a result.
+ 0 skips no match and returns the first match, 1 returns the second match, and so on.
+ :param search_range: The index to start the search from. Defaults to 0. The index to end the search at (exclusive).
+ Defaults to (0, -1), which means search to the end of the list.
+
+ :returns:
+ RangeSpec: The index for the desired line in the 'lines' list.
+ Returns None if no match is found or if the offset exceeds the number of matches within each category.
+
+ :Example:
+ >> lines = ["Hello, world!", " Hello, world! ", "Héllo, wörld?", "Another line", "Hello, world!"]
+ >> _find_line_index(lines, "Hello, world!", 1)
+ 4 # Returns the index of the second exact match
+
+ Note:
+ - The function prioritizes match types in the order: exact, stripped, normalized, partial.
+ - The offset is considered separately for each type.
+ """
+ search_start_index, search_end_index, _ = search_range if search_range is not None else (0, -1, 0)
+ search_line = search_term.value
+ assert search_line, "Empty marker"
+ assert search_term.type == MarkerType.LINE, f"Invalid marker type: {search_term.type}"
+
+ matches = {t: [] for t in MATCH_TYPES}
+
+ stripped_search = search_line.strip()
+ normalized_search_line = cls.normalize_line(stripped_search)
+
+ if search_start_index < 0:
+ search_start_index = 0
+ if search_end_index < 0:
+ search_end_index = len(lines)
+
+ assert search_start_index < len(lines), f"search start index ({search_start_index}) must be less than line count ({len(lines)})"
+ assert search_end_index <= len(lines), f"search end index ({search_end_index}) must be less than or equal to line count ({len(lines)})"
+
+ for i in range(search_start_index, search_end_index):
+ line = lines[i]
+ reference_indent = get_line_indent_count(line)
+
+ # Check for exact match
+ if search_line == line:
+ matches['exact'].append((i, reference_indent))
+
+ # Check for stripped match
+ elif stripped_search == line.strip():
+ matches['stripped'].append((i, reference_indent))
+
+ # Check for normalized match
+ elif normalized_search_line == cls.normalize_line(line):
+ matches['normalized'].append((i, reference_indent))
+
+ # Last resort!
+ elif normalized_search_line.casefold() in cls.normalize_line(line).casefold():
+ matches['partial'].append((i, reference_indent))
+
+ offset = search_term.offset or 0
+ for match_type in MATCH_TYPES:
+ if offset < len(matches[match_type]):
+ index, reference_indent = matches[match_type][offset]
+ match match_type:
+ case 'normalized':
+ print(f'Note: using {match_type} match for {search_term}')
+ case 'partial':
+ print(f"Note: Won't accept {match_type} match at index {index} for {search_term}")
+ continue
+ if isinstance(search_term, RelativeMarker):
+ match search_term.qualifier:
+ case RelativePositionType.BEFORE:
+ index += -1
+ case RelativePositionType.AFTER:
+ index += 1
+ case RelativePositionType.AT:
+ pass
+ case _ as invalid:
+ raise ValueError(f"Not implemented: {invalid}")
+ return cls(index, index, reference_indent)
+
+ return None
+
+
+RangeSpec.EMPTY = RangeSpec(0, -1, 0)
+
+
+class IdentifierBoundaries(NamedTuple):
+ whole: RangeSpec
+ body: RangeSpec
+
+ def __str__(self):
+ return f'IdentifierBoundaries({self.whole} (BODY: {self.body}) )'
+
+ @property
+ def start_line(self) -> int:
+ return self.whole.start + 1
+
+ @property
+ def body_start_line(self) -> int:
+ return self.body.start + 1
+
+ @property
+ def end_line(self) -> int:
+ return self.whole.end
+
+ # See the other bow_to_search_range
+ def location_to_search_range(self, location: BodyOrWhole | RelativePositionType) -> RangeSpec:
+ match location:
+ case BodyOrWhole.BODY:
+ return self.body
+ case BodyOrWhole.WHOLE | RelativePositionType.AT:
+ return self.whole
+ case RelativePositionType.BEFORE:
+ return RangeSpec(self.whole.start, self.whole.start, self.whole.indent)
+ case RelativePositionType.AFTER:
+ return RangeSpec(self.whole.end, self.whole.end, self.whole.indent)
+ case RelativePositionType.INSIDE_TOP:
+ return RangeSpec(self.body.start, self.body.start, self.body.indent)
+ case RelativePositionType.INSIDE_BOTTOM:
+ return RangeSpec(self.body.end, self.body.end, self.body.indent)
+ case _ as invalid:
+ raise ValueError(f"Invalid: {invalid}")
diff --git a/src/text_manipulation/text_editor_kit.py b/src/text_manipulation/text_editor_kit.py
new file mode 100644
index 0000000..29e43f3
--- /dev/null
+++ b/src/text_manipulation/text_editor_kit.py
@@ -0,0 +1,92 @@
+from collections.abc import Sequence
+from typing import Protocol, runtime_checkable
+
+from cedarscript_ast_parser import Marker, RelativeMarker, RelativePositionType, Segment, MarkerType, BodyOrWhole
+from text_manipulation.range_spec import IdentifierBoundaries, RangeSpec
+
+
+def read_file(file_path: str) -> str:
+ with open(file_path, 'r') as file:
+ return file.read()
+
+
+def write_file(file_path: str, lines: Sequence[str]):
+ with open(file_path, 'w') as file:
+ file.writelines([line + '\n' for line in lines])
+
+
+# def count_leading_chars(line: str, char: str) -> int:
+# return len(line) - len(line.lstrip(char))
+
+def bow_to_search_range(bow: BodyOrWhole, searh_range: IdentifierBoundaries | RangeSpec | None = None) -> RangeSpec:
+ match searh_range:
+
+ case RangeSpec() | None:
+ return searh_range or RangeSpec.EMPTY
+
+ case IdentifierBoundaries():
+ return searh_range.location_to_search_range(bow)
+
+ case _ as invalid:
+ raise ValueError(f"Invalid: {invalid}")
+
+
+# MarkerOrSegment
+
+# class MarkerOrSegmentProtocol(Protocol):
+# def to_search_range(self) -> str:
+# ...
+
+
+@runtime_checkable
+class MarkerOrSegmentProtocol(Protocol):
+ def marker_or_segment_to_index_range(
+ self,
+ lines: Sequence[str],
+ search_start_index: int = 0, search_end_index: int = -1
+ ) -> RangeSpec:
+ ...
+
+
+def marker_or_segment_to_search_range_impl(
+ self,
+ lines: Sequence[str],
+ search_range: RangeSpec = RangeSpec.EMPTY
+) -> RangeSpec | None:
+ match self:
+ case Marker(type=MarkerType.LINE):
+ result = RangeSpec.from_line_marker(lines, self, search_range)
+ assert result is not None, f"Unable to find `{self}`; Try: 1) Double-checking the marker (maybe you specified the the wrong one); or 2) using *exactly* the same characters from source; or 3) using another marker"
+ # TODO check under which circumstances we should return a 1-line range instead of an empty range
+ return result
+ case Segment(start=s, end=e):
+ return segment_to_search_range(lines, s, e, search_range)
+ case _ as invalid:
+ raise ValueError(f"Unexpected type: {invalid}")
+
+
+Marker.to_search_range = marker_or_segment_to_search_range_impl
+Segment.to_search_range = marker_or_segment_to_search_range_impl
+
+
+def segment_to_search_range(
+ lines: Sequence[str],
+ start_relpos: RelativeMarker, end_relpos: RelativeMarker,
+ search_range: RangeSpec = RangeSpec.EMPTY
+) -> RangeSpec:
+ assert len(lines), "`lines` is empty"
+
+ start_match_result = RangeSpec.from_line_marker(lines, start_relpos, search_range)
+ assert start_match_result, f"Unable to find segment start `{start_relpos}`; Try: 1) Double-checking the marker (maybe you specified the the wrong one); or 2) using *exactly* the same characters from source; or 3) using a marker from above"
+
+ start_index_for_end_marker = start_match_result.as_index
+ if start_relpos.qualifier == RelativePositionType.AFTER:
+ start_index_for_end_marker += -1
+ end_match_result = RangeSpec.from_line_marker(lines, end_relpos, RangeSpec(start_index_for_end_marker, search_range.end, start_match_result.indent))
+ assert end_match_result, f"Unable to find segment end `{end_relpos}` - Try: 1) using *exactly* the same characters from source; or 2) using a marker from below"
+ if end_match_result.as_index > -1:
+ one_after_end = end_match_result.as_index + 1
+ end_match_result = RangeSpec(one_after_end, one_after_end, end_match_result.indent)
+ return RangeSpec(
+ start_match_result.as_index, end_match_result.as_index, start_match_result.indent
+ )