From 5394cfcf6e0ab0d110429b22dffa7e8bd1cf39dc Mon Sep 17 00:00:00 2001 From: Qrius Date: Fri, 21 Feb 2025 12:51:08 +0100 Subject: first version of python rewrite --- src/smp/__init__.py | 45 +++++++ src/smp/builtins.py | 153 ++++++++++++++++++++++++ src/smp/exceptions.py | 7 ++ src/smp/macro_processor.py | 287 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 492 insertions(+) create mode 100644 src/smp/__init__.py create mode 100644 src/smp/builtins.py create mode 100644 src/smp/exceptions.py create mode 100644 src/smp/macro_processor.py (limited to 'src') diff --git a/src/smp/__init__.py b/src/smp/__init__.py new file mode 100644 index 0000000..63cecaf --- /dev/null +++ b/src/smp/__init__.py @@ -0,0 +1,45 @@ +__version__ = "0.0.1" +import smp.macro_processor +import smp.builtins + + +def repl(): + print("=Skaldpress Macro Processor (REPL)") + # print(" type \"quit\" to exit"); + print("NOT IMPLEMENTED") + # Intend to use code.InteractiveConsole or code.InteractiveInterpreter + # as well as the readline library + + +def read_stdin(): + import sys + + data = sys.stdin.read() + smp = macro_processor.MacroProcessor() + res = smp.process_input(data) + print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", file=sys.stderr) + print(res) + + +def main(): + import sys + + if not sys.stdin.isatty(): + read_stdin() + sys.exit(0) + + if len(sys.argv) > 1 and sys.argv[1] == "-": + read_stdin() + sys.exit(0) + + if len(sys.argv) == 1: + repl() + sys.exit(0) + + with open(sys.argv[1], "r") as f: + file_content = f.read() + + smp = macro_processor.MacroProcessor() + res = smp.process_input(file_content) + print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", file=sys.stderr) + print(res) diff --git a/src/smp/builtins.py b/src/smp/builtins.py new file mode 100644 index 0000000..4cbb0a3 --- /dev/null +++ b/src/smp/builtins.py @@ -0,0 +1,153 @@ +import smp.exceptions +import subprocess +import datetime +import markdown +from gfm import AutolinkExtension, TaskListExtension + +def smp_builtin_define(macro_processor, macro_name, macro_value=None): + macro_name = macro_processor.process_input(macro_name) + if macro_value is not None: + macro_value = macro_processor.process_input(macro_value) + macro_processor.macros[macro_name] = macro_value + else: + macro_processor.macros[macro_name] = "" + return "" + + +def smp_builtin_undefine(macro_processor, macro_name): + if macro_name in macro_processor.macros: + del macro_processor[macro_name] + return "" + + +def smp_builtin_define_array(macro_processor, macro_name): + macro_name = macro_processor.process_input(macro_name) + macro_processor.macros[macro_name] = list() + return "" + + +def smp_builtin_ifdef(macro_processor, macro_name, iftrue, iffalse=None): + if macro_name in macro_processor.macros: + return macro_processor.process_input(iftrue) + if iffalse is not None: + return macro_processor.process_input(iffalse) + return "" + + +def smp_builtin_ifndef(macro_processor, macro_name, iftrue, iffalse=None): + if macro_name not in macro_processor.macros: + return macro_processor.process_input(iftrue) + if iffalse is not None: + return macro_processor.process_input(iffalse) + return "" + + +def smp_builtin_ifeq(macro_processor, a, b, iftrue, iffalse=None): + a = macro_processor.process_input(a) + b = macro_processor.process_input(b) + if a == b: + return macro_processor.process_input(iftrue) + if iffalse is not None: + return macro_processor.process_input(iffalse) + return "" + + +def smp_builtin_ifneq(macro_processor, a, b, iftrue, iffalse=None): + a = macro_processor.process_input(a) + b = macro_processor.process_input(b) + if a != b: + return macro_processor.process_input(iftrue) + if iffalse is not None: + return macro_processor.process_input(iffalse) + return "" + + +def smp_builtin_include(macro_processor, filename): + filename = macro_processor.process_input(filename) + with open(filename, "r") as f: + file_content = f.read() + return macro_processor.process_input(file_content) + + +def smp_builtin_include_verbatim(macro_processor, filename): + filename = macro_processor.process_input(filename) + with open(filename, "r") as f: + file_content = f.read() + return file_content + + +def smp_builtin_shell(macro_processor, cmd_args): + cmd_args = macro_processor.process_input(cmd_args) + return subprocess.check_output(cmd_args, shell=True).decode() + + +def smp_builtin_eval(macro_processor, expression): + r = eval(expression, macro_processor.py_global_env, macro_processor.macros) + return r + + +def smp_builtin_array_push(macro_processor, array_name, *values): + if array_name not in macro_processor.macros: + raise Exception(f"{array_name} is not a macro") + if not isinstance(macro_processor.macros[array_name], list): + raise Exception(f"{array_name} is not a array") + for value in values: + macro_processor.macros[array_name].append(value) + return "" + + +def smp_builtin_array_size(macro_processor, array_name): + if array_name not in macro_processor.macros: + raise Exception(f"{array_name} is not a macro") + if not isinstance(macro_processor.macros[array_name], list): + raise Exception(f"{array_name} is not a array") + return str(len(macro_processor.macros[array_name])) + + +def smp_builtin_array_each(macro_processor, array_name, template): + if array_name not in macro_processor.macros: + raise Exception(f"{array_name} is not a macro") + if not isinstance(macro_processor.macros[array_name], list): + raise Exception(f"{array_name} is not a array") + out = "" + for el in macro_processor.macros[array_name]: + if isinstance(el, str): + el = [el] + out += macro_processor.process_input(macro_processor.expand_macro(template, el)) + return out + +def smp_builtin_explode(macro_processor, array_name, delimiter, input): + if array_name not in macro_processor.macros: + raise Exception(f"{array_name} is not a macro") + if not isinstance(macro_processor.macros[array_name], list): + raise Exception(f"{array_name} is not a array") + delimiter = macro_processor.process_input(delimiter) + for el in macro_processor.process_input(input).split(delimiter): + macro_processor.macros[array_name].append(el) + return "" + +def smp_builtin_format_time(macro_processor, format, time): + timestamp = macro_processor.process_input(time) + dobj = datetime.datetime.fromisoformat(timestamp) + return dobj.strftime(format) + +def smp_builtin_html_from_markdown(macro_processor, text, extensions=list()): + # Get rid of quoting, I don't remember why, but the rust implementation does it like this. + for _ in range(2): + text = macro_processor.process_input(text) + extensions.append(AutolinkExtension()) + extensions.append(TaskListExtension(max_depth=2)) + return markdown.markdown(text, extensions=extensions) + +def smp_builtin_dumpenv(macro_processor): + out = "" + out += "━ Macros ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" + for key, val in macro_processor.macros.items(): + out += f"{repr(key)}: {repr(val)}\n" + out += "━ Globals ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" + for key, val in macro_processor.py_global_env.items(): + if key == "__builtins__": + continue + out += f"{repr(key)}: {repr(val)}\n" + out += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + return out diff --git a/src/smp/exceptions.py b/src/smp/exceptions.py new file mode 100644 index 0000000..62eee32 --- /dev/null +++ b/src/smp/exceptions.py @@ -0,0 +1,7 @@ +class SMPException(Exception): + pass + + +class MacroArgumentException(Exception): + def __init__(self, message): + super().__init__(message) diff --git a/src/smp/macro_processor.py b/src/smp/macro_processor.py new file mode 100644 index 0000000..ad996d7 --- /dev/null +++ b/src/smp/macro_processor.py @@ -0,0 +1,287 @@ +import smp.builtins +import traceback +import inspect + +from typing import Any +from enum import Enum +from io import StringIO +from contextlib import redirect_stdout + + +class ParserState(Enum): + NORMAL = 1 + IN_QUOTES = 2 + IN_MACRO = 3 + IN_MACRO_ARGS = 4 + IN_SPECIAL_MACRO = 5 + IN_SPECIAL_MACRO_EXPRESSION = 6 + IN_CODE = 7 + DNL = 8 + + +def macro_is_whitespace_deleting(s: str) -> bool: + if len(s) == 0: + return False + return s[-1] == "_" + + +def macro_name_clean(macro_name: str) -> str: + if macro_is_whitespace_deleting(macro_name): + macro_name = macro_name[:-1] + return macro_name + + +class MacroProcessor: + """All currently defined macros in this MacroProcessor""" + + macros: dict[str, Any] + """ All macro invocations that has happened """ + macro_invocations: list[tuple[str, list[str]]] + """ Emitted warnings """ + warnings: list[Any] + """ Global environment for python execution """ + py_global_env: dict + + special_macros: dict[str, tuple[str, str]] + + def __init__(self, prefix=""): + self.macros = dict() + self.special_macros = dict() + self.macro_invocations = list() + self.warnings = list() + self.py_global_env = dict() + self._define_builtins(prefix=prefix) + + def _define_builtins(self, prefix=""): + self.macros[f"{prefix}define"] = smp.builtins.smp_builtin_define + self.macros[f"{prefix}undefine"] = smp.builtins.smp_builtin_undefine + self.macros[f"{prefix}define_array"] = smp.builtins.smp_builtin_define_array + self.macros[f"{prefix}ifdef"] = smp.builtins.smp_builtin_ifdef + self.macros[f"{prefix}ifndef"] = smp.builtins.smp_builtin_ifndef + self.macros[f"{prefix}ifeq"] = smp.builtins.smp_builtin_ifeq + self.macros[f"{prefix}ifneq"] = smp.builtins.smp_builtin_ifneq + self.macros[f"{prefix}include"] = smp.builtins.smp_builtin_include + self.macros[f"{prefix}include_verbatim"] = ( + smp.builtins.smp_builtin_include_verbatim + ) + self.macros[f"{prefix}shell"] = smp.builtins.smp_builtin_shell + self.macros[f"{prefix}dumpenv"] = smp.builtins.smp_builtin_dumpenv + self.macros[f"{prefix}eval"] = smp.builtins.smp_builtin_eval + self.macros[f"{prefix}array_push"] = smp.builtins.smp_builtin_array_push + self.macros[f"{prefix}array_each"] = smp.builtins.smp_builtin_array_each + self.macros[f"{prefix}array_size"] = smp.builtins.smp_builtin_array_size + self.macros[f"{prefix}explode"] = smp.builtins.smp_builtin_explode + self.macros[f"{prefix}format_time"] = smp.builtins.smp_builtin_format_time + self.macros[f"{prefix}html_from_markdown"] = smp.builtins.smp_builtin_html_from_markdown + self.special_macros[f"test"] = ("", "") + + def expand_macro(self, macro_name: str, args: list[str] = list()) -> str: + # Ignore trailing underscore in macro name, the parser will pop a space in front if + # present, but we should ignore it for finding the macro. + macro_name = macro_name_clean(macro_name) + if macro_name not in self.macros: + if len(args) == 0: + return macro_name + out = f"{macro_name}(" + for i, arg in enumerate(args): + out += self.process_input(arg) + if i < (len(args) - 1): + out += "," + out += ")" + return out + + # Strip leading whitespace from arguments + for arg in args: + arg = arg.strip() + + # Log macro invokation + # The fact that we are here, does not ensure that the macro is actually expanded into + # something useful, just that it exists, and was invoked + self.macro_invocations.append((macro_name, args)) + + macro = self.macros.get(macro_name) + + if callable(macro): + signature = inspect.signature(macro) + macro_args = [] + if ( + "macro_processor" in signature.parameters + or "smp" in signature.parameters + ): + macro_args.append(self) + macro_args.extend(args) + return str(macro(*macro_args)) + if isinstance(macro, str): + expanded = macro + for i, arg in enumerate(args): + placeholder = f"${i}" + expanded = macro.replace(placeholder, arg) + return self.process_input(expanded) + return f"{repr(macro)}" + + def process_input(self, input: str): + """ + I also want to add special syntax for "special blocks", + I am thinking of two main options, either some macro_names are intercepted, _or_ a special kind of macro can exist like + These will be on a line-basis, so they simply end on newline + @if + + @else + + @endif + + @for + @endfor + """ + output = "" + state = ParserState.NORMAL + macro_name = "" + macro_args = [] + argument = "" + py_expr = "" + + skip_next_line_ending = False + + # We should keep track of filename, linenumber, and character number on line here + # So we can give sensible error messages + + quote_level = 0 + parens_level = 0 + + i = 0 + while i < len(input): + c = input[i] + peek = None if i + 1 >= len(input) else input[i + 1] + #prev = None if i - 1 < 0 else input[i - 1] + + # import sys + # print(f"[{i:4}] {repr(c):4} -> {repr(peek):4} [{state}] = {repr(output)}", file=sys.stderr) + + if state == ParserState.DNL: + if c == "\n": + state = ParserState.NORMAL + elif state == ParserState.NORMAL: + if skip_next_line_ending and (c == "\n"): + skip_next_line_ending = False + i += 1 + continue + + if c == "%" and peek == "(": + state = ParserState.IN_CODE + i += 2 + continue + + if c == "%" and peek == '"': + state = ParserState.IN_QUOTES + quote_level += 1 + i += 1 + elif c.isalnum(): + state = ParserState.IN_MACRO + macro_name += c + else: + output += c + + elif state == ParserState.IN_QUOTES: + if c == "%" and peek == '"': + quote_level += 1 + i += 1 + output += '%"' + elif c == '"' and peek == "%": + quote_level -= 1 + if quote_level == 0: + state = ParserState.NORMAL + else: + output += '"%' + i += 1 + else: + output += c + elif state == ParserState.IN_MACRO: + if c.isalnum() or c == "_": + macro_name += c + elif c == "(": + parens_level += 1 + state = ParserState.IN_MACRO_ARGS + else: + if macro_is_whitespace_deleting(macro_name): + if output[-1] == " ": + output = output[:-1] + macro_name = macro_name_clean(macro_name) + + if macro_name == "SNNL": + skip_next_line_ending = c != "\n" + elif macro_name == "DNL": + if c != "\n": + state = ParserState.DNL + macro_name = "" + i += 1 + continue + else: + expanded = self.expand_macro(macro_name) + output += expanded + output += c + macro_name = "" + state = ParserState.NORMAL + elif state == ParserState.IN_MACRO_ARGS: + if c == "%" and peek == '"': + quote_level += 1 + i += 2 + argument += '%"' + continue + elif c == '"' and peek == "%": + quote_level -= 1 + i += 2 + argument += '"%' + continue + elif quote_level > 0: + argument += c + i += 1 + continue + + if (c == ")") and (parens_level == 1): + if macro_is_whitespace_deleting(macro_name): + if output[-1] == " ": + output = output[:-1] + macro_name = macro_name_clean(macro_name) + parens_level = 0 + macro_args.append(argument.strip()) + expanded = self.expand_macro(macro_name, macro_args) + output += expanded + state = ParserState.NORMAL + macro_name = "" + macro_args = [] + argument = "" + elif (c == ",") and (parens_level == 1): + macro_args.append(argument.strip()) + argument = "" + else: + if c == "(": + parens_level += 1 + if c == ")": + parens_level -= 1 + argument += c + elif state == ParserState.IN_CODE: + if c == ")" and peek == "%": + try: + f = StringIO() + with redirect_stdout(f): + exec(py_expr, self.py_global_env, self.macros) + s = f.getvalue() + if s != "": + output += s + except Exception: + traceback.print_exc() + py_expr = "" + state = ParserState.NORMAL + i += 1 + else: + py_expr += c + i += 1 + + # Handle cases where the text ends with a macro without arguments + if macro_name != "": + if macro_is_whitespace_deleting(macro_name): + if output[-1] == " ": + output = output[:-1] + macro_name = macro_name_clean(macro_name) + output += self.expand_macro(macro_name) + return output -- cgit v1.2.3