import smp.builtins import traceback import inspect from typing import Any from enum import Enum from io import StringIO from contextlib import redirect_stdout class ParserState(Enum): NORMAL = 1 IN_QUOTES = 2 IN_MACRO = 3 IN_MACRO_ARGS = 4 IN_SPECIAL_MACRO = 5 IN_SPECIAL_MACRO_EXPRESSION = 6 IN_CODE = 7 DNL = 8 def macro_is_whitespace_deleting(s: str) -> bool: if len(s) == 0: return False return s[-1] == "_" def macro_name_clean(macro_name: str) -> str: if macro_is_whitespace_deleting(macro_name): macro_name = macro_name[:-1] return macro_name def seek(input: str, start: int, target: str) -> int | None: """Seek for a value in a string, consider using startswith instead""" from warnings import warn warn( "seek should be considered replaced with str.startswith", DeprecationWarning, stacklevel=2, ) input_end = len(input) target_end = len(target) if input_end < start + target_end: return None i = 0 while i < len(target): if input[start + i] != target[i]: return None i += 1 return start + target_end class MacroProcessor: source_file_path: str """All currently defined macros in this MacroProcessor""" macros: dict[str, Any] """ All macro invocations that has happened """ macro_invocations: list[tuple[str, list[str]]] warnings: list[Any] """ Global environment for python execution """ py_global_env: dict py_local_env_alt: dict py_local_env_current: dict special_macros: dict[str, tuple[Any, Any]] start_quote: str = '%"' end_quote: str = '"%' prefix: str = "" def __init__(self, prefix=""): self.macros = dict() self.macro_invocations = list() self.warnings = list() self.py_global_env = dict() self.py_local_env_alt = dict() self.py_local_env_current = self.macros self.indent_level = "" self.prefix = prefix self._define_builtins(self.macros) self._define_builtins(self.py_local_env_alt) def _define_builtins(self, env): env[f"{self.prefix}macro_processor"] = self env[f"{self.prefix}define"] = smp.builtins.smp_builtin_define env[f"{self.prefix}undefine"] = smp.builtins.smp_builtin_undefine env[f"{self.prefix}define_array"] = smp.builtins.smp_builtin_define_array env[f"{self.prefix}ifdef"] = smp.builtins.smp_builtin_ifdef env[f"{self.prefix}ifndef"] = smp.builtins.smp_builtin_ifndef env[f"{self.prefix}ifeq"] = smp.builtins.smp_builtin_ifeq env[f"{self.prefix}ifneq"] = smp.builtins.smp_builtin_ifneq env[f"{self.prefix}once"] = smp.builtins.smp_builtin_once env[f"{self.prefix}include"] = smp.builtins.smp_builtin_include env[f"{self.prefix}include_verbatim"] = ( smp.builtins.smp_builtin_include_verbatim ) env[f"{self.prefix}shell"] = smp.builtins.smp_builtin_shell env[f"{self.prefix}dumpenv"] = smp.builtins.smp_builtin_dumpenv env[f"{self.prefix}eval"] = smp.builtins.smp_builtin_eval env[f"{self.prefix}array_push"] = smp.builtins.smp_builtin_array_push env[f"{self.prefix}array_each"] = smp.builtins.smp_builtin_array_each env[f"{self.prefix}array_size"] = smp.builtins.smp_builtin_array_size env[f"{self.prefix}explode"] = smp.builtins.smp_builtin_explode env[f"{self.prefix}format_time"] = smp.builtins.smp_builtin_format_time env[f"{self.prefix}html_from_markdown"] = ( smp.builtins.smp_builtin_html_from_markdown ) env[f"{self.prefix}wodl"] = smp.builtins.smp_builtin_wodl env[f"{self.prefix}template"] = smp.builtins.smp_builtin_template env[f"{self.prefix}template_stack"] = [] # If true, include-macros will parse yaml in beginning of content env[f"{self.prefix}parse_file_yaml"] = True # If true, some macros will run in a draft-mode, # meaning they will skip steps that are slow. env[f"{self.prefix}draft"] = False env[f"{self.prefix}metadata_prefix"] = "METADATA_" def define_macro_string(self, macro_name, macro_value): self.define_macro(macro_name, str(macro_value)) def define_macro(self, macro_name, macro_value): self.macros[macro_name] = macro_value def _define_macro_with_prefix(self, macro_name, macro_value, sub_prefix: str = ""): self.macros[f"{self.prefix}{sub_prefix}{macro_name}"] = macro_value def _get_macro_with_prefix(self, macro_name, sub_prefix: str = "", default=None): return self.macros.get(f"{self.prefix}{sub_prefix}{macro_name}", default) def log_warning(self, message): """ Here we should add some more information, line number, file etc, when that is available """ self.warnings.append(message) def expand_macro(self, macro_name: str, args: list[str] = list()) -> str: # Ignore trailing underscore in macro name, the parser will pop a space in front if # present, but we should ignore it for finding the macro. macro_name = macro_name_clean(macro_name) if macro_name not in self.macros: if len(args) == 0: return macro_name out = f"{macro_name}(" for i, arg in enumerate(args): out += self.process_input(arg) if i < (len(args) - 1): out += "," out += ")" return out # Strip leading whitespace from arguments for arg in args: arg = arg.strip() # Log macro invokation # The fact that we are here, does not ensure that the macro is actually expanded into # something useful, just that it exists, and was invoked self.macro_invocations.append((macro_name, args)) macro = self.macros.get(macro_name) if callable(macro): signature = inspect.signature(macro) macro_args: list[Any] = [] if ( "macro_processor" in signature.parameters or "smp" in signature.parameters ): macro_args.append(self) macro_args.extend(args) try: return str(macro(*macro_args)) except Exception as e: s = f"{macro_name}({','.join([repr(x) for x in args])})" self.log_warning( f"Error expanding macro {s} ({e})\n{traceback.format_exc()}" ) return s if isinstance(macro, str): expanded = macro for i, arg in enumerate(args): placeholder = f"${i}" expanded = macro.replace(placeholder, arg) return self.process_input(expanded) return f"{repr(macro)}" def process_input(self, input: str): """ I also want to add special syntax for "special blocks", I am thinking of two main options, either some macro_names are intercepted, _or_ a special kind of macro can exist like These will be on a line-basis, so they simply end on newline @if @else @endif @for @endfor Note: Consider writing a new implementation that does it the same way M4 does, by pushing the expanded macros back to the input string, this may be more confusing, but may also be faster (stream or mutable string) """ output = "" state = ParserState.NORMAL macro_name = "" macro_args = [] argument = "" py_expr = "" skip_next_line_ending = False # We should keep track of filename, linenumber, and character number on line here # So we can give sensible error messages # Probably add to python stack trace? quote_level = 0 parens_level = 0 i = 0 while i < len(input): c = input[i] peek = None if i + 1 >= len(input) else input[i + 1] # import sys # print(f"[{i:4}] {repr(c):4} -> {repr(peek):4} [{state}] = {repr(output)}", file=sys.stderr) if state == ParserState.DNL: if c == "\n": state = ParserState.NORMAL elif state == ParserState.NORMAL: if skip_next_line_ending and (c == "\n"): skip_next_line_ending = False i += 1 continue if c == "%" and peek == "(": state = ParserState.IN_CODE i += 2 continue if c == "%" and peek == '"': state = ParserState.IN_QUOTES quote_level += 1 i += 1 elif c.isalnum(): state = ParserState.IN_MACRO macro_name += c else: output += c elif state == ParserState.IN_QUOTES: if c == "%" and peek == '"': quote_level += 1 i += 1 output += '%"' elif c == '"' and peek == "%": quote_level -= 1 if quote_level == 0: state = ParserState.NORMAL else: output += '"%' i += 1 else: output += c elif state == ParserState.IN_MACRO: if c.isalnum() or c == "_": macro_name += c elif c == "(": parens_level += 1 state = ParserState.IN_MACRO_ARGS else: if macro_is_whitespace_deleting(macro_name): if output[-1] == " ": output = output[:-1] macro_name = macro_name_clean(macro_name) if macro_name == "SNNL": skip_next_line_ending = c != "\n" elif macro_name == "DNL": if c != "\n": state = ParserState.DNL macro_name = "" i += 1 continue else: expanded = self.expand_macro(macro_name) output += expanded output += c macro_name = "" state = ParserState.NORMAL elif state == ParserState.IN_MACRO_ARGS: if c == "%" and peek == '"': quote_level += 1 i += 2 argument += '%"' continue elif c == '"' and peek == "%": quote_level -= 1 i += 2 argument += '"%' continue elif quote_level > 0: argument += c i += 1 continue if (c == ")") and (parens_level == 1): if macro_is_whitespace_deleting(macro_name): if output[-1] == " ": output = output[:-1] macro_name = macro_name_clean(macro_name) parens_level = 0 macro_args.append(argument.strip()) expanded = self.expand_macro(macro_name, macro_args) output += expanded state = ParserState.NORMAL macro_name = "" macro_args = [] argument = "" elif (c == ",") and (parens_level == 1): macro_args.append(argument.strip()) argument = "" else: if c == "(": parens_level += 1 if c == ")": parens_level -= 1 argument += c elif state == ParserState.IN_CODE: if c == ")" and peek == "%": try: f = StringIO() with redirect_stdout(f): exec(py_expr, self.py_global_env, self.py_local_env_current) s = f.getvalue() if s != "": output += s except Exception: traceback.print_exc() py_expr = "" state = ParserState.NORMAL i += 1 else: py_expr += c i += 1 # Handle cases where the text ends with a macro without arguments if macro_name != "": if macro_is_whitespace_deleting(macro_name): if len(output) > 0 and output[-1] == " ": output = output[:-1] macro_name = macro_name_clean(macro_name) output += self.expand_macro(macro_name) return output def store(self, **xargs): requested_keys = self.macros.get("METADATA_keep_states", self.macros.keys()) for key in self.macros.keys(): if key.startswith("METADATA_") and key not in requested_keys: requested_keys.append(key) if isinstance(requested_keys, str): requested_keys = [str(requested_keys)] needs_recompilation = ("METADATA_keep_states" in self.macros) or ( "all_tagged_by" in [x[0] for x in self.macro_invocations] ) target_filename = self._get_macro_with_prefix( "target_filename", sub_prefix="METADATA_" ) self.py_global_env["macro_processor_state"][self.source_file_path] = dict( { # "content": "", "stored_data": { k: v for k, v in self.macros.items() if k in requested_keys }, "extension": self._get_macro_with_prefix("target_file_extension"), "source_path": self.source_file_path, "needs_recompilation": needs_recompilation, "target_filename": target_filename, **xargs, } ) return self.py_global_env["macro_processor_state"][self.source_file_path] class MacroProcessorState: global_state: dict def __init__(self): self.global_state = dict() def macro_processor(self, macro_processor=None): if macro_processor is None: macro_processor = MacroProcessor() macro_processor.py_global_env["macro_processor_state"] = self.global_state return macro_processor def print_state(self): for key, val in self.global_state.items(): print(f"{key[-20:]:20} {val}")