Begin rewrite of internals to be more malleable

author: Qrius <[email protected]> 2025-04-11 17:14:32 +0200
committer: Qrius <[email protected]> 2025-04-11 17:14:35 +0200
commit: 15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6 (patch)
tree: 88ae7f0ba9a1ef3491f44324c0b24036c1d5afff /src/smp
parent: 9a3ce865e64d496cb835ece3e5a84a80361480ab (diff)
download: skaldpress-15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6.tar.gz
skaldpress-15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6.zip
3 files changed, 215 insertions, 43 deletions
diff --git a/src/smp/__init__.py b/src/smp/__init__.py
index 22085ae..d6e5d52 100644
--- a/src/smp/__init__.py
+++ b/src/smp/__init__.py
@@ -20,7 +20,8 @@ def read_stdin():
     import sys
 
     data = sys.stdin.read()
-    macro_processor = smp.macro_processor.MacroProcessor()
+    macro_processor_state = smp.macro_processor.MacroProcessorState()
+    macro_processor = macro_processor_state.macro_processor()
     res = macro_processor.process_input(data)
     print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", file=sys.stderr)
     print(res)
@@ -44,7 +45,10 @@ def main():
     with open(sys.argv[1], "r") as f:
         file_content = f.read()
 
-    macro_processor = smp.macro_processor.MacroProcessor()
+    macro_processor_state = smp.macro_processor.MacroProcessorState()
+    macro_processor = macro_processor_state.macro_processor()
     res = macro_processor.process_input(file_content)
+    macro_processor.store("", "", "")
+    breakpoint()
     print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━", file=sys.stderr)
     print(res)
diff --git a/src/smp/builtins.py b/src/smp/builtins.py
index 3ff15c6..0997165 100644
--- a/src/smp/builtins.py
+++ b/src/smp/builtins.py
@@ -1,11 +1,14 @@
 # import smp.exceptions
+import os
 import subprocess
 import urllib.request
 import urllib.error
 import urllib.parse
 import datetime
 import markdown
+from skaldpress.metadata_parser import extract_parse_yaml_metadata
 from gfm import AutolinkExtension, TaskListExtension  # type: ignore
+from typing import Any
 
 
 def smp_builtin_define(macro_processor, macro_name, macro_value=None):
@@ -66,11 +69,38 @@ def smp_builtin_ifneq(macro_processor, a, b, iftrue, iffalse=None):
     return ""
 
 
+def smp_builtin_add_metadata(macro_processor, metadata: dict[str, Any], overwrite=True):
+    """
+    Not added to macro_processor as macro
+    """
+    for macro_name, value in metadata.items():
+        if not macro_name.startswith(
+            macro_processor._get_macro_with_prefix("metadata_prefix")
+        ):
+            macro_name = f"{macro_processor._get_macro_with_prefix('metadata_prefix')}{macro_name}"
+
+        macro_value = str(value)
+        if isinstance(value, list):
+            macro_value = [str(el) for el in value]
+
+            if macro_name in macro_processor.macros:
+                macro_value.extend(macro_processor.macros[macro_name])
+
+        if overwrite or macro_name not in macro_processor.macros:
+            macro_processor.define_macro(macro_name, macro_value)
+
+
 def smp_builtin_include(macro_processor, filename):
-    filename = macro_processor.process_input(filename)
-    with open(filename, "r") as f:
-        file_content = f.read()
-    return macro_processor.process_input(file_content)
+    return smp_builtin_read(macro_processor, filename, template_content=None)
+
+
+def smp_builtin_parse_leading_yaml(macro_processor, content):
+    """
+    Not added to macro_processor as macro
+    """
+    metadata, content = extract_parse_yaml_metadata(content)
+    smp_builtin_add_metadata(macro_processor, metadata, overwrite=True)
+    return content
 
 
 def smp_builtin_include_verbatim(macro_processor, filename):
@@ -147,6 +177,51 @@ def smp_builtin_html_from_markdown(macro_processor, text, extensions=list()):
     return markdown.markdown(text, extensions=extensions)
 
 
+def _smp_builtin_template_content(content):
+    def inner(macro_processor):
+        """
+        This should do some kind of stack thing, so we can track which file we are processing.
+        entering the CONTENT is fine, the question is how to handle exiting it.
+
+        could have a "once" macro or something, that is added to the end of the content.
+        """
+        return content
+
+    return inner
+
+
+def smp_builtin_template(macro_processor, template, content):
+    return smp_builtin_read(macro_processor, template, template_content=content)
+
+
+def smp_builtin_read(macro_processor, filename, template_content=None):
+    with open(filename, "r") as f:
+        file_content = f.read()
+
+    metadata = {}
+    if macro_processor._get_macro_with_prefix("parse_file_yaml"):
+        metadata, file_content = extract_parse_yaml_metadata(file_content)
+        smp_builtin_add_metadata(macro_processor, metadata, overwrite=False)
+
+    extension = os.path.splitext(filename)[1][1:] or ""
+    macro_processor._define_macro_with_prefix("target_file_extension", extension)
+
+    if template_content is not None:
+        macro_processor._get_macro_with_prefix("template_stack").append(filename)
+        macro_processor.macros["CONTENT"] = template_content
+
+    content = macro_processor.process_input(file_content)
+
+    if extension == "md":
+        content = smp_builtin_html_from_markdown(macro_processor, content)
+
+    if (template := macro_processor.macros.get("METADATA_template")) is not None:
+        if template not in macro_processor._get_macro_with_prefix("template_stack"):
+            return smp_builtin_read(macro_processor, template, content)
+
+    return content
+
+
 global LINK_CACHE
 LINK_CACHE: dict[str, tuple[bool, int, str]] = dict()
 
@@ -168,14 +243,24 @@ def smp_builtin_wodl(macro_processor, link, timeout_seconds=5):
         working_link = (r.status == 200) and (r.reason == "OK")
         LINK_CACHE[link] = (working_link, r.status, r.reason)
         if not working_link:
-            macro_processor.warnings.append(
-                f"Dead link {link} ({r.status} {r.reason})!"
-            )
+            macro_processor.log_warning(f"Dead link {link} ({r.status} {r.reason})!")
     except urllib.error.URLError as e:
-        macro_processor.warnings.append(f"Dead link {link} ({e})!")
+        macro_processor.log_warning(f"Dead link {link} ({e})!")
     return ""
 
 
+def smp_builtin_once(macro_processor, content):
+    if (cache := macro_processor._get_macro_with_prefix("once_cache")) is not None:
+        if (exp := cache.get(content)) is not None:
+            return exp
+    else:
+        macro_processor._define_macro_with_prefix("once_cache", {})
+
+    expanded_content = macro_processor.process_input(content)
+    macro_processor._get_macro_with_prefix("once_cache", expanded_content)
+    return expanded_content
+
+
 def smp_builtin_dumpenv(macro_processor):
     out = ""
     out += "━ Macros ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
@@ -188,7 +273,3 @@ def smp_builtin_dumpenv(macro_processor):
         out += f"{repr(key)}: {repr(val)}\n"
     out += "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
     return out
-
-
-# TODO Add macro that spawns a interactive shell with the python env. would allow interactive debugging :)
-# needs to have a continue function or something (probably on C-d
diff --git a/src/smp/macro_processor.py b/src/smp/macro_processor.py
index 68fd726..2af26dd 100644
--- a/src/smp/macro_processor.py
+++ b/src/smp/macro_processor.py
@@ -56,15 +56,18 @@ def seek(input: str, start: int, target: str) -> int | None:
 
 
 class MacroProcessor:
-    """All currently defined macros in this MacroProcessor"""
+    source_file_path: str
 
+    """All currently defined macros in this MacroProcessor"""
     macros: dict[str, Any]
     """ All macro invocations that has happened """
     macro_invocations: list[tuple[str, list[str]]]
-    """ Emitted warnings """
     warnings: list[Any]
+
     """ Global environment for python execution """
+
     py_global_env: dict
+
     py_local_env_alt: dict
     py_local_env_current: dict
 
@@ -72,6 +75,7 @@ class MacroProcessor:
 
     start_quote: str = '%"'
     end_quote: str = '"%'
+    prefix: str = ""
 
     def __init__(self, prefix=""):
         self.macros = dict()
@@ -81,31 +85,46 @@ class MacroProcessor:
         self.py_local_env_alt = dict()
         self.py_local_env_current = self.macros
         self.indent_level = ""
-
-        self._define_builtins(self.macros, prefix=prefix)
-        self._define_builtins(self.py_local_env_alt, prefix=prefix)
-
-    def _define_builtins(self, env, prefix=""):
-        env[f"{prefix}macro_processor"] = self
-        env[f"{prefix}define"] = smp.builtins.smp_builtin_define
-        env[f"{prefix}undefine"] = smp.builtins.smp_builtin_undefine
-        env[f"{prefix}define_array"] = smp.builtins.smp_builtin_define_array
-        env[f"{prefix}ifdef"] = smp.builtins.smp_builtin_ifdef
-        env[f"{prefix}ifndef"] = smp.builtins.smp_builtin_ifndef
-        env[f"{prefix}ifeq"] = smp.builtins.smp_builtin_ifeq
-        env[f"{prefix}ifneq"] = smp.builtins.smp_builtin_ifneq
-        env[f"{prefix}include"] = smp.builtins.smp_builtin_include
-        env[f"{prefix}include_verbatim"] = smp.builtins.smp_builtin_include_verbatim
-        env[f"{prefix}shell"] = smp.builtins.smp_builtin_shell
-        env[f"{prefix}dumpenv"] = smp.builtins.smp_builtin_dumpenv
-        env[f"{prefix}eval"] = smp.builtins.smp_builtin_eval
-        env[f"{prefix}array_push"] = smp.builtins.smp_builtin_array_push
-        env[f"{prefix}array_each"] = smp.builtins.smp_builtin_array_each
-        env[f"{prefix}array_size"] = smp.builtins.smp_builtin_array_size
-        env[f"{prefix}explode"] = smp.builtins.smp_builtin_explode
-        env[f"{prefix}format_time"] = smp.builtins.smp_builtin_format_time
-        env[f"{prefix}html_from_markdown"] = smp.builtins.smp_builtin_html_from_markdown
-        env[f"{prefix}wodl"] = smp.builtins.smp_builtin_wodl
+        self.prefix = prefix
+
+        self._define_builtins(self.macros)
+        self._define_builtins(self.py_local_env_alt)
+
+    def _define_builtins(self, env):
+        env[f"{self.prefix}macro_processor"] = self
+        env[f"{self.prefix}define"] = smp.builtins.smp_builtin_define
+        env[f"{self.prefix}undefine"] = smp.builtins.smp_builtin_undefine
+        env[f"{self.prefix}define_array"] = smp.builtins.smp_builtin_define_array
+        env[f"{self.prefix}ifdef"] = smp.builtins.smp_builtin_ifdef
+        env[f"{self.prefix}ifndef"] = smp.builtins.smp_builtin_ifndef
+        env[f"{self.prefix}ifeq"] = smp.builtins.smp_builtin_ifeq
+        env[f"{self.prefix}ifneq"] = smp.builtins.smp_builtin_ifneq
+        env[f"{self.prefix}once"] = smp.builtins.smp_builtin_once
+        env[f"{self.prefix}include"] = smp.builtins.smp_builtin_include
+        env[f"{self.prefix}include_verbatim"] = (
+            smp.builtins.smp_builtin_include_verbatim
+        )
+        env[f"{self.prefix}shell"] = smp.builtins.smp_builtin_shell
+        env[f"{self.prefix}dumpenv"] = smp.builtins.smp_builtin_dumpenv
+        env[f"{self.prefix}eval"] = smp.builtins.smp_builtin_eval
+        env[f"{self.prefix}array_push"] = smp.builtins.smp_builtin_array_push
+        env[f"{self.prefix}array_each"] = smp.builtins.smp_builtin_array_each
+        env[f"{self.prefix}array_size"] = smp.builtins.smp_builtin_array_size
+        env[f"{self.prefix}explode"] = smp.builtins.smp_builtin_explode
+        env[f"{self.prefix}format_time"] = smp.builtins.smp_builtin_format_time
+        env[f"{self.prefix}html_from_markdown"] = (
+            smp.builtins.smp_builtin_html_from_markdown
+        )
+        env[f"{self.prefix}wodl"] = smp.builtins.smp_builtin_wodl
+        env[f"{self.prefix}template"] = smp.builtins.smp_builtin_template
+        env[f"{self.prefix}template_stack"] = []
+
+        # If true, include-macros will parse yaml in beginning of content
+        env[f"{self.prefix}parse_file_yaml"] = True
+        # If true, some macros will run in a draft-mode,
+        # meaning they will skip steps that are slow.
+        env[f"{self.prefix}draft"] = False
+        env[f"{self.prefix}metadata_prefix"] = "METADATA_"
 
     def define_macro_string(self, macro_name, macro_value):
         self.define_macro(macro_name, str(macro_value))
@@ -113,6 +132,18 @@ class MacroProcessor:
     def define_macro(self, macro_name, macro_value):
         self.macros[macro_name] = macro_value
 
+    def _define_macro_with_prefix(self, macro_name, macro_value, sub_prefix: str = ""):
+        self.macros[f"{self.prefix}{sub_prefix}{macro_name}"] = macro_value
+
+    def _get_macro_with_prefix(self, macro_name, sub_prefix: str = "", default=None):
+        return self.macros.get(f"{self.prefix}{sub_prefix}{macro_name}", default)
+
+    def log_warning(self, message):
+        """
+        Here we should add some more information, line number, file etc, when that is available
+        """
+        self.warnings.append(message)
+
     def expand_macro(self, macro_name: str, args: list[str] = list()) -> str:
         # Ignore trailing underscore in macro name, the parser will pop a space in front if
         # present, but we should ignore it for finding the macro.
@@ -152,7 +183,9 @@ class MacroProcessor:
                 return str(macro(*macro_args))
             except Exception as e:
                 s = f"{macro_name}({','.join([repr(x) for x in args])})"
-                self.warnings.append(f"Error expanding macro {s} ({e})")
+                self.log_warning(
+                    f"Error expanding macro {s} ({e})\n{traceback.format_exc()}"
+                )
                 return s
         if isinstance(macro, str):
             expanded = macro
@@ -175,6 +208,10 @@ class MacroProcessor:
 
         @for <python-expression>
         @endfor
+
+        Note: Consider writing a new implementation that does it the same way M4 does,
+        by pushing the expanded macros back to the input string, this may be more confusing,
+        but may also be faster (stream or mutable string)
         """
         output = ""
         state = ParserState.NORMAL
@@ -322,8 +359,58 @@ class MacroProcessor:
         # Handle cases where the text ends with a macro without arguments
         if macro_name != "":
             if macro_is_whitespace_deleting(macro_name):
-                if output[-1] == " ":
+                if len(output) > 0 and output[-1] == " ":
                     output = output[:-1]
                 macro_name = macro_name_clean(macro_name)
             output += self.expand_macro(macro_name)
         return output
+
+    def store(self, **xargs):
+        requested_keys = self.macros.get("METADATA_keep_states", self.macros.keys())
+        for key in self.macros.keys():
+            if key.startswith("METADATA_") and key not in requested_keys:
+                requested_keys.append(key)
+
+        if isinstance(requested_keys, str):
+            requested_keys = [str(requested_keys)]
+
+        needs_recompilation = ("METADATA_keep_states" in self.macros) or (
+            "all_tagged_by" in [x[0] for x in self.macro_invocations]
+        )
+
+        target_filename = self._get_macro_with_prefix(
+            "target_filename", sub_prefix="METADATA_"
+        )
+
+        self.py_global_env["macro_processor_state"][self.source_file_path] = dict(
+            {
+                # "content": "",
+                "stored_data": {
+                    k: v for k, v in self.macros.items() if k in requested_keys
+                },
+                "extension": self._get_macro_with_prefix("target_file_extension"),
+                "source_path": self.source_file_path,
+                "needs_recompilation": needs_recompilation,
+                "target_filename": target_filename,
+                **xargs,
+            }
+        )
+        return self.py_global_env["macro_processor_state"][self.source_file_path]
+
+
+class MacroProcessorState:
+    global_state: dict
+
+    def __init__(self):
+        self.global_state = dict()
+
+    def macro_processor(self, macro_processor=None):
+        if macro_processor is None:
+            macro_processor = MacroProcessor()
+
+        macro_processor.py_global_env["macro_processor_state"] = self.global_state
+        return macro_processor
+
+    def print_state(self):
+        for key, val in self.global_state.items():
+            print(f"{key[-20:]:20} {val}")
author	Qrius <[email protected]>	2025-04-11 17:14:32 +0200
committer	Qrius <[email protected]>	2025-04-11 17:14:35 +0200
commit	15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6 (patch)
tree	88ae7f0ba9a1ef3491f44324c0b24036c1d5afff /src/smp
parent	9a3ce865e64d496cb835ece3e5a84a80361480ab (diff)
download	skaldpress-15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6.tar.gz skaldpress-15b9c3af6b0d58f8d6bb0729a217dc6d9f4666e6.zip