""" Variable Wrapper Utility Automatically wraps variables in HTML content with tooltip spans """ import re import html from typing import List, Tuple from .symbol_loader import get_symbol_definitions class VariableWrapper: """Wraps known variables in HTML content with tooltip markup""" def __init__(self): """Initialize variable wrapper with symbol definitions""" self.symbols = get_symbol_definitions() self._build_patterns() def _build_patterns(self) -> None: """Build regex patterns for all known symbols""" # Get all symbols and sort by length (longest first) to avoid partial matches symbols_list = sorted( self.symbols.get_all_symbols(), key=len, reverse=True ) # Single letters that commonly appear in regular text # Only match these in specific mathematical contexts common_words = {'A', 'I', 'V', 'P', 'Q', 'R', 'L', 'C', 'E', 'B', 'G', 'X', 'Y', 'Z', 'f', 'd', 'h'} # Very common English words that need extra-strict matching very_common = {'A', 'I'} self.patterns: List[Tuple[str, str]] = [] self.context_patterns: List[Tuple[str, str]] = [] # Patterns requiring context for symbol in symbols_list: # Escape special regex characters escaped = re.escape(symbol) # For single-letter variables, only match in formula/code contexts if symbol in common_words: if symbol in very_common: # Extra restrictive for A, I - only in clear math context # Must be preceded by =, ×, +, -, /, ( with optional single space # Multiple patterns to handle both "=A" and "= A" cases # Use alternation to avoid variable-width lookbehind pattern = f'(?<=[=×+\\-/\\(])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\]])' self.context_patterns.append((pattern, symbol)) else: # More restrictive pattern - requires mathematical context # Match if preceded by: =, mathematical operators, but NOT punctuation pattern = f'(?<=[=])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])|(?<=\\s)({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])' self.context_patterns.append((pattern, symbol)) else: # Normal pattern for multi-character symbols # Use word boundaries but allow underscores and subscripts pattern = f'(? str: """ Wrap known variables in HTML content with tooltip spans Args: html_content: HTML content to process Returns: HTML content with variables wrapped in tooltip spans """ # Track which variables were found (for debugging) wrapped_vars = set() # Process normal patterns all_patterns = self.patterns + self.context_patterns for pattern, symbol in all_patterns: tooltip_text = self.symbols.get_tooltip(symbol) if not tooltip_text: continue # Escape for HTML attribute (newlines become ) tooltip_escaped = html.escape(tooltip_text, quote=True).replace('\n', ' ') # Create replacement span with tooltip replacement = ( f'' f'\\1' # Captured group (the symbol itself) f'' ) # Count matches before replacement matches = list(re.finditer(pattern, html_content)) if matches: wrapped_vars.add(symbol) # Replace pattern with wrapped version # Use negative lookahead to avoid wrapping already-wrapped variables pattern_with_check = f'(?)(?)' html_content = re.sub( pattern_with_check, replacement, html_content ) if wrapped_vars: print(f"[VariableWrapper] Wrapped {len(wrapped_vars)} unique variables: {', '.join(sorted(wrapped_vars)[:10])}...") return html_content def wrap_in_context(self, html_content: str) -> str: """ More sophisticated wrapping that parses HTML structure to avoid wrapping in code blocks, headings, etc. Args: html_content: HTML content to process Returns: HTML content with variables wrapped (context-aware) """ # For now, use simple wrapping # TODO: Implement HTML parsing to be more selective # (e.g., skip ,
, 

-

tags) # Simple exclusion: Don't process content inside or
        code_blocks = []

        def preserve_code(match):
            """Preserve code blocks and replace with placeholder"""
            code_blocks.append(match.group(0))
            return f"___CODE_BLOCK_{len(code_blocks) - 1}___"

        # Temporarily remove code blocks
        html_content = re.sub(
            r'<(code|pre)>(.*?)',
            preserve_code,
            html_content,
            flags=re.DOTALL
        )

        # Wrap variables
        html_content = self.wrap_variables(html_content)

        # Restore code blocks
        for i, code_block in enumerate(code_blocks):
            html_content = html_content.replace(
                f"___CODE_BLOCK_{i}___",
                code_block
            )

        return html_content