You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
5.9 KiB
159 lines
5.9 KiB
"""
|
|
Variable Wrapper Utility
|
|
Automatically wraps variables in HTML content with tooltip spans
|
|
"""
|
|
|
|
import re
|
|
import html
|
|
from typing import List, Tuple
|
|
from .symbol_loader import get_symbol_definitions
|
|
|
|
|
|
class VariableWrapper:
|
|
"""Wraps known variables in HTML content with tooltip markup"""
|
|
|
|
def __init__(self):
|
|
"""Initialize variable wrapper with symbol definitions"""
|
|
self.symbols = get_symbol_definitions()
|
|
self._build_patterns()
|
|
|
|
def _build_patterns(self) -> None:
|
|
"""Build regex patterns for all known symbols"""
|
|
# Get all symbols and sort by length (longest first) to avoid partial matches
|
|
symbols_list = sorted(
|
|
self.symbols.get_all_symbols(),
|
|
key=len,
|
|
reverse=True
|
|
)
|
|
|
|
# Single letters that commonly appear in regular text
|
|
# Only match these in specific mathematical contexts
|
|
common_words = {'A', 'I', 'V', 'P', 'Q', 'R', 'L', 'C', 'E', 'B', 'G', 'X', 'Y', 'Z', 'f', 'd', 'h'}
|
|
|
|
# Very common English words that need extra-strict matching
|
|
very_common = {'A', 'I'}
|
|
|
|
self.patterns: List[Tuple[str, str]] = []
|
|
self.context_patterns: List[Tuple[str, str]] = [] # Patterns requiring context
|
|
|
|
for symbol in symbols_list:
|
|
# Escape special regex characters
|
|
escaped = re.escape(symbol)
|
|
|
|
# For single-letter variables, only match in formula/code contexts
|
|
if symbol in common_words:
|
|
if symbol in very_common:
|
|
# Extra restrictive for A, I - only in clear math context
|
|
# Must be preceded by =, ×, +, -, /, ( with optional single space
|
|
# Multiple patterns to handle both "=A" and "= A" cases
|
|
# Use alternation to avoid variable-width lookbehind
|
|
pattern = f'(?<=[=×+\\-/\\(])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\]])'
|
|
self.context_patterns.append((pattern, symbol))
|
|
else:
|
|
# More restrictive pattern - requires mathematical context
|
|
# Match if preceded by: =, mathematical operators, but NOT punctuation
|
|
pattern = f'(?<=[=])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])|(?<=\\s)({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])'
|
|
self.context_patterns.append((pattern, symbol))
|
|
else:
|
|
# Normal pattern for multi-character symbols
|
|
# Use word boundaries but allow underscores and subscripts
|
|
pattern = f'(?<!\\w)({escaped})(?!\\w)'
|
|
self.patterns.append((pattern, symbol))
|
|
|
|
print(f"[VariableWrapper] Built {len(self.patterns)} normal patterns + {len(self.context_patterns)} context-sensitive patterns")
|
|
|
|
def wrap_variables(self, html_content: str) -> str:
|
|
"""
|
|
Wrap known variables in HTML content with tooltip spans
|
|
|
|
Args:
|
|
html_content: HTML content to process
|
|
|
|
Returns:
|
|
HTML content with variables wrapped in tooltip spans
|
|
"""
|
|
# Track which variables were found (for debugging)
|
|
wrapped_vars = set()
|
|
|
|
# Process normal patterns
|
|
all_patterns = self.patterns + self.context_patterns
|
|
|
|
for pattern, symbol in all_patterns:
|
|
tooltip_text = self.symbols.get_tooltip(symbol)
|
|
if not tooltip_text:
|
|
continue
|
|
|
|
# Escape for HTML attribute (newlines become )
|
|
tooltip_escaped = html.escape(tooltip_text, quote=True).replace('\n', ' ')
|
|
|
|
# Create replacement span with tooltip
|
|
replacement = (
|
|
f'<span class="var-tooltip" '
|
|
f'data-symbol="{symbol}" '
|
|
f'title="{tooltip_escaped}">'
|
|
f'\\1' # Captured group (the symbol itself)
|
|
f'</span>'
|
|
)
|
|
|
|
# Count matches before replacement
|
|
matches = list(re.finditer(pattern, html_content))
|
|
|
|
if matches:
|
|
wrapped_vars.add(symbol)
|
|
|
|
# Replace pattern with wrapped version
|
|
# Use negative lookahead to avoid wrapping already-wrapped variables
|
|
pattern_with_check = f'(?<!var-tooltip">)(?<!var-tooltip" )(?<!title=")({pattern})(?!</span>)'
|
|
html_content = re.sub(
|
|
pattern_with_check,
|
|
replacement,
|
|
html_content
|
|
)
|
|
|
|
if wrapped_vars:
|
|
print(f"[VariableWrapper] Wrapped {len(wrapped_vars)} unique variables: {', '.join(sorted(wrapped_vars)[:10])}...")
|
|
|
|
return html_content
|
|
|
|
def wrap_in_context(self, html_content: str) -> str:
|
|
"""
|
|
More sophisticated wrapping that parses HTML structure
|
|
to avoid wrapping in code blocks, headings, etc.
|
|
|
|
Args:
|
|
html_content: HTML content to process
|
|
|
|
Returns:
|
|
HTML content with variables wrapped (context-aware)
|
|
"""
|
|
# For now, use simple wrapping
|
|
# TODO: Implement HTML parsing to be more selective
|
|
# (e.g., skip <code>, <pre>, <h1>-<h6> tags)
|
|
|
|
# Simple exclusion: Don't process content inside <code> or <pre>
|
|
code_blocks = []
|
|
|
|
def preserve_code(match):
|
|
"""Preserve code blocks and replace with placeholder"""
|
|
code_blocks.append(match.group(0))
|
|
return f"___CODE_BLOCK_{len(code_blocks) - 1}___"
|
|
|
|
# Temporarily remove code blocks
|
|
html_content = re.sub(
|
|
r'<(code|pre)>(.*?)</\1>',
|
|
preserve_code,
|
|
html_content,
|
|
flags=re.DOTALL
|
|
)
|
|
|
|
# Wrap variables
|
|
html_content = self.wrap_variables(html_content)
|
|
|
|
# Restore code blocks
|
|
for i, code_block in enumerate(code_blocks):
|
|
html_content = html_content.replace(
|
|
f"___CODE_BLOCK_{i}___",
|
|
code_block
|
|
)
|
|
|
|
return html_content
|