chore: remove .venv
from Git via .gitignore
This commit is contained in:
parent
b2e290793f
commit
f9173759f0
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
.venv/
|
|
@ -1,247 +0,0 @@
|
|||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
|
@ -1,70 +0,0 @@
|
|||
# This file must be used with "source bin/activate" *from bash*
|
||||
# You cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# Call hash to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
hash -r 2> /dev/null
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
unset VIRTUAL_ENV_PROMPT
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
# on Windows, a path can contain colons and backslashes and has to be converted:
|
||||
if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
|
||||
# transform D:\path\to\venv to /d/path/to/venv on MSYS
|
||||
# and to /cygdrive/d/path/to/venv on Cygwin
|
||||
export VIRTUAL_ENV=$(cygpath "/home/devraza/Projects/fireguide/.venv")
|
||||
else
|
||||
# use the path as-is
|
||||
export VIRTUAL_ENV="/home/devraza/Projects/fireguide/.venv"
|
||||
fi
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1="(.venv) ${PS1:-}"
|
||||
export PS1
|
||||
VIRTUAL_ENV_PROMPT="(.venv) "
|
||||
export VIRTUAL_ENV_PROMPT
|
||||
fi
|
||||
|
||||
# Call hash to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
hash -r 2> /dev/null
|
|
@ -1,27 +0,0 @@
|
|||
# This file must be used with "source bin/activate.csh" *from csh*.
|
||||
# You cannot run it directly.
|
||||
|
||||
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
||||
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
||||
|
||||
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
setenv VIRTUAL_ENV "/home/devraza/Projects/fireguide/.venv"
|
||||
|
||||
set _OLD_VIRTUAL_PATH="$PATH"
|
||||
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
|
||||
set _OLD_VIRTUAL_PROMPT="$prompt"
|
||||
|
||||
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
||||
set prompt = "(.venv) $prompt"
|
||||
setenv VIRTUAL_ENV_PROMPT "(.venv) "
|
||||
endif
|
||||
|
||||
alias pydoc python -m pydoc
|
||||
|
||||
rehash
|
|
@ -1,69 +0,0 @@
|
|||
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
||||
# (https://fishshell.com/). You cannot run it directly.
|
||||
|
||||
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
||||
# reset old environment variables
|
||||
if test -n "$_OLD_VIRTUAL_PATH"
|
||||
set -gx PATH $_OLD_VIRTUAL_PATH
|
||||
set -e _OLD_VIRTUAL_PATH
|
||||
end
|
||||
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
||||
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
||||
set -e _OLD_VIRTUAL_PYTHONHOME
|
||||
end
|
||||
|
||||
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
||||
set -e _OLD_FISH_PROMPT_OVERRIDE
|
||||
# prevents error when using nested fish instances (Issue #93858)
|
||||
if functions -q _old_fish_prompt
|
||||
functions -e fish_prompt
|
||||
functions -c _old_fish_prompt fish_prompt
|
||||
functions -e _old_fish_prompt
|
||||
end
|
||||
end
|
||||
|
||||
set -e VIRTUAL_ENV
|
||||
set -e VIRTUAL_ENV_PROMPT
|
||||
if test "$argv[1]" != "nondestructive"
|
||||
# Self-destruct!
|
||||
functions -e deactivate
|
||||
end
|
||||
end
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
set -gx VIRTUAL_ENV "/home/devraza/Projects/fireguide/.venv"
|
||||
|
||||
set -gx _OLD_VIRTUAL_PATH $PATH
|
||||
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
|
||||
|
||||
# Unset PYTHONHOME if set.
|
||||
if set -q PYTHONHOME
|
||||
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
||||
set -e PYTHONHOME
|
||||
end
|
||||
|
||||
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
||||
# fish uses a function instead of an env var to generate the prompt.
|
||||
|
||||
# Save the current fish_prompt function as the function _old_fish_prompt.
|
||||
functions -c fish_prompt _old_fish_prompt
|
||||
|
||||
# With the original prompt function renamed, we can override with our own.
|
||||
function fish_prompt
|
||||
# Save the return status of the last command.
|
||||
set -l old_status $status
|
||||
|
||||
# Output the venv prompt; color taken from the blue of the Python logo.
|
||||
printf "%s%s%s" (set_color 4B8BBE) "(.venv) " (set_color normal)
|
||||
|
||||
# Restore the return status of the previous command.
|
||||
echo "exit $old_status" | .
|
||||
# Output the original/"old" prompt.
|
||||
_old_fish_prompt
|
||||
end
|
||||
|
||||
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
||||
set -gx VIRTUAL_ENV_PROMPT "(.venv) "
|
||||
end
|
|
@ -1,8 +0,0 @@
|
|||
#!/home/devraza/Projects/fireguide/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from markdown_it.cli.parse import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
|
@ -1,8 +0,0 @@
|
|||
#!/home/devraza/Projects/fireguide/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
|
@ -1,8 +0,0 @@
|
|||
#!/home/devraza/Projects/fireguide/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
|
@ -1,8 +0,0 @@
|
|||
#!/home/devraza/Projects/fireguide/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
|
@ -1,8 +0,0 @@
|
|||
#!/home/devraza/Projects/fireguide/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pygments.cmdline import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
|
@ -1 +0,0 @@
|
|||
python3
|
|
@ -1 +0,0 @@
|
|||
/etc/profiles/per-user/devraza/bin/python3
|
|
@ -1 +0,0 @@
|
|||
python3
|
|
@ -1,5 +0,0 @@
|
|||
"""A Python port of Markdown-It"""
|
||||
__all__ = ("MarkdownIt",)
|
||||
__version__ = "3.0.0"
|
||||
|
||||
from .main import MarkdownIt
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,11 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
DATACLASS_KWARGS: Mapping[str, Any]
|
||||
if sys.version_info >= (3, 10):
|
||||
DATACLASS_KWARGS = {"slots": True}
|
||||
else:
|
||||
DATACLASS_KWARGS = {}
|
|
@ -1,67 +0,0 @@
|
|||
# Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
|
||||
# Copyright 2021 Taneli Hukkinen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from typing import Callable
|
||||
|
||||
REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
|
||||
REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
|
||||
|
||||
|
||||
def encode(uni: str) -> str:
|
||||
return codecs.encode(uni, encoding="punycode").decode()
|
||||
|
||||
|
||||
def decode(ascii: str) -> str:
|
||||
return codecs.decode(ascii, encoding="punycode") # type: ignore
|
||||
|
||||
|
||||
def map_domain(string: str, fn: Callable[[str], str]) -> str:
|
||||
parts = string.split("@")
|
||||
result = ""
|
||||
if len(parts) > 1:
|
||||
# In email addresses, only the domain name should be punycoded. Leave
|
||||
# the local part (i.e. everything up to `@`) intact.
|
||||
result = parts[0] + "@"
|
||||
string = parts[1]
|
||||
labels = REGEX_SEPARATORS.split(string)
|
||||
encoded = ".".join(fn(label) for label in labels)
|
||||
return result + encoded
|
||||
|
||||
|
||||
def to_unicode(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if obj.startswith("xn--"):
|
||||
return decode(obj[4:].lower())
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
||||
|
||||
|
||||
def to_ascii(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if REGEX_NON_ASCII.search(obj):
|
||||
return "xn--" + encode(obj)
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
Binary file not shown.
Binary file not shown.
|
@ -1,109 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
CLI interface to markdown-it-py
|
||||
|
||||
Parse one or more markdown files, convert each to HTML, and print to stdout.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections.abc import Iterable, Sequence
|
||||
import sys
|
||||
|
||||
from markdown_it import __version__
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
version_str = "markdown-it-py [version {}]".format(__version__)
|
||||
|
||||
|
||||
def main(args: Sequence[str] | None = None) -> int:
|
||||
namespace = parse_args(args)
|
||||
if namespace.filenames:
|
||||
convert(namespace.filenames)
|
||||
else:
|
||||
interactive()
|
||||
return 0
|
||||
|
||||
|
||||
def convert(filenames: Iterable[str]) -> None:
|
||||
for filename in filenames:
|
||||
convert_file(filename)
|
||||
|
||||
|
||||
def convert_file(filename: str) -> None:
|
||||
"""
|
||||
Parse a Markdown file and dump the output to stdout.
|
||||
"""
|
||||
try:
|
||||
with open(filename, "r", encoding="utf8", errors="ignore") as fin:
|
||||
rendered = MarkdownIt().render(fin.read())
|
||||
print(rendered, end="")
|
||||
except OSError:
|
||||
sys.stderr.write(f'Cannot open file "{filename}".\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def interactive() -> None:
|
||||
"""
|
||||
Parse user input, dump to stdout, rinse and repeat.
|
||||
Python REPL style.
|
||||
"""
|
||||
print_heading()
|
||||
contents = []
|
||||
more = False
|
||||
while True:
|
||||
try:
|
||||
prompt, more = ("... ", True) if more else (">>> ", True)
|
||||
contents.append(input(prompt) + "\n")
|
||||
except EOFError:
|
||||
print("\n" + MarkdownIt().render("\n".join(contents)), end="")
|
||||
more = False
|
||||
contents = []
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting.")
|
||||
break
|
||||
|
||||
|
||||
def parse_args(args: Sequence[str] | None) -> argparse.Namespace:
|
||||
"""Parse input CLI arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse one or more markdown files, "
|
||||
"convert each to HTML, and print to stdout",
|
||||
# NOTE: Remember to update README.md w/ the output of `markdown-it -h`
|
||||
epilog=(
|
||||
f"""
|
||||
Interactive:
|
||||
|
||||
$ markdown-it
|
||||
markdown-it-py [version {__version__}] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
>>> # Example
|
||||
... > markdown *input*
|
||||
...
|
||||
<h1>Example</h1>
|
||||
<blockquote>
|
||||
<p>markdown <em>input</em></p>
|
||||
</blockquote>
|
||||
|
||||
Batch:
|
||||
|
||||
$ markdown-it README.md README.footer.md > index.html
|
||||
"""
|
||||
),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-v", "--version", action="version", version=version_str)
|
||||
parser.add_argument(
|
||||
"filenames", nargs="*", help="specify an optional list of files to convert"
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def print_heading() -> None:
|
||||
print("{} (interactive)".format(version_str))
|
||||
print("Type Ctrl-D to complete input, or Ctrl-C to exit.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = main(sys.argv[1:])
|
||||
sys.exit(exit_code)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,4 +0,0 @@
|
|||
"""HTML5 entities map: { name -> characters }."""
|
||||
import html.entities
|
||||
|
||||
entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()}
|
|
@ -1,68 +0,0 @@
|
|||
"""List of valid html blocks names, according to commonmark spec
|
||||
http://jgm.github.io/CommonMark/spec.html#html-blocks
|
||||
"""
|
||||
|
||||
block_names = [
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
"base",
|
||||
"basefont",
|
||||
"blockquote",
|
||||
"body",
|
||||
"caption",
|
||||
"center",
|
||||
"col",
|
||||
"colgroup",
|
||||
"dd",
|
||||
"details",
|
||||
"dialog",
|
||||
"dir",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"frame",
|
||||
"frameset",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hr",
|
||||
"html",
|
||||
"iframe",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"main",
|
||||
"menu",
|
||||
"menuitem",
|
||||
"nav",
|
||||
"noframes",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"p",
|
||||
"param",
|
||||
"section",
|
||||
"source",
|
||||
"summary",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"ul",
|
||||
]
|
|
@ -1,40 +0,0 @@
|
|||
"""Regexps to match html elements
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
|
||||
|
||||
unquoted = "[^\"'=<>`\\x00-\\x20]+"
|
||||
single_quoted = "'[^']*'"
|
||||
double_quoted = '"[^"]*"'
|
||||
|
||||
attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
|
||||
|
||||
attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
|
||||
|
||||
open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
|
||||
|
||||
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
|
||||
comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
|
||||
processing = "<[?][\\s\\S]*?[?]>"
|
||||
declaration = "<![A-Z]+\\s+[^>]*>"
|
||||
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
|
||||
|
||||
HTML_TAG_RE = re.compile(
|
||||
"^(?:"
|
||||
+ open_tag
|
||||
+ "|"
|
||||
+ close_tag
|
||||
+ "|"
|
||||
+ comment
|
||||
+ "|"
|
||||
+ processing
|
||||
+ "|"
|
||||
+ declaration
|
||||
+ "|"
|
||||
+ cdata
|
||||
+ ")"
|
||||
)
|
||||
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
|
||||
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)
|
|
@ -1,81 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from contextlib import suppress
|
||||
import re
|
||||
from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401
|
||||
|
||||
import mdurl
|
||||
|
||||
from .. import _punycode
|
||||
|
||||
RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:")
|
||||
|
||||
|
||||
def normalizeLink(url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if parsed.hostname and (
|
||||
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
|
||||
):
|
||||
with suppress(Exception):
|
||||
parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname))
|
||||
|
||||
return mdurl.encode(mdurl.format(parsed))
|
||||
|
||||
|
||||
def normalizeLinkText(url: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if parsed.hostname and (
|
||||
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
|
||||
):
|
||||
with suppress(Exception):
|
||||
parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname))
|
||||
|
||||
# add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
|
||||
return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%")
|
||||
|
||||
|
||||
BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):")
|
||||
GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);")
|
||||
|
||||
|
||||
def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool:
|
||||
"""Validate URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
if validator is not None:
|
||||
return validator(url)
|
||||
url = url.strip().lower()
|
||||
return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True
|
|
@ -1,318 +0,0 @@
|
|||
"""Utilities for parsing source text
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Match, TypeVar
|
||||
|
||||
from .entities import entities
|
||||
|
||||
|
||||
def charCodeAt(src: str, pos: int) -> int | None:
|
||||
"""
|
||||
Returns the Unicode value of the character at the specified location.
|
||||
|
||||
@param - index The zero-based index of the desired character.
|
||||
If there is no character at the specified index, NaN is returned.
|
||||
|
||||
This was added for compatibility with python
|
||||
"""
|
||||
try:
|
||||
return ord(src[pos])
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
def charStrAt(src: str, pos: int) -> str | None:
|
||||
"""
|
||||
Returns the Unicode value of the character at the specified location.
|
||||
|
||||
@param - index The zero-based index of the desired character.
|
||||
If there is no character at the specified index, NaN is returned.
|
||||
|
||||
This was added for compatibility with python
|
||||
"""
|
||||
try:
|
||||
return src[pos]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
_ItemTV = TypeVar("_ItemTV")
|
||||
|
||||
|
||||
def arrayReplaceAt(
|
||||
src: list[_ItemTV], pos: int, newElements: list[_ItemTV]
|
||||
) -> list[_ItemTV]:
|
||||
"""
|
||||
Remove element from array and put another array at those position.
|
||||
Useful for some operations with tokens
|
||||
"""
|
||||
return src[:pos] + newElements + src[pos + 1 :]
|
||||
|
||||
|
||||
def isValidEntityCode(c: int) -> bool:
|
||||
# broken sequence
|
||||
if c >= 0xD800 and c <= 0xDFFF:
|
||||
return False
|
||||
# never used
|
||||
if c >= 0xFDD0 and c <= 0xFDEF:
|
||||
return False
|
||||
if ((c & 0xFFFF) == 0xFFFF) or ((c & 0xFFFF) == 0xFFFE):
|
||||
return False
|
||||
# control codes
|
||||
if c >= 0x00 and c <= 0x08:
|
||||
return False
|
||||
if c == 0x0B:
|
||||
return False
|
||||
if c >= 0x0E and c <= 0x1F:
|
||||
return False
|
||||
if c >= 0x7F and c <= 0x9F:
|
||||
return False
|
||||
# out of range
|
||||
if c > 0x10FFFF:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def fromCodePoint(c: int) -> str:
|
||||
"""Convert ordinal to unicode.
|
||||
|
||||
Note, in the original Javascript two string characters were required,
|
||||
for codepoints larger than `0xFFFF`.
|
||||
But Python 3 can represent any unicode codepoint in one character.
|
||||
"""
|
||||
return chr(c)
|
||||
|
||||
|
||||
# UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
|
||||
# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
|
||||
UNESCAPE_ALL_RE = re.compile(
|
||||
r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})")
|
||||
DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE)
|
||||
|
||||
|
||||
def replaceEntityPattern(match: str, name: str) -> str:
|
||||
"""Convert HTML entity patterns,
|
||||
see https://spec.commonmark.org/0.30/#entity-references
|
||||
"""
|
||||
if name in entities:
|
||||
return entities[name]
|
||||
|
||||
code: None | int = None
|
||||
if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name):
|
||||
code = int(pat.group(1), 10)
|
||||
elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name):
|
||||
code = int(pat.group(1), 16)
|
||||
|
||||
if code is not None and isValidEntityCode(code):
|
||||
return fromCodePoint(code)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
def unescapeAll(string: str) -> str:
|
||||
def replacer_func(match: Match[str]) -> str:
|
||||
escaped = match.group(1)
|
||||
if escaped:
|
||||
return escaped
|
||||
entity = match.group(2)
|
||||
return replaceEntityPattern(match.group(), entity)
|
||||
|
||||
if "\\" not in string and "&" not in string:
|
||||
return string
|
||||
return UNESCAPE_ALL_RE.sub(replacer_func, string)
|
||||
|
||||
|
||||
ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
|
||||
ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
|
||||
|
||||
|
||||
def stripEscape(string: str) -> str:
|
||||
"""Strip escape \\ characters"""
|
||||
return ESCAPE_CHAR.sub(r"\1", string)
|
||||
|
||||
|
||||
def escapeHtml(raw: str) -> str:
|
||||
"""Replace special characters "&", "<", ">" and '"' to HTML-safe sequences."""
|
||||
# like html.escape, but without escaping single quotes
|
||||
raw = raw.replace("&", "&") # Must be done first!
|
||||
raw = raw.replace("<", "<")
|
||||
raw = raw.replace(">", ">")
|
||||
raw = raw.replace('"', """)
|
||||
return raw
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
REGEXP_ESCAPE_RE = re.compile(r"[.?*+^$[\]\\(){}|-]")
|
||||
|
||||
|
||||
def escapeRE(string: str) -> str:
|
||||
string = REGEXP_ESCAPE_RE.sub("\\$&", string)
|
||||
return string
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
def isSpace(code: int | None) -> bool:
|
||||
"""Check if character code is a whitespace."""
|
||||
return code in (0x09, 0x20)
|
||||
|
||||
|
||||
def isStrSpace(ch: str | None) -> bool:
|
||||
"""Check if character is a whitespace."""
|
||||
return ch in ("\t", " ")
|
||||
|
||||
|
||||
MD_WHITESPACE = {
|
||||
0x09, # \t
|
||||
0x0A, # \n
|
||||
0x0B, # \v
|
||||
0x0C, # \f
|
||||
0x0D, # \r
|
||||
0x20, # space
|
||||
0xA0,
|
||||
0x1680,
|
||||
0x202F,
|
||||
0x205F,
|
||||
0x3000,
|
||||
}
|
||||
|
||||
|
||||
def isWhiteSpace(code: int) -> bool:
|
||||
r"""Zs (unicode class) || [\t\f\v\r\n]"""
|
||||
if code >= 0x2000 and code <= 0x200A:
|
||||
return True
|
||||
return code in MD_WHITESPACE
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UNICODE_PUNCT_RE = re.compile(
|
||||
r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" # noqa: E501
|
||||
)
|
||||
|
||||
|
||||
# Currently without astral characters support.
|
||||
def isPunctChar(ch: str) -> bool:
|
||||
"""Check if character is a punctuation character."""
|
||||
return UNICODE_PUNCT_RE.search(ch) is not None
|
||||
|
||||
|
||||
MD_ASCII_PUNCT = {
|
||||
0x21, # /* ! */
|
||||
0x22, # /* " */
|
||||
0x23, # /* # */
|
||||
0x24, # /* $ */
|
||||
0x25, # /* % */
|
||||
0x26, # /* & */
|
||||
0x27, # /* ' */
|
||||
0x28, # /* ( */
|
||||
0x29, # /* ) */
|
||||
0x2A, # /* * */
|
||||
0x2B, # /* + */
|
||||
0x2C, # /* , */
|
||||
0x2D, # /* - */
|
||||
0x2E, # /* . */
|
||||
0x2F, # /* / */
|
||||
0x3A, # /* : */
|
||||
0x3B, # /* ; */
|
||||
0x3C, # /* < */
|
||||
0x3D, # /* = */
|
||||
0x3E, # /* > */
|
||||
0x3F, # /* ? */
|
||||
0x40, # /* @ */
|
||||
0x5B, # /* [ */
|
||||
0x5C, # /* \ */
|
||||
0x5D, # /* ] */
|
||||
0x5E, # /* ^ */
|
||||
0x5F, # /* _ */
|
||||
0x60, # /* ` */
|
||||
0x7B, # /* { */
|
||||
0x7C, # /* | */
|
||||
0x7D, # /* } */
|
||||
0x7E, # /* ~ */
|
||||
}
|
||||
|
||||
|
||||
def isMdAsciiPunct(ch: int) -> bool:
|
||||
"""Markdown ASCII punctuation characters.
|
||||
|
||||
::
|
||||
|
||||
!, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \\, ], ^, _, `, {, |, }, or ~
|
||||
|
||||
See http://spec.commonmark.org/0.15/#ascii-punctuation-character
|
||||
|
||||
Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
|
||||
|
||||
""" # noqa: E501
|
||||
return ch in MD_ASCII_PUNCT
|
||||
|
||||
|
||||
def normalizeReference(string: str) -> str:
|
||||
"""Helper to unify [reference labels]."""
|
||||
# Trim and collapse whitespace
|
||||
#
|
||||
string = re.sub(r"\s+", " ", string.strip())
|
||||
|
||||
# In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
|
||||
# fixed in v12 (couldn't find any details).
|
||||
#
|
||||
# So treat this one as a special case
|
||||
# (remove this when node v10 is no longer supported).
|
||||
#
|
||||
# if ('ẞ'.toLowerCase() === 'Ṿ') {
|
||||
# str = str.replace(/ẞ/g, 'ß')
|
||||
# }
|
||||
|
||||
# .toLowerCase().toUpperCase() should get rid of all differences
|
||||
# between letter variants.
|
||||
#
|
||||
# Simple .toLowerCase() doesn't normalize 125 code points correctly,
|
||||
# and .toUpperCase doesn't normalize 6 of them (list of exceptions:
|
||||
# İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
|
||||
# uppercased versions).
|
||||
#
|
||||
# Here's an example showing how it happens. Lets take greek letter omega:
|
||||
# uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
|
||||
#
|
||||
# Unicode entries:
|
||||
# 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8
|
||||
# 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
|
||||
# 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
|
||||
# 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8
|
||||
#
|
||||
# Case-insensitive comparison should treat all of them as equivalent.
|
||||
#
|
||||
# But .toLowerCase() doesn't change ϑ (it's already lowercase),
|
||||
# and .toUpperCase() doesn't change ϴ (already uppercase).
|
||||
#
|
||||
# Applying first lower then upper case normalizes any character:
|
||||
# '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
|
||||
#
|
||||
# Note: this is equivalent to unicode case folding; unicode normalization
|
||||
# is a different step that is not required here.
|
||||
#
|
||||
# Final result should be uppercased, because it's later stored in an object
|
||||
# (this avoid a conflict with Object.prototype members,
|
||||
# most notably, `__proto__`)
|
||||
#
|
||||
return string.lower().upper()
|
||||
|
||||
|
||||
LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
|
||||
LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def isLinkOpen(string: str) -> bool:
|
||||
return bool(LINK_OPEN_RE.search(string))
|
||||
|
||||
|
||||
def isLinkClose(string: str) -> bool:
|
||||
return bool(LINK_CLOSE_RE.search(string))
|
|
@ -1,6 +0,0 @@
|
|||
"""Functions for parsing Links
|
||||
"""
|
||||
__all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle")
|
||||
from .parse_link_destination import parseLinkDestination
|
||||
from .parse_link_label import parseLinkLabel
|
||||
from .parse_link_title import parseLinkTitle
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,86 +0,0 @@
|
|||
"""
|
||||
Parse link destination
|
||||
"""
|
||||
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _Result:
|
||||
__slots__ = ("ok", "pos", "lines", "str")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ok = False
|
||||
self.pos = 0
|
||||
self.lines = 0
|
||||
self.str = ""
|
||||
|
||||
|
||||
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
|
||||
lines = 0
|
||||
start = pos
|
||||
result = _Result()
|
||||
|
||||
if charCodeAt(string, pos) == 0x3C: # /* < */
|
||||
pos += 1
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == 0x0A: # /* \n */)
|
||||
return result
|
||||
if code == 0x3C: # / * < * /
|
||||
return result
|
||||
if code == 0x3E: # /* > */) {
|
||||
result.pos = pos + 1
|
||||
result.str = unescapeAll(string[start + 1 : pos])
|
||||
result.ok = True
|
||||
return result
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum: # \
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
|
||||
# no closing '>'
|
||||
return result
|
||||
|
||||
# this should be ... } else { ... branch
|
||||
|
||||
level = 0
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
|
||||
if code is None or code == 0x20:
|
||||
break
|
||||
|
||||
# ascii control characters
|
||||
if code < 0x20 or code == 0x7F:
|
||||
break
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum:
|
||||
if charCodeAt(string, pos + 1) == 0x20:
|
||||
break
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
if code == 0x28: # /* ( */)
|
||||
level += 1
|
||||
if level > 32:
|
||||
return result
|
||||
|
||||
if code == 0x29: # /* ) */)
|
||||
if level == 0:
|
||||
break
|
||||
level -= 1
|
||||
|
||||
pos += 1
|
||||
|
||||
if start == pos:
|
||||
return result
|
||||
if level != 0:
|
||||
return result
|
||||
|
||||
result.str = unescapeAll(string[start:pos])
|
||||
result.lines = lines
|
||||
result.pos = pos
|
||||
result.ok = True
|
||||
return result
|
|
@ -1,43 +0,0 @@
|
|||
"""
|
||||
Parse link label
|
||||
|
||||
this function assumes that first character ("[") already matches
|
||||
returns the end of the label
|
||||
|
||||
"""
|
||||
from markdown_it.rules_inline import StateInline
|
||||
|
||||
|
||||
def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
|
||||
labelEnd = -1
|
||||
oldPos = state.pos
|
||||
found = False
|
||||
|
||||
state.pos = start + 1
|
||||
level = 1
|
||||
|
||||
while state.pos < state.posMax:
|
||||
marker = state.src[state.pos]
|
||||
if marker == "]":
|
||||
level -= 1
|
||||
if level == 0:
|
||||
found = True
|
||||
break
|
||||
|
||||
prevPos = state.pos
|
||||
state.md.inline.skipToken(state)
|
||||
if marker == "[":
|
||||
if prevPos == state.pos - 1:
|
||||
# increase level if we find text `[`,
|
||||
# which is not a part of any token
|
||||
level += 1
|
||||
elif disableNested:
|
||||
state.pos = oldPos
|
||||
return -1
|
||||
if found:
|
||||
labelEnd = state.pos
|
||||
|
||||
# restore old state
|
||||
state.pos = oldPos
|
||||
|
||||
return labelEnd
|
|
@ -1,60 +0,0 @@
|
|||
"""Parse link title
|
||||
"""
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _Result:
|
||||
__slots__ = ("ok", "pos", "lines", "str")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ok = False
|
||||
self.pos = 0
|
||||
self.lines = 0
|
||||
self.str = ""
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.str
|
||||
|
||||
|
||||
def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result:
|
||||
lines = 0
|
||||
start = pos
|
||||
result = _Result()
|
||||
|
||||
if pos >= maximum:
|
||||
return result
|
||||
|
||||
marker = charCodeAt(string, pos)
|
||||
|
||||
# /* " */ /* ' */ /* ( */
|
||||
if marker != 0x22 and marker != 0x27 and marker != 0x28:
|
||||
return result
|
||||
|
||||
pos += 1
|
||||
|
||||
# if opening marker is "(", switch it to closing marker ")"
|
||||
if marker == 0x28:
|
||||
marker = 0x29
|
||||
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == marker:
|
||||
title = string[start + 1 : pos]
|
||||
title = unescapeAll(title)
|
||||
result.pos = pos + 1
|
||||
result.lines = lines
|
||||
result.str = title
|
||||
result.ok = True
|
||||
return result
|
||||
elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */
|
||||
return result
|
||||
elif code == 0x0A:
|
||||
lines += 1
|
||||
elif code == 0x5C and pos + 1 < maximum: # /* \ */
|
||||
pos += 1
|
||||
if charCodeAt(string, pos) == 0x0A:
|
||||
lines += 1
|
||||
|
||||
pos += 1
|
||||
|
||||
return result
|
|
@ -1,355 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Literal, overload
|
||||
|
||||
from . import helpers, presets
|
||||
from .common import normalize_url, utils
|
||||
from .parser_block import ParserBlock
|
||||
from .parser_core import ParserCore
|
||||
from .parser_inline import ParserInline
|
||||
from .renderer import RendererHTML, RendererProtocol
|
||||
from .rules_core.state_core import StateCore
|
||||
from .token import Token
|
||||
from .utils import EnvType, OptionsDict, OptionsType, PresetType
|
||||
|
||||
try:
|
||||
import linkify_it
|
||||
except ModuleNotFoundError:
|
||||
linkify_it = None
|
||||
|
||||
|
||||
_PRESETS: dict[str, PresetType] = {
|
||||
"default": presets.default.make(),
|
||||
"js-default": presets.js_default.make(),
|
||||
"zero": presets.zero.make(),
|
||||
"commonmark": presets.commonmark.make(),
|
||||
"gfm-like": presets.gfm_like.make(),
|
||||
}
|
||||
|
||||
|
||||
class MarkdownIt:
|
||||
def __init__(
|
||||
self,
|
||||
config: str | PresetType = "commonmark",
|
||||
options_update: Mapping[str, Any] | None = None,
|
||||
*,
|
||||
renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
|
||||
):
|
||||
"""Main parser class
|
||||
|
||||
:param config: name of configuration to load or a pre-defined dictionary
|
||||
:param options_update: dictionary that will be merged into ``config["options"]``
|
||||
:param renderer_cls: the class to load as the renderer:
|
||||
``self.renderer = renderer_cls(self)
|
||||
"""
|
||||
# add modules
|
||||
self.utils = utils
|
||||
self.helpers = helpers
|
||||
|
||||
# initialise classes
|
||||
self.inline = ParserInline()
|
||||
self.block = ParserBlock()
|
||||
self.core = ParserCore()
|
||||
self.renderer = renderer_cls(self)
|
||||
self.linkify = linkify_it.LinkifyIt() if linkify_it else None
|
||||
|
||||
# set the configuration
|
||||
if options_update and not isinstance(options_update, Mapping):
|
||||
# catch signature change where renderer_cls was not used as a key-word
|
||||
raise TypeError(
|
||||
f"options_update should be a mapping: {options_update}"
|
||||
"\n(Perhaps you intended this to be the renderer_cls?)"
|
||||
)
|
||||
self.configure(config, options_update=options_update)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.__class__.__module__}.{self.__class__.__name__}()"
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["inline"]) -> ParserInline:
|
||||
...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["block"]) -> ParserBlock:
|
||||
...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["core"]) -> ParserCore:
|
||||
...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol:
|
||||
...
|
||||
|
||||
@overload
|
||||
def __getitem__(self, name: str) -> Any:
|
||||
...
|
||||
|
||||
def __getitem__(self, name: str) -> Any:
|
||||
return {
|
||||
"inline": self.inline,
|
||||
"block": self.block,
|
||||
"core": self.core,
|
||||
"renderer": self.renderer,
|
||||
}[name]
|
||||
|
||||
def set(self, options: OptionsType) -> None:
|
||||
"""Set parser options (in the same format as in constructor).
|
||||
Probably, you will never need it, but you can change options after constructor call.
|
||||
|
||||
__Note:__ To achieve the best possible performance, don't modify a
|
||||
`markdown-it` instance options on the fly. If you need multiple configurations
|
||||
it's best to create multiple instances and initialize each with separate config.
|
||||
"""
|
||||
self.options = OptionsDict(options)
|
||||
|
||||
def configure(
|
||||
self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
|
||||
) -> MarkdownIt:
|
||||
"""Batch load of all options and component settings.
|
||||
This is an internal method, and you probably will not need it.
|
||||
But if you will - see available presets and data structure
|
||||
[here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
|
||||
|
||||
We strongly recommend to use presets instead of direct config loads.
|
||||
That will give better compatibility with next versions.
|
||||
"""
|
||||
if isinstance(presets, str):
|
||||
if presets not in _PRESETS:
|
||||
raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
|
||||
config = _PRESETS[presets]
|
||||
else:
|
||||
config = presets
|
||||
|
||||
if not config:
|
||||
raise ValueError("Wrong `markdown-it` config, can't be empty")
|
||||
|
||||
options = config.get("options", {}) or {}
|
||||
if options_update:
|
||||
options = {**options, **options_update} # type: ignore
|
||||
|
||||
self.set(options) # type: ignore
|
||||
|
||||
if "components" in config:
|
||||
for name, component in config["components"].items():
|
||||
rules = component.get("rules", None)
|
||||
if rules:
|
||||
self[name].ruler.enableOnly(rules)
|
||||
rules2 = component.get("rules2", None)
|
||||
if rules2:
|
||||
self[name].ruler2.enableOnly(rules2)
|
||||
|
||||
return self
|
||||
|
||||
def get_all_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_all_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_all_rules()
|
||||
return rules
|
||||
|
||||
def get_active_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_active_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_active_rules()
|
||||
return rules
|
||||
|
||||
def enable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""Enable list or rules. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to enable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
It will automatically find appropriate components,
|
||||
containing rules with given names. If rule not found, and `ignoreInvalid`
|
||||
not set - throws exception.
|
||||
|
||||
Example::
|
||||
|
||||
md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.enable(names, True))
|
||||
result.extend(self.inline.ruler2.enable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
|
||||
|
||||
return self
|
||||
|
||||
def disable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to disable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.disable(names, True))
|
||||
result.extend(self.inline.ruler2.disable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
|
||||
return self
|
||||
|
||||
@contextmanager
|
||||
def reset_rules(self) -> Generator[None, None, None]:
|
||||
"""A context manager, that will reset the current enabled rules on exit."""
|
||||
chain_rules = self.get_active_rules()
|
||||
yield
|
||||
for chain, rules in chain_rules.items():
|
||||
if chain != "inline2":
|
||||
self[chain].ruler.enableOnly(rules)
|
||||
self.inline.ruler2.enableOnly(chain_rules["inline2"])
|
||||
|
||||
def add_render_rule(
|
||||
self, name: str, function: Callable[..., Any], fmt: str = "html"
|
||||
) -> None:
|
||||
"""Add a rule for rendering a particular Token type.
|
||||
|
||||
Only applied when ``renderer.__output__ == fmt``
|
||||
"""
|
||||
if self.renderer.__output__ == fmt:
|
||||
self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
|
||||
|
||||
def use(
|
||||
self, plugin: Callable[..., None], *params: Any, **options: Any
|
||||
) -> MarkdownIt:
|
||||
"""Load specified plugin with given params into current parser instance. (chainable)
|
||||
|
||||
It's just a sugar to call `plugin(md, params)` with curring.
|
||||
|
||||
Example::
|
||||
|
||||
def func(tokens, idx):
|
||||
tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
|
||||
md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
|
||||
|
||||
"""
|
||||
plugin(self, *params, **options)
|
||||
return self
|
||||
|
||||
def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
|
||||
"""Parse the source string to a token stream
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Parse input string and return list of block tokens (special token type
|
||||
"inline" will contain list of inline tokens).
|
||||
|
||||
`env` is used to pass data between "distributed" rules and return additional
|
||||
metadata like reference info, needed for the renderer. It also can be used to
|
||||
inject data in specific cases. Usually, you will be ok to pass `{}`,
|
||||
and then pass updated object to renderer.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def render(self, src: str, env: EnvType | None = None) -> Any:
|
||||
"""Render markdown string into html. It does all magic for you :).
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
:returns: The output of the loaded renderer
|
||||
|
||||
`env` can be used to inject additional metadata (`{}` by default).
|
||||
But you will not need it with high probability. See also comment
|
||||
in [[MarkdownIt.parse]].
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parse(src, env), self.options, env)
|
||||
|
||||
def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
|
||||
"""The same as [[MarkdownIt.parse]] but skip all block rules.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
It returns the
|
||||
block tokens list with the single `inline` element, containing parsed inline
|
||||
tokens in `children` property. Also updates `env` object.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
state.inlineMode = True
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def renderInline(self, src: str, env: EnvType | None = None) -> Any:
|
||||
"""Similar to [[MarkdownIt.render]] but for single paragraph content.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Similar to [[MarkdownIt.render]] but for single paragraph content. Result
|
||||
will NOT be wrapped into `<p>` tags.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parseInline(src, env), self.options, env)
|
||||
|
||||
# link methods
|
||||
|
||||
def validateLink(self, url: str) -> bool:
|
||||
"""Validate if the URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: the url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
return normalize_url.validateLink(url)
|
||||
|
||||
def normalizeLink(self, url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
return normalize_url.normalizeLink(url)
|
||||
|
||||
def normalizeLinkText(self, link: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
return normalize_url.normalizeLinkText(link)
|
|
@ -1,111 +0,0 @@
|
|||
"""Block-level tokenizer."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from . import rules_block
|
||||
from .ruler import Ruler
|
||||
from .rules_block.state_block import StateBlock
|
||||
from .token import Token
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
|
||||
"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
|
||||
|
||||
`silent` disables token generation, useful for lookahead.
|
||||
"""
|
||||
|
||||
_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
|
||||
# First 2 params - rule name & source. Secondary array - list of rules,
|
||||
# which can be terminated by this one.
|
||||
("table", rules_block.table, ["paragraph", "reference"]),
|
||||
("code", rules_block.code, []),
|
||||
("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
|
||||
(
|
||||
"blockquote",
|
||||
rules_block.blockquote,
|
||||
["paragraph", "reference", "blockquote", "list"],
|
||||
),
|
||||
("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
|
||||
("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
|
||||
("reference", rules_block.reference, []),
|
||||
("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
|
||||
("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
|
||||
("lheading", rules_block.lheading, []),
|
||||
("paragraph", rules_block.paragraph, []),
|
||||
]
|
||||
|
||||
|
||||
class ParserBlock:
|
||||
"""
|
||||
ParserBlock#ruler -> Ruler
|
||||
|
||||
[[Ruler]] instance. Keep configuration of block rules.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncBlockType]()
|
||||
for name, rule, alt in _rules:
|
||||
self.ruler.push(name, rule, {"alt": alt})
|
||||
|
||||
def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
rules = self.ruler.getRules("")
|
||||
line = startLine
|
||||
maxNesting = state.md.options.maxNesting
|
||||
hasEmptyLines = False
|
||||
|
||||
while line < endLine:
|
||||
state.line = line = state.skipEmptyLines(line)
|
||||
if line >= endLine:
|
||||
break
|
||||
if state.sCount[line] < state.blkIndent:
|
||||
# Termination condition for nested calls.
|
||||
# Nested calls currently used for blockquotes & lists
|
||||
break
|
||||
if state.level >= maxNesting:
|
||||
# If nesting level exceeded - skip tail to the end.
|
||||
# That's not ordinary situation and we should not care about content.
|
||||
state.line = endLine
|
||||
break
|
||||
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
# - update `state.line`
|
||||
# - update `state.tokens`
|
||||
# - return True
|
||||
for rule in rules:
|
||||
if rule(state, line, endLine, False):
|
||||
break
|
||||
|
||||
# set state.tight if we had an empty line before current tag
|
||||
# i.e. latest empty line should not count
|
||||
state.tight = not hasEmptyLines
|
||||
|
||||
line = state.line
|
||||
|
||||
# paragraph might "eat" one newline after it in nested lists
|
||||
if (line - 1) < endLine and state.isEmpty(line - 1):
|
||||
hasEmptyLines = True
|
||||
|
||||
if line < endLine and state.isEmpty(line):
|
||||
hasEmptyLines = True
|
||||
line += 1
|
||||
state.line = line
|
||||
|
||||
def parse(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
|
||||
) -> list[Token] | None:
|
||||
"""Process input string and push block tokens into `outTokens`."""
|
||||
if not src:
|
||||
return None
|
||||
state = StateBlock(src, md, env, outTokens)
|
||||
self.tokenize(state, state.line, state.lineMax)
|
||||
return state.tokens
|
|
@ -1,45 +0,0 @@
|
|||
"""
|
||||
* class Core
|
||||
*
|
||||
* Top-level rules executor. Glues block/inline parsers and does intermediate
|
||||
* transformations.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
from .ruler import Ruler
|
||||
from .rules_core import (
|
||||
block,
|
||||
inline,
|
||||
linkify,
|
||||
normalize,
|
||||
replace,
|
||||
smartquotes,
|
||||
text_join,
|
||||
)
|
||||
from .rules_core.state_core import StateCore
|
||||
|
||||
RuleFuncCoreType = Callable[[StateCore], None]
|
||||
|
||||
_rules: list[tuple[str, RuleFuncCoreType]] = [
|
||||
("normalize", normalize),
|
||||
("block", block),
|
||||
("inline", inline),
|
||||
("linkify", linkify),
|
||||
("replacements", replace),
|
||||
("smartquotes", smartquotes),
|
||||
("text_join", text_join),
|
||||
]
|
||||
|
||||
|
||||
class ParserCore:
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncCoreType]()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
|
||||
def process(self, state: StateCore) -> None:
|
||||
"""Executes core chain rules."""
|
||||
for rule in self.ruler.getRules(""):
|
||||
rule(state)
|
|
@ -1,147 +0,0 @@
|
|||
"""Tokenizes paragraph content.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from . import rules_inline
|
||||
from .ruler import Ruler
|
||||
from .rules_inline.state_inline import StateInline
|
||||
from .token import Token
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
# Parser rules
|
||||
RuleFuncInlineType = Callable[[StateInline, bool], bool]
|
||||
"""(state: StateInline, silent: bool) -> matched: bool)
|
||||
|
||||
`silent` disables token generation, useful for lookahead.
|
||||
"""
|
||||
_rules: list[tuple[str, RuleFuncInlineType]] = [
|
||||
("text", rules_inline.text),
|
||||
("linkify", rules_inline.linkify),
|
||||
("newline", rules_inline.newline),
|
||||
("escape", rules_inline.escape),
|
||||
("backticks", rules_inline.backtick),
|
||||
("strikethrough", rules_inline.strikethrough.tokenize),
|
||||
("emphasis", rules_inline.emphasis.tokenize),
|
||||
("link", rules_inline.link),
|
||||
("image", rules_inline.image),
|
||||
("autolink", rules_inline.autolink),
|
||||
("html_inline", rules_inline.html_inline),
|
||||
("entity", rules_inline.entity),
|
||||
]
|
||||
|
||||
# Note `rule2` ruleset was created specifically for emphasis/strikethrough
|
||||
# post-processing and may be changed in the future.
|
||||
#
|
||||
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
|
||||
#
|
||||
RuleFuncInline2Type = Callable[[StateInline], None]
|
||||
_rules2: list[tuple[str, RuleFuncInline2Type]] = [
|
||||
("balance_pairs", rules_inline.link_pairs),
|
||||
("strikethrough", rules_inline.strikethrough.postProcess),
|
||||
("emphasis", rules_inline.emphasis.postProcess),
|
||||
# rules for pairs separate '**' into its own text tokens, which may be left unused,
|
||||
# rule below merges unused segments back with the rest of the text
|
||||
("fragments_join", rules_inline.fragments_join),
|
||||
]
|
||||
|
||||
|
||||
class ParserInline:
|
||||
def __init__(self) -> None:
|
||||
self.ruler = Ruler[RuleFuncInlineType]()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
# Second ruler used for post-processing (e.g. in emphasis-like rules)
|
||||
self.ruler2 = Ruler[RuleFuncInline2Type]()
|
||||
for name, rule2 in _rules2:
|
||||
self.ruler2.push(name, rule2)
|
||||
|
||||
def skipToken(self, state: StateInline) -> None:
|
||||
"""Skip single token by running all rules in validation mode;
|
||||
returns `True` if any rule reported success
|
||||
"""
|
||||
ok = False
|
||||
pos = state.pos
|
||||
rules = self.ruler.getRules("")
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
cache = state.cache
|
||||
|
||||
if pos in cache:
|
||||
state.pos = cache[pos]
|
||||
return
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
# Increment state.level and decrement it later to limit recursion.
|
||||
# It's harmless to do here, because no tokens are created.
|
||||
# But ideally, we'd need a separate private state variable for this purpose.
|
||||
state.level += 1
|
||||
ok = rule(state, True)
|
||||
state.level -= 1
|
||||
if ok:
|
||||
break
|
||||
else:
|
||||
# Too much nesting, just skip until the end of the paragraph.
|
||||
#
|
||||
# NOTE: this will cause links to behave incorrectly in the following case,
|
||||
# when an amount of `[` is exactly equal to `maxNesting + 1`:
|
||||
#
|
||||
# [[[[[[[[[[[[[[[[[[[[[foo]()
|
||||
#
|
||||
# TODO: remove this workaround when CM standard will allow nested links
|
||||
# (we can replace it by preventing links from being parsed in
|
||||
# validation mode)
|
||||
#
|
||||
state.pos = state.posMax
|
||||
|
||||
if not ok:
|
||||
state.pos += 1
|
||||
cache[pos] = state.pos
|
||||
|
||||
def tokenize(self, state: StateInline) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
ok = False
|
||||
rules = self.ruler.getRules("")
|
||||
end = state.posMax
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
|
||||
while state.pos < end:
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
#
|
||||
# - update `state.pos`
|
||||
# - update `state.tokens`
|
||||
# - return true
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
ok = rule(state, False)
|
||||
if ok:
|
||||
break
|
||||
|
||||
if ok:
|
||||
if state.pos >= end:
|
||||
break
|
||||
continue
|
||||
|
||||
state.pending += state.src[state.pos]
|
||||
state.pos += 1
|
||||
|
||||
if state.pending:
|
||||
state.pushPending()
|
||||
|
||||
def parse(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
||||
) -> list[Token]:
|
||||
"""Process input string and push inline tokens into `tokens`"""
|
||||
state = StateInline(src, md, env, tokens)
|
||||
self.tokenize(state)
|
||||
rules2 = self.ruler2.getRules("")
|
||||
for rule in rules2:
|
||||
rule(state)
|
||||
return state.tokens
|
|
@ -1,48 +0,0 @@
|
|||
- package: markdown-it/markdown-it
|
||||
version: 13.0.1
|
||||
commit: e843acc9edad115cbf8cf85e676443f01658be08
|
||||
date: May 3, 2022
|
||||
notes:
|
||||
- Rename variables that use python built-in names, e.g.
|
||||
- `max` -> `maximum`
|
||||
- `len` -> `length`
|
||||
- `str` -> `string`
|
||||
- |
|
||||
Convert JS `for` loops to `while` loops
|
||||
this is generally the main difference between the codes,
|
||||
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
|
||||
- |
|
||||
`env` is a common Python dictionary, and so does not have attribute access to keys,
|
||||
as with JavaScript dictionaries.
|
||||
`options` have attribute access only to core markdownit configuration options
|
||||
- |
|
||||
`Token.attrs` is a dictionary, instead of a list of lists.
|
||||
Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
|
||||
but in Python 3.7+ order of dictionaries is guaranteed.
|
||||
One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
|
||||
to manipulate `Token.attrs`, which have an identical signature to those upstream.
|
||||
- Use python version of `charCodeAt`
|
||||
- |
|
||||
Use `str` units instead of `int`s to represent Unicode codepoints.
|
||||
This provides a significant performance boost
|
||||
- |
|
||||
In markdown_it/rules_block/reference.py,
|
||||
record line range in state.env["references"] and add state.env["duplicate_refs"]
|
||||
This is to allow renderers to report on issues regarding references
|
||||
- |
|
||||
The `MarkdownIt.__init__` signature is slightly different for updating options,
|
||||
since you must always specify the config first, e.g.
|
||||
use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
|
||||
- The default configuration preset for `MarkdownIt` is "commonmark" not "default"
|
||||
- Allow custom renderer to be passed to `MarkdownIt`
|
||||
- |
|
||||
change render method signatures
|
||||
`func(tokens, idx, options, env, slf)` to
|
||||
`func(self, tokens, idx, options, env)`
|
||||
- |
|
||||
Extensions add render methods by format
|
||||
`MarkdownIt.add_render_rule(name, function, fmt="html")`,
|
||||
rather than `MarkdownIt.renderer.rules[name] = function`
|
||||
and renderers should declare a class property `__output__ = "html"`.
|
||||
This allows for extensibility to more than just HTML renderers
|
||||
- inline tokens in tables are assigned a map (this is helpful for propagation to children)
|
|
@ -1,28 +0,0 @@
|
|||
__all__ = ("commonmark", "default", "zero", "js_default", "gfm_like")
|
||||
|
||||
from . import commonmark, default, zero
|
||||
from ..utils import PresetType
|
||||
|
||||
js_default = default
|
||||
|
||||
|
||||
class gfm_like: # noqa: N801
|
||||
"""GitHub Flavoured Markdown (GFM) like.
|
||||
|
||||
This adds the linkify, table and strikethrough components to CommmonMark.
|
||||
|
||||
Note, it lacks task-list items and raw HTML filtering,
|
||||
to meet the the full GFM specification
|
||||
(see https://github.github.com/gfm/#autolinks-extension-).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def make() -> PresetType:
|
||||
config = commonmark.make()
|
||||
config["components"]["core"]["rules"].append("linkify")
|
||||
config["components"]["block"]["rules"].append("table")
|
||||
config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"])
|
||||
config["components"]["inline"]["rules2"].append("strikethrough")
|
||||
config["options"]["linkify"] = True
|
||||
config["options"]["html"] = True
|
||||
return config
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,74 +0,0 @@
|
|||
"""Commonmark default options.
|
||||
|
||||
This differs to presets.default,
|
||||
primarily in that it allows HTML and does not enable components:
|
||||
|
||||
- block: table
|
||||
- inline: strikethrough
|
||||
"""
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": True, # Enable HTML tags in source,
|
||||
# this is just a shorthand for .enable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
#
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": True, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
|
||||
"block": {
|
||||
"rules": [
|
||||
"blockquote",
|
||||
"code",
|
||||
"fence",
|
||||
"heading",
|
||||
"hr",
|
||||
"html_block",
|
||||
"lheading",
|
||||
"list",
|
||||
"reference",
|
||||
"paragraph",
|
||||
]
|
||||
},
|
||||
"inline": {
|
||||
"rules": [
|
||||
"autolink",
|
||||
"backticks",
|
||||
"emphasis",
|
||||
"entity",
|
||||
"escape",
|
||||
"html_inline",
|
||||
"image",
|
||||
"link",
|
||||
"newline",
|
||||
"text",
|
||||
],
|
||||
"rules2": ["balance_pairs", "emphasis", "fragments_join"],
|
||||
},
|
||||
},
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
"""markdown-it default options."""
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 100, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {"core": {}, "block": {}, "inline": {}},
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
"""
|
||||
"Zero" preset, with nothing enabled. Useful for manual configuring of simple
|
||||
modes. For example, to parse bold/italic only.
|
||||
"""
|
||||
from ..utils import PresetType
|
||||
|
||||
|
||||
def make() -> PresetType:
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
|
||||
"block": {"rules": ["paragraph"]},
|
||||
"inline": {
|
||||
"rules": ["text"],
|
||||
"rules2": ["balance_pairs", "fragments_join"],
|
||||
},
|
||||
},
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
# Marker file for PEP 561
|
|
@ -1,336 +0,0 @@
|
|||
"""
|
||||
class Renderer
|
||||
|
||||
Generates HTML from parsed token stream. Each instance has independent
|
||||
copy of rules. Those can be rewritten with ease. Also, you can add new
|
||||
rules if you create plugin and adds new token types.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
import inspect
|
||||
from typing import Any, ClassVar, Protocol
|
||||
|
||||
from .common.utils import escapeHtml, unescapeAll
|
||||
from .token import Token
|
||||
from .utils import EnvType, OptionsDict
|
||||
|
||||
|
||||
class RendererProtocol(Protocol):
|
||||
__output__: ClassVar[str]
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> Any:
|
||||
...
|
||||
|
||||
|
||||
class RendererHTML(RendererProtocol):
|
||||
"""Contains render rules for tokens. Can be updated and extended.
|
||||
|
||||
Example:
|
||||
|
||||
Each rule is called as independent static function with fixed signature:
|
||||
|
||||
::
|
||||
|
||||
class Renderer:
|
||||
def token_type_name(self, tokens, idx, options, env) {
|
||||
# ...
|
||||
return renderedHTML
|
||||
|
||||
::
|
||||
|
||||
class CustomRenderer(RendererHTML):
|
||||
def strong_open(self, tokens, idx, options, env):
|
||||
return '<b>'
|
||||
def strong_close(self, tokens, idx, options, env):
|
||||
return '</b>'
|
||||
|
||||
md = MarkdownIt(renderer_cls=CustomRenderer)
|
||||
|
||||
result = md.render(...)
|
||||
|
||||
See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
|
||||
for more details and examples.
|
||||
"""
|
||||
|
||||
__output__ = "html"
|
||||
|
||||
def __init__(self, parser: Any = None):
|
||||
self.rules = {
|
||||
k: v
|
||||
for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
|
||||
if not (k.startswith("render") or k.startswith("_"))
|
||||
}
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
"""Takes token stream and generates HTML.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type == "inline":
|
||||
if token.children:
|
||||
result += self.renderInline(token.children, options, env)
|
||||
elif token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderInline(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
"""The same as ``render``, but for single token of `inline` type.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input (references, for example)
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderToken(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
"""Default token renderer.
|
||||
|
||||
Can be overridden by custom function
|
||||
|
||||
:param idx: token index to render
|
||||
:param options: params of parser instance
|
||||
"""
|
||||
result = ""
|
||||
needLf = False
|
||||
token = tokens[idx]
|
||||
|
||||
# Tight list paragraphs
|
||||
if token.hidden:
|
||||
return ""
|
||||
|
||||
# Insert a newline between hidden paragraph and subsequent opening
|
||||
# block-level tag.
|
||||
#
|
||||
# For example, here we should insert a newline before blockquote:
|
||||
# - a
|
||||
# >
|
||||
#
|
||||
if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
|
||||
result += "\n"
|
||||
|
||||
# Add token name, e.g. `<img`
|
||||
result += ("</" if token.nesting == -1 else "<") + token.tag
|
||||
|
||||
# Encode attributes, e.g. `<img src="foo"`
|
||||
result += self.renderAttrs(token)
|
||||
|
||||
# Add a slash for self-closing tags, e.g. `<img src="foo" /`
|
||||
if token.nesting == 0 and options["xhtmlOut"]:
|
||||
result += " /"
|
||||
|
||||
# Check if we need to add a newline after this tag
|
||||
if token.block:
|
||||
needLf = True
|
||||
|
||||
if token.nesting == 1 and (idx + 1 < len(tokens)):
|
||||
nextToken = tokens[idx + 1]
|
||||
|
||||
if nextToken.type == "inline" or nextToken.hidden: # noqa: SIM114
|
||||
# Block-level tag containing an inline tag.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
elif nextToken.nesting == -1 and nextToken.tag == token.tag:
|
||||
# Opening tag + closing tag of the same type. E.g. `<li></li>`.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
result += ">\n" if needLf else ">"
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def renderAttrs(token: Token) -> str:
|
||||
"""Render token attributes to string."""
|
||||
result = ""
|
||||
|
||||
for key, value in token.attrItems():
|
||||
result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
|
||||
|
||||
return result
|
||||
|
||||
def renderInlineAsText(
|
||||
self,
|
||||
tokens: Sequence[Token] | None,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
"""Special kludge for image `alt` attributes to conform CommonMark spec.
|
||||
|
||||
Don't try to use it! Spec requires to show `alt` content with stripped markup,
|
||||
instead of simple escaping.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for token in tokens or []:
|
||||
if token.type == "text":
|
||||
result += token.content
|
||||
elif token.type == "image":
|
||||
if token.children:
|
||||
result += self.renderInlineAsText(token.children, options, env)
|
||||
elif token.type == "softbreak":
|
||||
result += "\n"
|
||||
|
||||
return result
|
||||
|
||||
###################################################
|
||||
|
||||
def code_inline(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
return (
|
||||
"<code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code>"
|
||||
)
|
||||
|
||||
def code_block(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
return (
|
||||
"<pre"
|
||||
+ self.renderAttrs(token)
|
||||
+ "><code>"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def fence(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
info = unescapeAll(token.info).strip() if token.info else ""
|
||||
langName = ""
|
||||
langAttrs = ""
|
||||
|
||||
if info:
|
||||
arr = info.split(maxsplit=1)
|
||||
langName = arr[0]
|
||||
if len(arr) == 2:
|
||||
langAttrs = arr[1]
|
||||
|
||||
if options.highlight:
|
||||
highlighted = options.highlight(
|
||||
token.content, langName, langAttrs
|
||||
) or escapeHtml(token.content)
|
||||
else:
|
||||
highlighted = escapeHtml(token.content)
|
||||
|
||||
if highlighted.startswith("<pre"):
|
||||
return highlighted + "\n"
|
||||
|
||||
# If language exists, inject class gently, without modifying original token.
|
||||
# May be, one day we will add .deepClone() for token and simplify this part, but
|
||||
# now we prefer to keep things local.
|
||||
if info:
|
||||
# Fake token just to render attributes
|
||||
tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
|
||||
tmpToken.attrJoin("class", options.langPrefix + langName)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(tmpToken)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def image(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: EnvType,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
# "alt" attr MUST be set, even if empty. Because it's mandatory and
|
||||
# should be placed on proper position for tests.
|
||||
if token.children:
|
||||
token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
|
||||
else:
|
||||
token.attrSet("alt", "")
|
||||
|
||||
return self.renderToken(tokens, idx, options, env)
|
||||
|
||||
def hardbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return "<br />\n" if options.xhtmlOut else "<br>\n"
|
||||
|
||||
def softbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return (
|
||||
("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
|
||||
)
|
||||
|
||||
def text(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return escapeHtml(tokens[idx].content)
|
||||
|
||||
def html_block(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return tokens[idx].content
|
||||
|
||||
def html_inline(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
|
||||
) -> str:
|
||||
return tokens[idx].content
|
|
@ -1,276 +0,0 @@
|
|||
"""
|
||||
class Ruler
|
||||
|
||||
Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
|
||||
[[MarkdownIt#inline]] to manage sequences of functions (rules):
|
||||
|
||||
- keep rules in defined order
|
||||
- assign the name to each rule
|
||||
- enable/disable rules
|
||||
- add/replace rules
|
||||
- allow assign rules to additional named chains (in the same)
|
||||
- caching lists of active rules
|
||||
|
||||
You will not need use this class directly until write plugins. For simple
|
||||
rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
|
||||
[[MarkdownIt.use]].
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar
|
||||
import warnings
|
||||
|
||||
from markdown_it._compat import DATACLASS_KWARGS
|
||||
|
||||
from .utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
class StateBase:
|
||||
def __init__(self, src: str, md: MarkdownIt, env: EnvType):
|
||||
self.src = src
|
||||
self.env = env
|
||||
self.md = md
|
||||
|
||||
@property
|
||||
def src(self) -> str:
|
||||
return self._src
|
||||
|
||||
@src.setter
|
||||
def src(self, value: str) -> None:
|
||||
self._src = value
|
||||
self._srcCharCode: tuple[int, ...] | None = None
|
||||
|
||||
@property
|
||||
def srcCharCode(self) -> tuple[int, ...]:
|
||||
warnings.warn(
|
||||
"StateBase.srcCharCode is deprecated. Use StateBase.src instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if self._srcCharCode is None:
|
||||
self._srcCharCode = tuple(ord(c) for c in self._src)
|
||||
return self._srcCharCode
|
||||
|
||||
|
||||
class RuleOptionsType(TypedDict, total=False):
|
||||
alt: list[str]
|
||||
|
||||
|
||||
RuleFuncTv = TypeVar("RuleFuncTv")
|
||||
"""A rule function, whose signature is dependent on the state type."""
|
||||
|
||||
|
||||
@dataclass(**DATACLASS_KWARGS)
|
||||
class Rule(Generic[RuleFuncTv]):
|
||||
name: str
|
||||
enabled: bool
|
||||
fn: RuleFuncTv = field(repr=False)
|
||||
alt: list[str]
|
||||
|
||||
|
||||
class Ruler(Generic[RuleFuncTv]):
|
||||
def __init__(self) -> None:
|
||||
# List of added rules.
|
||||
self.__rules__: list[Rule[RuleFuncTv]] = []
|
||||
# Cached rule chains.
|
||||
# First level - chain name, '' for default.
|
||||
# Second level - diginal anchor for fast filtering by charcodes.
|
||||
self.__cache__: dict[str, list[RuleFuncTv]] | None = None
|
||||
|
||||
def __find__(self, name: str) -> int:
|
||||
"""Find rule index by name"""
|
||||
for i, rule in enumerate(self.__rules__):
|
||||
if rule.name == name:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def __compile__(self) -> None:
|
||||
"""Build rules lookup cache"""
|
||||
chains = {""}
|
||||
# collect unique names
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
for name in rule.alt:
|
||||
chains.add(name)
|
||||
self.__cache__ = {}
|
||||
for chain in chains:
|
||||
self.__cache__[chain] = []
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
if chain and (chain not in rule.alt):
|
||||
continue
|
||||
self.__cache__[chain].append(rule.fn)
|
||||
|
||||
def at(
|
||||
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
|
||||
) -> None:
|
||||
"""Replace rule by name with new function & options.
|
||||
|
||||
:param ruleName: rule name to replace.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(ruleName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {ruleName}")
|
||||
self.__rules__[index].fn = fn
|
||||
self.__rules__[index].alt = options.get("alt", [])
|
||||
self.__cache__ = None
|
||||
|
||||
def before(
|
||||
self,
|
||||
beforeName: str,
|
||||
ruleName: str,
|
||||
fn: RuleFuncTv,
|
||||
options: RuleOptionsType | None = None,
|
||||
) -> None:
|
||||
"""Add new rule to chain before one with given name.
|
||||
|
||||
:param beforeName: new rule will be added before this one.
|
||||
:param ruleName: new rule will be added before this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(beforeName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {beforeName}")
|
||||
self.__rules__.insert(
|
||||
index, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def after(
|
||||
self,
|
||||
afterName: str,
|
||||
ruleName: str,
|
||||
fn: RuleFuncTv,
|
||||
options: RuleOptionsType | None = None,
|
||||
) -> None:
|
||||
"""Add new rule to chain after one with given name.
|
||||
|
||||
:param afterName: new rule will be added after this one.
|
||||
:param ruleName: new rule will be added after this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(afterName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {afterName}")
|
||||
self.__rules__.insert(
|
||||
index + 1, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def push(
|
||||
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
|
||||
) -> None:
|
||||
"""Push new rule to the end of chain.
|
||||
|
||||
:param ruleName: new rule will be added to the end of chain.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
|
||||
"""
|
||||
self.__rules__.append(
|
||||
Rule[RuleFuncTv](ruleName, True, fn, (options or {}).get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def enable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Enable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result: list[str] = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = True
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def enableOnly(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Enable rules with given names, and disable everything else.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
for rule in self.__rules__:
|
||||
rule.enabled = False
|
||||
return self.enable(names, ignoreInvalid)
|
||||
|
||||
def disable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> list[str]:
|
||||
"""Disable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = False
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def getRules(self, chainName: str = "") -> list[RuleFuncTv]:
|
||||
"""Return array of active functions (rules) for given chain name.
|
||||
It analyzes rules configuration, compiles caches if not exists and returns result.
|
||||
|
||||
Default chain name is `''` (empty string). It can't be skipped.
|
||||
That's done intentionally, to keep signature monomorphic for high speed.
|
||||
|
||||
"""
|
||||
if self.__cache__ is None:
|
||||
self.__compile__()
|
||||
assert self.__cache__ is not None
|
||||
# Chain can be empty, if rules disabled. But we still have to return Array.
|
||||
return self.__cache__.get(chainName, []) or []
|
||||
|
||||
def get_all_rules(self) -> list[str]:
|
||||
"""Return all available rule names."""
|
||||
return [r.name for r in self.__rules__]
|
||||
|
||||
def get_active_rules(self) -> list[str]:
|
||||
"""Return the active rule names."""
|
||||
return [r.name for r in self.__rules__ if r.enabled]
|
|
@ -1,27 +0,0 @@
|
|||
__all__ = (
|
||||
"StateBlock",
|
||||
"paragraph",
|
||||
"heading",
|
||||
"lheading",
|
||||
"code",
|
||||
"fence",
|
||||
"hr",
|
||||
"list_block",
|
||||
"reference",
|
||||
"blockquote",
|
||||
"html_block",
|
||||
"table",
|
||||
)
|
||||
|
||||
from .blockquote import blockquote
|
||||
from .code import code
|
||||
from .fence import fence
|
||||
from .heading import heading
|
||||
from .hr import hr
|
||||
from .html_block import html_block
|
||||
from .lheading import lheading
|
||||
from .list import list_block
|
||||
from .paragraph import paragraph
|
||||
from .reference import reference
|
||||
from .state_block import StateBlock
|
||||
from .table import table
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,299 +0,0 @@
|
|||
# Block quotes
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
oldLineMax = state.lineMax
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
max = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# check the block quote marker
|
||||
try:
|
||||
if state.src[pos] != ">":
|
||||
return False
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# we know that it's going to be a valid blockquote,
|
||||
# so no point trying to find the end of it in silent mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[startLine] + 1
|
||||
|
||||
try:
|
||||
second_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
second_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if second_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif second_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[startLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks = [state.bMarks[startLine]]
|
||||
state.bMarks[startLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
oldBSCount = [state.bsCount[startLine]]
|
||||
state.bsCount[startLine] = (
|
||||
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldSCount = [state.sCount[startLine]]
|
||||
state.sCount[startLine] = offset - initial
|
||||
|
||||
oldTShift = [state.tShift[startLine]]
|
||||
state.tShift[startLine] = pos - state.bMarks[startLine]
|
||||
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "blockquote"
|
||||
|
||||
# Search the end of the block
|
||||
#
|
||||
# Block ends with either:
|
||||
# 1. an empty line outside:
|
||||
# ```
|
||||
# > test
|
||||
#
|
||||
# ```
|
||||
# 2. an empty line inside:
|
||||
# ```
|
||||
# >
|
||||
# test
|
||||
# ```
|
||||
# 3. another tag:
|
||||
# ```
|
||||
# > test
|
||||
# - - -
|
||||
# ```
|
||||
|
||||
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
|
||||
nextLine = startLine + 1
|
||||
while nextLine < endLine:
|
||||
# check if it's outdented, i.e. it's inside list item and indented
|
||||
# less than said list item:
|
||||
#
|
||||
# ```
|
||||
# 1. anything
|
||||
# > current blockquote
|
||||
# 2. checking this line
|
||||
# ```
|
||||
isOutdented = state.sCount[nextLine] < state.blkIndent
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
max = state.eMarks[nextLine]
|
||||
|
||||
if pos >= max:
|
||||
# Case 1: line is not inside the blockquote, and this line is empty.
|
||||
break
|
||||
|
||||
evaluatesTrue = state.src[pos] == ">" and not isOutdented
|
||||
pos += 1
|
||||
if evaluatesTrue:
|
||||
# This line is inside the blockquote.
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[nextLine] + 1
|
||||
|
||||
try:
|
||||
next_char: str | None = state.src[pos]
|
||||
except IndexError:
|
||||
next_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if next_char == " ":
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif next_char == "\t":
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[nextLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
state.bMarks[nextLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.src[pos]
|
||||
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
offset += (
|
||||
4
|
||||
- (
|
||||
offset
|
||||
+ state.bsCount[nextLine]
|
||||
+ (1 if adjustTab else 0)
|
||||
)
|
||||
% 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
state.bsCount[nextLine] = (
|
||||
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] = offset - initial
|
||||
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
state.tShift[nextLine] = pos - state.bMarks[nextLine]
|
||||
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Case 2: line is not inside the blockquote, and the last line was empty.
|
||||
if lastLineEmpty:
|
||||
break
|
||||
|
||||
# Case 3: another tag found.
|
||||
terminate = False
|
||||
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
# Quirk to enforce "hard termination mode" for paragraphs;
|
||||
# normally if you call `tokenize(state, startLine, nextLine)`,
|
||||
# paragraphs will look below nextLine for paragraph continuation,
|
||||
# but if blockquote is terminated by another tag, they shouldn't
|
||||
state.lineMax = nextLine
|
||||
|
||||
if state.blkIndent != 0:
|
||||
# state.blkIndent was non-zero, we now set it to zero,
|
||||
# so we need to re-calculate all offsets to appear as
|
||||
# if indent wasn't changed
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] -= state.blkIndent
|
||||
|
||||
break
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
|
||||
# A negative indentation means that this is a paragraph continuation
|
||||
#
|
||||
state.sCount[nextLine] = -1
|
||||
|
||||
nextLine += 1
|
||||
|
||||
oldIndent = state.blkIndent
|
||||
state.blkIndent = 0
|
||||
|
||||
token = state.push("blockquote_open", "blockquote", 1)
|
||||
token.markup = ">"
|
||||
token.map = lines = [startLine, 0]
|
||||
|
||||
state.md.block.tokenize(state, startLine, nextLine)
|
||||
|
||||
token = state.push("blockquote_close", "blockquote", -1)
|
||||
token.markup = ">"
|
||||
|
||||
state.lineMax = oldLineMax
|
||||
state.parentType = oldParentType
|
||||
lines[1] = state.line
|
||||
|
||||
# Restore original tShift; this might not be necessary since the parser
|
||||
# has already been here, but just to make sure we can do that.
|
||||
for i, item in enumerate(oldTShift):
|
||||
state.bMarks[i + startLine] = oldBMarks[i]
|
||||
state.tShift[i + startLine] = item
|
||||
state.sCount[i + startLine] = oldSCount[i]
|
||||
state.bsCount[i + startLine] = oldBSCount[i]
|
||||
|
||||
state.blkIndent = oldIndent
|
||||
|
||||
return True
|
|
@ -1,35 +0,0 @@
|
|||
"""Code block (4 spaces padded)."""
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
if not state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
last = nextLine = startLine + 1
|
||||
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
nextLine += 1
|
||||
last = nextLine
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
state.line = last
|
||||
|
||||
token = state.push("code_block", "code", 0)
|
||||
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
|
@ -1,101 +0,0 @@
|
|||
# fences (``` lang, ~~~ lang)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
haveEndMarker = False
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if pos + 3 > maximum:
|
||||
return False
|
||||
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker not in ("~", "`"):
|
||||
return False
|
||||
|
||||
# scan marker length
|
||||
mem = pos
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
length = pos - mem
|
||||
|
||||
if length < 3:
|
||||
return False
|
||||
|
||||
markup = state.src[mem:pos]
|
||||
params = state.src[pos:maximum]
|
||||
|
||||
if marker == "`" and marker in params:
|
||||
return False
|
||||
|
||||
# Since start is found, we can report success here in validation mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# search end of block
|
||||
nextLine = startLine
|
||||
|
||||
while True:
|
||||
nextLine += 1
|
||||
if nextLine >= endLine:
|
||||
# unclosed block should be autoclosed by end of document.
|
||||
# also block seems to be autoclosed by end of parent
|
||||
break
|
||||
|
||||
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
|
||||
# non-empty line with negative indent should stop the list:
|
||||
# - ```
|
||||
# test
|
||||
break
|
||||
|
||||
try:
|
||||
if state.src[pos] != marker:
|
||||
continue
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
continue
|
||||
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
|
||||
# closing code fence must be at least as long as the opening one
|
||||
if pos - mem < length:
|
||||
continue
|
||||
|
||||
# make sure tail has spaces only
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
if pos < maximum:
|
||||
continue
|
||||
|
||||
haveEndMarker = True
|
||||
# found!
|
||||
break
|
||||
|
||||
# If a fence has heading spaces, they should be removed from its inner block
|
||||
length = state.sCount[startLine]
|
||||
|
||||
state.line = nextLine + (1 if haveEndMarker else 0)
|
||||
|
||||
token = state.push("fence", "code", 0)
|
||||
token.info = params
|
||||
token.content = state.getLines(startLine + 1, nextLine, length, True)
|
||||
token.markup = markup
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
|
@ -1,68 +0,0 @@
|
|||
""" Atex heading (#, ##, ...) """
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
ch: str | None = state.src[pos]
|
||||
|
||||
if ch != "#" or pos >= maximum:
|
||||
return False
|
||||
|
||||
# count heading level
|
||||
level = 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
while ch == "#" and pos < maximum and level <= 6:
|
||||
level += 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.src[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
|
||||
if level > 6 or (pos < maximum and not isStrSpace(ch)):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Let's cut tails like ' ### ' from the end of string
|
||||
|
||||
maximum = state.skipSpacesBack(maximum, pos)
|
||||
tmp = state.skipCharsStrBack(maximum, "#", pos)
|
||||
if tmp > pos and isStrSpace(state.src[tmp - 1]):
|
||||
maximum = tmp
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = "########"[:level]
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = state.src[pos:maximum].strip()
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = "########"[:level]
|
||||
|
||||
return True
|
|
@ -1,55 +0,0 @@
|
|||
"""Horizontal rule
|
||||
|
||||
At least 3 of these characters on a line * - _
|
||||
"""
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# Check hr marker
|
||||
if marker not in ("*", "-", "_"):
|
||||
return False
|
||||
|
||||
# markers can be mixed with spaces, but there should be at least 3 of them
|
||||
|
||||
cnt = 1
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
if ch != marker and not isStrSpace(ch):
|
||||
return False
|
||||
if ch == marker:
|
||||
cnt += 1
|
||||
|
||||
if cnt < 3:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("hr", "hr", 0)
|
||||
token.map = [startLine, state.line]
|
||||
token.markup = marker * (cnt + 1)
|
||||
|
||||
return True
|
|
@ -1,90 +0,0 @@
|
|||
# HTML block
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..common.html_blocks import block_names
|
||||
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# An array of opening and corresponding closing sequences for html tags,
|
||||
# last argument defines whether it can terminate a paragraph or not
|
||||
HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
|
||||
(
|
||||
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
|
||||
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
|
||||
True,
|
||||
),
|
||||
(re.compile(r"^<!--"), re.compile(r"-->"), True),
|
||||
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
|
||||
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
|
||||
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
|
||||
(
|
||||
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
|
||||
re.compile(r"^$"),
|
||||
True,
|
||||
),
|
||||
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
|
||||
]
|
||||
|
||||
|
||||
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if not state.md.options.get("html", None):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "<":
|
||||
return False
|
||||
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
html_seq = None
|
||||
for HTML_SEQUENCE in HTML_SEQUENCES:
|
||||
if HTML_SEQUENCE[0].search(lineText):
|
||||
html_seq = HTML_SEQUENCE
|
||||
break
|
||||
|
||||
if not html_seq:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
# true if this sequence can be a terminator, false otherwise
|
||||
return html_seq[2]
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
# If we are here - we detected HTML block.
|
||||
# Let's roll down till block end.
|
||||
if not html_seq[1].search(lineText):
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
if html_seq[1].search(lineText):
|
||||
if len(lineText) != 0:
|
||||
nextLine += 1
|
||||
break
|
||||
nextLine += 1
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("html_block", "", 0)
|
||||
token.map = [startLine, nextLine]
|
||||
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
|
||||
|
||||
return True
|
|
@ -1,86 +0,0 @@
|
|||
# lheading (---, ==)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
level = None
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph" # use paragraph to match terminatorRules
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Check for underline in setext header
|
||||
if state.sCount[nextLine] >= state.blkIndent:
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum:
|
||||
marker = state.src[pos]
|
||||
|
||||
if marker in ("-", "="):
|
||||
pos = state.skipCharsStr(pos, marker)
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
# /* = */
|
||||
if pos >= maximum:
|
||||
level = 1 if marker == "=" else 2
|
||||
break
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if not level:
|
||||
# Didn't find valid underline
|
||||
return False
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = marker
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line - 1]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = marker
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -1,345 +0,0 @@
|
|||
# Lists
|
||||
import logging
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Search `[-+*][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
try:
|
||||
marker = state.src[pos]
|
||||
except IndexError:
|
||||
return -1
|
||||
pos += 1
|
||||
|
||||
if marker not in ("*", "-", "+"):
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " -test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
# Search `\d+[.)][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
pos = start
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# List marker should have at least 2 chars (digit + dot)
|
||||
if pos + 1 >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
ch_ord = ord(ch)
|
||||
# /* 0 */ /* 9 */
|
||||
if ch_ord < 0x30 or ch_ord > 0x39:
|
||||
return -1
|
||||
|
||||
while True:
|
||||
# EOL -> fail
|
||||
if pos >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.src[pos]
|
||||
pos += 1
|
||||
|
||||
# /* 0 */ /* 9 */
|
||||
ch_ord = ord(ch)
|
||||
if ch_ord >= 0x30 and ch_ord <= 0x39:
|
||||
# List marker should have no more than 9 digits
|
||||
# (prevents integer overflow in browsers)
|
||||
if pos - start >= 10:
|
||||
return -1
|
||||
|
||||
continue
|
||||
|
||||
# found valid marker
|
||||
if ch in (")", "."):
|
||||
break
|
||||
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if not isStrSpace(ch):
|
||||
# " 1.test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
def markTightParagraphs(state: StateBlock, idx: int) -> None:
|
||||
level = state.level + 2
|
||||
|
||||
i = idx + 2
|
||||
length = len(state.tokens) - 2
|
||||
while i < length:
|
||||
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
|
||||
state.tokens[i + 2].hidden = True
|
||||
state.tokens[i].hidden = True
|
||||
i += 2
|
||||
i += 1
|
||||
|
||||
|
||||
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
isTerminatingParagraph = False
|
||||
tight = True
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
# Special case:
|
||||
# - item 1
|
||||
# - item 2
|
||||
# - item 3
|
||||
# - item 4
|
||||
# - this one is a paragraph continuation
|
||||
if (
|
||||
state.listIndent >= 0
|
||||
and state.sCount[startLine] - state.listIndent >= 4
|
||||
and state.sCount[startLine] < state.blkIndent
|
||||
):
|
||||
return False
|
||||
|
||||
# limit conditions when list can interrupt
|
||||
# a paragraph (validation mode only)
|
||||
# Next list item should still terminate previous list item
|
||||
#
|
||||
# This code can fail if plugins use blkIndent as well as lists,
|
||||
# but I hope the spec gets fixed long before that happens.
|
||||
#
|
||||
if (
|
||||
silent
|
||||
and state.parentType == "paragraph"
|
||||
and state.sCount[startLine] >= state.blkIndent
|
||||
):
|
||||
isTerminatingParagraph = True
|
||||
|
||||
# Detect list type and position after marker
|
||||
posAfterMarker = skipOrderedListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = True
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
markerValue = int(state.src[start : posAfterMarker - 1])
|
||||
|
||||
# If we're starting a new ordered list right after
|
||||
# a paragraph, it should start with 1.
|
||||
if isTerminatingParagraph and markerValue != 1:
|
||||
return False
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = False
|
||||
else:
|
||||
return False
|
||||
|
||||
# If we're starting a new unordered list right after
|
||||
# a paragraph, first line should not be empty.
|
||||
if (
|
||||
isTerminatingParagraph
|
||||
and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
|
||||
):
|
||||
return False
|
||||
|
||||
# We should terminate list on style change. Remember first one to compare.
|
||||
markerChar = state.src[posAfterMarker - 1]
|
||||
|
||||
# For validation mode we can terminate immediately
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Start list
|
||||
listTokIdx = len(state.tokens)
|
||||
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_open", "ol", 1)
|
||||
if markerValue != 1:
|
||||
token.attrs = {"start": markerValue}
|
||||
|
||||
else:
|
||||
token = state.push("bullet_list_open", "ul", 1)
|
||||
|
||||
token.map = listLines = [startLine, 0]
|
||||
token.markup = markerChar
|
||||
|
||||
#
|
||||
# Iterate list items
|
||||
#
|
||||
|
||||
nextLine = startLine
|
||||
prevEmptyEnd = False
|
||||
terminatorRules = state.md.block.ruler.getRules("list")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "list"
|
||||
|
||||
while nextLine < endLine:
|
||||
pos = posAfterMarker
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
initial = offset = (
|
||||
state.sCount[nextLine]
|
||||
+ posAfterMarker
|
||||
- (state.bMarks[startLine] + state.tShift[startLine])
|
||||
)
|
||||
|
||||
while pos < maximum:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch == "\t":
|
||||
offset += 4 - (offset + state.bsCount[nextLine]) % 4
|
||||
elif ch == " ":
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
contentStart = pos
|
||||
|
||||
# trimming space in "- \n 3" case, indent is 1 here
|
||||
indentAfterMarker = 1 if contentStart >= maximum else offset - initial
|
||||
|
||||
# If we have more than 4 spaces, the indent is 1
|
||||
# (the rest is just indented code block)
|
||||
if indentAfterMarker > 4:
|
||||
indentAfterMarker = 1
|
||||
|
||||
# " - test"
|
||||
# ^^^^^ - calculating total length of this thing
|
||||
indent = initial + indentAfterMarker
|
||||
|
||||
# Run subparser & write tokens
|
||||
token = state.push("list_item_open", "li", 1)
|
||||
token.markup = markerChar
|
||||
token.map = itemLines = [startLine, 0]
|
||||
if isOrdered:
|
||||
token.info = state.src[start : posAfterMarker - 1]
|
||||
|
||||
# change current state, then restore it after parser subcall
|
||||
oldTight = state.tight
|
||||
oldTShift = state.tShift[startLine]
|
||||
oldSCount = state.sCount[startLine]
|
||||
|
||||
# - example list
|
||||
# ^ listIndent position will be here
|
||||
# ^ blkIndent position will be here
|
||||
#
|
||||
oldListIndent = state.listIndent
|
||||
state.listIndent = state.blkIndent
|
||||
state.blkIndent = indent
|
||||
|
||||
state.tight = True
|
||||
state.tShift[startLine] = contentStart - state.bMarks[startLine]
|
||||
state.sCount[startLine] = offset
|
||||
|
||||
if contentStart >= maximum and state.isEmpty(startLine + 1):
|
||||
# workaround for this case
|
||||
# (list item is empty, list terminates before "foo"):
|
||||
# ~~~~~~~~
|
||||
# -
|
||||
#
|
||||
# foo
|
||||
# ~~~~~~~~
|
||||
state.line = min(state.line + 2, endLine)
|
||||
else:
|
||||
# NOTE in list.js this was:
|
||||
# state.md.block.tokenize(state, startLine, endLine, True)
|
||||
# but tokeniz does not take the final parameter
|
||||
state.md.block.tokenize(state, startLine, endLine)
|
||||
|
||||
# If any of list item is tight, mark list as tight
|
||||
if (not state.tight) or prevEmptyEnd:
|
||||
tight = False
|
||||
|
||||
# Item become loose if finish with empty line,
|
||||
# but we should filter last element, because it means list finish
|
||||
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
|
||||
|
||||
state.blkIndent = state.listIndent
|
||||
state.listIndent = oldListIndent
|
||||
state.tShift[startLine] = oldTShift
|
||||
state.sCount[startLine] = oldSCount
|
||||
state.tight = oldTight
|
||||
|
||||
token = state.push("list_item_close", "li", -1)
|
||||
token.markup = markerChar
|
||||
|
||||
nextLine = startLine = state.line
|
||||
itemLines[1] = nextLine
|
||||
|
||||
if nextLine >= endLine:
|
||||
break
|
||||
|
||||
contentStart = state.bMarks[startLine]
|
||||
|
||||
#
|
||||
# Try to check if list is terminated or continued.
|
||||
#
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
break
|
||||
|
||||
# fail if terminating block found
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
# fail if list has another type
|
||||
if isOrdered:
|
||||
posAfterMarker = skipOrderedListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
start = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
|
||||
if markerChar != state.src[posAfterMarker - 1]:
|
||||
break
|
||||
|
||||
# Finalize list
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_close", "ol", -1)
|
||||
else:
|
||||
token = state.push("bullet_list_close", "ul", -1)
|
||||
|
||||
token.markup = markerChar
|
||||
|
||||
listLines[1] = nextLine
|
||||
state.line = nextLine
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
# mark paragraphs tight if needed
|
||||
if tight:
|
||||
markTightParagraphs(state, listTokIdx)
|
||||
|
||||
return True
|
|
@ -1,65 +0,0 @@
|
|||
"""Paragraph."""
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
nextLine = startLine + 1
|
||||
ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
endLine = state.lineMax
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph"
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
break
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("paragraph_open", "p", 1)
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("paragraph_close", "p", -1)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -1,215 +0,0 @@
|
|||
import logging
|
||||
|
||||
from ..common.utils import charCodeAt, isSpace, normalizeReference
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
|
||||
LOGGER.debug(
|
||||
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
|
||||
)
|
||||
|
||||
lines = 0
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
|
||||
if state.src[pos] != "[":
|
||||
return False
|
||||
|
||||
# Simple check to quickly interrupt scan on [link](url) at the start of line.
|
||||
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
|
||||
while pos < maximum:
|
||||
# /* ] */ /* \ */ /* : */
|
||||
if state.src[pos] == "]" and state.src[pos - 1] != "\\":
|
||||
if pos + 1 == maximum:
|
||||
return False
|
||||
if state.src[pos + 1] != ":":
|
||||
return False
|
||||
break
|
||||
pos += 1
|
||||
|
||||
endLine = state.lineMax
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
terminatorRules = state.md.block.ruler.getRules("reference")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "reference"
|
||||
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
maximum = len(string)
|
||||
|
||||
labelEnd = None
|
||||
pos = 1
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x5B: # /* [ */
|
||||
return False
|
||||
elif ch == 0x5D: # /* ] */
|
||||
labelEnd = pos
|
||||
break
|
||||
elif ch == 0x0A: # /* \n */
|
||||
lines += 1
|
||||
elif ch == 0x5C: # /* \ */
|
||||
pos += 1
|
||||
if pos < maximum and charCodeAt(string, pos) == 0x0A:
|
||||
lines += 1
|
||||
pos += 1
|
||||
|
||||
if (
|
||||
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
|
||||
): # /* : */
|
||||
return False
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skip optional whitespace here
|
||||
pos = labelEnd + 2
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkDestination(string, pos, maximum)
|
||||
if not res.ok:
|
||||
return False
|
||||
|
||||
href = state.md.normalizeLink(res.str)
|
||||
if not state.md.validateLink(href):
|
||||
return False
|
||||
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
|
||||
# save cursor state, we could require to rollback later
|
||||
destEndPos = pos
|
||||
destEndLineNo = lines
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skipping those spaces
|
||||
start = pos
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkTitle(string, pos, maximum)
|
||||
if pos < maximum and start != pos and res.ok:
|
||||
title = res.str
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
else:
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
|
||||
# skip trailing spaces until the rest of the line
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
|
||||
# garbage at the end of the line after title,
|
||||
# but it could still be a valid reference if we roll back
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A:
|
||||
# garbage at the end of the line
|
||||
return False
|
||||
|
||||
label = normalizeReference(string[1:labelEnd])
|
||||
if not label:
|
||||
# CommonMark 0.20 disallows empty labels
|
||||
return False
|
||||
|
||||
# Reference can not terminate anything. This check is for safety only.
|
||||
if silent:
|
||||
return True
|
||||
|
||||
if "references" not in state.env:
|
||||
state.env["references"] = {}
|
||||
|
||||
state.line = startLine + lines + 1
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if state.md.options.get("inline_definitions", False):
|
||||
token = state.push("definition", "", 0)
|
||||
token.meta = {
|
||||
"id": label,
|
||||
"title": title,
|
||||
"url": href,
|
||||
"label": string[1:labelEnd],
|
||||
}
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
if label not in state.env["references"]:
|
||||
state.env["references"][label] = {
|
||||
"title": title,
|
||||
"href": href,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
else:
|
||||
state.env.setdefault("duplicate_refs", []).append(
|
||||
{
|
||||
"title": title,
|
||||
"href": href,
|
||||
"label": label,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -1,261 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from ..common.utils import isStrSpace
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
from ..utils import EnvType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
|
||||
class StateBlock(StateBase):
|
||||
def __init__(
|
||||
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
|
||||
) -> None:
|
||||
self.src = src
|
||||
|
||||
# link to parser instance
|
||||
self.md = md
|
||||
|
||||
self.env = env
|
||||
|
||||
#
|
||||
# Internal state variables
|
||||
#
|
||||
|
||||
self.tokens = tokens
|
||||
|
||||
self.bMarks: list[int] = [] # line begin offsets for fast jumps
|
||||
self.eMarks: list[int] = [] # line end offsets for fast jumps
|
||||
# offsets of the first non-space characters (tabs not expanded)
|
||||
self.tShift: list[int] = []
|
||||
self.sCount: list[int] = [] # indents for each line (tabs expanded)
|
||||
|
||||
# An amount of virtual spaces (tabs expanded) between beginning
|
||||
# of each line (bMarks) and real beginning of that line.
|
||||
#
|
||||
# It exists only as a hack because blockquotes override bMarks
|
||||
# losing information in the process.
|
||||
#
|
||||
# It's used only when expanding tabs, you can think about it as
|
||||
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
|
||||
# means first tab should be expanded to 4-21%4 === 3 spaces.
|
||||
#
|
||||
self.bsCount: list[int] = []
|
||||
|
||||
# block parser variables
|
||||
self.blkIndent = 0 # required block content indent (for example, if we are
|
||||
# inside a list, it would be positioned after list marker)
|
||||
self.line = 0 # line index in src
|
||||
self.lineMax = 0 # lines count
|
||||
self.tight = False # loose/tight mode for lists
|
||||
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
|
||||
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
|
||||
|
||||
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
|
||||
# used in lists to determine if they interrupt a paragraph
|
||||
self.parentType = "root"
|
||||
|
||||
self.level = 0
|
||||
|
||||
# renderer
|
||||
self.result = ""
|
||||
|
||||
# Create caches
|
||||
# Generate markers.
|
||||
indent_found = False
|
||||
|
||||
start = pos = indent = offset = 0
|
||||
length = len(self.src)
|
||||
|
||||
for pos, character in enumerate(self.src):
|
||||
if not indent_found:
|
||||
if isStrSpace(character):
|
||||
indent += 1
|
||||
|
||||
if character == "\t":
|
||||
offset += 4 - offset % 4
|
||||
else:
|
||||
offset += 1
|
||||
continue
|
||||
else:
|
||||
indent_found = True
|
||||
|
||||
if character == "\n" or pos == length - 1:
|
||||
if character != "\n":
|
||||
pos += 1
|
||||
self.bMarks.append(start)
|
||||
self.eMarks.append(pos)
|
||||
self.tShift.append(indent)
|
||||
self.sCount.append(offset)
|
||||
self.bsCount.append(0)
|
||||
|
||||
indent_found = False
|
||||
indent = 0
|
||||
offset = 0
|
||||
start = pos + 1
|
||||
|
||||
# Push fake entry to simplify cache bounds checks
|
||||
self.bMarks.append(length)
|
||||
self.eMarks.append(length)
|
||||
self.tShift.append(0)
|
||||
self.sCount.append(0)
|
||||
self.bsCount.append(0)
|
||||
|
||||
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
|
||||
|
||||
# pre-check if code blocks are enabled, to speed up is_code_block method
|
||||
self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
|
||||
)
|
||||
|
||||
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
|
||||
"""Push new token to "stream"."""
|
||||
token = Token(ttype, tag, nesting)
|
||||
token.block = True
|
||||
if nesting < 0:
|
||||
self.level -= 1 # closing tag
|
||||
token.level = self.level
|
||||
if nesting > 0:
|
||||
self.level += 1 # opening tag
|
||||
self.tokens.append(token)
|
||||
return token
|
||||
|
||||
def isEmpty(self, line: int) -> bool:
|
||||
"""."""
|
||||
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
|
||||
|
||||
def skipEmptyLines(self, from_pos: int) -> int:
|
||||
"""."""
|
||||
while from_pos < self.lineMax:
|
||||
try:
|
||||
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
|
||||
from_pos
|
||||
]:
|
||||
break
|
||||
except IndexError:
|
||||
pass
|
||||
from_pos += 1
|
||||
return from_pos
|
||||
|
||||
def skipSpaces(self, pos: int) -> int:
|
||||
"""Skip spaces from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if not isStrSpace(current):
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipSpacesBack(self, pos: int, minimum: int) -> int:
|
||||
"""Skip spaces from given position in reverse."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if not isStrSpace(self.src[pos]):
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipChars(self, pos: int, code: int) -> int:
|
||||
"""Skip character code from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.srcCharCode[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != code:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsStr(self, pos: int, ch: str) -> int:
|
||||
"""Skip character string from given position."""
|
||||
while True:
|
||||
try:
|
||||
current = self.src[pos]
|
||||
except IndexError:
|
||||
break
|
||||
if current != ch:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
|
||||
"""Skip character code reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if code != self.srcCharCode[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
|
||||
"""Skip character string reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if ch != self.src[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
|
||||
"""Cut lines range from source."""
|
||||
line = begin
|
||||
if begin >= end:
|
||||
return ""
|
||||
|
||||
queue = [""] * (end - begin)
|
||||
|
||||
i = 1
|
||||
while line < end:
|
||||
lineIndent = 0
|
||||
lineStart = first = self.bMarks[line]
|
||||
last = (
|
||||
self.eMarks[line] + 1
|
||||
if line + 1 < end or keepLastLF
|
||||
else self.eMarks[line]
|
||||
)
|
||||
|
||||
while (first < last) and (lineIndent < indent):
|
||||
ch = self.src[first]
|
||||
if isStrSpace(ch):
|
||||
if ch == "\t":
|
||||
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
|
||||
else:
|
||||
lineIndent += 1
|
||||
elif first - lineStart < self.tShift[line]:
|
||||
lineIndent += 1
|
||||
else:
|
||||
break
|
||||
first += 1
|
||||
|
||||
if lineIndent > indent:
|
||||
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
|
||||
# with indent=2 becomes ' \tfoobar'
|
||||
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
|
||||
else:
|
||||
queue[i - 1] = self.src[first:last]
|
||||
|
||||
line += 1
|
||||
i += 1
|
||||
|
||||
return "".join(queue)
|
||||
|
||||
def is_code_block(self, line: int) -> bool:
|
||||
"""Check if line is a code block,
|
||||
i.e. the code block rule is enabled and text is indented by more than 3 spaces.
|
||||
"""
|
||||
return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4
|
|
@ -1,236 +0,0 @@
|
|||
# GFM table, https://github.github.com/gfm/#tables-extension-
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ..common.utils import charStrAt, isStrSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
headerLineRe = re.compile(r"^:?-+:?$")
|
||||
enclosingPipesRe = re.compile(r"^\||\|$")
|
||||
|
||||
|
||||
def getLine(state: StateBlock, line: int) -> str:
|
||||
pos = state.bMarks[line] + state.tShift[line]
|
||||
maximum = state.eMarks[line]
|
||||
|
||||
# return state.src.substr(pos, max - pos)
|
||||
return state.src[pos:maximum]
|
||||
|
||||
|
||||
def escapedSplit(string: str) -> list[str]:
|
||||
result: list[str] = []
|
||||
pos = 0
|
||||
max = len(string)
|
||||
isEscaped = False
|
||||
lastPos = 0
|
||||
current = ""
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
while pos < max:
|
||||
if ch == "|":
|
||||
if not isEscaped:
|
||||
# pipe separating cells, '|'
|
||||
result.append(current + string[lastPos:pos])
|
||||
current = ""
|
||||
lastPos = pos + 1
|
||||
else:
|
||||
# escaped pipe, '\|'
|
||||
current += string[lastPos : pos - 1]
|
||||
lastPos = pos
|
||||
|
||||
isEscaped = ch == "\\"
|
||||
pos += 1
|
||||
|
||||
ch = charStrAt(string, pos)
|
||||
|
||||
result.append(current + string[lastPos:])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
|
||||
tbodyLines = None
|
||||
|
||||
# should have at least two lines
|
||||
if startLine + 2 > endLine:
|
||||
return False
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
return False
|
||||
|
||||
if state.is_code_block(nextLine):
|
||||
return False
|
||||
|
||||
# first character of the second line should be '|', '-', ':',
|
||||
# and no other characters are allowed but spaces;
|
||||
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
first_ch = state.src[pos]
|
||||
pos += 1
|
||||
if first_ch not in ("|", "-", ":"):
|
||||
return False
|
||||
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
second_ch = state.src[pos]
|
||||
pos += 1
|
||||
if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
# if first character is '-', then second character must not be a space
|
||||
# (due to parsing ambiguity with list)
|
||||
if first_ch == "-" and isStrSpace(second_ch):
|
||||
return False
|
||||
|
||||
while pos < state.eMarks[nextLine]:
|
||||
ch = state.src[pos]
|
||||
|
||||
if ch not in ("|", "-", ":") and not isStrSpace(ch):
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
lineText = getLine(state, startLine + 1)
|
||||
|
||||
columns = lineText.split("|")
|
||||
aligns = []
|
||||
for i in range(len(columns)):
|
||||
t = columns[i].strip()
|
||||
if not t:
|
||||
# allow empty columns before and after table, but not in between columns;
|
||||
# e.g. allow ` |---| `, disallow ` ---||--- `
|
||||
if i == 0 or i == len(columns) - 1:
|
||||
continue
|
||||
else:
|
||||
return False
|
||||
|
||||
if not headerLineRe.search(t):
|
||||
return False
|
||||
if charStrAt(t, len(t) - 1) == ":":
|
||||
aligns.append("center" if charStrAt(t, 0) == ":" else "right")
|
||||
elif charStrAt(t, 0) == ":":
|
||||
aligns.append("left")
|
||||
else:
|
||||
aligns.append("")
|
||||
|
||||
lineText = getLine(state, startLine).strip()
|
||||
if "|" not in lineText:
|
||||
return False
|
||||
if state.is_code_block(startLine):
|
||||
return False
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
# header row will define an amount of columns in the entire table,
|
||||
# and align row should be exactly the same (the rest of the rows can differ)
|
||||
columnCount = len(columns)
|
||||
if columnCount == 0 or columnCount != len(aligns):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "table"
|
||||
|
||||
# use 'blockquote' lists for termination because it's
|
||||
# the most similar to tables
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
token = state.push("table_open", "table", 1)
|
||||
token.map = tableLines = [startLine, 0]
|
||||
|
||||
token = state.push("thead_open", "thead", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
for i in range(len(columns)):
|
||||
token = state.push("th_open", "th", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [startLine, startLine + 1]
|
||||
token.content = columns[i].strip()
|
||||
token.children = []
|
||||
|
||||
token = state.push("th_close", "th", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
token = state.push("thead_close", "thead", -1)
|
||||
|
||||
nextLine = startLine + 2
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
terminate = False
|
||||
for i in range(len(terminatorRules)):
|
||||
if terminatorRules[i](state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
lineText = getLine(state, nextLine).strip()
|
||||
if not lineText:
|
||||
break
|
||||
if state.is_code_block(nextLine):
|
||||
break
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
if nextLine == startLine + 2:
|
||||
token = state.push("tbody_open", "tbody", 1)
|
||||
token.map = tbodyLines = [startLine + 2, 0]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
|
||||
for i in range(columnCount):
|
||||
token = state.push("td_open", "td", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
try:
|
||||
token.content = columns[i].strip() if columns[i] else ""
|
||||
except IndexError:
|
||||
token.content = ""
|
||||
token.children = []
|
||||
|
||||
token = state.push("td_close", "td", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if tbodyLines:
|
||||
token = state.push("tbody_close", "tbody", -1)
|
||||
tbodyLines[1] = nextLine
|
||||
|
||||
token = state.push("table_close", "table", -1)
|
||||
|
||||
tableLines[1] = nextLine
|
||||
state.parentType = oldParentType
|
||||
state.line = nextLine
|
||||
return True
|
|
@ -1,19 +0,0 @@
|
|||
__all__ = (
|
||||
"StateCore",
|
||||
"normalize",
|
||||
"block",
|
||||
"inline",
|
||||
"replace",
|
||||
"smartquotes",
|
||||
"linkify",
|
||||
"text_join",
|
||||
)
|
||||
|
||||
from .block import block
|
||||
from .inline import inline
|
||||
from .linkify import linkify
|
||||
from .normalize import normalize
|
||||
from .replacements import replace
|
||||
from .smartquotes import smartquotes
|
||||
from .state_core import StateCore
|
||||
from .text_join import text_join
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue