1 |
yakumo_izuru |
1.1 |
// Blackfriday Markdown Processor |
2 |
|
|
// Available at http://github.com/russross/blackfriday |
3 |
|
|
// |
4 |
|
|
// Copyright © 2011 Russ Ross <russ@russross.com>. |
5 |
|
|
// Distributed under the Simplified BSD License. |
6 |
|
|
// See README.md for details. |
7 |
|
|
|
8 |
|
|
package blackfriday |
9 |
|
|
|
10 |
|
|
import ( |
11 |
|
|
"bytes" |
12 |
|
|
"fmt" |
13 |
|
|
"io" |
14 |
|
|
"strings" |
15 |
|
|
"unicode/utf8" |
16 |
|
|
) |
17 |
|
|
|
18 |
|
|
// |
19 |
|
|
// Markdown parsing and processing |
20 |
|
|
// |
21 |
|
|
|
22 |
|
|
// Version string of the package. Appears in the rendered document when |
23 |
|
|
// CompletePage flag is on. |
24 |
|
|
const Version = "2.0" |
25 |
|
|
|
26 |
|
|
// Extensions is a bitwise or'ed collection of enabled Blackfriday's |
27 |
|
|
// extensions. |
28 |
|
|
type Extensions int |
29 |
|
|
|
30 |
|
|
// These are the supported markdown parsing extensions. |
31 |
|
|
// OR these values together to select multiple extensions. |
32 |
|
|
const ( |
33 |
|
|
NoExtensions Extensions = 0 |
34 |
|
|
NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words |
35 |
|
|
Tables // Render tables |
36 |
|
|
FencedCode // Render fenced code blocks |
37 |
|
|
Autolink // Detect embedded URLs that are not explicitly marked |
38 |
|
|
Strikethrough // Strikethrough text using ~~test~~ |
39 |
|
|
LaxHTMLBlocks // Loosen up HTML block parsing rules |
40 |
|
|
SpaceHeadings // Be strict about prefix heading rules |
41 |
|
|
HardLineBreak // Translate newlines into line breaks |
42 |
|
|
TabSizeEight // Expand tabs to eight spaces instead of four |
43 |
|
|
Footnotes // Pandoc-style footnotes |
44 |
|
|
NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block |
45 |
|
|
HeadingIDs // specify heading IDs with {#id} |
46 |
|
|
Titleblock // Titleblock ala pandoc |
47 |
|
|
AutoHeadingIDs // Create the heading ID from the text |
48 |
|
|
BackslashLineBreak // Translate trailing backslashes into line breaks |
49 |
|
|
DefinitionLists // Render definition lists |
50 |
|
|
|
51 |
|
|
CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | |
52 |
|
|
SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes |
53 |
|
|
|
54 |
|
|
CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | |
55 |
|
|
Autolink | Strikethrough | SpaceHeadings | HeadingIDs | |
56 |
|
|
BackslashLineBreak | DefinitionLists |
57 |
|
|
) |
58 |
|
|
|
59 |
|
|
// ListType contains bitwise or'ed flags for list and list item objects. |
60 |
|
|
type ListType int |
61 |
|
|
|
62 |
|
|
// These are the possible flag values for the ListItem renderer. |
63 |
|
|
// Multiple flag values may be ORed together. |
64 |
|
|
// These are mostly of interest if you are writing a new output format. |
65 |
|
|
const ( |
66 |
|
|
ListTypeOrdered ListType = 1 << iota |
67 |
|
|
ListTypeDefinition |
68 |
|
|
ListTypeTerm |
69 |
|
|
|
70 |
|
|
ListItemContainsBlock |
71 |
|
|
ListItemBeginningOfList // TODO: figure out if this is of any use now |
72 |
|
|
ListItemEndOfList |
73 |
|
|
) |
74 |
|
|
|
75 |
|
|
// CellAlignFlags holds a type of alignment in a table cell. |
76 |
|
|
type CellAlignFlags int |
77 |
|
|
|
78 |
|
|
// These are the possible flag values for the table cell renderer. |
79 |
|
|
// Only a single one of these values will be used; they are not ORed together. |
80 |
|
|
// These are mostly of interest if you are writing a new output format. |
81 |
|
|
const ( |
82 |
|
|
TableAlignmentLeft CellAlignFlags = 1 << iota |
83 |
|
|
TableAlignmentRight |
84 |
|
|
TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) |
85 |
|
|
) |
86 |
|
|
|
87 |
|
|
// The size of a tab stop. |
88 |
|
|
const ( |
89 |
|
|
TabSizeDefault = 4 |
90 |
|
|
TabSizeDouble = 8 |
91 |
|
|
) |
92 |
|
|
|
93 |
|
|
// blockTags is a set of tags that are recognized as HTML block tags. |
94 |
|
|
// Any of these can be included in markdown text without special escaping. |
95 |
|
|
var blockTags = map[string]struct{}{ |
96 |
|
|
"blockquote": {}, |
97 |
|
|
"del": {}, |
98 |
|
|
"div": {}, |
99 |
|
|
"dl": {}, |
100 |
|
|
"fieldset": {}, |
101 |
|
|
"form": {}, |
102 |
|
|
"h1": {}, |
103 |
|
|
"h2": {}, |
104 |
|
|
"h3": {}, |
105 |
|
|
"h4": {}, |
106 |
|
|
"h5": {}, |
107 |
|
|
"h6": {}, |
108 |
|
|
"iframe": {}, |
109 |
|
|
"ins": {}, |
110 |
|
|
"math": {}, |
111 |
|
|
"noscript": {}, |
112 |
|
|
"ol": {}, |
113 |
|
|
"pre": {}, |
114 |
|
|
"p": {}, |
115 |
|
|
"script": {}, |
116 |
|
|
"style": {}, |
117 |
|
|
"table": {}, |
118 |
|
|
"ul": {}, |
119 |
|
|
|
120 |
|
|
// HTML5 |
121 |
|
|
"address": {}, |
122 |
|
|
"article": {}, |
123 |
|
|
"aside": {}, |
124 |
|
|
"canvas": {}, |
125 |
|
|
"figcaption": {}, |
126 |
|
|
"figure": {}, |
127 |
|
|
"footer": {}, |
128 |
|
|
"header": {}, |
129 |
|
|
"hgroup": {}, |
130 |
|
|
"main": {}, |
131 |
|
|
"nav": {}, |
132 |
|
|
"output": {}, |
133 |
|
|
"progress": {}, |
134 |
|
|
"section": {}, |
135 |
|
|
"video": {}, |
136 |
|
|
} |
137 |
|
|
|
138 |
|
|
// Renderer is the rendering interface. This is mostly of interest if you are |
139 |
|
|
// implementing a new rendering format. |
140 |
|
|
// |
141 |
|
|
// Only an HTML implementation is provided in this repository, see the README |
142 |
|
|
// for external implementations. |
143 |
|
|
type Renderer interface { |
144 |
|
|
// RenderNode is the main rendering method. It will be called once for |
145 |
|
|
// every leaf node and twice for every non-leaf node (first with |
146 |
|
|
// entering=true, then with entering=false). The method should write its |
147 |
|
|
// rendition of the node to the supplied writer w. |
148 |
|
|
RenderNode(w io.Writer, node *Node, entering bool) WalkStatus |
149 |
|
|
|
150 |
|
|
// RenderHeader is a method that allows the renderer to produce some |
151 |
|
|
// content preceding the main body of the output document. The header is |
152 |
|
|
// understood in the broad sense here. For example, the default HTML |
153 |
|
|
// renderer will write not only the HTML document preamble, but also the |
154 |
|
|
// table of contents if it was requested. |
155 |
|
|
// |
156 |
|
|
// The method will be passed an entire document tree, in case a particular |
157 |
|
|
// implementation needs to inspect it to produce output. |
158 |
|
|
// |
159 |
|
|
// The output should be written to the supplied writer w. If your |
160 |
|
|
// implementation has no header to write, supply an empty implementation. |
161 |
|
|
RenderHeader(w io.Writer, ast *Node) |
162 |
|
|
|
163 |
|
|
// RenderFooter is a symmetric counterpart of RenderHeader. |
164 |
|
|
RenderFooter(w io.Writer, ast *Node) |
165 |
|
|
} |
166 |
|
|
|
167 |
|
|
// Callback functions for inline parsing. One such function is defined |
168 |
|
|
// for each character that triggers a response when parsing inline data. |
169 |
|
|
type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) |
170 |
|
|
|
171 |
|
|
// Markdown is a type that holds extensions and the runtime state used by |
172 |
|
|
// Parse, and the renderer. You can not use it directly, construct it with New. |
173 |
|
|
type Markdown struct { |
174 |
|
|
renderer Renderer |
175 |
|
|
referenceOverride ReferenceOverrideFunc |
176 |
|
|
refs map[string]*reference |
177 |
|
|
inlineCallback [256]inlineParser |
178 |
|
|
extensions Extensions |
179 |
|
|
nesting int |
180 |
|
|
maxNesting int |
181 |
|
|
insideLink bool |
182 |
|
|
|
183 |
|
|
// Footnotes need to be ordered as well as available to quickly check for |
184 |
|
|
// presence. If a ref is also a footnote, it's stored both in refs and here |
185 |
|
|
// in notes. Slice is nil if footnotes not enabled. |
186 |
|
|
notes []*reference |
187 |
|
|
|
188 |
|
|
doc *Node |
189 |
|
|
tip *Node // = doc |
190 |
|
|
oldTip *Node |
191 |
|
|
lastMatchedContainer *Node // = doc |
192 |
|
|
allClosed bool |
193 |
|
|
} |
194 |
|
|
|
195 |
|
|
func (p *Markdown) getRef(refid string) (ref *reference, found bool) { |
196 |
|
|
if p.referenceOverride != nil { |
197 |
|
|
r, overridden := p.referenceOverride(refid) |
198 |
|
|
if overridden { |
199 |
|
|
if r == nil { |
200 |
|
|
return nil, false |
201 |
|
|
} |
202 |
|
|
return &reference{ |
203 |
|
|
link: []byte(r.Link), |
204 |
|
|
title: []byte(r.Title), |
205 |
|
|
noteID: 0, |
206 |
|
|
hasBlock: false, |
207 |
|
|
text: []byte(r.Text)}, true |
208 |
|
|
} |
209 |
|
|
} |
210 |
|
|
// refs are case insensitive |
211 |
|
|
ref, found = p.refs[strings.ToLower(refid)] |
212 |
|
|
return ref, found |
213 |
|
|
} |
214 |
|
|
|
215 |
|
|
func (p *Markdown) finalize(block *Node) { |
216 |
|
|
above := block.Parent |
217 |
|
|
block.open = false |
218 |
|
|
p.tip = above |
219 |
|
|
} |
220 |
|
|
|
221 |
|
|
func (p *Markdown) addChild(node NodeType, offset uint32) *Node { |
222 |
|
|
return p.addExistingChild(NewNode(node), offset) |
223 |
|
|
} |
224 |
|
|
|
225 |
|
|
func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { |
226 |
|
|
for !p.tip.canContain(node.Type) { |
227 |
|
|
p.finalize(p.tip) |
228 |
|
|
} |
229 |
|
|
p.tip.AppendChild(node) |
230 |
|
|
p.tip = node |
231 |
|
|
return node |
232 |
|
|
} |
233 |
|
|
|
234 |
|
|
func (p *Markdown) closeUnmatchedBlocks() { |
235 |
|
|
if !p.allClosed { |
236 |
|
|
for p.oldTip != p.lastMatchedContainer { |
237 |
|
|
parent := p.oldTip.Parent |
238 |
|
|
p.finalize(p.oldTip) |
239 |
|
|
p.oldTip = parent |
240 |
|
|
} |
241 |
|
|
p.allClosed = true |
242 |
|
|
} |
243 |
|
|
} |
244 |
|
|
|
245 |
|
|
// |
246 |
|
|
// |
247 |
|
|
// Public interface |
248 |
|
|
// |
249 |
|
|
// |
250 |
|
|
|
251 |
|
|
// Reference represents the details of a link. |
252 |
|
|
// See the documentation in Options for more details on use-case. |
253 |
|
|
type Reference struct { |
254 |
|
|
// Link is usually the URL the reference points to. |
255 |
|
|
Link string |
256 |
|
|
// Title is the alternate text describing the link in more detail. |
257 |
|
|
Title string |
258 |
|
|
// Text is the optional text to override the ref with if the syntax used was |
259 |
|
|
// [refid][] |
260 |
|
|
Text string |
261 |
|
|
} |
262 |
|
|
|
263 |
|
|
// ReferenceOverrideFunc is expected to be called with a reference string and |
264 |
|
|
// return either a valid Reference type that the reference string maps to or |
265 |
|
|
// nil. If overridden is false, the default reference logic will be executed. |
266 |
|
|
// See the documentation in Options for more details on use-case. |
267 |
|
|
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) |
268 |
|
|
|
269 |
|
|
// New constructs a Markdown processor. You can use the same With* functions as |
270 |
|
|
// for Run() to customize parser's behavior and the renderer. |
271 |
|
|
func New(opts ...Option) *Markdown { |
272 |
|
|
var p Markdown |
273 |
|
|
for _, opt := range opts { |
274 |
|
|
opt(&p) |
275 |
|
|
} |
276 |
|
|
p.refs = make(map[string]*reference) |
277 |
|
|
p.maxNesting = 16 |
278 |
|
|
p.insideLink = false |
279 |
|
|
docNode := NewNode(Document) |
280 |
|
|
p.doc = docNode |
281 |
|
|
p.tip = docNode |
282 |
|
|
p.oldTip = docNode |
283 |
|
|
p.lastMatchedContainer = docNode |
284 |
|
|
p.allClosed = true |
285 |
|
|
// register inline parsers |
286 |
|
|
p.inlineCallback[' '] = maybeLineBreak |
287 |
|
|
p.inlineCallback['*'] = emphasis |
288 |
|
|
p.inlineCallback['_'] = emphasis |
289 |
|
|
if p.extensions&Strikethrough != 0 { |
290 |
|
|
p.inlineCallback['~'] = emphasis |
291 |
|
|
} |
292 |
|
|
p.inlineCallback['`'] = codeSpan |
293 |
|
|
p.inlineCallback['\n'] = lineBreak |
294 |
|
|
p.inlineCallback['['] = link |
295 |
|
|
p.inlineCallback['<'] = leftAngle |
296 |
|
|
p.inlineCallback['\\'] = escape |
297 |
|
|
p.inlineCallback['&'] = entity |
298 |
|
|
p.inlineCallback['!'] = maybeImage |
299 |
|
|
p.inlineCallback['^'] = maybeInlineFootnote |
300 |
|
|
if p.extensions&Autolink != 0 { |
301 |
|
|
p.inlineCallback['h'] = maybeAutoLink |
302 |
|
|
p.inlineCallback['m'] = maybeAutoLink |
303 |
|
|
p.inlineCallback['f'] = maybeAutoLink |
304 |
|
|
p.inlineCallback['H'] = maybeAutoLink |
305 |
|
|
p.inlineCallback['M'] = maybeAutoLink |
306 |
|
|
p.inlineCallback['F'] = maybeAutoLink |
307 |
|
|
} |
308 |
|
|
if p.extensions&Footnotes != 0 { |
309 |
|
|
p.notes = make([]*reference, 0) |
310 |
|
|
} |
311 |
|
|
return &p |
312 |
|
|
} |
313 |
|
|
|
314 |
|
|
// Option customizes the Markdown processor's default behavior. |
315 |
|
|
type Option func(*Markdown) |
316 |
|
|
|
317 |
|
|
// WithRenderer allows you to override the default renderer. |
318 |
|
|
func WithRenderer(r Renderer) Option { |
319 |
|
|
return func(p *Markdown) { |
320 |
|
|
p.renderer = r |
321 |
|
|
} |
322 |
|
|
} |
323 |
|
|
|
324 |
|
|
// WithExtensions allows you to pick some of the many extensions provided by |
325 |
|
|
// Blackfriday. You can bitwise OR them. |
326 |
|
|
func WithExtensions(e Extensions) Option { |
327 |
|
|
return func(p *Markdown) { |
328 |
|
|
p.extensions = e |
329 |
|
|
} |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
// WithNoExtensions turns off all extensions and custom behavior. |
333 |
|
|
func WithNoExtensions() Option { |
334 |
|
|
return func(p *Markdown) { |
335 |
|
|
p.extensions = NoExtensions |
336 |
|
|
p.renderer = NewHTMLRenderer(HTMLRendererParameters{ |
337 |
|
|
Flags: HTMLFlagsNone, |
338 |
|
|
}) |
339 |
|
|
} |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
// WithRefOverride sets an optional function callback that is called every |
343 |
|
|
// time a reference is resolved. |
344 |
|
|
// |
345 |
|
|
// In Markdown, the link reference syntax can be made to resolve a link to |
346 |
|
|
// a reference instead of an inline URL, in one of the following ways: |
347 |
|
|
// |
348 |
|
|
// * [link text][refid] |
349 |
|
|
// * [refid][] |
350 |
|
|
// |
351 |
|
|
// Usually, the refid is defined at the bottom of the Markdown document. If |
352 |
|
|
// this override function is provided, the refid is passed to the override |
353 |
|
|
// function first, before consulting the defined refids at the bottom. If |
354 |
|
|
// the override function indicates an override did not occur, the refids at |
355 |
|
|
// the bottom will be used to fill in the link details. |
356 |
|
|
func WithRefOverride(o ReferenceOverrideFunc) Option { |
357 |
|
|
return func(p *Markdown) { |
358 |
|
|
p.referenceOverride = o |
359 |
|
|
} |
360 |
|
|
} |
361 |
|
|
|
362 |
|
|
// Run is the main entry point to Blackfriday. It parses and renders a |
363 |
|
|
// block of markdown-encoded text. |
364 |
|
|
// |
365 |
|
|
// The simplest invocation of Run takes one argument, input: |
366 |
|
|
// output := Run(input) |
367 |
|
|
// This will parse the input with CommonExtensions enabled and render it with |
368 |
|
|
// the default HTMLRenderer (with CommonHTMLFlags). |
369 |
|
|
// |
370 |
|
|
// Variadic arguments opts can customize the default behavior. Since Markdown |
371 |
|
|
// type does not contain exported fields, you can not use it directly. Instead, |
372 |
|
|
// use the With* functions. For example, this will call the most basic |
373 |
|
|
// functionality, with no extensions: |
374 |
|
|
// output := Run(input, WithNoExtensions()) |
375 |
|
|
// |
376 |
|
|
// You can use any number of With* arguments, even contradicting ones. They |
377 |
|
|
// will be applied in order of appearance and the latter will override the |
378 |
|
|
// former: |
379 |
|
|
// output := Run(input, WithNoExtensions(), WithExtensions(exts), |
380 |
|
|
// WithRenderer(yourRenderer)) |
381 |
|
|
func Run(input []byte, opts ...Option) []byte { |
382 |
|
|
r := NewHTMLRenderer(HTMLRendererParameters{ |
383 |
|
|
Flags: CommonHTMLFlags, |
384 |
|
|
}) |
385 |
|
|
optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} |
386 |
|
|
optList = append(optList, opts...) |
387 |
|
|
parser := New(optList...) |
388 |
|
|
ast := parser.Parse(input) |
389 |
|
|
var buf bytes.Buffer |
390 |
|
|
parser.renderer.RenderHeader(&buf, ast) |
391 |
|
|
ast.Walk(func(node *Node, entering bool) WalkStatus { |
392 |
|
|
return parser.renderer.RenderNode(&buf, node, entering) |
393 |
|
|
}) |
394 |
|
|
parser.renderer.RenderFooter(&buf, ast) |
395 |
|
|
return buf.Bytes() |
396 |
|
|
} |
397 |
|
|
|
398 |
|
|
// Parse is an entry point to the parsing part of Blackfriday. It takes an |
399 |
|
|
// input markdown document and produces a syntax tree for its contents. This |
400 |
|
|
// tree can then be rendered with a default or custom renderer, or |
401 |
|
|
// analyzed/transformed by the caller to whatever non-standard needs they have. |
402 |
|
|
// The return value is the root node of the syntax tree. |
403 |
|
|
func (p *Markdown) Parse(input []byte) *Node { |
404 |
|
|
p.block(input) |
405 |
|
|
// Walk the tree and finish up some of unfinished blocks |
406 |
|
|
for p.tip != nil { |
407 |
|
|
p.finalize(p.tip) |
408 |
|
|
} |
409 |
|
|
// Walk the tree again and process inline markdown in each block |
410 |
|
|
p.doc.Walk(func(node *Node, entering bool) WalkStatus { |
411 |
|
|
if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { |
412 |
|
|
p.inline(node, node.content) |
413 |
|
|
node.content = nil |
414 |
|
|
} |
415 |
|
|
return GoToNext |
416 |
|
|
}) |
417 |
|
|
p.parseRefsToAST() |
418 |
|
|
return p.doc |
419 |
|
|
} |
420 |
|
|
|
421 |
|
|
func (p *Markdown) parseRefsToAST() { |
422 |
|
|
if p.extensions&Footnotes == 0 || len(p.notes) == 0 { |
423 |
|
|
return |
424 |
|
|
} |
425 |
|
|
p.tip = p.doc |
426 |
|
|
block := p.addBlock(List, nil) |
427 |
|
|
block.IsFootnotesList = true |
428 |
|
|
block.ListFlags = ListTypeOrdered |
429 |
|
|
flags := ListItemBeginningOfList |
430 |
|
|
// Note: this loop is intentionally explicit, not range-form. This is |
431 |
|
|
// because the body of the loop will append nested footnotes to p.notes and |
432 |
|
|
// we need to process those late additions. Range form would only walk over |
433 |
|
|
// the fixed initial set. |
434 |
|
|
for i := 0; i < len(p.notes); i++ { |
435 |
|
|
ref := p.notes[i] |
436 |
|
|
p.addExistingChild(ref.footnote, 0) |
437 |
|
|
block := ref.footnote |
438 |
|
|
block.ListFlags = flags | ListTypeOrdered |
439 |
|
|
block.RefLink = ref.link |
440 |
|
|
if ref.hasBlock { |
441 |
|
|
flags |= ListItemContainsBlock |
442 |
|
|
p.block(ref.title) |
443 |
|
|
} else { |
444 |
|
|
p.inline(block, ref.title) |
445 |
|
|
} |
446 |
|
|
flags &^= ListItemBeginningOfList | ListItemContainsBlock |
447 |
|
|
} |
448 |
|
|
above := block.Parent |
449 |
|
|
finalizeList(block) |
450 |
|
|
p.tip = above |
451 |
|
|
block.Walk(func(node *Node, entering bool) WalkStatus { |
452 |
|
|
if node.Type == Paragraph || node.Type == Heading { |
453 |
|
|
p.inline(node, node.content) |
454 |
|
|
node.content = nil |
455 |
|
|
} |
456 |
|
|
return GoToNext |
457 |
|
|
}) |
458 |
|
|
} |
459 |
|
|
|
460 |
|
|
// |
461 |
|
|
// Link references |
462 |
|
|
// |
463 |
|
|
// This section implements support for references that (usually) appear |
464 |
|
|
// as footnotes in a document, and can be referenced anywhere in the document. |
465 |
|
|
// The basic format is: |
466 |
|
|
// |
467 |
|
|
// [1]: http://www.google.com/ "Google" |
468 |
|
|
// [2]: http://www.github.com/ "Github" |
469 |
|
|
// |
470 |
|
|
// Anywhere in the document, the reference can be linked by referring to its |
471 |
|
|
// label, i.e., 1 and 2 in this example, as in: |
472 |
|
|
// |
473 |
|
|
// This library is hosted on [Github][2], a git hosting site. |
474 |
|
|
// |
475 |
|
|
// Actual footnotes as specified in Pandoc and supported by some other Markdown |
476 |
|
|
// libraries such as php-markdown are also taken care of. They look like this: |
477 |
|
|
// |
478 |
|
|
// This sentence needs a bit of further explanation.[^note] |
479 |
|
|
// |
480 |
|
|
// [^note]: This is the explanation. |
481 |
|
|
// |
482 |
|
|
// Footnotes should be placed at the end of the document in an ordered list. |
483 |
|
|
// Finally, there are inline footnotes such as: |
484 |
|
|
// |
485 |
|
|
// Inline footnotes^[Also supported.] provide a quick inline explanation, |
486 |
|
|
// but are rendered at the bottom of the document. |
487 |
|
|
// |
488 |
|
|
|
489 |
|
|
// reference holds all information necessary for a reference-style links or |
490 |
|
|
// footnotes. |
491 |
|
|
// |
492 |
|
|
// Consider this markdown with reference-style links: |
493 |
|
|
// |
494 |
|
|
// [link][ref] |
495 |
|
|
// |
496 |
|
|
// [ref]: /url/ "tooltip title" |
497 |
|
|
// |
498 |
|
|
// It will be ultimately converted to this HTML: |
499 |
|
|
// |
500 |
|
|
// <p><a href=\"/url/\" title=\"title\">link</a></p> |
501 |
|
|
// |
502 |
|
|
// And a reference structure will be populated as follows: |
503 |
|
|
// |
504 |
|
|
// p.refs["ref"] = &reference{ |
505 |
|
|
// link: "/url/", |
506 |
|
|
// title: "tooltip title", |
507 |
|
|
// } |
508 |
|
|
// |
509 |
|
|
// Alternatively, reference can contain information about a footnote. Consider |
510 |
|
|
// this markdown: |
511 |
|
|
// |
512 |
|
|
// Text needing a footnote.[^a] |
513 |
|
|
// |
514 |
|
|
// [^a]: This is the note |
515 |
|
|
// |
516 |
|
|
// A reference structure will be populated as follows: |
517 |
|
|
// |
518 |
|
|
// p.refs["a"] = &reference{ |
519 |
|
|
// link: "a", |
520 |
|
|
// title: "This is the note", |
521 |
|
|
// noteID: <some positive int>, |
522 |
|
|
// } |
523 |
|
|
// |
524 |
|
|
// TODO: As you can see, it begs for splitting into two dedicated structures |
525 |
|
|
// for refs and for footnotes. |
526 |
|
|
type reference struct { |
527 |
|
|
link []byte |
528 |
|
|
title []byte |
529 |
|
|
noteID int // 0 if not a footnote ref |
530 |
|
|
hasBlock bool |
531 |
|
|
footnote *Node // a link to the Item node within a list of footnotes |
532 |
|
|
|
533 |
|
|
text []byte // only gets populated by refOverride feature with Reference.Text |
534 |
|
|
} |
535 |
|
|
|
536 |
|
|
func (r *reference) String() string { |
537 |
|
|
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", |
538 |
|
|
r.link, r.title, r.text, r.noteID, r.hasBlock) |
539 |
|
|
} |
540 |
|
|
|
541 |
|
|
// Check whether or not data starts with a reference link. |
542 |
|
|
// If so, it is parsed and stored in the list of references |
543 |
|
|
// (in the render struct). |
544 |
|
|
// Returns the number of bytes to skip to move past it, |
545 |
|
|
// or zero if the first line is not a reference. |
546 |
|
|
func isReference(p *Markdown, data []byte, tabSize int) int { |
547 |
|
|
// up to 3 optional leading spaces |
548 |
|
|
if len(data) < 4 { |
549 |
|
|
return 0 |
550 |
|
|
} |
551 |
|
|
i := 0 |
552 |
|
|
for i < 3 && data[i] == ' ' { |
553 |
|
|
i++ |
554 |
|
|
} |
555 |
|
|
|
556 |
|
|
noteID := 0 |
557 |
|
|
|
558 |
|
|
// id part: anything but a newline between brackets |
559 |
|
|
if data[i] != '[' { |
560 |
|
|
return 0 |
561 |
|
|
} |
562 |
|
|
i++ |
563 |
|
|
if p.extensions&Footnotes != 0 { |
564 |
|
|
if i < len(data) && data[i] == '^' { |
565 |
|
|
// we can set it to anything here because the proper noteIds will |
566 |
|
|
// be assigned later during the second pass. It just has to be != 0 |
567 |
|
|
noteID = 1 |
568 |
|
|
i++ |
569 |
|
|
} |
570 |
|
|
} |
571 |
|
|
idOffset := i |
572 |
|
|
for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { |
573 |
|
|
i++ |
574 |
|
|
} |
575 |
|
|
if i >= len(data) || data[i] != ']' { |
576 |
|
|
return 0 |
577 |
|
|
} |
578 |
|
|
idEnd := i |
579 |
|
|
// footnotes can have empty ID, like this: [^], but a reference can not be |
580 |
|
|
// empty like this: []. Break early if it's not a footnote and there's no ID |
581 |
|
|
if noteID == 0 && idOffset == idEnd { |
582 |
|
|
return 0 |
583 |
|
|
} |
584 |
|
|
// spacer: colon (space | tab)* newline? (space | tab)* |
585 |
|
|
i++ |
586 |
|
|
if i >= len(data) || data[i] != ':' { |
587 |
|
|
return 0 |
588 |
|
|
} |
589 |
|
|
i++ |
590 |
|
|
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
591 |
|
|
i++ |
592 |
|
|
} |
593 |
|
|
if i < len(data) && (data[i] == '\n' || data[i] == '\r') { |
594 |
|
|
i++ |
595 |
|
|
if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { |
596 |
|
|
i++ |
597 |
|
|
} |
598 |
|
|
} |
599 |
|
|
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
600 |
|
|
i++ |
601 |
|
|
} |
602 |
|
|
if i >= len(data) { |
603 |
|
|
return 0 |
604 |
|
|
} |
605 |
|
|
|
606 |
|
|
var ( |
607 |
|
|
linkOffset, linkEnd int |
608 |
|
|
titleOffset, titleEnd int |
609 |
|
|
lineEnd int |
610 |
|
|
raw []byte |
611 |
|
|
hasBlock bool |
612 |
|
|
) |
613 |
|
|
|
614 |
|
|
if p.extensions&Footnotes != 0 && noteID != 0 { |
615 |
|
|
linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) |
616 |
|
|
lineEnd = linkEnd |
617 |
|
|
} else { |
618 |
|
|
linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) |
619 |
|
|
} |
620 |
|
|
if lineEnd == 0 { |
621 |
|
|
return 0 |
622 |
|
|
} |
623 |
|
|
|
624 |
|
|
// a valid ref has been found |
625 |
|
|
|
626 |
|
|
ref := &reference{ |
627 |
|
|
noteID: noteID, |
628 |
|
|
hasBlock: hasBlock, |
629 |
|
|
} |
630 |
|
|
|
631 |
|
|
if noteID > 0 { |
632 |
|
|
// reusing the link field for the id since footnotes don't have links |
633 |
|
|
ref.link = data[idOffset:idEnd] |
634 |
|
|
// if footnote, it's not really a title, it's the contained text |
635 |
|
|
ref.title = raw |
636 |
|
|
} else { |
637 |
|
|
ref.link = data[linkOffset:linkEnd] |
638 |
|
|
ref.title = data[titleOffset:titleEnd] |
639 |
|
|
} |
640 |
|
|
|
641 |
|
|
// id matches are case-insensitive |
642 |
|
|
id := string(bytes.ToLower(data[idOffset:idEnd])) |
643 |
|
|
|
644 |
|
|
p.refs[id] = ref |
645 |
|
|
|
646 |
|
|
return lineEnd |
647 |
|
|
} |
648 |
|
|
|
649 |
|
|
func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { |
650 |
|
|
// link: whitespace-free sequence, optionally between angle brackets |
651 |
|
|
if data[i] == '<' { |
652 |
|
|
i++ |
653 |
|
|
} |
654 |
|
|
linkOffset = i |
655 |
|
|
for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { |
656 |
|
|
i++ |
657 |
|
|
} |
658 |
|
|
linkEnd = i |
659 |
|
|
if data[linkOffset] == '<' && data[linkEnd-1] == '>' { |
660 |
|
|
linkOffset++ |
661 |
|
|
linkEnd-- |
662 |
|
|
} |
663 |
|
|
|
664 |
|
|
// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) |
665 |
|
|
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
666 |
|
|
i++ |
667 |
|
|
} |
668 |
|
|
if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { |
669 |
|
|
return |
670 |
|
|
} |
671 |
|
|
|
672 |
|
|
// compute end-of-line |
673 |
|
|
if i >= len(data) || data[i] == '\r' || data[i] == '\n' { |
674 |
|
|
lineEnd = i |
675 |
|
|
} |
676 |
|
|
if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { |
677 |
|
|
lineEnd++ |
678 |
|
|
} |
679 |
|
|
|
680 |
|
|
// optional (space|tab)* spacer after a newline |
681 |
|
|
if lineEnd > 0 { |
682 |
|
|
i = lineEnd + 1 |
683 |
|
|
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
684 |
|
|
i++ |
685 |
|
|
} |
686 |
|
|
} |
687 |
|
|
|
688 |
|
|
// optional title: any non-newline sequence enclosed in '"() alone on its line |
689 |
|
|
if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { |
690 |
|
|
i++ |
691 |
|
|
titleOffset = i |
692 |
|
|
|
693 |
|
|
// look for EOL |
694 |
|
|
for i < len(data) && data[i] != '\n' && data[i] != '\r' { |
695 |
|
|
i++ |
696 |
|
|
} |
697 |
|
|
if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { |
698 |
|
|
titleEnd = i + 1 |
699 |
|
|
} else { |
700 |
|
|
titleEnd = i |
701 |
|
|
} |
702 |
|
|
|
703 |
|
|
// step back |
704 |
|
|
i-- |
705 |
|
|
for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { |
706 |
|
|
i-- |
707 |
|
|
} |
708 |
|
|
if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { |
709 |
|
|
lineEnd = titleEnd |
710 |
|
|
titleEnd = i |
711 |
|
|
} |
712 |
|
|
} |
713 |
|
|
|
714 |
|
|
return |
715 |
|
|
} |
716 |
|
|
|
717 |
|
|
// The first bit of this logic is the same as Parser.listItem, but the rest |
718 |
|
|
// is much simpler. This function simply finds the entire block and shifts it |
719 |
|
|
// over by one tab if it is indeed a block (just returns the line if it's not). |
720 |
|
|
// blockEnd is the end of the section in the input buffer, and contents is the |
721 |
|
|
// extracted text that was shifted over one tab. It will need to be rendered at |
722 |
|
|
// the end of the document. |
723 |
|
|
func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { |
724 |
|
|
if i == 0 || len(data) == 0 { |
725 |
|
|
return |
726 |
|
|
} |
727 |
|
|
|
728 |
|
|
// skip leading whitespace on first line |
729 |
|
|
for i < len(data) && data[i] == ' ' { |
730 |
|
|
i++ |
731 |
|
|
} |
732 |
|
|
|
733 |
|
|
blockStart = i |
734 |
|
|
|
735 |
|
|
// find the end of the line |
736 |
|
|
blockEnd = i |
737 |
|
|
for i < len(data) && data[i-1] != '\n' { |
738 |
|
|
i++ |
739 |
|
|
} |
740 |
|
|
|
741 |
|
|
// get working buffer |
742 |
|
|
var raw bytes.Buffer |
743 |
|
|
|
744 |
|
|
// put the first line into the working buffer |
745 |
|
|
raw.Write(data[blockEnd:i]) |
746 |
|
|
blockEnd = i |
747 |
|
|
|
748 |
|
|
// process the following lines |
749 |
|
|
containsBlankLine := false |
750 |
|
|
|
751 |
|
|
gatherLines: |
752 |
|
|
for blockEnd < len(data) { |
753 |
|
|
i++ |
754 |
|
|
|
755 |
|
|
// find the end of this line |
756 |
|
|
for i < len(data) && data[i-1] != '\n' { |
757 |
|
|
i++ |
758 |
|
|
} |
759 |
|
|
|
760 |
|
|
// if it is an empty line, guess that it is part of this item |
761 |
|
|
// and move on to the next line |
762 |
|
|
if p.isEmpty(data[blockEnd:i]) > 0 { |
763 |
|
|
containsBlankLine = true |
764 |
|
|
blockEnd = i |
765 |
|
|
continue |
766 |
|
|
} |
767 |
|
|
|
768 |
|
|
n := 0 |
769 |
|
|
if n = isIndented(data[blockEnd:i], indentSize); n == 0 { |
770 |
|
|
// this is the end of the block. |
771 |
|
|
// we don't want to include this last line in the index. |
772 |
|
|
break gatherLines |
773 |
|
|
} |
774 |
|
|
|
775 |
|
|
// if there were blank lines before this one, insert a new one now |
776 |
|
|
if containsBlankLine { |
777 |
|
|
raw.WriteByte('\n') |
778 |
|
|
containsBlankLine = false |
779 |
|
|
} |
780 |
|
|
|
781 |
|
|
// get rid of that first tab, write to buffer |
782 |
|
|
raw.Write(data[blockEnd+n : i]) |
783 |
|
|
hasBlock = true |
784 |
|
|
|
785 |
|
|
blockEnd = i |
786 |
|
|
} |
787 |
|
|
|
788 |
|
|
if data[blockEnd-1] != '\n' { |
789 |
|
|
raw.WriteByte('\n') |
790 |
|
|
} |
791 |
|
|
|
792 |
|
|
contents = raw.Bytes() |
793 |
|
|
|
794 |
|
|
return |
795 |
|
|
} |
796 |
|
|
|
797 |
|
|
// |
798 |
|
|
// |
799 |
|
|
// Miscellaneous helper functions |
800 |
|
|
// |
801 |
|
|
// |
802 |
|
|
|
803 |
|
|
// Test if a character is a punctuation symbol. |
804 |
|
|
// Taken from a private function in regexp in the stdlib. |
805 |
|
|
func ispunct(c byte) bool { |
806 |
|
|
for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { |
807 |
|
|
if c == r { |
808 |
|
|
return true |
809 |
|
|
} |
810 |
|
|
} |
811 |
|
|
return false |
812 |
|
|
} |
813 |
|
|
|
814 |
|
|
// Test if a character is a whitespace character. |
815 |
|
|
func isspace(c byte) bool { |
816 |
|
|
return ishorizontalspace(c) || isverticalspace(c) |
817 |
|
|
} |
818 |
|
|
|
819 |
|
|
// Test if a character is a horizontal whitespace character. |
820 |
|
|
func ishorizontalspace(c byte) bool { |
821 |
|
|
return c == ' ' || c == '\t' |
822 |
|
|
} |
823 |
|
|
|
824 |
|
|
// Test if a character is a vertical character. |
825 |
|
|
func isverticalspace(c byte) bool { |
826 |
|
|
return c == '\n' || c == '\r' || c == '\f' || c == '\v' |
827 |
|
|
} |
828 |
|
|
|
829 |
|
|
// Test if a character is letter. |
830 |
|
|
func isletter(c byte) bool { |
831 |
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
832 |
|
|
} |
833 |
|
|
|
834 |
|
|
// Test if a character is a letter or a digit. |
835 |
|
|
// TODO: check when this is looking for ASCII alnum and when it should use unicode |
836 |
|
|
func isalnum(c byte) bool { |
837 |
|
|
return (c >= '0' && c <= '9') || isletter(c) |
838 |
|
|
} |
839 |
|
|
|
840 |
|
|
// Replace tab characters with spaces, aligning to the next TAB_SIZE column. |
841 |
|
|
// always ends output with a newline |
842 |
|
|
func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { |
843 |
|
|
// first, check for common cases: no tabs, or only tabs at beginning of line |
844 |
|
|
i, prefix := 0, 0 |
845 |
|
|
slowcase := false |
846 |
|
|
for i = 0; i < len(line); i++ { |
847 |
|
|
if line[i] == '\t' { |
848 |
|
|
if prefix == i { |
849 |
|
|
prefix++ |
850 |
|
|
} else { |
851 |
|
|
slowcase = true |
852 |
|
|
break |
853 |
|
|
} |
854 |
|
|
} |
855 |
|
|
} |
856 |
|
|
|
857 |
|
|
// no need to decode runes if all tabs are at the beginning of the line |
858 |
|
|
if !slowcase { |
859 |
|
|
for i = 0; i < prefix*tabSize; i++ { |
860 |
|
|
out.WriteByte(' ') |
861 |
|
|
} |
862 |
|
|
out.Write(line[prefix:]) |
863 |
|
|
return |
864 |
|
|
} |
865 |
|
|
|
866 |
|
|
// the slow case: we need to count runes to figure out how |
867 |
|
|
// many spaces to insert for each tab |
868 |
|
|
column := 0 |
869 |
|
|
i = 0 |
870 |
|
|
for i < len(line) { |
871 |
|
|
start := i |
872 |
|
|
for i < len(line) && line[i] != '\t' { |
873 |
|
|
_, size := utf8.DecodeRune(line[i:]) |
874 |
|
|
i += size |
875 |
|
|
column++ |
876 |
|
|
} |
877 |
|
|
|
878 |
|
|
if i > start { |
879 |
|
|
out.Write(line[start:i]) |
880 |
|
|
} |
881 |
|
|
|
882 |
|
|
if i >= len(line) { |
883 |
|
|
break |
884 |
|
|
} |
885 |
|
|
|
886 |
|
|
for { |
887 |
|
|
out.WriteByte(' ') |
888 |
|
|
column++ |
889 |
|
|
if column%tabSize == 0 { |
890 |
|
|
break |
891 |
|
|
} |
892 |
|
|
} |
893 |
|
|
|
894 |
|
|
i++ |
895 |
|
|
} |
896 |
|
|
} |
897 |
|
|
|
898 |
|
|
// Find if a line counts as indented or not. |
899 |
|
|
// Returns number of characters the indent is (0 = not indented). |
900 |
|
|
func isIndented(data []byte, indentSize int) int { |
901 |
|
|
if len(data) == 0 { |
902 |
|
|
return 0 |
903 |
|
|
} |
904 |
|
|
if data[0] == '\t' { |
905 |
|
|
return 1 |
906 |
|
|
} |
907 |
|
|
if len(data) < indentSize { |
908 |
|
|
return 0 |
909 |
|
|
} |
910 |
|
|
for i := 0; i < indentSize; i++ { |
911 |
|
|
if data[i] != ' ' { |
912 |
|
|
return 0 |
913 |
|
|
} |
914 |
|
|
} |
915 |
|
|
return indentSize |
916 |
|
|
} |
917 |
|
|
|
918 |
|
|
// Create a url-safe slug for fragments |
919 |
|
|
func slugify(in []byte) []byte { |
920 |
|
|
if len(in) == 0 { |
921 |
|
|
return in |
922 |
|
|
} |
923 |
|
|
out := make([]byte, 0, len(in)) |
924 |
|
|
sym := false |
925 |
|
|
|
926 |
|
|
for _, ch := range in { |
927 |
|
|
if isalnum(ch) { |
928 |
|
|
sym = false |
929 |
|
|
out = append(out, ch) |
930 |
|
|
} else if sym { |
931 |
|
|
continue |
932 |
|
|
} else { |
933 |
|
|
out = append(out, '-') |
934 |
|
|
sym = true |
935 |
|
|
} |
936 |
|
|
} |
937 |
|
|
var a, b int |
938 |
|
|
var ch byte |
939 |
|
|
for a, ch = range out { |
940 |
|
|
if ch != '-' { |
941 |
|
|
break |
942 |
|
|
} |
943 |
|
|
} |
944 |
|
|
for b = len(out) - 1; b > 0; b-- { |
945 |
|
|
if out[b] != '-' { |
946 |
|
|
break |
947 |
|
|
} |
948 |
|
|
} |
949 |
|
|
return out[a : b+1] |
950 |
|
|
} |