1 |
// Blackfriday Markdown Processor |
2 |
// Available at http://github.com/russross/blackfriday |
3 |
// |
4 |
// Copyright © 2011 Russ Ross <russ@russross.com>. |
5 |
// Distributed under the Simplified BSD License. |
6 |
// See README.md for details. |
7 |
|
8 |
package blackfriday |
9 |
|
10 |
import ( |
11 |
"bytes" |
12 |
"fmt" |
13 |
"io" |
14 |
"strings" |
15 |
"unicode/utf8" |
16 |
) |
17 |
|
18 |
// |
19 |
// Markdown parsing and processing |
20 |
// |
21 |
|
22 |
// Version string of the package. Appears in the rendered document when |
23 |
// CompletePage flag is on. |
24 |
const Version = "2.0" |
25 |
|
26 |
// Extensions is a bitwise or'ed collection of enabled Blackfriday's |
27 |
// extensions. |
28 |
type Extensions int |
29 |
|
30 |
// These are the supported markdown parsing extensions. |
31 |
// OR these values together to select multiple extensions. |
32 |
const ( |
33 |
NoExtensions Extensions = 0 |
34 |
NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words |
35 |
Tables // Render tables |
36 |
FencedCode // Render fenced code blocks |
37 |
Autolink // Detect embedded URLs that are not explicitly marked |
38 |
Strikethrough // Strikethrough text using ~~test~~ |
39 |
LaxHTMLBlocks // Loosen up HTML block parsing rules |
40 |
SpaceHeadings // Be strict about prefix heading rules |
41 |
HardLineBreak // Translate newlines into line breaks |
42 |
TabSizeEight // Expand tabs to eight spaces instead of four |
43 |
Footnotes // Pandoc-style footnotes |
44 |
NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block |
45 |
HeadingIDs // specify heading IDs with {#id} |
46 |
Titleblock // Titleblock ala pandoc |
47 |
AutoHeadingIDs // Create the heading ID from the text |
48 |
BackslashLineBreak // Translate trailing backslashes into line breaks |
49 |
DefinitionLists // Render definition lists |
50 |
|
51 |
CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | |
52 |
SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes |
53 |
|
54 |
CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | |
55 |
Autolink | Strikethrough | SpaceHeadings | HeadingIDs | |
56 |
BackslashLineBreak | DefinitionLists |
57 |
) |
58 |
|
59 |
// ListType contains bitwise or'ed flags for list and list item objects. |
60 |
type ListType int |
61 |
|
62 |
// These are the possible flag values for the ListItem renderer. |
63 |
// Multiple flag values may be ORed together. |
64 |
// These are mostly of interest if you are writing a new output format. |
65 |
const ( |
66 |
ListTypeOrdered ListType = 1 << iota |
67 |
ListTypeDefinition |
68 |
ListTypeTerm |
69 |
|
70 |
ListItemContainsBlock |
71 |
ListItemBeginningOfList // TODO: figure out if this is of any use now |
72 |
ListItemEndOfList |
73 |
) |
74 |
|
75 |
// CellAlignFlags holds a type of alignment in a table cell. |
76 |
type CellAlignFlags int |
77 |
|
78 |
// These are the possible flag values for the table cell renderer. |
79 |
// Only a single one of these values will be used; they are not ORed together. |
80 |
// These are mostly of interest if you are writing a new output format. |
81 |
const ( |
82 |
TableAlignmentLeft CellAlignFlags = 1 << iota |
83 |
TableAlignmentRight |
84 |
TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) |
85 |
) |
86 |
|
87 |
// The size of a tab stop. |
88 |
const ( |
89 |
TabSizeDefault = 4 |
90 |
TabSizeDouble = 8 |
91 |
) |
92 |
|
93 |
// blockTags is a set of tags that are recognized as HTML block tags. |
94 |
// Any of these can be included in markdown text without special escaping. |
95 |
var blockTags = map[string]struct{}{ |
96 |
"blockquote": {}, |
97 |
"del": {}, |
98 |
"div": {}, |
99 |
"dl": {}, |
100 |
"fieldset": {}, |
101 |
"form": {}, |
102 |
"h1": {}, |
103 |
"h2": {}, |
104 |
"h3": {}, |
105 |
"h4": {}, |
106 |
"h5": {}, |
107 |
"h6": {}, |
108 |
"iframe": {}, |
109 |
"ins": {}, |
110 |
"math": {}, |
111 |
"noscript": {}, |
112 |
"ol": {}, |
113 |
"pre": {}, |
114 |
"p": {}, |
115 |
"script": {}, |
116 |
"style": {}, |
117 |
"table": {}, |
118 |
"ul": {}, |
119 |
|
120 |
// HTML5 |
121 |
"address": {}, |
122 |
"article": {}, |
123 |
"aside": {}, |
124 |
"canvas": {}, |
125 |
"figcaption": {}, |
126 |
"figure": {}, |
127 |
"footer": {}, |
128 |
"header": {}, |
129 |
"hgroup": {}, |
130 |
"main": {}, |
131 |
"nav": {}, |
132 |
"output": {}, |
133 |
"progress": {}, |
134 |
"section": {}, |
135 |
"video": {}, |
136 |
} |
137 |
|
138 |
// Renderer is the rendering interface. This is mostly of interest if you are |
139 |
// implementing a new rendering format. |
140 |
// |
141 |
// Only an HTML implementation is provided in this repository, see the README |
142 |
// for external implementations. |
143 |
type Renderer interface { |
144 |
// RenderNode is the main rendering method. It will be called once for |
145 |
// every leaf node and twice for every non-leaf node (first with |
146 |
// entering=true, then with entering=false). The method should write its |
147 |
// rendition of the node to the supplied writer w. |
148 |
RenderNode(w io.Writer, node *Node, entering bool) WalkStatus |
149 |
|
150 |
// RenderHeader is a method that allows the renderer to produce some |
151 |
// content preceding the main body of the output document. The header is |
152 |
// understood in the broad sense here. For example, the default HTML |
153 |
// renderer will write not only the HTML document preamble, but also the |
154 |
// table of contents if it was requested. |
155 |
// |
156 |
// The method will be passed an entire document tree, in case a particular |
157 |
// implementation needs to inspect it to produce output. |
158 |
// |
159 |
// The output should be written to the supplied writer w. If your |
160 |
// implementation has no header to write, supply an empty implementation. |
161 |
RenderHeader(w io.Writer, ast *Node) |
162 |
|
163 |
// RenderFooter is a symmetric counterpart of RenderHeader. |
164 |
RenderFooter(w io.Writer, ast *Node) |
165 |
} |
166 |
|
167 |
// Callback functions for inline parsing. One such function is defined |
168 |
// for each character that triggers a response when parsing inline data. |
169 |
type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) |
170 |
|
171 |
// Markdown is a type that holds extensions and the runtime state used by |
172 |
// Parse, and the renderer. You can not use it directly, construct it with New. |
173 |
type Markdown struct { |
174 |
renderer Renderer |
175 |
referenceOverride ReferenceOverrideFunc |
176 |
refs map[string]*reference |
177 |
inlineCallback [256]inlineParser |
178 |
extensions Extensions |
179 |
nesting int |
180 |
maxNesting int |
181 |
insideLink bool |
182 |
|
183 |
// Footnotes need to be ordered as well as available to quickly check for |
184 |
// presence. If a ref is also a footnote, it's stored both in refs and here |
185 |
// in notes. Slice is nil if footnotes not enabled. |
186 |
notes []*reference |
187 |
|
188 |
doc *Node |
189 |
tip *Node // = doc |
190 |
oldTip *Node |
191 |
lastMatchedContainer *Node // = doc |
192 |
allClosed bool |
193 |
} |
194 |
|
195 |
func (p *Markdown) getRef(refid string) (ref *reference, found bool) { |
196 |
if p.referenceOverride != nil { |
197 |
r, overridden := p.referenceOverride(refid) |
198 |
if overridden { |
199 |
if r == nil { |
200 |
return nil, false |
201 |
} |
202 |
return &reference{ |
203 |
link: []byte(r.Link), |
204 |
title: []byte(r.Title), |
205 |
noteID: 0, |
206 |
hasBlock: false, |
207 |
text: []byte(r.Text)}, true |
208 |
} |
209 |
} |
210 |
// refs are case insensitive |
211 |
ref, found = p.refs[strings.ToLower(refid)] |
212 |
return ref, found |
213 |
} |
214 |
|
215 |
func (p *Markdown) finalize(block *Node) { |
216 |
above := block.Parent |
217 |
block.open = false |
218 |
p.tip = above |
219 |
} |
220 |
|
221 |
func (p *Markdown) addChild(node NodeType, offset uint32) *Node { |
222 |
return p.addExistingChild(NewNode(node), offset) |
223 |
} |
224 |
|
225 |
func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { |
226 |
for !p.tip.canContain(node.Type) { |
227 |
p.finalize(p.tip) |
228 |
} |
229 |
p.tip.AppendChild(node) |
230 |
p.tip = node |
231 |
return node |
232 |
} |
233 |
|
234 |
func (p *Markdown) closeUnmatchedBlocks() { |
235 |
if !p.allClosed { |
236 |
for p.oldTip != p.lastMatchedContainer { |
237 |
parent := p.oldTip.Parent |
238 |
p.finalize(p.oldTip) |
239 |
p.oldTip = parent |
240 |
} |
241 |
p.allClosed = true |
242 |
} |
243 |
} |
244 |
|
245 |
// |
246 |
// |
247 |
// Public interface |
248 |
// |
249 |
// |
250 |
|
251 |
// Reference represents the details of a link. |
252 |
// See the documentation in Options for more details on use-case. |
253 |
type Reference struct { |
254 |
// Link is usually the URL the reference points to. |
255 |
Link string |
256 |
// Title is the alternate text describing the link in more detail. |
257 |
Title string |
258 |
// Text is the optional text to override the ref with if the syntax used was |
259 |
// [refid][] |
260 |
Text string |
261 |
} |
262 |
|
263 |
// ReferenceOverrideFunc is expected to be called with a reference string and |
264 |
// return either a valid Reference type that the reference string maps to or |
265 |
// nil. If overridden is false, the default reference logic will be executed. |
266 |
// See the documentation in Options for more details on use-case. |
267 |
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) |
268 |
|
269 |
// New constructs a Markdown processor. You can use the same With* functions as |
270 |
// for Run() to customize parser's behavior and the renderer. |
271 |
func New(opts ...Option) *Markdown { |
272 |
var p Markdown |
273 |
for _, opt := range opts { |
274 |
opt(&p) |
275 |
} |
276 |
p.refs = make(map[string]*reference) |
277 |
p.maxNesting = 16 |
278 |
p.insideLink = false |
279 |
docNode := NewNode(Document) |
280 |
p.doc = docNode |
281 |
p.tip = docNode |
282 |
p.oldTip = docNode |
283 |
p.lastMatchedContainer = docNode |
284 |
p.allClosed = true |
285 |
// register inline parsers |
286 |
p.inlineCallback[' '] = maybeLineBreak |
287 |
p.inlineCallback['*'] = emphasis |
288 |
p.inlineCallback['_'] = emphasis |
289 |
if p.extensions&Strikethrough != 0 { |
290 |
p.inlineCallback['~'] = emphasis |
291 |
} |
292 |
p.inlineCallback['`'] = codeSpan |
293 |
p.inlineCallback['\n'] = lineBreak |
294 |
p.inlineCallback['['] = link |
295 |
p.inlineCallback['<'] = leftAngle |
296 |
p.inlineCallback['\\'] = escape |
297 |
p.inlineCallback['&'] = entity |
298 |
p.inlineCallback['!'] = maybeImage |
299 |
p.inlineCallback['^'] = maybeInlineFootnote |
300 |
if p.extensions&Autolink != 0 { |
301 |
p.inlineCallback['h'] = maybeAutoLink |
302 |
p.inlineCallback['m'] = maybeAutoLink |
303 |
p.inlineCallback['f'] = maybeAutoLink |
304 |
p.inlineCallback['H'] = maybeAutoLink |
305 |
p.inlineCallback['M'] = maybeAutoLink |
306 |
p.inlineCallback['F'] = maybeAutoLink |
307 |
} |
308 |
if p.extensions&Footnotes != 0 { |
309 |
p.notes = make([]*reference, 0) |
310 |
} |
311 |
return &p |
312 |
} |
313 |
|
314 |
// Option customizes the Markdown processor's default behavior. |
315 |
type Option func(*Markdown) |
316 |
|
317 |
// WithRenderer allows you to override the default renderer. |
318 |
func WithRenderer(r Renderer) Option { |
319 |
return func(p *Markdown) { |
320 |
p.renderer = r |
321 |
} |
322 |
} |
323 |
|
324 |
// WithExtensions allows you to pick some of the many extensions provided by |
325 |
// Blackfriday. You can bitwise OR them. |
326 |
func WithExtensions(e Extensions) Option { |
327 |
return func(p *Markdown) { |
328 |
p.extensions = e |
329 |
} |
330 |
} |
331 |
|
332 |
// WithNoExtensions turns off all extensions and custom behavior. |
333 |
func WithNoExtensions() Option { |
334 |
return func(p *Markdown) { |
335 |
p.extensions = NoExtensions |
336 |
p.renderer = NewHTMLRenderer(HTMLRendererParameters{ |
337 |
Flags: HTMLFlagsNone, |
338 |
}) |
339 |
} |
340 |
} |
341 |
|
342 |
// WithRefOverride sets an optional function callback that is called every |
343 |
// time a reference is resolved. |
344 |
// |
345 |
// In Markdown, the link reference syntax can be made to resolve a link to |
346 |
// a reference instead of an inline URL, in one of the following ways: |
347 |
// |
348 |
// * [link text][refid] |
349 |
// * [refid][] |
350 |
// |
351 |
// Usually, the refid is defined at the bottom of the Markdown document. If |
352 |
// this override function is provided, the refid is passed to the override |
353 |
// function first, before consulting the defined refids at the bottom. If |
354 |
// the override function indicates an override did not occur, the refids at |
355 |
// the bottom will be used to fill in the link details. |
356 |
func WithRefOverride(o ReferenceOverrideFunc) Option { |
357 |
return func(p *Markdown) { |
358 |
p.referenceOverride = o |
359 |
} |
360 |
} |
361 |
|
362 |
// Run is the main entry point to Blackfriday. It parses and renders a |
363 |
// block of markdown-encoded text. |
364 |
// |
365 |
// The simplest invocation of Run takes one argument, input: |
366 |
// output := Run(input) |
367 |
// This will parse the input with CommonExtensions enabled and render it with |
368 |
// the default HTMLRenderer (with CommonHTMLFlags). |
369 |
// |
370 |
// Variadic arguments opts can customize the default behavior. Since Markdown |
371 |
// type does not contain exported fields, you can not use it directly. Instead, |
372 |
// use the With* functions. For example, this will call the most basic |
373 |
// functionality, with no extensions: |
374 |
// output := Run(input, WithNoExtensions()) |
375 |
// |
376 |
// You can use any number of With* arguments, even contradicting ones. They |
377 |
// will be applied in order of appearance and the latter will override the |
378 |
// former: |
379 |
// output := Run(input, WithNoExtensions(), WithExtensions(exts), |
380 |
// WithRenderer(yourRenderer)) |
381 |
func Run(input []byte, opts ...Option) []byte { |
382 |
r := NewHTMLRenderer(HTMLRendererParameters{ |
383 |
Flags: CommonHTMLFlags, |
384 |
}) |
385 |
optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} |
386 |
optList = append(optList, opts...) |
387 |
parser := New(optList...) |
388 |
ast := parser.Parse(input) |
389 |
var buf bytes.Buffer |
390 |
parser.renderer.RenderHeader(&buf, ast) |
391 |
ast.Walk(func(node *Node, entering bool) WalkStatus { |
392 |
return parser.renderer.RenderNode(&buf, node, entering) |
393 |
}) |
394 |
parser.renderer.RenderFooter(&buf, ast) |
395 |
return buf.Bytes() |
396 |
} |
397 |
|
398 |
// Parse is an entry point to the parsing part of Blackfriday. It takes an |
399 |
// input markdown document and produces a syntax tree for its contents. This |
400 |
// tree can then be rendered with a default or custom renderer, or |
401 |
// analyzed/transformed by the caller to whatever non-standard needs they have. |
402 |
// The return value is the root node of the syntax tree. |
403 |
func (p *Markdown) Parse(input []byte) *Node { |
404 |
p.block(input) |
405 |
// Walk the tree and finish up some of unfinished blocks |
406 |
for p.tip != nil { |
407 |
p.finalize(p.tip) |
408 |
} |
409 |
// Walk the tree again and process inline markdown in each block |
410 |
p.doc.Walk(func(node *Node, entering bool) WalkStatus { |
411 |
if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { |
412 |
p.inline(node, node.content) |
413 |
node.content = nil |
414 |
} |
415 |
return GoToNext |
416 |
}) |
417 |
p.parseRefsToAST() |
418 |
return p.doc |
419 |
} |
420 |
|
421 |
func (p *Markdown) parseRefsToAST() { |
422 |
if p.extensions&Footnotes == 0 || len(p.notes) == 0 { |
423 |
return |
424 |
} |
425 |
p.tip = p.doc |
426 |
block := p.addBlock(List, nil) |
427 |
block.IsFootnotesList = true |
428 |
block.ListFlags = ListTypeOrdered |
429 |
flags := ListItemBeginningOfList |
430 |
// Note: this loop is intentionally explicit, not range-form. This is |
431 |
// because the body of the loop will append nested footnotes to p.notes and |
432 |
// we need to process those late additions. Range form would only walk over |
433 |
// the fixed initial set. |
434 |
for i := 0; i < len(p.notes); i++ { |
435 |
ref := p.notes[i] |
436 |
p.addExistingChild(ref.footnote, 0) |
437 |
block := ref.footnote |
438 |
block.ListFlags = flags | ListTypeOrdered |
439 |
block.RefLink = ref.link |
440 |
if ref.hasBlock { |
441 |
flags |= ListItemContainsBlock |
442 |
p.block(ref.title) |
443 |
} else { |
444 |
p.inline(block, ref.title) |
445 |
} |
446 |
flags &^= ListItemBeginningOfList | ListItemContainsBlock |
447 |
} |
448 |
above := block.Parent |
449 |
finalizeList(block) |
450 |
p.tip = above |
451 |
block.Walk(func(node *Node, entering bool) WalkStatus { |
452 |
if node.Type == Paragraph || node.Type == Heading { |
453 |
p.inline(node, node.content) |
454 |
node.content = nil |
455 |
} |
456 |
return GoToNext |
457 |
}) |
458 |
} |
459 |
|
460 |
// |
461 |
// Link references |
462 |
// |
463 |
// This section implements support for references that (usually) appear |
464 |
// as footnotes in a document, and can be referenced anywhere in the document. |
465 |
// The basic format is: |
466 |
// |
467 |
// [1]: http://www.google.com/ "Google" |
468 |
// [2]: http://www.github.com/ "Github" |
469 |
// |
470 |
// Anywhere in the document, the reference can be linked by referring to its |
471 |
// label, i.e., 1 and 2 in this example, as in: |
472 |
// |
473 |
// This library is hosted on [Github][2], a git hosting site. |
474 |
// |
475 |
// Actual footnotes as specified in Pandoc and supported by some other Markdown |
476 |
// libraries such as php-markdown are also taken care of. They look like this: |
477 |
// |
478 |
// This sentence needs a bit of further explanation.[^note] |
479 |
// |
480 |
// [^note]: This is the explanation. |
481 |
// |
482 |
// Footnotes should be placed at the end of the document in an ordered list. |
483 |
// Finally, there are inline footnotes such as: |
484 |
// |
485 |
// Inline footnotes^[Also supported.] provide a quick inline explanation, |
486 |
// but are rendered at the bottom of the document. |
487 |
// |
488 |
|
489 |
// reference holds all information necessary for a reference-style links or |
490 |
// footnotes. |
491 |
// |
492 |
// Consider this markdown with reference-style links: |
493 |
// |
494 |
// [link][ref] |
495 |
// |
496 |
// [ref]: /url/ "tooltip title" |
497 |
// |
498 |
// It will be ultimately converted to this HTML: |
499 |
// |
500 |
// <p><a href=\"/url/\" title=\"title\">link</a></p> |
501 |
// |
502 |
// And a reference structure will be populated as follows: |
503 |
// |
504 |
// p.refs["ref"] = &reference{ |
505 |
// link: "/url/", |
506 |
// title: "tooltip title", |
507 |
// } |
508 |
// |
509 |
// Alternatively, reference can contain information about a footnote. Consider |
510 |
// this markdown: |
511 |
// |
512 |
// Text needing a footnote.[^a] |
513 |
// |
514 |
// [^a]: This is the note |
515 |
// |
516 |
// A reference structure will be populated as follows: |
517 |
// |
518 |
// p.refs["a"] = &reference{ |
519 |
// link: "a", |
520 |
// title: "This is the note", |
521 |
// noteID: <some positive int>, |
522 |
// } |
523 |
// |
524 |
// TODO: As you can see, it begs for splitting into two dedicated structures |
525 |
// for refs and for footnotes. |
526 |
type reference struct { |
527 |
link []byte |
528 |
title []byte |
529 |
noteID int // 0 if not a footnote ref |
530 |
hasBlock bool |
531 |
footnote *Node // a link to the Item node within a list of footnotes |
532 |
|
533 |
text []byte // only gets populated by refOverride feature with Reference.Text |
534 |
} |
535 |
|
536 |
func (r *reference) String() string { |
537 |
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", |
538 |
r.link, r.title, r.text, r.noteID, r.hasBlock) |
539 |
} |
540 |
|
541 |
// Check whether or not data starts with a reference link. |
542 |
// If so, it is parsed and stored in the list of references |
543 |
// (in the render struct). |
544 |
// Returns the number of bytes to skip to move past it, |
545 |
// or zero if the first line is not a reference. |
546 |
func isReference(p *Markdown, data []byte, tabSize int) int { |
547 |
// up to 3 optional leading spaces |
548 |
if len(data) < 4 { |
549 |
return 0 |
550 |
} |
551 |
i := 0 |
552 |
for i < 3 && data[i] == ' ' { |
553 |
i++ |
554 |
} |
555 |
|
556 |
noteID := 0 |
557 |
|
558 |
// id part: anything but a newline between brackets |
559 |
if data[i] != '[' { |
560 |
return 0 |
561 |
} |
562 |
i++ |
563 |
if p.extensions&Footnotes != 0 { |
564 |
if i < len(data) && data[i] == '^' { |
565 |
// we can set it to anything here because the proper noteIds will |
566 |
// be assigned later during the second pass. It just has to be != 0 |
567 |
noteID = 1 |
568 |
i++ |
569 |
} |
570 |
} |
571 |
idOffset := i |
572 |
for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { |
573 |
i++ |
574 |
} |
575 |
if i >= len(data) || data[i] != ']' { |
576 |
return 0 |
577 |
} |
578 |
idEnd := i |
579 |
// footnotes can have empty ID, like this: [^], but a reference can not be |
580 |
// empty like this: []. Break early if it's not a footnote and there's no ID |
581 |
if noteID == 0 && idOffset == idEnd { |
582 |
return 0 |
583 |
} |
584 |
// spacer: colon (space | tab)* newline? (space | tab)* |
585 |
i++ |
586 |
if i >= len(data) || data[i] != ':' { |
587 |
return 0 |
588 |
} |
589 |
i++ |
590 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
591 |
i++ |
592 |
} |
593 |
if i < len(data) && (data[i] == '\n' || data[i] == '\r') { |
594 |
i++ |
595 |
if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { |
596 |
i++ |
597 |
} |
598 |
} |
599 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
600 |
i++ |
601 |
} |
602 |
if i >= len(data) { |
603 |
return 0 |
604 |
} |
605 |
|
606 |
var ( |
607 |
linkOffset, linkEnd int |
608 |
titleOffset, titleEnd int |
609 |
lineEnd int |
610 |
raw []byte |
611 |
hasBlock bool |
612 |
) |
613 |
|
614 |
if p.extensions&Footnotes != 0 && noteID != 0 { |
615 |
linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) |
616 |
lineEnd = linkEnd |
617 |
} else { |
618 |
linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) |
619 |
} |
620 |
if lineEnd == 0 { |
621 |
return 0 |
622 |
} |
623 |
|
624 |
// a valid ref has been found |
625 |
|
626 |
ref := &reference{ |
627 |
noteID: noteID, |
628 |
hasBlock: hasBlock, |
629 |
} |
630 |
|
631 |
if noteID > 0 { |
632 |
// reusing the link field for the id since footnotes don't have links |
633 |
ref.link = data[idOffset:idEnd] |
634 |
// if footnote, it's not really a title, it's the contained text |
635 |
ref.title = raw |
636 |
} else { |
637 |
ref.link = data[linkOffset:linkEnd] |
638 |
ref.title = data[titleOffset:titleEnd] |
639 |
} |
640 |
|
641 |
// id matches are case-insensitive |
642 |
id := string(bytes.ToLower(data[idOffset:idEnd])) |
643 |
|
644 |
p.refs[id] = ref |
645 |
|
646 |
return lineEnd |
647 |
} |
648 |
|
649 |
func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { |
650 |
// link: whitespace-free sequence, optionally between angle brackets |
651 |
if data[i] == '<' { |
652 |
i++ |
653 |
} |
654 |
linkOffset = i |
655 |
for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { |
656 |
i++ |
657 |
} |
658 |
linkEnd = i |
659 |
if data[linkOffset] == '<' && data[linkEnd-1] == '>' { |
660 |
linkOffset++ |
661 |
linkEnd-- |
662 |
} |
663 |
|
664 |
// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) |
665 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
666 |
i++ |
667 |
} |
668 |
if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { |
669 |
return |
670 |
} |
671 |
|
672 |
// compute end-of-line |
673 |
if i >= len(data) || data[i] == '\r' || data[i] == '\n' { |
674 |
lineEnd = i |
675 |
} |
676 |
if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { |
677 |
lineEnd++ |
678 |
} |
679 |
|
680 |
// optional (space|tab)* spacer after a newline |
681 |
if lineEnd > 0 { |
682 |
i = lineEnd + 1 |
683 |
for i < len(data) && (data[i] == ' ' || data[i] == '\t') { |
684 |
i++ |
685 |
} |
686 |
} |
687 |
|
688 |
// optional title: any non-newline sequence enclosed in '"() alone on its line |
689 |
if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { |
690 |
i++ |
691 |
titleOffset = i |
692 |
|
693 |
// look for EOL |
694 |
for i < len(data) && data[i] != '\n' && data[i] != '\r' { |
695 |
i++ |
696 |
} |
697 |
if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { |
698 |
titleEnd = i + 1 |
699 |
} else { |
700 |
titleEnd = i |
701 |
} |
702 |
|
703 |
// step back |
704 |
i-- |
705 |
for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { |
706 |
i-- |
707 |
} |
708 |
if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { |
709 |
lineEnd = titleEnd |
710 |
titleEnd = i |
711 |
} |
712 |
} |
713 |
|
714 |
return |
715 |
} |
716 |
|
717 |
// The first bit of this logic is the same as Parser.listItem, but the rest |
718 |
// is much simpler. This function simply finds the entire block and shifts it |
719 |
// over by one tab if it is indeed a block (just returns the line if it's not). |
720 |
// blockEnd is the end of the section in the input buffer, and contents is the |
721 |
// extracted text that was shifted over one tab. It will need to be rendered at |
722 |
// the end of the document. |
723 |
func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { |
724 |
if i == 0 || len(data) == 0 { |
725 |
return |
726 |
} |
727 |
|
728 |
// skip leading whitespace on first line |
729 |
for i < len(data) && data[i] == ' ' { |
730 |
i++ |
731 |
} |
732 |
|
733 |
blockStart = i |
734 |
|
735 |
// find the end of the line |
736 |
blockEnd = i |
737 |
for i < len(data) && data[i-1] != '\n' { |
738 |
i++ |
739 |
} |
740 |
|
741 |
// get working buffer |
742 |
var raw bytes.Buffer |
743 |
|
744 |
// put the first line into the working buffer |
745 |
raw.Write(data[blockEnd:i]) |
746 |
blockEnd = i |
747 |
|
748 |
// process the following lines |
749 |
containsBlankLine := false |
750 |
|
751 |
gatherLines: |
752 |
for blockEnd < len(data) { |
753 |
i++ |
754 |
|
755 |
// find the end of this line |
756 |
for i < len(data) && data[i-1] != '\n' { |
757 |
i++ |
758 |
} |
759 |
|
760 |
// if it is an empty line, guess that it is part of this item |
761 |
// and move on to the next line |
762 |
if p.isEmpty(data[blockEnd:i]) > 0 { |
763 |
containsBlankLine = true |
764 |
blockEnd = i |
765 |
continue |
766 |
} |
767 |
|
768 |
n := 0 |
769 |
if n = isIndented(data[blockEnd:i], indentSize); n == 0 { |
770 |
// this is the end of the block. |
771 |
// we don't want to include this last line in the index. |
772 |
break gatherLines |
773 |
} |
774 |
|
775 |
// if there were blank lines before this one, insert a new one now |
776 |
if containsBlankLine { |
777 |
raw.WriteByte('\n') |
778 |
containsBlankLine = false |
779 |
} |
780 |
|
781 |
// get rid of that first tab, write to buffer |
782 |
raw.Write(data[blockEnd+n : i]) |
783 |
hasBlock = true |
784 |
|
785 |
blockEnd = i |
786 |
} |
787 |
|
788 |
if data[blockEnd-1] != '\n' { |
789 |
raw.WriteByte('\n') |
790 |
} |
791 |
|
792 |
contents = raw.Bytes() |
793 |
|
794 |
return |
795 |
} |
796 |
|
797 |
// |
798 |
// |
799 |
// Miscellaneous helper functions |
800 |
// |
801 |
// |
802 |
|
803 |
// Test if a character is a punctuation symbol. |
804 |
// Taken from a private function in regexp in the stdlib. |
805 |
func ispunct(c byte) bool { |
806 |
for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { |
807 |
if c == r { |
808 |
return true |
809 |
} |
810 |
} |
811 |
return false |
812 |
} |
813 |
|
814 |
// Test if a character is a whitespace character. |
815 |
func isspace(c byte) bool { |
816 |
return ishorizontalspace(c) || isverticalspace(c) |
817 |
} |
818 |
|
819 |
// Test if a character is a horizontal whitespace character. |
820 |
func ishorizontalspace(c byte) bool { |
821 |
return c == ' ' || c == '\t' |
822 |
} |
823 |
|
824 |
// Test if a character is a vertical character. |
825 |
func isverticalspace(c byte) bool { |
826 |
return c == '\n' || c == '\r' || c == '\f' || c == '\v' |
827 |
} |
828 |
|
829 |
// Test if a character is letter. |
830 |
func isletter(c byte) bool { |
831 |
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') |
832 |
} |
833 |
|
834 |
// Test if a character is a letter or a digit. |
835 |
// TODO: check when this is looking for ASCII alnum and when it should use unicode |
836 |
func isalnum(c byte) bool { |
837 |
return (c >= '0' && c <= '9') || isletter(c) |
838 |
} |
839 |
|
840 |
// Replace tab characters with spaces, aligning to the next TAB_SIZE column. |
841 |
// always ends output with a newline |
842 |
func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { |
843 |
// first, check for common cases: no tabs, or only tabs at beginning of line |
844 |
i, prefix := 0, 0 |
845 |
slowcase := false |
846 |
for i = 0; i < len(line); i++ { |
847 |
if line[i] == '\t' { |
848 |
if prefix == i { |
849 |
prefix++ |
850 |
} else { |
851 |
slowcase = true |
852 |
break |
853 |
} |
854 |
} |
855 |
} |
856 |
|
857 |
// no need to decode runes if all tabs are at the beginning of the line |
858 |
if !slowcase { |
859 |
for i = 0; i < prefix*tabSize; i++ { |
860 |
out.WriteByte(' ') |
861 |
} |
862 |
out.Write(line[prefix:]) |
863 |
return |
864 |
} |
865 |
|
866 |
// the slow case: we need to count runes to figure out how |
867 |
// many spaces to insert for each tab |
868 |
column := 0 |
869 |
i = 0 |
870 |
for i < len(line) { |
871 |
start := i |
872 |
for i < len(line) && line[i] != '\t' { |
873 |
_, size := utf8.DecodeRune(line[i:]) |
874 |
i += size |
875 |
column++ |
876 |
} |
877 |
|
878 |
if i > start { |
879 |
out.Write(line[start:i]) |
880 |
} |
881 |
|
882 |
if i >= len(line) { |
883 |
break |
884 |
} |
885 |
|
886 |
for { |
887 |
out.WriteByte(' ') |
888 |
column++ |
889 |
if column%tabSize == 0 { |
890 |
break |
891 |
} |
892 |
} |
893 |
|
894 |
i++ |
895 |
} |
896 |
} |
897 |
|
898 |
// Find if a line counts as indented or not. |
899 |
// Returns number of characters the indent is (0 = not indented). |
900 |
func isIndented(data []byte, indentSize int) int { |
901 |
if len(data) == 0 { |
902 |
return 0 |
903 |
} |
904 |
if data[0] == '\t' { |
905 |
return 1 |
906 |
} |
907 |
if len(data) < indentSize { |
908 |
return 0 |
909 |
} |
910 |
for i := 0; i < indentSize; i++ { |
911 |
if data[i] != ' ' { |
912 |
return 0 |
913 |
} |
914 |
} |
915 |
return indentSize |
916 |
} |
917 |
|
918 |
// Create a url-safe slug for fragments |
919 |
func slugify(in []byte) []byte { |
920 |
if len(in) == 0 { |
921 |
return in |
922 |
} |
923 |
out := make([]byte, 0, len(in)) |
924 |
sym := false |
925 |
|
926 |
for _, ch := range in { |
927 |
if isalnum(ch) { |
928 |
sym = false |
929 |
out = append(out, ch) |
930 |
} else if sym { |
931 |
continue |
932 |
} else { |
933 |
out = append(out, '-') |
934 |
sym = true |
935 |
} |
936 |
} |
937 |
var a, b int |
938 |
var ch byte |
939 |
for a, ch = range out { |
940 |
if ch != '-' { |
941 |
break |
942 |
} |
943 |
} |
944 |
for b = len(out) - 1; b > 0; b-- { |
945 |
if out[b] != '-' { |
946 |
break |
947 |
} |
948 |
} |
949 |
return out[a : b+1] |
950 |
} |