/[aya]/vendor/github.com/russross/blackfriday/v2/block.go
ViewVC logotype

Annotation of /vendor/github.com/russross/blackfriday/v2/block.go

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations)
Mon Sep 30 00:42:06 2024 UTC (6 weeks, 4 days ago) by yakumo_izuru
Branch: MAIN
CVS Tags: HEAD
Mirrored from https://git.chaotic.ninja/git/yakumo_izuru/aya

1 yakumo_izuru 1.1 //
2     // Blackfriday Markdown Processor
3     // Available at http://github.com/russross/blackfriday
4     //
5     // Copyright © 2011 Russ Ross <russ@russross.com>.
6     // Distributed under the Simplified BSD License.
7     // See README.md for details.
8     //
9    
10     //
11     // Functions to parse block-level elements.
12     //
13    
14     package blackfriday
15    
16     import (
17     "bytes"
18     "html"
19     "regexp"
20     "strings"
21     "unicode"
22     )
23    
24     const (
25     charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
26     escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
27     )
28    
29     var (
30     reBackslashOrAmp = regexp.MustCompile("[\\&]")
31     reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
32     )
33    
34     // Parse block-level data.
35     // Note: this function and many that it calls assume that
36     // the input buffer ends with a newline.
37     func (p *Markdown) block(data []byte) {
38     // this is called recursively: enforce a maximum depth
39     if p.nesting >= p.maxNesting {
40     return
41     }
42     p.nesting++
43    
44     // parse out one block-level construct at a time
45     for len(data) > 0 {
46     // prefixed heading:
47     //
48     // # Heading 1
49     // ## Heading 2
50     // ...
51     // ###### Heading 6
52     if p.isPrefixHeading(data) {
53     data = data[p.prefixHeading(data):]
54     continue
55     }
56    
57     // block of preformatted HTML:
58     //
59     // <div>
60     // ...
61     // </div>
62     if data[0] == '<' {
63     if i := p.html(data, true); i > 0 {
64     data = data[i:]
65     continue
66     }
67     }
68    
69     // title block
70     //
71     // % stuff
72     // % more stuff
73     // % even more stuff
74     if p.extensions&Titleblock != 0 {
75     if data[0] == '%' {
76     if i := p.titleBlock(data, true); i > 0 {
77     data = data[i:]
78     continue
79     }
80     }
81     }
82    
83     // blank lines. note: returns the # of bytes to skip
84     if i := p.isEmpty(data); i > 0 {
85     data = data[i:]
86     continue
87     }
88    
89     // indented code block:
90     //
91     // func max(a, b int) int {
92     // if a > b {
93     // return a
94     // }
95     // return b
96     // }
97     if p.codePrefix(data) > 0 {
98     data = data[p.code(data):]
99     continue
100     }
101    
102     // fenced code block:
103     //
104     // ``` go
105     // func fact(n int) int {
106     // if n <= 1 {
107     // return n
108     // }
109     // return n * fact(n-1)
110     // }
111     // ```
112     if p.extensions&FencedCode != 0 {
113     if i := p.fencedCodeBlock(data, true); i > 0 {
114     data = data[i:]
115     continue
116     }
117     }
118    
119     // horizontal rule:
120     //
121     // ------
122     // or
123     // ******
124     // or
125     // ______
126     if p.isHRule(data) {
127     p.addBlock(HorizontalRule, nil)
128     var i int
129     for i = 0; i < len(data) && data[i] != '\n'; i++ {
130     }
131     data = data[i:]
132     continue
133     }
134    
135     // block quote:
136     //
137     // > A big quote I found somewhere
138     // > on the web
139     if p.quotePrefix(data) > 0 {
140     data = data[p.quote(data):]
141     continue
142     }
143    
144     // table:
145     //
146     // Name | Age | Phone
147     // ------|-----|---------
148     // Bob | 31 | 555-1234
149     // Alice | 27 | 555-4321
150     if p.extensions&Tables != 0 {
151     if i := p.table(data); i > 0 {
152     data = data[i:]
153     continue
154     }
155     }
156    
157     // an itemized/unordered list:
158     //
159     // * Item 1
160     // * Item 2
161     //
162     // also works with + or -
163     if p.uliPrefix(data) > 0 {
164     data = data[p.list(data, 0):]
165     continue
166     }
167    
168     // a numbered/ordered list:
169     //
170     // 1. Item 1
171     // 2. Item 2
172     if p.oliPrefix(data) > 0 {
173     data = data[p.list(data, ListTypeOrdered):]
174     continue
175     }
176    
177     // definition lists:
178     //
179     // Term 1
180     // : Definition a
181     // : Definition b
182     //
183     // Term 2
184     // : Definition c
185     if p.extensions&DefinitionLists != 0 {
186     if p.dliPrefix(data) > 0 {
187     data = data[p.list(data, ListTypeDefinition):]
188     continue
189     }
190     }
191    
192     // anything else must look like a normal paragraph
193     // note: this finds underlined headings, too
194     data = data[p.paragraph(data):]
195     }
196    
197     p.nesting--
198     }
199    
200     func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
201     p.closeUnmatchedBlocks()
202     container := p.addChild(typ, 0)
203     container.content = content
204     return container
205     }
206    
207     func (p *Markdown) isPrefixHeading(data []byte) bool {
208     if data[0] != '#' {
209     return false
210     }
211    
212     if p.extensions&SpaceHeadings != 0 {
213     level := 0
214     for level < 6 && level < len(data) && data[level] == '#' {
215     level++
216     }
217     if level == len(data) || data[level] != ' ' {
218     return false
219     }
220     }
221     return true
222     }
223    
224     func (p *Markdown) prefixHeading(data []byte) int {
225     level := 0
226     for level < 6 && level < len(data) && data[level] == '#' {
227     level++
228     }
229     i := skipChar(data, level, ' ')
230     end := skipUntilChar(data, i, '\n')
231     skip := end
232     id := ""
233     if p.extensions&HeadingIDs != 0 {
234     j, k := 0, 0
235     // find start/end of heading id
236     for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
237     }
238     for k = j + 1; k < end && data[k] != '}'; k++ {
239     }
240     // extract heading id iff found
241     if j < end && k < end {
242     id = string(data[j+2 : k])
243     end = j
244     skip = k + 1
245     for end > 0 && data[end-1] == ' ' {
246     end--
247     }
248     }
249     }
250     for end > 0 && data[end-1] == '#' {
251     if isBackslashEscaped(data, end-1) {
252     break
253     }
254     end--
255     }
256     for end > 0 && data[end-1] == ' ' {
257     end--
258     }
259     if end > i {
260     if id == "" && p.extensions&AutoHeadingIDs != 0 {
261     id = SanitizedAnchorName(string(data[i:end]))
262     }
263     block := p.addBlock(Heading, data[i:end])
264     block.HeadingID = id
265     block.Level = level
266     }
267     return skip
268     }
269    
270     func (p *Markdown) isUnderlinedHeading(data []byte) int {
271     // test of level 1 heading
272     if data[0] == '=' {
273     i := skipChar(data, 1, '=')
274     i = skipChar(data, i, ' ')
275     if i < len(data) && data[i] == '\n' {
276     return 1
277     }
278     return 0
279     }
280    
281     // test of level 2 heading
282     if data[0] == '-' {
283     i := skipChar(data, 1, '-')
284     i = skipChar(data, i, ' ')
285     if i < len(data) && data[i] == '\n' {
286     return 2
287     }
288     return 0
289     }
290    
291     return 0
292     }
293    
294     func (p *Markdown) titleBlock(data []byte, doRender bool) int {
295     if data[0] != '%' {
296     return 0
297     }
298     splitData := bytes.Split(data, []byte("\n"))
299     var i int
300     for idx, b := range splitData {
301     if !bytes.HasPrefix(b, []byte("%")) {
302     i = idx // - 1
303     break
304     }
305     }
306    
307     data = bytes.Join(splitData[0:i], []byte("\n"))
308     consumed := len(data)
309     data = bytes.TrimPrefix(data, []byte("% "))
310     data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
311     block := p.addBlock(Heading, data)
312     block.Level = 1
313     block.IsTitleblock = true
314    
315     return consumed
316     }
317    
318     func (p *Markdown) html(data []byte, doRender bool) int {
319     var i, j int
320    
321     // identify the opening tag
322     if data[0] != '<' {
323     return 0
324     }
325     curtag, tagfound := p.htmlFindTag(data[1:])
326    
327     // handle special cases
328     if !tagfound {
329     // check for an HTML comment
330     if size := p.htmlComment(data, doRender); size > 0 {
331     return size
332     }
333    
334     // check for an <hr> tag
335     if size := p.htmlHr(data, doRender); size > 0 {
336     return size
337     }
338    
339     // no special case recognized
340     return 0
341     }
342    
343     // look for an unindented matching closing tag
344     // followed by a blank line
345     found := false
346     /*
347     closetag := []byte("\n</" + curtag + ">")
348     j = len(curtag) + 1
349     for !found {
350     // scan for a closing tag at the beginning of a line
351     if skip := bytes.Index(data[j:], closetag); skip >= 0 {
352     j += skip + len(closetag)
353     } else {
354     break
355     }
356    
357     // see if it is the only thing on the line
358     if skip := p.isEmpty(data[j:]); skip > 0 {
359     // see if it is followed by a blank line/eof
360     j += skip
361     if j >= len(data) {
362     found = true
363     i = j
364     } else {
365     if skip := p.isEmpty(data[j:]); skip > 0 {
366     j += skip
367     found = true
368     i = j
369     }
370     }
371     }
372     }
373     */
374    
375     // if not found, try a second pass looking for indented match
376     // but not if tag is "ins" or "del" (following original Markdown.pl)
377     if !found && curtag != "ins" && curtag != "del" {
378     i = 1
379     for i < len(data) {
380     i++
381     for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
382     i++
383     }
384    
385     if i+2+len(curtag) >= len(data) {
386     break
387     }
388    
389     j = p.htmlFindEnd(curtag, data[i-1:])
390    
391     if j > 0 {
392     i += j - 1
393     found = true
394     break
395     }
396     }
397     }
398    
399     if !found {
400     return 0
401     }
402    
403     // the end of the block has been found
404     if doRender {
405     // trim newlines
406     end := i
407     for end > 0 && data[end-1] == '\n' {
408     end--
409     }
410     finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
411     }
412    
413     return i
414     }
415    
416     func finalizeHTMLBlock(block *Node) {
417     block.Literal = block.content
418     block.content = nil
419     }
420    
421     // HTML comment, lax form
422     func (p *Markdown) htmlComment(data []byte, doRender bool) int {
423     i := p.inlineHTMLComment(data)
424     // needs to end with a blank line
425     if j := p.isEmpty(data[i:]); j > 0 {
426     size := i + j
427     if doRender {
428     // trim trailing newlines
429     end := size
430     for end > 0 && data[end-1] == '\n' {
431     end--
432     }
433     block := p.addBlock(HTMLBlock, data[:end])
434     finalizeHTMLBlock(block)
435     }
436     return size
437     }
438     return 0
439     }
440    
441     // HR, which is the only self-closing block tag considered
442     func (p *Markdown) htmlHr(data []byte, doRender bool) int {
443     if len(data) < 4 {
444     return 0
445     }
446     if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
447     return 0
448     }
449     if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
450     // not an <hr> tag after all; at least not a valid one
451     return 0
452     }
453     i := 3
454     for i < len(data) && data[i] != '>' && data[i] != '\n' {
455     i++
456     }
457     if i < len(data) && data[i] == '>' {
458     i++
459     if j := p.isEmpty(data[i:]); j > 0 {
460     size := i + j
461     if doRender {
462     // trim newlines
463     end := size
464     for end > 0 && data[end-1] == '\n' {
465     end--
466     }
467     finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
468     }
469     return size
470     }
471     }
472     return 0
473     }
474    
475     func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
476     i := 0
477     for i < len(data) && isalnum(data[i]) {
478     i++
479     }
480     key := string(data[:i])
481     if _, ok := blockTags[key]; ok {
482     return key, true
483     }
484     return "", false
485     }
486    
487     func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
488     // assume data[0] == '<' && data[1] == '/' already tested
489     if tag == "hr" {
490     return 2
491     }
492     // check if tag is a match
493     closetag := []byte("</" + tag + ">")
494     if !bytes.HasPrefix(data, closetag) {
495     return 0
496     }
497     i := len(closetag)
498    
499     // check that the rest of the line is blank
500     skip := 0
501     if skip = p.isEmpty(data[i:]); skip == 0 {
502     return 0
503     }
504     i += skip
505     skip = 0
506    
507     if i >= len(data) {
508     return i
509     }
510    
511     if p.extensions&LaxHTMLBlocks != 0 {
512     return i
513     }
514     if skip = p.isEmpty(data[i:]); skip == 0 {
515     // following line must be blank
516     return 0
517     }
518    
519     return i + skip
520     }
521    
522     func (*Markdown) isEmpty(data []byte) int {
523     // it is okay to call isEmpty on an empty buffer
524     if len(data) == 0 {
525     return 0
526     }
527    
528     var i int
529     for i = 0; i < len(data) && data[i] != '\n'; i++ {
530     if data[i] != ' ' && data[i] != '\t' {
531     return 0
532     }
533     }
534     if i < len(data) && data[i] == '\n' {
535     i++
536     }
537     return i
538     }
539    
540     func (*Markdown) isHRule(data []byte) bool {
541     i := 0
542    
543     // skip up to three spaces
544     for i < 3 && data[i] == ' ' {
545     i++
546     }
547    
548     // look at the hrule char
549     if data[i] != '*' && data[i] != '-' && data[i] != '_' {
550     return false
551     }
552     c := data[i]
553    
554     // the whole line must be the char or whitespace
555     n := 0
556     for i < len(data) && data[i] != '\n' {
557     switch {
558     case data[i] == c:
559     n++
560     case data[i] != ' ':
561     return false
562     }
563     i++
564     }
565    
566     return n >= 3
567     }
568    
569     // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
570     // and returns the end index if so, or 0 otherwise. It also returns the marker found.
571     // If info is not nil, it gets set to the syntax specified in the fence line.
572     func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
573     i, size := 0, 0
574    
575     // skip up to three spaces
576     for i < len(data) && i < 3 && data[i] == ' ' {
577     i++
578     }
579    
580     // check for the marker characters: ~ or `
581     if i >= len(data) {
582     return 0, ""
583     }
584     if data[i] != '~' && data[i] != '`' {
585     return 0, ""
586     }
587    
588     c := data[i]
589    
590     // the whole line must be the same char or whitespace
591     for i < len(data) && data[i] == c {
592     size++
593     i++
594     }
595    
596     // the marker char must occur at least 3 times
597     if size < 3 {
598     return 0, ""
599     }
600     marker = string(data[i-size : i])
601    
602     // if this is the end marker, it must match the beginning marker
603     if oldmarker != "" && marker != oldmarker {
604     return 0, ""
605     }
606    
607     // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
608     // into one, always get the info string, and discard it if the caller doesn't care.
609     if info != nil {
610     infoLength := 0
611     i = skipChar(data, i, ' ')
612    
613     if i >= len(data) {
614     if i == len(data) {
615     return i, marker
616     }
617     return 0, ""
618     }
619    
620     infoStart := i
621    
622     if data[i] == '{' {
623     i++
624     infoStart++
625    
626     for i < len(data) && data[i] != '}' && data[i] != '\n' {
627     infoLength++
628     i++
629     }
630    
631     if i >= len(data) || data[i] != '}' {
632     return 0, ""
633     }
634    
635     // strip all whitespace at the beginning and the end
636     // of the {} block
637     for infoLength > 0 && isspace(data[infoStart]) {
638     infoStart++
639     infoLength--
640     }
641    
642     for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
643     infoLength--
644     }
645     i++
646     i = skipChar(data, i, ' ')
647     } else {
648     for i < len(data) && !isverticalspace(data[i]) {
649     infoLength++
650     i++
651     }
652     }
653    
654     *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
655     }
656    
657     if i == len(data) {
658     return i, marker
659     }
660     if i > len(data) || data[i] != '\n' {
661     return 0, ""
662     }
663     return i + 1, marker // Take newline into account.
664     }
665    
666     // fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
667     // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
668     // If doRender is true, a final newline is mandatory to recognize the fenced code block.
669     func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
670     var info string
671     beg, marker := isFenceLine(data, &info, "")
672     if beg == 0 || beg >= len(data) {
673     return 0
674     }
675     fenceLength := beg - 1
676    
677     var work bytes.Buffer
678     work.Write([]byte(info))
679     work.WriteByte('\n')
680    
681     for {
682     // safe to assume beg < len(data)
683    
684     // check for the end of the code block
685     fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
686     if fenceEnd != 0 {
687     beg += fenceEnd
688     break
689     }
690    
691     // copy the current line
692     end := skipUntilChar(data, beg, '\n') + 1
693    
694     // did we reach the end of the buffer without a closing marker?
695     if end >= len(data) {
696     return 0
697     }
698    
699     // verbatim copy to the working buffer
700     if doRender {
701     work.Write(data[beg:end])
702     }
703     beg = end
704     }
705    
706     if doRender {
707     block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
708     block.IsFenced = true
709     block.FenceLength = fenceLength
710     finalizeCodeBlock(block)
711     }
712    
713     return beg
714     }
715    
716     func unescapeChar(str []byte) []byte {
717     if str[0] == '\\' {
718     return []byte{str[1]}
719     }
720     return []byte(html.UnescapeString(string(str)))
721     }
722    
723     func unescapeString(str []byte) []byte {
724     if reBackslashOrAmp.Match(str) {
725     return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
726     }
727     return str
728     }
729    
730     func finalizeCodeBlock(block *Node) {
731     if block.IsFenced {
732     newlinePos := bytes.IndexByte(block.content, '\n')
733     firstLine := block.content[:newlinePos]
734     rest := block.content[newlinePos+1:]
735     block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
736     block.Literal = rest
737     } else {
738     block.Literal = block.content
739     }
740     block.content = nil
741     }
742    
743     func (p *Markdown) table(data []byte) int {
744     table := p.addBlock(Table, nil)
745     i, columns := p.tableHeader(data)
746     if i == 0 {
747     p.tip = table.Parent
748     table.Unlink()
749     return 0
750     }
751    
752     p.addBlock(TableBody, nil)
753    
754     for i < len(data) {
755     pipes, rowStart := 0, i
756     for ; i < len(data) && data[i] != '\n'; i++ {
757     if data[i] == '|' {
758     pipes++
759     }
760     }
761    
762     if pipes == 0 {
763     i = rowStart
764     break
765     }
766    
767     // include the newline in data sent to tableRow
768     if i < len(data) && data[i] == '\n' {
769     i++
770     }
771     p.tableRow(data[rowStart:i], columns, false)
772     }
773    
774     return i
775     }
776    
777     // check if the specified position is preceded by an odd number of backslashes
778     func isBackslashEscaped(data []byte, i int) bool {
779     backslashes := 0
780     for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
781     backslashes++
782     }
783     return backslashes&1 == 1
784     }
785    
786     func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
787     i := 0
788     colCount := 1
789     for i = 0; i < len(data) && data[i] != '\n'; i++ {
790     if data[i] == '|' && !isBackslashEscaped(data, i) {
791     colCount++
792     }
793     }
794    
795     // doesn't look like a table header
796     if colCount == 1 {
797     return
798     }
799    
800     // include the newline in the data sent to tableRow
801     j := i
802     if j < len(data) && data[j] == '\n' {
803     j++
804     }
805     header := data[:j]
806    
807     // column count ignores pipes at beginning or end of line
808     if data[0] == '|' {
809     colCount--
810     }
811     if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
812     colCount--
813     }
814    
815     columns = make([]CellAlignFlags, colCount)
816    
817     // move on to the header underline
818     i++
819     if i >= len(data) {
820     return
821     }
822    
823     if data[i] == '|' && !isBackslashEscaped(data, i) {
824     i++
825     }
826     i = skipChar(data, i, ' ')
827    
828     // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
829     // and trailing | optional on last column
830     col := 0
831     for i < len(data) && data[i] != '\n' {
832     dashes := 0
833    
834     if data[i] == ':' {
835     i++
836     columns[col] |= TableAlignmentLeft
837     dashes++
838     }
839     for i < len(data) && data[i] == '-' {
840     i++
841     dashes++
842     }
843     if i < len(data) && data[i] == ':' {
844     i++
845     columns[col] |= TableAlignmentRight
846     dashes++
847     }
848     for i < len(data) && data[i] == ' ' {
849     i++
850     }
851     if i == len(data) {
852     return
853     }
854     // end of column test is messy
855     switch {
856     case dashes < 3:
857     // not a valid column
858     return
859    
860     case data[i] == '|' && !isBackslashEscaped(data, i):
861     // marker found, now skip past trailing whitespace
862     col++
863     i++
864     for i < len(data) && data[i] == ' ' {
865     i++
866     }
867    
868     // trailing junk found after last column
869     if col >= colCount && i < len(data) && data[i] != '\n' {
870     return
871     }
872    
873     case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
874     // something else found where marker was required
875     return
876    
877     case data[i] == '\n':
878     // marker is optional for the last column
879     col++
880    
881     default:
882     // trailing junk found after last column
883     return
884     }
885     }
886     if col != colCount {
887     return
888     }
889    
890     p.addBlock(TableHead, nil)
891     p.tableRow(header, columns, true)
892     size = i
893     if size < len(data) && data[size] == '\n' {
894     size++
895     }
896     return
897     }
898    
899     func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
900     p.addBlock(TableRow, nil)
901     i, col := 0, 0
902    
903     if data[i] == '|' && !isBackslashEscaped(data, i) {
904     i++
905     }
906    
907     for col = 0; col < len(columns) && i < len(data); col++ {
908     for i < len(data) && data[i] == ' ' {
909     i++
910     }
911    
912     cellStart := i
913    
914     for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
915     i++
916     }
917    
918     cellEnd := i
919    
920     // skip the end-of-cell marker, possibly taking us past end of buffer
921     i++
922    
923     for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
924     cellEnd--
925     }
926    
927     cell := p.addBlock(TableCell, data[cellStart:cellEnd])
928     cell.IsHeader = header
929     cell.Align = columns[col]
930     }
931    
932     // pad it out with empty columns to get the right number
933     for ; col < len(columns); col++ {
934     cell := p.addBlock(TableCell, nil)
935     cell.IsHeader = header
936     cell.Align = columns[col]
937     }
938    
939     // silently ignore rows with too many cells
940     }
941    
942     // returns blockquote prefix length
943     func (p *Markdown) quotePrefix(data []byte) int {
944     i := 0
945     for i < 3 && i < len(data) && data[i] == ' ' {
946     i++
947     }
948     if i < len(data) && data[i] == '>' {
949     if i+1 < len(data) && data[i+1] == ' ' {
950     return i + 2
951     }
952     return i + 1
953     }
954     return 0
955     }
956    
957     // blockquote ends with at least one blank line
958     // followed by something without a blockquote prefix
959     func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
960     if p.isEmpty(data[beg:]) <= 0 {
961     return false
962     }
963     if end >= len(data) {
964     return true
965     }
966     return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
967     }
968    
969     // parse a blockquote fragment
970     func (p *Markdown) quote(data []byte) int {
971     block := p.addBlock(BlockQuote, nil)
972     var raw bytes.Buffer
973     beg, end := 0, 0
974     for beg < len(data) {
975     end = beg
976     // Step over whole lines, collecting them. While doing that, check for
977     // fenced code and if one's found, incorporate it altogether,
978     // irregardless of any contents inside it
979     for end < len(data) && data[end] != '\n' {
980     if p.extensions&FencedCode != 0 {
981     if i := p.fencedCodeBlock(data[end:], false); i > 0 {
982     // -1 to compensate for the extra end++ after the loop:
983     end += i - 1
984     break
985     }
986     }
987     end++
988     }
989     if end < len(data) && data[end] == '\n' {
990     end++
991     }
992     if pre := p.quotePrefix(data[beg:]); pre > 0 {
993     // skip the prefix
994     beg += pre
995     } else if p.terminateBlockquote(data, beg, end) {
996     break
997     }
998     // this line is part of the blockquote
999     raw.Write(data[beg:end])
1000     beg = end
1001     }
1002     p.block(raw.Bytes())
1003     p.finalize(block)
1004     return end
1005     }
1006    
1007     // returns prefix length for block code
1008     func (p *Markdown) codePrefix(data []byte) int {
1009     if len(data) >= 1 && data[0] == '\t' {
1010     return 1
1011     }
1012     if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1013     return 4
1014     }
1015     return 0
1016     }
1017    
1018     func (p *Markdown) code(data []byte) int {
1019     var work bytes.Buffer
1020    
1021     i := 0
1022     for i < len(data) {
1023     beg := i
1024     for i < len(data) && data[i] != '\n' {
1025     i++
1026     }
1027     if i < len(data) && data[i] == '\n' {
1028     i++
1029     }
1030    
1031     blankline := p.isEmpty(data[beg:i]) > 0
1032     if pre := p.codePrefix(data[beg:i]); pre > 0 {
1033     beg += pre
1034     } else if !blankline {
1035     // non-empty, non-prefixed line breaks the pre
1036     i = beg
1037     break
1038     }
1039    
1040     // verbatim copy to the working buffer
1041     if blankline {
1042     work.WriteByte('\n')
1043     } else {
1044     work.Write(data[beg:i])
1045     }
1046     }
1047    
1048     // trim all the \n off the end of work
1049     workbytes := work.Bytes()
1050     eol := len(workbytes)
1051     for eol > 0 && workbytes[eol-1] == '\n' {
1052     eol--
1053     }
1054     if eol != len(workbytes) {
1055     work.Truncate(eol)
1056     }
1057    
1058     work.WriteByte('\n')
1059    
1060     block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1061     block.IsFenced = false
1062     finalizeCodeBlock(block)
1063    
1064     return i
1065     }
1066    
1067     // returns unordered list item prefix
1068     func (p *Markdown) uliPrefix(data []byte) int {
1069     i := 0
1070     // start with up to 3 spaces
1071     for i < len(data) && i < 3 && data[i] == ' ' {
1072     i++
1073     }
1074     if i >= len(data)-1 {
1075     return 0
1076     }
1077     // need one of {'*', '+', '-'} followed by a space or a tab
1078     if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1079     (data[i+1] != ' ' && data[i+1] != '\t') {
1080     return 0
1081     }
1082     return i + 2
1083     }
1084    
1085     // returns ordered list item prefix
1086     func (p *Markdown) oliPrefix(data []byte) int {
1087     i := 0
1088    
1089     // start with up to 3 spaces
1090     for i < 3 && i < len(data) && data[i] == ' ' {
1091     i++
1092     }
1093    
1094     // count the digits
1095     start := i
1096     for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1097     i++
1098     }
1099     if start == i || i >= len(data)-1 {
1100     return 0
1101     }
1102    
1103     // we need >= 1 digits followed by a dot and a space or a tab
1104     if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1105     return 0
1106     }
1107     return i + 2
1108     }
1109    
1110     // returns definition list item prefix
1111     func (p *Markdown) dliPrefix(data []byte) int {
1112     if len(data) < 2 {
1113     return 0
1114     }
1115     i := 0
1116     // need a ':' followed by a space or a tab
1117     if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1118     return 0
1119     }
1120     for i < len(data) && data[i] == ' ' {
1121     i++
1122     }
1123     return i + 2
1124     }
1125    
1126     // parse ordered or unordered list block
1127     func (p *Markdown) list(data []byte, flags ListType) int {
1128     i := 0
1129     flags |= ListItemBeginningOfList
1130     block := p.addBlock(List, nil)
1131     block.ListFlags = flags
1132     block.Tight = true
1133    
1134     for i < len(data) {
1135     skip := p.listItem(data[i:], &flags)
1136     if flags&ListItemContainsBlock != 0 {
1137     block.ListData.Tight = false
1138     }
1139     i += skip
1140     if skip == 0 || flags&ListItemEndOfList != 0 {
1141     break
1142     }
1143     flags &= ^ListItemBeginningOfList
1144     }
1145    
1146     above := block.Parent
1147     finalizeList(block)
1148     p.tip = above
1149     return i
1150     }
1151    
1152     // Returns true if the list item is not the same type as its parent list
1153     func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool {
1154     if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
1155     return true
1156     } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
1157     return true
1158     } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) {
1159     return true
1160     }
1161     return false
1162     }
1163    
1164     // Returns true if block ends with a blank line, descending if needed
1165     // into lists and sublists.
1166     func endsWithBlankLine(block *Node) bool {
1167     // TODO: figure this out. Always false now.
1168     for block != nil {
1169     //if block.lastLineBlank {
1170     //return true
1171     //}
1172     t := block.Type
1173     if t == List || t == Item {
1174     block = block.LastChild
1175     } else {
1176     break
1177     }
1178     }
1179     return false
1180     }
1181    
1182     func finalizeList(block *Node) {
1183     block.open = false
1184     item := block.FirstChild
1185     for item != nil {
1186     // check for non-final list item ending with blank line:
1187     if endsWithBlankLine(item) && item.Next != nil {
1188     block.ListData.Tight = false
1189     break
1190     }
1191     // recurse into children of list item, to see if there are spaces
1192     // between any of them:
1193     subItem := item.FirstChild
1194     for subItem != nil {
1195     if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1196     block.ListData.Tight = false
1197     break
1198     }
1199     subItem = subItem.Next
1200     }
1201     item = item.Next
1202     }
1203     }
1204    
1205     // Parse a single list item.
1206     // Assumes initial prefix is already removed if this is a sublist.
1207     func (p *Markdown) listItem(data []byte, flags *ListType) int {
1208     // keep track of the indentation of the first line
1209     itemIndent := 0
1210     if data[0] == '\t' {
1211     itemIndent += 4
1212     } else {
1213     for itemIndent < 3 && data[itemIndent] == ' ' {
1214     itemIndent++
1215     }
1216     }
1217    
1218     var bulletChar byte = '*'
1219     i := p.uliPrefix(data)
1220     if i == 0 {
1221     i = p.oliPrefix(data)
1222     } else {
1223     bulletChar = data[i-2]
1224     }
1225     if i == 0 {
1226     i = p.dliPrefix(data)
1227     // reset definition term flag
1228     if i > 0 {
1229     *flags &= ^ListTypeTerm
1230     }
1231     }
1232     if i == 0 {
1233     // if in definition list, set term flag and continue
1234     if *flags&ListTypeDefinition != 0 {
1235     *flags |= ListTypeTerm
1236     } else {
1237     return 0
1238     }
1239     }
1240    
1241     // skip leading whitespace on first line
1242     for i < len(data) && data[i] == ' ' {
1243     i++
1244     }
1245    
1246     // find the end of the line
1247     line := i
1248     for i > 0 && i < len(data) && data[i-1] != '\n' {
1249     i++
1250     }
1251    
1252     // get working buffer
1253     var raw bytes.Buffer
1254    
1255     // put the first line into the working buffer
1256     raw.Write(data[line:i])
1257     line = i
1258    
1259     // process the following lines
1260     containsBlankLine := false
1261     sublist := 0
1262     codeBlockMarker := ""
1263    
1264     gatherlines:
1265     for line < len(data) {
1266     i++
1267    
1268     // find the end of this line
1269     for i < len(data) && data[i-1] != '\n' {
1270     i++
1271     }
1272    
1273     // if it is an empty line, guess that it is part of this item
1274     // and move on to the next line
1275     if p.isEmpty(data[line:i]) > 0 {
1276     containsBlankLine = true
1277     line = i
1278     continue
1279     }
1280    
1281     // calculate the indentation
1282     indent := 0
1283     indentIndex := 0
1284     if data[line] == '\t' {
1285     indentIndex++
1286     indent += 4
1287     } else {
1288     for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1289     indent++
1290     indentIndex++
1291     }
1292     }
1293    
1294     chunk := data[line+indentIndex : i]
1295    
1296     if p.extensions&FencedCode != 0 {
1297     // determine if in or out of codeblock
1298     // if in codeblock, ignore normal list processing
1299     _, marker := isFenceLine(chunk, nil, codeBlockMarker)
1300     if marker != "" {
1301     if codeBlockMarker == "" {
1302     // start of codeblock
1303     codeBlockMarker = marker
1304     } else {
1305     // end of codeblock.
1306     codeBlockMarker = ""
1307     }
1308     }
1309     // we are in a codeblock, write line, and continue
1310     if codeBlockMarker != "" || marker != "" {
1311     raw.Write(data[line+indentIndex : i])
1312     line = i
1313     continue gatherlines
1314     }
1315     }
1316    
1317     // evaluate how this line fits in
1318     switch {
1319     // is this a nested list item?
1320     case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1321     p.oliPrefix(chunk) > 0 ||
1322     p.dliPrefix(chunk) > 0:
1323    
1324     // to be a nested list, it must be indented more
1325     // if not, it is either a different kind of list
1326     // or the next item in the same list
1327     if indent <= itemIndent {
1328     if p.listTypeChanged(chunk, flags) {
1329     *flags |= ListItemEndOfList
1330     } else if containsBlankLine {
1331     *flags |= ListItemContainsBlock
1332     }
1333    
1334     break gatherlines
1335     }
1336    
1337     if containsBlankLine {
1338     *flags |= ListItemContainsBlock
1339     }
1340    
1341     // is this the first item in the nested list?
1342     if sublist == 0 {
1343     sublist = raw.Len()
1344     }
1345    
1346     // is this a nested prefix heading?
1347     case p.isPrefixHeading(chunk):
1348     // if the heading is not indented, it is not nested in the list
1349     // and thus ends the list
1350     if containsBlankLine && indent < 4 {
1351     *flags |= ListItemEndOfList
1352     break gatherlines
1353     }
1354     *flags |= ListItemContainsBlock
1355    
1356     // anything following an empty line is only part
1357     // of this item if it is indented 4 spaces
1358     // (regardless of the indentation of the beginning of the item)
1359     case containsBlankLine && indent < 4:
1360     if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1361     // is the next item still a part of this list?
1362     next := i
1363     for next < len(data) && data[next] != '\n' {
1364     next++
1365     }
1366     for next < len(data)-1 && data[next] == '\n' {
1367     next++
1368     }
1369     if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1370     *flags |= ListItemEndOfList
1371     }
1372     } else {
1373     *flags |= ListItemEndOfList
1374     }
1375     break gatherlines
1376    
1377     // a blank line means this should be parsed as a block
1378     case containsBlankLine:
1379     raw.WriteByte('\n')
1380     *flags |= ListItemContainsBlock
1381     }
1382    
1383     // if this line was preceded by one or more blanks,
1384     // re-introduce the blank into the buffer
1385     if containsBlankLine {
1386     containsBlankLine = false
1387     raw.WriteByte('\n')
1388     }
1389    
1390     // add the line into the working buffer without prefix
1391     raw.Write(data[line+indentIndex : i])
1392    
1393     line = i
1394     }
1395    
1396     rawBytes := raw.Bytes()
1397    
1398     block := p.addBlock(Item, nil)
1399     block.ListFlags = *flags
1400     block.Tight = false
1401     block.BulletChar = bulletChar
1402     block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1403    
1404     // render the contents of the list item
1405     if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1406     // intermediate render of block item, except for definition term
1407     if sublist > 0 {
1408     p.block(rawBytes[:sublist])
1409     p.block(rawBytes[sublist:])
1410     } else {
1411     p.block(rawBytes)
1412     }
1413     } else {
1414     // intermediate render of inline item
1415     if sublist > 0 {
1416     child := p.addChild(Paragraph, 0)
1417     child.content = rawBytes[:sublist]
1418     p.block(rawBytes[sublist:])
1419     } else {
1420     child := p.addChild(Paragraph, 0)
1421     child.content = rawBytes
1422     }
1423     }
1424     return line
1425     }
1426    
1427     // render a single paragraph that has already been parsed out
1428     func (p *Markdown) renderParagraph(data []byte) {
1429     if len(data) == 0 {
1430     return
1431     }
1432    
1433     // trim leading spaces
1434     beg := 0
1435     for data[beg] == ' ' {
1436     beg++
1437     }
1438    
1439     end := len(data)
1440     // trim trailing newline
1441     if data[len(data)-1] == '\n' {
1442     end--
1443     }
1444    
1445     // trim trailing spaces
1446     for end > beg && data[end-1] == ' ' {
1447     end--
1448     }
1449    
1450     p.addBlock(Paragraph, data[beg:end])
1451     }
1452    
1453     func (p *Markdown) paragraph(data []byte) int {
1454     // prev: index of 1st char of previous line
1455     // line: index of 1st char of current line
1456     // i: index of cursor/end of current line
1457     var prev, line, i int
1458     tabSize := TabSizeDefault
1459     if p.extensions&TabSizeEight != 0 {
1460     tabSize = TabSizeDouble
1461     }
1462     // keep going until we find something to mark the end of the paragraph
1463     for i < len(data) {
1464     // mark the beginning of the current line
1465     prev = line
1466     current := data[i:]
1467     line = i
1468    
1469     // did we find a reference or a footnote? If so, end a paragraph
1470     // preceding it and report that we have consumed up to the end of that
1471     // reference:
1472     if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1473     p.renderParagraph(data[:i])
1474     return i + refEnd
1475     }
1476    
1477     // did we find a blank line marking the end of the paragraph?
1478     if n := p.isEmpty(current); n > 0 {
1479     // did this blank line followed by a definition list item?
1480     if p.extensions&DefinitionLists != 0 {
1481     if i < len(data)-1 && data[i+1] == ':' {
1482     return p.list(data[prev:], ListTypeDefinition)
1483     }
1484     }
1485    
1486     p.renderParagraph(data[:i])
1487     return i + n
1488     }
1489    
1490     // an underline under some text marks a heading, so our paragraph ended on prev line
1491     if i > 0 {
1492     if level := p.isUnderlinedHeading(current); level > 0 {
1493     // render the paragraph
1494     p.renderParagraph(data[:prev])
1495    
1496     // ignore leading and trailing whitespace
1497     eol := i - 1
1498     for prev < eol && data[prev] == ' ' {
1499     prev++
1500     }
1501     for eol > prev && data[eol-1] == ' ' {
1502     eol--
1503     }
1504    
1505     id := ""
1506     if p.extensions&AutoHeadingIDs != 0 {
1507     id = SanitizedAnchorName(string(data[prev:eol]))
1508     }
1509    
1510     block := p.addBlock(Heading, data[prev:eol])
1511     block.Level = level
1512     block.HeadingID = id
1513    
1514     // find the end of the underline
1515     for i < len(data) && data[i] != '\n' {
1516     i++
1517     }
1518     return i
1519     }
1520     }
1521    
1522     // if the next line starts a block of HTML, then the paragraph ends here
1523     if p.extensions&LaxHTMLBlocks != 0 {
1524     if data[i] == '<' && p.html(current, false) > 0 {
1525     // rewind to before the HTML block
1526     p.renderParagraph(data[:i])
1527     return i
1528     }
1529     }
1530    
1531     // if there's a prefixed heading or a horizontal rule after this, paragraph is over
1532     if p.isPrefixHeading(current) || p.isHRule(current) {
1533     p.renderParagraph(data[:i])
1534     return i
1535     }
1536    
1537     // if there's a fenced code block, paragraph is over
1538     if p.extensions&FencedCode != 0 {
1539     if p.fencedCodeBlock(current, false) > 0 {
1540     p.renderParagraph(data[:i])
1541     return i
1542     }
1543     }
1544    
1545     // if there's a definition list item, prev line is a definition term
1546     if p.extensions&DefinitionLists != 0 {
1547     if p.dliPrefix(current) != 0 {
1548     ret := p.list(data[prev:], ListTypeDefinition)
1549     return ret
1550     }
1551     }
1552    
1553     // if there's a list after this, paragraph is over
1554     if p.extensions&NoEmptyLineBeforeBlock != 0 {
1555     if p.uliPrefix(current) != 0 ||
1556     p.oliPrefix(current) != 0 ||
1557     p.quotePrefix(current) != 0 ||
1558     p.codePrefix(current) != 0 {
1559     p.renderParagraph(data[:i])
1560     return i
1561     }
1562     }
1563    
1564     // otherwise, scan to the beginning of the next line
1565     nl := bytes.IndexByte(data[i:], '\n')
1566     if nl >= 0 {
1567     i += nl + 1
1568     } else {
1569     i += len(data[i:])
1570     }
1571     }
1572    
1573     p.renderParagraph(data[:i])
1574     return i
1575     }
1576    
1577     func skipChar(data []byte, start int, char byte) int {
1578     i := start
1579     for i < len(data) && data[i] == char {
1580     i++
1581     }
1582     return i
1583     }
1584    
1585     func skipUntilChar(text []byte, start int, char byte) int {
1586     i := start
1587     for i < len(text) && text[i] != char {
1588     i++
1589     }
1590     return i
1591     }
1592    
1593     // SanitizedAnchorName returns a sanitized anchor name for the given text.
1594     //
1595     // It implements the algorithm specified in the package comment.
1596     func SanitizedAnchorName(text string) string {
1597     var anchorName []rune
1598     futureDash := false
1599     for _, r := range text {
1600     switch {
1601     case unicode.IsLetter(r) || unicode.IsNumber(r):
1602     if futureDash && len(anchorName) > 0 {
1603     anchorName = append(anchorName, '-')
1604     }
1605     futureDash = false
1606     anchorName = append(anchorName, unicode.ToLower(r))
1607     default:
1608     futureDash = true
1609     }
1610     }
1611     return string(anchorName)
1612     }

nishi@chaotic.ninja
ViewVC Help
Powered by ViewVC 1.3.0-dev