ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/aya/vendor/github.com/russross/blackfriday/v2/inline.go
Revision: 1.1
Committed: Mon Sep 30 00:42:06 2024 UTC (6 weeks, 4 days ago) by yakumo_izuru
Branch: MAIN
CVS Tags: HEAD
Log Message:
Mirrored from https://git.chaotic.ninja/git/yakumo_izuru/aya

File Contents

# User Rev Content
1 yakumo_izuru 1.1 //
2     // Blackfriday Markdown Processor
3     // Available at http://github.com/russross/blackfriday
4     //
5     // Copyright © 2011 Russ Ross <russ@russross.com>.
6     // Distributed under the Simplified BSD License.
7     // See README.md for details.
8     //
9    
10     //
11     // Functions to parse inline elements.
12     //
13    
14     package blackfriday
15    
16     import (
17     "bytes"
18     "regexp"
19     "strconv"
20     )
21    
22     var (
23     urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24     anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25    
26     // https://www.w3.org/TR/html5/syntax.html#character-references
27     // highest unicode code point in 17 planes (2^20): 1,114,112d =
28     // 7 dec digits or 6 hex digits
29     // named entity references can be 2-31 characters with stuff like &lt;
30     // at one end and &CounterClockwiseContourIntegral; at the other. There
31     // are also sometimes numbers at the end, although this isn't inherent
32     // in the specification; there are never numbers anywhere else in
33     // current character references, though; see &frac34; and &blk12;, etc.
34     // https://www.w3.org/TR/html5/syntax.html#named-character-references
35     //
36     // entity := "&" (named group | number ref) ";"
37     // named group := [a-zA-Z]{2,31}[0-9]{0,2}
38     // number ref := "#" (dec ref | hex ref)
39     // dec ref := [0-9]{1,7}
40     // hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
41     htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
42     )
43    
44     // Functions to parse text within a block
45     // Each function returns the number of chars taken care of
46     // data is the complete block being rendered
47     // offset is the number of valid chars before the current cursor
48    
49     func (p *Markdown) inline(currBlock *Node, data []byte) {
50     // handlers might call us recursively: enforce a maximum depth
51     if p.nesting >= p.maxNesting || len(data) == 0 {
52     return
53     }
54     p.nesting++
55     beg, end := 0, 0
56     for end < len(data) {
57     handler := p.inlineCallback[data[end]]
58     if handler != nil {
59     if consumed, node := handler(p, data, end); consumed == 0 {
60     // No action from the callback.
61     end++
62     } else {
63     // Copy inactive chars into the output.
64     currBlock.AppendChild(text(data[beg:end]))
65     if node != nil {
66     currBlock.AppendChild(node)
67     }
68     // Skip past whatever the callback used.
69     beg = end + consumed
70     end = beg
71     }
72     } else {
73     end++
74     }
75     }
76     if beg < len(data) {
77     if data[end-1] == '\n' {
78     end--
79     }
80     currBlock.AppendChild(text(data[beg:end]))
81     }
82     p.nesting--
83     }
84    
85     // single and double emphasis parsing
86     func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
87     data = data[offset:]
88     c := data[0]
89    
90     if len(data) > 2 && data[1] != c {
91     // whitespace cannot follow an opening emphasis;
92     // strikethrough only takes two characters '~~'
93     if c == '~' || isspace(data[1]) {
94     return 0, nil
95     }
96     ret, node := helperEmphasis(p, data[1:], c)
97     if ret == 0 {
98     return 0, nil
99     }
100    
101     return ret + 1, node
102     }
103    
104     if len(data) > 3 && data[1] == c && data[2] != c {
105     if isspace(data[2]) {
106     return 0, nil
107     }
108     ret, node := helperDoubleEmphasis(p, data[2:], c)
109     if ret == 0 {
110     return 0, nil
111     }
112    
113     return ret + 2, node
114     }
115    
116     if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
117     if c == '~' || isspace(data[3]) {
118     return 0, nil
119     }
120     ret, node := helperTripleEmphasis(p, data, 3, c)
121     if ret == 0 {
122     return 0, nil
123     }
124    
125     return ret + 3, node
126     }
127    
128     return 0, nil
129     }
130    
131     func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
132     data = data[offset:]
133    
134     nb := 0
135    
136     // count the number of backticks in the delimiter
137     for nb < len(data) && data[nb] == '`' {
138     nb++
139     }
140    
141     // find the next delimiter
142     i, end := 0, 0
143     for end = nb; end < len(data) && i < nb; end++ {
144     if data[end] == '`' {
145     i++
146     } else {
147     i = 0
148     }
149     }
150    
151     // no matching delimiter?
152     if i < nb && end >= len(data) {
153     return 0, nil
154     }
155    
156     // trim outside whitespace
157     fBegin := nb
158     for fBegin < end && data[fBegin] == ' ' {
159     fBegin++
160     }
161    
162     fEnd := end - nb
163     for fEnd > fBegin && data[fEnd-1] == ' ' {
164     fEnd--
165     }
166    
167     // render the code span
168     if fBegin != fEnd {
169     code := NewNode(Code)
170     code.Literal = data[fBegin:fEnd]
171     return end, code
172     }
173    
174     return end, nil
175     }
176    
177     // newline preceded by two spaces becomes <br>
178     func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
179     origOffset := offset
180     for offset < len(data) && data[offset] == ' ' {
181     offset++
182     }
183    
184     if offset < len(data) && data[offset] == '\n' {
185     if offset-origOffset >= 2 {
186     return offset - origOffset + 1, NewNode(Hardbreak)
187     }
188     return offset - origOffset, nil
189     }
190     return 0, nil
191     }
192    
193     // newline without two spaces works when HardLineBreak is enabled
194     func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
195     if p.extensions&HardLineBreak != 0 {
196     return 1, NewNode(Hardbreak)
197     }
198     return 0, nil
199     }
200    
201     type linkType int
202    
203     const (
204     linkNormal linkType = iota
205     linkImg
206     linkDeferredFootnote
207     linkInlineFootnote
208     )
209    
210     func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
211     if t == linkDeferredFootnote {
212     return false
213     }
214     return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
215     }
216    
217     func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
218     if offset < len(data)-1 && data[offset+1] == '[' {
219     return link(p, data, offset)
220     }
221     return 0, nil
222     }
223    
224     func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
225     if offset < len(data)-1 && data[offset+1] == '[' {
226     return link(p, data, offset)
227     }
228     return 0, nil
229     }
230    
231     // '[': parse a link or an image or a footnote
232     func link(p *Markdown, data []byte, offset int) (int, *Node) {
233     // no links allowed inside regular links, footnote, and deferred footnotes
234     if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
235     return 0, nil
236     }
237    
238     var t linkType
239     switch {
240     // special case: ![^text] == deferred footnote (that follows something with
241     // an exclamation point)
242     case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
243     t = linkDeferredFootnote
244     // ![alt] == image
245     case offset >= 0 && data[offset] == '!':
246     t = linkImg
247     offset++
248     // ^[text] == inline footnote
249     // [^refId] == deferred footnote
250     case p.extensions&Footnotes != 0:
251     if offset >= 0 && data[offset] == '^' {
252     t = linkInlineFootnote
253     offset++
254     } else if len(data)-1 > offset && data[offset+1] == '^' {
255     t = linkDeferredFootnote
256     }
257     // [text] == regular link
258     default:
259     t = linkNormal
260     }
261    
262     data = data[offset:]
263    
264     var (
265     i = 1
266     noteID int
267     title, link, altContent []byte
268     textHasNl = false
269     )
270    
271     if t == linkDeferredFootnote {
272     i++
273     }
274    
275     // look for the matching closing bracket
276     for level := 1; level > 0 && i < len(data); i++ {
277     switch {
278     case data[i] == '\n':
279     textHasNl = true
280    
281     case isBackslashEscaped(data, i):
282     continue
283    
284     case data[i] == '[':
285     level++
286    
287     case data[i] == ']':
288     level--
289     if level <= 0 {
290     i-- // compensate for extra i++ in for loop
291     }
292     }
293     }
294    
295     if i >= len(data) {
296     return 0, nil
297     }
298    
299     txtE := i
300     i++
301     var footnoteNode *Node
302    
303     // skip any amount of whitespace or newline
304     // (this is much more lax than original markdown syntax)
305     for i < len(data) && isspace(data[i]) {
306     i++
307     }
308    
309     // inline style link
310     switch {
311     case i < len(data) && data[i] == '(':
312     // skip initial whitespace
313     i++
314    
315     for i < len(data) && isspace(data[i]) {
316     i++
317     }
318    
319     linkB := i
320    
321     // look for link end: ' " )
322     findlinkend:
323     for i < len(data) {
324     switch {
325     case data[i] == '\\':
326     i += 2
327    
328     case data[i] == ')' || data[i] == '\'' || data[i] == '"':
329     break findlinkend
330    
331     default:
332     i++
333     }
334     }
335    
336     if i >= len(data) {
337     return 0, nil
338     }
339     linkE := i
340    
341     // look for title end if present
342     titleB, titleE := 0, 0
343     if data[i] == '\'' || data[i] == '"' {
344     i++
345     titleB = i
346    
347     findtitleend:
348     for i < len(data) {
349     switch {
350     case data[i] == '\\':
351     i += 2
352    
353     case data[i] == ')':
354     break findtitleend
355    
356     default:
357     i++
358     }
359     }
360    
361     if i >= len(data) {
362     return 0, nil
363     }
364    
365     // skip whitespace after title
366     titleE = i - 1
367     for titleE > titleB && isspace(data[titleE]) {
368     titleE--
369     }
370    
371     // check for closing quote presence
372     if data[titleE] != '\'' && data[titleE] != '"' {
373     titleB, titleE = 0, 0
374     linkE = i
375     }
376     }
377    
378     // remove whitespace at the end of the link
379     for linkE > linkB && isspace(data[linkE-1]) {
380     linkE--
381     }
382    
383     // remove optional angle brackets around the link
384     if data[linkB] == '<' {
385     linkB++
386     }
387     if data[linkE-1] == '>' {
388     linkE--
389     }
390    
391     // build escaped link and title
392     if linkE > linkB {
393     link = data[linkB:linkE]
394     }
395    
396     if titleE > titleB {
397     title = data[titleB:titleE]
398     }
399    
400     i++
401    
402     // reference style link
403     case isReferenceStyleLink(data, i, t):
404     var id []byte
405     altContentConsidered := false
406    
407     // look for the id
408     i++
409     linkB := i
410     for i < len(data) && data[i] != ']' {
411     i++
412     }
413     if i >= len(data) {
414     return 0, nil
415     }
416     linkE := i
417    
418     // find the reference
419     if linkB == linkE {
420     if textHasNl {
421     var b bytes.Buffer
422    
423     for j := 1; j < txtE; j++ {
424     switch {
425     case data[j] != '\n':
426     b.WriteByte(data[j])
427     case data[j-1] != ' ':
428     b.WriteByte(' ')
429     }
430     }
431    
432     id = b.Bytes()
433     } else {
434     id = data[1:txtE]
435     altContentConsidered = true
436     }
437     } else {
438     id = data[linkB:linkE]
439     }
440    
441     // find the reference with matching id
442     lr, ok := p.getRef(string(id))
443     if !ok {
444     return 0, nil
445     }
446    
447     // keep link and title from reference
448     link = lr.link
449     title = lr.title
450     if altContentConsidered {
451     altContent = lr.text
452     }
453     i++
454    
455     // shortcut reference style link or reference or inline footnote
456     default:
457     var id []byte
458    
459     // craft the id
460     if textHasNl {
461     var b bytes.Buffer
462    
463     for j := 1; j < txtE; j++ {
464     switch {
465     case data[j] != '\n':
466     b.WriteByte(data[j])
467     case data[j-1] != ' ':
468     b.WriteByte(' ')
469     }
470     }
471    
472     id = b.Bytes()
473     } else {
474     if t == linkDeferredFootnote {
475     id = data[2:txtE] // get rid of the ^
476     } else {
477     id = data[1:txtE]
478     }
479     }
480    
481     footnoteNode = NewNode(Item)
482     if t == linkInlineFootnote {
483     // create a new reference
484     noteID = len(p.notes) + 1
485    
486     var fragment []byte
487     if len(id) > 0 {
488     if len(id) < 16 {
489     fragment = make([]byte, len(id))
490     } else {
491     fragment = make([]byte, 16)
492     }
493     copy(fragment, slugify(id))
494     } else {
495     fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
496     }
497    
498     ref := &reference{
499     noteID: noteID,
500     hasBlock: false,
501     link: fragment,
502     title: id,
503     footnote: footnoteNode,
504     }
505    
506     p.notes = append(p.notes, ref)
507    
508     link = ref.link
509     title = ref.title
510     } else {
511     // find the reference with matching id
512     lr, ok := p.getRef(string(id))
513     if !ok {
514     return 0, nil
515     }
516    
517     if t == linkDeferredFootnote {
518     lr.noteID = len(p.notes) + 1
519     lr.footnote = footnoteNode
520     p.notes = append(p.notes, lr)
521     }
522    
523     // keep link and title from reference
524     link = lr.link
525     // if inline footnote, title == footnote contents
526     title = lr.title
527     noteID = lr.noteID
528     }
529    
530     // rewind the whitespace
531     i = txtE + 1
532     }
533    
534     var uLink []byte
535     if t == linkNormal || t == linkImg {
536     if len(link) > 0 {
537     var uLinkBuf bytes.Buffer
538     unescapeText(&uLinkBuf, link)
539     uLink = uLinkBuf.Bytes()
540     }
541    
542     // links need something to click on and somewhere to go
543     if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
544     return 0, nil
545     }
546     }
547    
548     // call the relevant rendering function
549     var linkNode *Node
550     switch t {
551     case linkNormal:
552     linkNode = NewNode(Link)
553     linkNode.Destination = normalizeURI(uLink)
554     linkNode.Title = title
555     if len(altContent) > 0 {
556     linkNode.AppendChild(text(altContent))
557     } else {
558     // links cannot contain other links, so turn off link parsing
559     // temporarily and recurse
560     insideLink := p.insideLink
561     p.insideLink = true
562     p.inline(linkNode, data[1:txtE])
563     p.insideLink = insideLink
564     }
565    
566     case linkImg:
567     linkNode = NewNode(Image)
568     linkNode.Destination = uLink
569     linkNode.Title = title
570     linkNode.AppendChild(text(data[1:txtE]))
571     i++
572    
573     case linkInlineFootnote, linkDeferredFootnote:
574     linkNode = NewNode(Link)
575     linkNode.Destination = link
576     linkNode.Title = title
577     linkNode.NoteID = noteID
578     linkNode.Footnote = footnoteNode
579     if t == linkInlineFootnote {
580     i++
581     }
582    
583     default:
584     return 0, nil
585     }
586    
587     return i, linkNode
588     }
589    
590     func (p *Markdown) inlineHTMLComment(data []byte) int {
591     if len(data) < 5 {
592     return 0
593     }
594     if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
595     return 0
596     }
597     i := 5
598     // scan for an end-of-comment marker, across lines if necessary
599     for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
600     i++
601     }
602     // no end-of-comment marker
603     if i >= len(data) {
604     return 0
605     }
606     return i + 1
607     }
608    
609     func stripMailto(link []byte) []byte {
610     if bytes.HasPrefix(link, []byte("mailto://")) {
611     return link[9:]
612     } else if bytes.HasPrefix(link, []byte("mailto:")) {
613     return link[7:]
614     } else {
615     return link
616     }
617     }
618    
619     // autolinkType specifies a kind of autolink that gets detected.
620     type autolinkType int
621    
622     // These are the possible flag values for the autolink renderer.
623     const (
624     notAutolink autolinkType = iota
625     normalAutolink
626     emailAutolink
627     )
628    
629     // '<' when tags or autolinks are allowed
630     func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
631     data = data[offset:]
632     altype, end := tagLength(data)
633     if size := p.inlineHTMLComment(data); size > 0 {
634     end = size
635     }
636     if end > 2 {
637     if altype != notAutolink {
638     var uLink bytes.Buffer
639     unescapeText(&uLink, data[1:end+1-2])
640     if uLink.Len() > 0 {
641     link := uLink.Bytes()
642     node := NewNode(Link)
643     node.Destination = link
644     if altype == emailAutolink {
645     node.Destination = append([]byte("mailto:"), link...)
646     }
647     node.AppendChild(text(stripMailto(link)))
648     return end, node
649     }
650     } else {
651     htmlTag := NewNode(HTMLSpan)
652     htmlTag.Literal = data[:end]
653     return end, htmlTag
654     }
655     }
656    
657     return end, nil
658     }
659    
660     // '\\' backslash escape
661     var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
662    
663     func escape(p *Markdown, data []byte, offset int) (int, *Node) {
664     data = data[offset:]
665    
666     if len(data) > 1 {
667     if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
668     return 2, NewNode(Hardbreak)
669     }
670     if bytes.IndexByte(escapeChars, data[1]) < 0 {
671     return 0, nil
672     }
673    
674     return 2, text(data[1:2])
675     }
676    
677     return 2, nil
678     }
679    
680     func unescapeText(ob *bytes.Buffer, src []byte) {
681     i := 0
682     for i < len(src) {
683     org := i
684     for i < len(src) && src[i] != '\\' {
685     i++
686     }
687    
688     if i > org {
689     ob.Write(src[org:i])
690     }
691    
692     if i+1 >= len(src) {
693     break
694     }
695    
696     ob.WriteByte(src[i+1])
697     i += 2
698     }
699     }
700    
701     // '&' escaped when it doesn't belong to an entity
702     // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
703     func entity(p *Markdown, data []byte, offset int) (int, *Node) {
704     data = data[offset:]
705    
706     end := 1
707    
708     if end < len(data) && data[end] == '#' {
709     end++
710     }
711    
712     for end < len(data) && isalnum(data[end]) {
713     end++
714     }
715    
716     if end < len(data) && data[end] == ';' {
717     end++ // real entity
718     } else {
719     return 0, nil // lone '&'
720     }
721    
722     ent := data[:end]
723     // undo &amp; escaping or it will be converted to &amp;amp; by another
724     // escaper in the renderer
725     if bytes.Equal(ent, []byte("&amp;")) {
726     ent = []byte{'&'}
727     }
728    
729     return end, text(ent)
730     }
731    
732     func linkEndsWithEntity(data []byte, linkEnd int) bool {
733     entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
734     return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
735     }
736    
737     // hasPrefixCaseInsensitive is a custom implementation of
738     // strings.HasPrefix(strings.ToLower(s), prefix)
739     // we rolled our own because ToLower pulls in a huge machinery of lowercasing
740     // anything from Unicode and that's very slow. Since this func will only be
741     // used on ASCII protocol prefixes, we can take shortcuts.
742     func hasPrefixCaseInsensitive(s, prefix []byte) bool {
743     if len(s) < len(prefix) {
744     return false
745     }
746     delta := byte('a' - 'A')
747     for i, b := range prefix {
748     if b != s[i] && b != s[i]+delta {
749     return false
750     }
751     }
752     return true
753     }
754    
755     var protocolPrefixes = [][]byte{
756     []byte("http://"),
757     []byte("https://"),
758     []byte("ftp://"),
759     []byte("file://"),
760     []byte("mailto:"),
761     }
762    
763     const shortestPrefix = 6 // len("ftp://"), the shortest of the above
764    
765     func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
766     // quick check to rule out most false hits
767     if p.insideLink || len(data) < offset+shortestPrefix {
768     return 0, nil
769     }
770     for _, prefix := range protocolPrefixes {
771     endOfHead := offset + 8 // 8 is the len() of the longest prefix
772     if endOfHead > len(data) {
773     endOfHead = len(data)
774     }
775     if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
776     return autoLink(p, data, offset)
777     }
778     }
779     return 0, nil
780     }
781    
782     func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
783     // Now a more expensive check to see if we're not inside an anchor element
784     anchorStart := offset
785     offsetFromAnchor := 0
786     for anchorStart > 0 && data[anchorStart] != '<' {
787     anchorStart--
788     offsetFromAnchor++
789     }
790    
791     anchorStr := anchorRe.Find(data[anchorStart:])
792     if anchorStr != nil {
793     anchorClose := NewNode(HTMLSpan)
794     anchorClose.Literal = anchorStr[offsetFromAnchor:]
795     return len(anchorStr) - offsetFromAnchor, anchorClose
796     }
797    
798     // scan backward for a word boundary
799     rewind := 0
800     for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
801     rewind++
802     }
803     if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
804     return 0, nil
805     }
806    
807     origData := data
808     data = data[offset-rewind:]
809    
810     if !isSafeLink(data) {
811     return 0, nil
812     }
813    
814     linkEnd := 0
815     for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
816     linkEnd++
817     }
818    
819     // Skip punctuation at the end of the link
820     if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
821     linkEnd--
822     }
823    
824     // But don't skip semicolon if it's a part of escaped entity:
825     if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
826     linkEnd--
827     }
828    
829     // See if the link finishes with a punctuation sign that can be closed.
830     var copen byte
831     switch data[linkEnd-1] {
832     case '"':
833     copen = '"'
834     case '\'':
835     copen = '\''
836     case ')':
837     copen = '('
838     case ']':
839     copen = '['
840     case '}':
841     copen = '{'
842     default:
843     copen = 0
844     }
845    
846     if copen != 0 {
847     bufEnd := offset - rewind + linkEnd - 2
848    
849     openDelim := 1
850    
851     /* Try to close the final punctuation sign in this same line;
852     * if we managed to close it outside of the URL, that means that it's
853     * not part of the URL. If it closes inside the URL, that means it
854     * is part of the URL.
855     *
856     * Examples:
857     *
858     * foo http://www.pokemon.com/Pikachu_(Electric) bar
859     * => http://www.pokemon.com/Pikachu_(Electric)
860     *
861     * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
862     * => http://www.pokemon.com/Pikachu_(Electric)
863     *
864     * foo http://www.pokemon.com/Pikachu_(Electric)) bar
865     * => http://www.pokemon.com/Pikachu_(Electric))
866     *
867     * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
868     * => foo http://www.pokemon.com/Pikachu_(Electric)
869     */
870    
871     for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
872     if origData[bufEnd] == data[linkEnd-1] {
873     openDelim++
874     }
875    
876     if origData[bufEnd] == copen {
877     openDelim--
878     }
879    
880     bufEnd--
881     }
882    
883     if openDelim == 0 {
884     linkEnd--
885     }
886     }
887    
888     var uLink bytes.Buffer
889     unescapeText(&uLink, data[:linkEnd])
890    
891     if uLink.Len() > 0 {
892     node := NewNode(Link)
893     node.Destination = uLink.Bytes()
894     node.AppendChild(text(uLink.Bytes()))
895     return linkEnd, node
896     }
897    
898     return linkEnd, nil
899     }
900    
901     func isEndOfLink(char byte) bool {
902     return isspace(char) || char == '<'
903     }
904    
905     var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
906     var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
907    
908     func isSafeLink(link []byte) bool {
909     for _, path := range validPaths {
910     if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
911     if len(link) == len(path) {
912     return true
913     } else if isalnum(link[len(path)]) {
914     return true
915     }
916     }
917     }
918    
919     for _, prefix := range validUris {
920     // TODO: handle unicode here
921     // case-insensitive prefix test
922     if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
923     return true
924     }
925     }
926    
927     return false
928     }
929    
930     // return the length of the given tag, or 0 is it's not valid
931     func tagLength(data []byte) (autolink autolinkType, end int) {
932     var i, j int
933    
934     // a valid tag can't be shorter than 3 chars
935     if len(data) < 3 {
936     return notAutolink, 0
937     }
938    
939     // begins with a '<' optionally followed by '/', followed by letter or number
940     if data[0] != '<' {
941     return notAutolink, 0
942     }
943     if data[1] == '/' {
944     i = 2
945     } else {
946     i = 1
947     }
948    
949     if !isalnum(data[i]) {
950     return notAutolink, 0
951     }
952    
953     // scheme test
954     autolink = notAutolink
955    
956     // try to find the beginning of an URI
957     for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
958     i++
959     }
960    
961     if i > 1 && i < len(data) && data[i] == '@' {
962     if j = isMailtoAutoLink(data[i:]); j != 0 {
963     return emailAutolink, i + j
964     }
965     }
966    
967     if i > 2 && i < len(data) && data[i] == ':' {
968     autolink = normalAutolink
969     i++
970     }
971    
972     // complete autolink test: no whitespace or ' or "
973     switch {
974     case i >= len(data):
975     autolink = notAutolink
976     case autolink != notAutolink:
977     j = i
978    
979     for i < len(data) {
980     if data[i] == '\\' {
981     i += 2
982     } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
983     break
984     } else {
985     i++
986     }
987    
988     }
989    
990     if i >= len(data) {
991     return autolink, 0
992     }
993     if i > j && data[i] == '>' {
994     return autolink, i + 1
995     }
996    
997     // one of the forbidden chars has been found
998     autolink = notAutolink
999     }
1000     i += bytes.IndexByte(data[i:], '>')
1001     if i < 0 {
1002     return autolink, 0
1003     }
1004     return autolink, i + 1
1005     }
1006    
1007     // look for the address part of a mail autolink and '>'
1008     // this is less strict than the original markdown e-mail address matching
1009     func isMailtoAutoLink(data []byte) int {
1010     nb := 0
1011    
1012     // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1013     for i := 0; i < len(data); i++ {
1014     if isalnum(data[i]) {
1015     continue
1016     }
1017    
1018     switch data[i] {
1019     case '@':
1020     nb++
1021    
1022     case '-', '.', '_':
1023     break
1024    
1025     case '>':
1026     if nb == 1 {
1027     return i + 1
1028     }
1029     return 0
1030     default:
1031     return 0
1032     }
1033     }
1034    
1035     return 0
1036     }
1037    
1038     // look for the next emph char, skipping other constructs
1039     func helperFindEmphChar(data []byte, c byte) int {
1040     i := 0
1041    
1042     for i < len(data) {
1043     for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1044     i++
1045     }
1046     if i >= len(data) {
1047     return 0
1048     }
1049     // do not count escaped chars
1050     if i != 0 && data[i-1] == '\\' {
1051     i++
1052     continue
1053     }
1054     if data[i] == c {
1055     return i
1056     }
1057    
1058     if data[i] == '`' {
1059     // skip a code span
1060     tmpI := 0
1061     i++
1062     for i < len(data) && data[i] != '`' {
1063     if tmpI == 0 && data[i] == c {
1064     tmpI = i
1065     }
1066     i++
1067     }
1068     if i >= len(data) {
1069     return tmpI
1070     }
1071     i++
1072     } else if data[i] == '[' {
1073     // skip a link
1074     tmpI := 0
1075     i++
1076     for i < len(data) && data[i] != ']' {
1077     if tmpI == 0 && data[i] == c {
1078     tmpI = i
1079     }
1080     i++
1081     }
1082     i++
1083     for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1084     i++
1085     }
1086     if i >= len(data) {
1087     return tmpI
1088     }
1089     if data[i] != '[' && data[i] != '(' { // not a link
1090     if tmpI > 0 {
1091     return tmpI
1092     }
1093     continue
1094     }
1095     cc := data[i]
1096     i++
1097     for i < len(data) && data[i] != cc {
1098     if tmpI == 0 && data[i] == c {
1099     return i
1100     }
1101     i++
1102     }
1103     if i >= len(data) {
1104     return tmpI
1105     }
1106     i++
1107     }
1108     }
1109     return 0
1110     }
1111    
1112     func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1113     i := 0
1114    
1115     // skip one symbol if coming from emph3
1116     if len(data) > 1 && data[0] == c && data[1] == c {
1117     i = 1
1118     }
1119    
1120     for i < len(data) {
1121     length := helperFindEmphChar(data[i:], c)
1122     if length == 0 {
1123     return 0, nil
1124     }
1125     i += length
1126     if i >= len(data) {
1127     return 0, nil
1128     }
1129    
1130     if i+1 < len(data) && data[i+1] == c {
1131     i++
1132     continue
1133     }
1134    
1135     if data[i] == c && !isspace(data[i-1]) {
1136    
1137     if p.extensions&NoIntraEmphasis != 0 {
1138     if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1139     continue
1140     }
1141     }
1142    
1143     emph := NewNode(Emph)
1144     p.inline(emph, data[:i])
1145     return i + 1, emph
1146     }
1147     }
1148    
1149     return 0, nil
1150     }
1151    
1152     func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1153     i := 0
1154    
1155     for i < len(data) {
1156     length := helperFindEmphChar(data[i:], c)
1157     if length == 0 {
1158     return 0, nil
1159     }
1160     i += length
1161    
1162     if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1163     nodeType := Strong
1164     if c == '~' {
1165     nodeType = Del
1166     }
1167     node := NewNode(nodeType)
1168     p.inline(node, data[:i])
1169     return i + 2, node
1170     }
1171     i++
1172     }
1173     return 0, nil
1174     }
1175    
1176     func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
1177     i := 0
1178     origData := data
1179     data = data[offset:]
1180    
1181     for i < len(data) {
1182     length := helperFindEmphChar(data[i:], c)
1183     if length == 0 {
1184     return 0, nil
1185     }
1186     i += length
1187    
1188     // skip whitespace preceded symbols
1189     if data[i] != c || isspace(data[i-1]) {
1190     continue
1191     }
1192    
1193     switch {
1194     case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1195     // triple symbol found
1196     strong := NewNode(Strong)
1197     em := NewNode(Emph)
1198     strong.AppendChild(em)
1199     p.inline(em, data[:i])
1200     return i + 3, strong
1201     case (i+1 < len(data) && data[i+1] == c):
1202     // double symbol found, hand over to emph1
1203     length, node := helperEmphasis(p, origData[offset-2:], c)
1204     if length == 0 {
1205     return 0, nil
1206     }
1207     return length - 2, node
1208     default:
1209     // single symbol found, hand over to emph2
1210     length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1211     if length == 0 {
1212     return 0, nil
1213     }
1214     return length - 1, node
1215     }
1216     }
1217     return 0, nil
1218     }
1219    
1220     func text(s []byte) *Node {
1221     node := NewNode(Text)
1222     node.Literal = s
1223     return node
1224     }
1225    
1226     func normalizeURI(s []byte) []byte {
1227     return s // TODO: implement
1228     }