Suffix Array

Share:
  1func newSuffixArray(s string) []int {
  2	s2 := make([]int, len(s))
  3	for i, v := range s {
  4		s2[i] = int(v)
  5	}
  6
  7	return sais(s2, 255)
  8}
  9
 10func newSuffixArrayInts(s []int) []int {
 11	n := len(s)
 12	idx := make([]int, n)
 13	for i := 0; i < n; i++ {
 14		idx[i] = i
 15	}
 16	sort.Slice(idx, func(i, j int) bool {
 17		return s[idx[i]] < s[idx[j]]
 18	})
 19
 20	s2 := make([]int, n)
 21	u := 0
 22	for i := 0; i < n; i++ {
 23		if i != 0 && s[idx[i-1]] != s[idx[i]] {
 24			u++
 25		}
 26		s2[idx[i]] = u
 27	}
 28
 29	return sais(s2, u)
 30}
 31
 32func sais(s []int, u int) []int {
 33	n := len(s)
 34	if n == 0 {
 35		return []int{}
 36	}
 37	if n == 1 {
 38		return []int{0}
 39	}
 40	if n == 2 {
 41		if s[0] < s[1] {
 42			return []int{0, 1}
 43		}
 44		return []int{1, 0}
 45	}
 46
 47	sa := make([]int, n)
 48	ls := make([]bool, n)
 49	for i := n - 2; i >= 0; i-- {
 50		if s[i] == s[i+1] {
 51			ls[i] = ls[i+1]
 52		} else {
 53			ls[i] = s[i] < s[i+1]
 54		}
 55	}
 56
 57	sumL, sumS := make([]int, u+1), make([]int, u+1)
 58	for i := 0; i < n; i++ {
 59		if !ls[i] {
 60			sumS[s[i]]++
 61		} else {
 62			sumL[s[i]+1]++
 63		}
 64	}
 65
 66	for i := 0; i <= u; i++ {
 67		sumS[i] += sumL[i]
 68		if i < u {
 69			sumL[i+1] += sumS[i]
 70		}
 71	}
 72
 73	induce := func(lms []int) {
 74		for i := 0; i < n; i++ {
 75			sa[i] = -1
 76		}
 77		buf := make([]int, u+1)
 78		copy(buf, sumS)
 79		for i := 0; i < len(lms); i++ {
 80			d := lms[i]
 81			if d == n {
 82				continue
 83			}
 84			sa[buf[s[d]]] = d
 85			buf[s[d]]++
 86		}
 87
 88		copy(buf, sumL)
 89		sa[buf[s[n-1]]] = n - 1
 90		buf[s[n-1]]++
 91		for i := 0; i < n; i++ {
 92			v := sa[i]
 93			if v >= 1 && !ls[v-1] {
 94				sa[buf[s[v-1]]] = v - 1
 95				buf[s[v-1]]++
 96			}
 97		}
 98		copy(buf, sumL)
 99		for i := n - 1; i >= 0; i-- {
100			v := sa[i]
101			if v >= 1 && ls[v-1] {
102				buf[s[v-1]+1]--
103				sa[buf[s[v-1]+1]] = v - 1
104			}
105		}
106	}
107
108	lmsMap := make([]int, n+1)
109	for i := 0; i <= n; i++ {
110		lmsMap[i] = -1
111	}
112	m := 0
113	for i := 1; i < n; i++ {
114		if !ls[i-1] && ls[i] {
115			lmsMap[i] = m
116			m++
117		}
118	}
119	lms := make([]int, 0, m)
120	for i := 1; i < n; i++ {
121		if !ls[i-1] && ls[i] {
122			lms = append(lms, i)
123		}
124	}
125
126	induce(lms)
127
128	if m != 0 {
129		sortedLms := make([]int, 0, m)
130		for i := 0; i < n; i++ {
131			if lmsMap[sa[i]] != -1 {
132				sortedLms = append(sortedLms, sa[i])
133			}
134		}
135		recS := make([]int, m)
136		recU := 0
137		recS[lmsMap[sortedLms[0]]] = 0
138		for i := 1; i < m; i++ {
139			l, r := sortedLms[i-1], sortedLms[i]
140			endL, endR := n, n
141			if lmsMap[l]+1 < m {
142				endL = lms[lmsMap[l]+1]
143			}
144			if lmsMap[r]+1 < m {
145				endR = lms[lmsMap[r]+1]
146			}
147			same := true
148			if endL-l != endR-r {
149				same = false
150			} else {
151				for l < endL {
152					if s[l] != s[r] {
153						break
154					}
155					r++
156					l++
157				}
158				if l == n || s[l] != s[r] {
159					same = false
160				}
161			}
162			if !same {
163				recU++
164			}
165			recS[lmsMap[sortedLms[i]]] = recU
166		}
167
168		recSa := sais(recS, recU)
169
170		for i := 0; i < m; i++ {
171			sortedLms[i] = lms[recSa[i]]
172		}
173		induce(sortedLms)
174	}
175
176	return sa
177}
178