quote.go 5.56 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package strconv

import (
Russ Cox's avatar
Russ Cox committed
8
	"os";
9
	"unicode";
10 11 12
	"utf8";
)

13
const lowerhex = "0123456789abcdef"
14

Russ Cox's avatar
Russ Cox committed
15 16 17 18
// Quote returns a double-quoted Go string literal
// representing s.  The returned string s uses Go escape
// sequences (\t, \n, \xFF, \u0100) for control characters
// and non-ASCII characters.
Russ Cox's avatar
Russ Cox committed
19
func Quote(s string) string {
Russ Cox's avatar
Russ Cox committed
20
	// TODO(rsc): String accumulation could be more efficient.
21
	t := `"`;
22 23 24
	for ; len(s) > 0; s = s[1:len(s)] {
		switch c := s[0]; {
		case c == '"':
25
			t += `\"`;
26
		case c == '\\':
27
			t += `\\`;
28 29 30
		case ' ' <= c && c <= '~':
			t += string(c);
		case c == '\a':
31
			t += `\a`;
32
		case c == '\b':
33
			t += `\b`;
34
		case c == '\f':
35
			t += `\f`;
36
		case c == '\n':
37
			t += `\n`;
38
		case c == '\r':
39
			t += `\r`;
40
		case c == '\t':
41
			t += `\t`;
42
		case c == '\v':
43 44
			t += `\v`;

45 46
		case c < utf8.RuneSelf:
			t += `\x` + string(lowerhex[c>>4]) + string(lowerhex[c&0xF]);
Russ Cox's avatar
Russ Cox committed
47

48 49
		case utf8.FullRuneInString(s):
			r, size := utf8.DecodeRuneInString(s);
50 51 52
			if r == utf8.RuneError && size == 1 {
				goto EscX;
			}
53
			s = s[size-1:len(s)];	// next iteration will slice off 1 more
54 55 56
			if r < 0x10000 {
				t += `\u`;
				for j:=uint(0); j<4; j++ {
57
					t += string(lowerhex[(r>>(12-4*j))&0xF]);
58 59 60 61
				}
			} else {
				t += `\U`;
				for j:=uint(0); j<8; j++ {
62
					t += string(lowerhex[(r>>(28-4*j))&0xF]);
63 64 65 66 67 68
				}
			}

		default:
		EscX:
			t += `\x`;
69 70
			t += string(lowerhex[c>>4]);
			t += string(lowerhex[c&0xF]);
71 72 73 74 75 76
		}
	}
	t += `"`;
	return t;
}

Russ Cox's avatar
Russ Cox committed
77 78
// CanBackquote returns whether the string s would be
// a valid Go string literal if enclosed in backquotes.
Russ Cox's avatar
Russ Cox committed
79
func CanBackquote(s string) bool {
80
	for i := 0; i < len(s); i++ {
Russ Cox's avatar
Russ Cox committed
81
		if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
82 83 84 85 86 87
			return false;
		}
	}
	return true;
}

Russ Cox's avatar
Russ Cox committed
88 89 90 91 92 93 94 95 96 97 98 99 100
func unhex(b byte) (v int, ok bool) {
	c := int(b);
	switch {
	case '0' <= c && c <= '9':
		return c - '0', true;
	case 'a' <= c && c <= 'f':
		return c - 'a' + 10, true;
	case 'A' <= c && c <= 'F':
		return c - 'A' + 10, true;
	}
	return;
}

Russ Cox's avatar
Russ Cox committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114
// UnquoteChar decodes the first character or byte in the escaped string
// or character literal represented by the string s.
// It returns four values: 
// 1) value, the decoded Unicode code point or byte value;
// 2) multibyte, a boolean indicating whether the decoded character
//    requires a multibyte UTF-8 representation;
// 3) tail, the remainder of the string after the character; and
// 4) an error that will be nil if the character is syntactically valid.
// The second argument, quote, specifies the type of literal being parsed
// and therefore which escaped quote character is permitted.
// If set to a single quote, it permits the sequence \' and disallows unescaped '.
// If set to a double quote, it permits \" and disallows unescaped ".
// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) {
Russ Cox's avatar
Russ Cox committed
115
	// easy cases
116
	switch c := s[0]; {
Russ Cox's avatar
Russ Cox committed
117 118 119
	case c == quote && (quote == '\'' || quote == '"'):
		err = os.EINVAL;
		return;
Russ Cox's avatar
Russ Cox committed
120
	case c >= utf8.RuneSelf:
121
		r, size := utf8.DecodeRuneInString(s);
Russ Cox's avatar
Russ Cox committed
122
		return r, true, s[size:len(s)], nil;
Russ Cox's avatar
Russ Cox committed
123
	case c != '\\':
Russ Cox's avatar
Russ Cox committed
124
		return int(s[0]), false, s[1:len(s)], nil;
Russ Cox's avatar
Russ Cox committed
125 126 127
	}

	// hard case: c is backslash
128
	if len(s) <= 1 {
Russ Cox's avatar
Russ Cox committed
129
		err = os.EINVAL;
Russ Cox's avatar
Russ Cox committed
130 131
		return;
	}
132 133
	c := s[1];
	s = s[2:len(s)];
Russ Cox's avatar
Russ Cox committed
134 135 136

	switch c {
	case 'a':
Russ Cox's avatar
Russ Cox committed
137
		value = '\a';
Russ Cox's avatar
Russ Cox committed
138
	case 'b':
Russ Cox's avatar
Russ Cox committed
139
		value = '\b';
Russ Cox's avatar
Russ Cox committed
140
	case 'f':
Russ Cox's avatar
Russ Cox committed
141
		value = '\f';
Russ Cox's avatar
Russ Cox committed
142
	case 'n':
Russ Cox's avatar
Russ Cox committed
143
		value = '\n';
Russ Cox's avatar
Russ Cox committed
144
	case 'r':
Russ Cox's avatar
Russ Cox committed
145
		value = '\r';
Russ Cox's avatar
Russ Cox committed
146
	case 't':
Russ Cox's avatar
Russ Cox committed
147
		value = '\t';
Russ Cox's avatar
Russ Cox committed
148
	case 'v':
Russ Cox's avatar
Russ Cox committed
149
		value = '\v';
Russ Cox's avatar
Russ Cox committed
150 151 152 153 154 155 156 157 158 159 160
	case 'x', 'u', 'U':
		n := 0;
		switch c {
		case 'x':
			n = 2;
		case 'u':
			n = 4;
		case 'U':
			n = 8;
		}
		v := 0;
161
		if len(s) < n {
Russ Cox's avatar
Russ Cox committed
162
			err = os.EINVAL;
163 164
			return;
		}
Russ Cox's avatar
Russ Cox committed
165
		for j := 0; j < n; j++ {
166
			x, ok := unhex(s[j]);
Russ Cox's avatar
Russ Cox committed
167
			if !ok {
Russ Cox's avatar
Russ Cox committed
168
				err = os.EINVAL;
Russ Cox's avatar
Russ Cox committed
169 170 171 172
				return;
			}
			v = v<<4 | x;
		}
173
		s = s[n:len(s)];
Russ Cox's avatar
Russ Cox committed
174
		if c == 'x' {
175
			// single-byte string, possibly not UTF-8
Russ Cox's avatar
Russ Cox committed
176 177
			value = v;
			break;
Russ Cox's avatar
Russ Cox committed
178
		}
179
		if v > unicode.MaxRune {
Russ Cox's avatar
Russ Cox committed
180
			err = os.EINVAL;
Russ Cox's avatar
Russ Cox committed
181 182
			return;
		}
Russ Cox's avatar
Russ Cox committed
183 184
		value = v;
		multibyte = true;
Russ Cox's avatar
Russ Cox committed
185
	case '0', '1', '2', '3', '4', '5', '6', '7':
186 187
		v := int(c) - '0';
		if len(s) < 2 {
Russ Cox's avatar
Russ Cox committed
188
			err = os.EINVAL;
189 190 191 192
			return;
		}
		for j := 0; j < 2; j++ {	// one digit already; two more
			x := int(s[j]) - '0';
Russ Cox's avatar
Russ Cox committed
193 194 195 196 197
			if x < 0 || x > 7 {
				return;
			}
			v = (v<<3) | x;
		}
198
		s = s[2:len(s)];
Russ Cox's avatar
Russ Cox committed
199
		if v > 255 {
Russ Cox's avatar
Russ Cox committed
200
			err = os.EINVAL;
Russ Cox's avatar
Russ Cox committed
201 202
			return;
		}
Russ Cox's avatar
Russ Cox committed
203 204 205 206 207 208 209 210 211 212 213 214
		value = v;
	case '\\':
		value = '\\';
	case '\'', '"':
		if c != quote {
			err = os.EINVAL;
			return;
		}
		value = int(c);
	default:
		err = os.EINVAL;
		return;
Russ Cox's avatar
Russ Cox committed
215
	}
Russ Cox's avatar
Russ Cox committed
216
	tail = s;
Russ Cox's avatar
Russ Cox committed
217 218 219 220 221 222 223 224
	return;
}

// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes.  (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
225
func Unquote(s string) (t string, err os.Error) {
Russ Cox's avatar
Russ Cox committed
226 227
	err = os.EINVAL;  // assume error for easy return
	n := len(s);
228
	if n < 2 {
Russ Cox's avatar
Russ Cox committed
229 230
		return;
	}
231 232 233 234 235
	quote := s[0];
	if quote != s[n-1] {
		return;
	}
	s = s[1:n-1];
Russ Cox's avatar
Russ Cox committed
236

237 238 239 240 241 242 243 244
	if quote == '`' {
		return s, nil;
	}
	if quote != '"' && quote != '\'' {
		return;
	}

	// TODO(rsc): String accumulation could be more efficient.
Russ Cox's avatar
Russ Cox committed
245
	var tt string;
246
	for len(s) > 0 {
Russ Cox's avatar
Russ Cox committed
247 248
		c, multibyte, ss, err1 := UnquoteChar(s, quote);
		if err1 != nil {
249 250 251
			err = err1;
			return;
		}
Russ Cox's avatar
Russ Cox committed
252 253 254 255 256 257
		s = ss;
		if multibyte || c < utf8.RuneSelf {
			tt += string(c);
		} else {
			tt += string([]byte{byte(c)});
		}
258 259 260
		if quote == '\'' && len(s) != 0 {
			// single-quoted must be single character
			return;
Russ Cox's avatar
Russ Cox committed
261 262
		}
	}
263
	return tt, nil
Russ Cox's avatar
Russ Cox committed
264
}