Commit ff6acb1a authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #23181: More "codepoint" -> "code point".

parent 117d7b29
......@@ -153,7 +153,7 @@ A single exception is defined as well:
.. method:: SGMLParser.convert_codepoint(codepoint)
Convert a codepoint to a :class:`str` value. Encodings can be handled here if
Convert a code point to a :class:`str` value. Encodings can be handled here if
appropriate, though the rest of :mod:`sgmllib` is oblivious on this matter.
.. versionadded:: 2.5
......
"""HTML character entity references."""
# maps the HTML entity name to the Unicode codepoint
# maps the HTML entity name to the Unicode code point
name2codepoint = {
'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
......@@ -256,7 +256,7 @@ name2codepoint = {
'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070
}
# maps the Unicode codepoint to the HTML entity name
# maps the Unicode code point to the HTML entity name
codepoint2name = {}
# maps the HTML entity name to the character
......
......@@ -72,7 +72,7 @@ class Test_IncrementalEncoder(unittest.TestCase):
self.assertEqual(encoder.reset(), None)
def test_stateful(self):
# jisx0213 encoder is stateful for a few codepoints. eg)
# jisx0213 encoder is stateful for a few code points. eg)
# U+00E6 => A9DC
# U+00E6 U+0300 => ABC4
# U+0300 => ABDC
......
......@@ -20,7 +20,7 @@ class TestBase:
roundtriptest = 1 # set if roundtrip is possible with unicode
has_iso10646 = 0 # set if this encoding contains whole iso10646 map
xmlcharnametest = None # string to test xmlcharrefreplace
unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
unmappedunicode = u'\udeee' # a unicode code point that is not mapped.
def setUp(self):
if self.codec is None:
......
# To fully test this module, we would need a copy of the stringprep tables.
# Since we don't have them, this test checks only a few codepoints.
# Since we don't have them, this test checks only a few code points.
import unittest
from test import test_support
......
......@@ -874,9 +874,9 @@ class UnicodeTest(
def test_utf8_decode_invalid_sequences(self):
# continuation bytes in a sequence of 2, 3, or 4 bytes
continuation_bytes = map(chr, range(0x80, 0xC0))
# start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
# start bytes of a 2-byte sequence equivalent to code points < 0x7F
invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2))
# start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
# start bytes of a 4-byte sequence equivalent to code points > 0x10FFFF
invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8))
invalid_start_bytes = (
continuation_bytes + invalid_2B_seq_start_bytes +
......
......@@ -15,7 +15,7 @@
#undef hz
#endif
/* GBK and GB2312 map differently in few codepoints that are listed below:
/* GBK and GB2312 map differently in few code points that are listed below:
*
* gb2312 gbk
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
......
......@@ -164,7 +164,7 @@ DECODER(big5hkscs)
default: return 2;
}
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
NEXT(2, 2) /* all decoded code points are pairs, above. */
}
return 0;
......
......@@ -64,7 +64,7 @@ ENCODER(euc_kr)
OUT1(EUCKR_JAMO_FIRSTBYTE)
OUT2(EUCKR_JAMO_FILLER)
/* All codepoints in CP949 extension are in unicode
/* All code points in CP949 extension are in unicode
* Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00;
......
......@@ -12,10 +12,10 @@
#include "multibytecodec.h"
/* a unicode "undefined" codepoint */
/* a unicode "undefined" code point */
#define UNIINV 0xFFFE
/* internal-use DBCS codepoints which aren't used by any charsets */
/* internal-use DBCS code points which aren't used by any charsets */
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
......
......@@ -2039,7 +2039,7 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
(table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
Uncomment the 2 lines below to make them invalid,
codepoints: d800-dfff; UTF-8: \xed\xa0\x80-\xed\xbf\xbf. */
code points: d800-dfff; UTF-8: \xed\xa0\x80-\xed\xbf\xbf. */
if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
((unsigned char)s[0] == 0xE0 &&
......@@ -2337,7 +2337,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
}
/* On narrow builds we split characters outside the BMP into two
codepoints => count how much extra space we need. */
code points => count how much extra space we need. */
#ifndef Py_UNICODE_WIDE
for (qq = q; e - qq >= 4; qq += 4)
if (qq[iorder[2]] != 0 || qq[iorder[3]] != 0)
......@@ -2372,7 +2372,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
if (ch >= 0x110000)
{
errmsg = "codepoint not in range(0x110000)";
errmsg = "code point not in range(0x110000)";
startinpos = ((const char *)q)-starts;
endinpos = startinpos+4;
goto utf32Error;
......@@ -2449,7 +2449,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
p += 4; \
} while(0)
/* In narrow builds we can output surrogate pairs as one codepoint,
/* In narrow builds we can output surrogate pairs as one code point,
so we need less space. */
#ifndef Py_UNICODE_WIDE
for (i = pairs = 0; i < size-1; i++)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment