Commit 4b75bd36 authored by Arvind Sankar's avatar Arvind Sankar Committed by Ard Biesheuvel

efi/libstub: Add UTF-8 decoding to efi_puts

In order to be able to use the UTF-16 support added to vsprintf in the
previous commit, enhance efi_puts to decode UTF-8 into UTF-16. Invalid
UTF-8 encodings are passed through unchanged.
Signed-off-by: default avatarArvind Sankar <nivedita@alum.mit.edu>
Link: https://lore.kernel.org/r/20200518190716.751506-22-nivedita@alum.mit.eduSigned-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
parent d850a2ff
......@@ -36,17 +36,74 @@ void efi_char16_puts(efi_char16_t *str)
output_string, str);
}
static
u32 utf8_to_utf32(const u8 **s8)
{
u32 c32;
u8 c0, cx;
size_t clen, i;
c0 = cx = *(*s8)++;
/*
* The position of the most-significant 0 bit gives us the length of
* a multi-octet encoding.
*/
for (clen = 0; cx & 0x80; ++clen)
cx <<= 1;
/*
* If the 0 bit is in position 8, this is a valid single-octet
* encoding. If the 0 bit is in position 7 or positions 1-3, the
* encoding is invalid.
* In either case, we just return the first octet.
*/
if (clen < 2 || clen > 4)
return c0;
/* Get the bits from the first octet. */
c32 = cx >> clen--;
for (i = 0; i < clen; ++i) {
/* Trailing octets must have 10 in most significant bits. */
cx = (*s8)[i] ^ 0x80;
if (cx & 0xc0)
return c0;
c32 = (c32 << 6) | cx;
}
/*
* Check for validity:
* - The character must be in the Unicode range.
* - It must not be a surrogate.
* - It must be encoded using the correct number of octets.
*/
if (c32 > 0x10ffff ||
(c32 & 0xf800) == 0xd800 ||
clen != (c32 >= 0x80) + (c32 >= 0x800) + (c32 >= 0x10000))
return c0;
*s8 += clen;
return c32;
}
void efi_puts(const char *str)
{
efi_char16_t buf[128];
size_t pos = 0, lim = ARRAY_SIZE(buf);
const u8 *s8 = (const u8 *)str;
u32 c32;
while (*str) {
if (*str == '\n')
while (*s8) {
if (*s8 == '\n')
buf[pos++] = L'\r';
/* Cast to unsigned char to avoid sign-extension */
buf[pos++] = (unsigned char)(*str++);
if (*str == '\0' || pos >= lim - 2) {
c32 = utf8_to_utf32(&s8);
if (c32 < 0x10000) {
/* Characters in plane 0 use a single word. */
buf[pos++] = c32;
} else {
/*
* Characters in other planes encode into a surrogate
* pair.
*/
buf[pos++] = (0xd800 - (0x10000 >> 10)) + (c32 >> 10);
buf[pos++] = 0xdc00 + (c32 & 0x3ff);
}
if (*s8 == '\0' || pos >= lim - 2) {
buf[pos] = L'\0';
efi_char16_puts(buf);
pos = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment