Commit a58e93a3 authored by Marius Wachtler's avatar Marius Wachtler

Support UTF-8 BOMs inside source files

parent 153fd1eb
...@@ -1064,6 +1064,12 @@ std::string PystonSourceReader::get_line() { ...@@ -1064,6 +1064,12 @@ std::string PystonSourceReader::get_line() {
break; break;
line.push_back(c); line.push_back(c);
} while (c != '\n' && c != '\x0c'); } while (c != '\n' && c != '\x0c');
// check for UTF8 BOM
if (line_number == 0 && line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') {
set_encoding("utf-8");
line.erase(0, 3);
}
++line_number; ++line_number;
return line; return line;
} }
......
# fail-if: '-x' not in EXTRA_JIT_ARGS
# I really don't understand all the intricacies of unicode parsing, but apparently in addition to # I really don't understand all the intricacies of unicode parsing, but apparently in addition to
# Python-specific coding lines, you can put a unicode byte order mark to signify that the text # Python-specific coding lines, you can put a unicode byte order mark to signify that the text
# is encoded. # is encoded.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment