diff --git a/tools/jsonproc/jsonproc.cpp b/tools/jsonproc/jsonproc.cpp index 23056a5ff3..9ef23cd43e 100755 --- a/tools/jsonproc/jsonproc.cpp +++ b/tools/jsonproc/jsonproc.cpp @@ -105,10 +105,13 @@ int main(int argc, char *argv[]) }); env.add_callback("cleanString", 1, [](Arguments& args) { - string badChars = ".'{} \n\t-\u00e9"; string str = args.at(0)->get(); for (unsigned int i = 0; i < str.length(); i++) { - if (badChars.find(str[i]) != std::string::npos) { + // This code is not Unicode aware, so UTF-8 is not easily parsable without introducing + // another library. Just filter out any non-alphanumeric characters for now. + // TODO: proper Unicode string normalization + if ((i == 0 && isdigit(str[i])) + || !isalnum(str[i])) { str[i] = '_'; } }