SlimXml的一个使用注意点

举报
Amrf 发表于 2019/08/11 12:56:29 2019/08/11
【摘要】 setString/getString特殊字符不会自动转义 -----后来发现有个SLIM_TRANSFER_CHARACTER宏,开启即可 assignString函数运行报错,修正为

setString/getString特殊字符不会自动转义

-----后来发现有个SLIM_TRANSFER_CHARACTER宏,开启即可

assignString函数运行报错,修正为

setString(temp);
//memcpy(str, temp.c_str(), sizeof(Char) * actualLength);
//str[actualLength] = 0;
buffer = const_cast<slim::Char*>(found + 1);
if (length >= 6)
{
	if (Strncmp(buffer, T("quot;"), 5) == 0)//此外转义符号里面没有分号,类似我也处理一下

下面是rapidxml中的转义和解转义方法

  • 转义

// Copy characters from given range to given output iterator and expand
        // characters into references (&lt; &gt; &apos; &quot; &amp;)
        template<class OutIt, class Ch>
        inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out)
        {
            while (begin != end)
            {
                if (*begin == noexpand)
                {
                    *out++ = *begin;    // No expansion, copy character
                }
                else
                {
                    switch (*begin)
                    {
                    case Ch('<'):
                        *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';');
                        break;
                    case Ch('>'): 
                        *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';');
                        break;
                    case Ch('\''): 
                        *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';');
                        break;
                    case Ch('"'): 
                        *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';');
                        break;
                    case Ch('&'): 
                        *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';'); 
                        break;
                    default:
                        *out++ = *begin;    // No expansion, copy character
                    }
                }
                ++begin;    // Step to next character
            }
            return out;
        }
  • 解转义

// Skip characters until predicate evaluates to true while doing the following:
        // - replacing XML character entity references with proper characters (&apos; &amp; &quot; &lt; &gt; &#...;)
        // - condensing whitespace sequences to single space character
        template<class StopPred, class StopPredPure, int Flags>
        static Ch *skip_and_expand_character_refs(Ch *&text)
        {
            // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip
            if (Flags & parse_no_entity_translation && 
                !(Flags & parse_normalize_whitespace) &&
                !(Flags & parse_trim_whitespace))
            {
                skip<StopPred, Flags>(text);
                return text;
            }
            
            // Use simple skip until first modification is detected
            skip<StopPredPure, Flags>(text);

            // Use translation skip
            Ch *src = text;
            Ch *dest = src;
            while (StopPred::test(*src))
            {
                // If entity translation is enabled    
                if (!(Flags & parse_no_entity_translation))
                {
                    // Test if replacement is needed
                    if (src[0] == Ch('&'))
                    {
                        switch (src[1])
                        {

                        // &amp; &apos;
                        case Ch('a'): 
                            if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';'))
                            {
                                *dest = Ch('&');
                                ++dest;
                                src += 5;
                                continue;
                            }
                            if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';'))
                            {
                                *dest = Ch('\'');
                                ++dest;
                                src += 6;
                                continue;
                            }
                            break;

                        // &quot;
                        case Ch('q'): 
                            if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';'))
                            {
                                *dest = Ch('"');
                                ++dest;
                                src += 6;
                                continue;
                            }
                            break;

                        // &gt;
                        case Ch('g'): 
                            if (src[2] == Ch('t') && src[3] == Ch(';'))
                            {
                                *dest = Ch('>');
                                ++dest;
                                src += 4;
                                continue;
                            }
                            break;

                        // &lt;
                        case Ch('l'): 
                            if (src[2] == Ch('t') && src[3] == Ch(';'))
                            {
                                *dest = Ch('<');
                                ++dest;
                                src += 4;
                                continue;
                            }
                            break;

                        // &#...; - assumes ASCII
                        case Ch('#'): 
                            if (src[2] == Ch('x'))
                            {
                                unsigned long code = 0;
                                src += 3;   // Skip &#x
                                while (1)
                                {
                                    unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)];
                                    if (digit == 0xFF)
                                        break;
                                    code = code * 16 + digit;
                                    ++src;
                                }
                                insert_coded_character<Flags>(dest, code);    // Put character in output
                            }
                            else
                            {
                                unsigned long code = 0;
                                src += 2;   // Skip &#
                                while (1)
                                {
                                    unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)];
                                    if (digit == 0xFF)
                                        break;
                                    code = code * 10 + digit;
                                    ++src;
                                }
                                insert_coded_character<Flags>(dest, code);    // Put character in output
                            }
                            if (*src == Ch(';'))
                                ++src;
                            else
                                RAPIDXML_PARSE_ERROR("expected ;", src);
                            continue;

                        // Something else
                        default:
                            // Ignore, just copy '&' verbatim
                            break;

                        }
                    }
                }
                
                // If whitespace condensing is enabled
                if (Flags & parse_normalize_whitespace)
                {
                    // Test if condensing is needed                 
                    if (whitespace_pred::test(*src))
                    {
                        *dest = Ch(' '); ++dest;    // Put single space in dest
                        ++src;                      // Skip first whitespace char
                        // Skip remaining whitespace chars
                        while (whitespace_pred::test(*src))
                            ++src;
                        continue;
                    }
                }

                // No replacement, only copy character
                *dest++ = *src++;

            }

            // Return new end
            text = src;
            return dest;

        }

为slimxml添加相关转义的功能

参考:https://stackoverflow.com/questions/9904051/is-there-c-function-that-replace-xml-special-character-with-their-escape-seque

void escape(std::wstring& xml)
std::map<wchar_t, std::wstring> transformations;
    transformations[L'&']  = std::wstring(L"&amp;");
    transformations[L'\''] = std::wstring(L"&apos;");
    transformations[L'"']  = std::wstring(L"&quot;");
    transformations[L'>']  = std::wstring(L"&gt;");
    transformations[L'<']  = std::wstring(L"&lt;");

    // Build list of characters to be searched for.
    //
    std::string reserved_chars;
    for (auto ti = transformations.begin(); ti != transformations.end(); ti++)
    {
        reserved_chars += ti->first;
    }

    size_t pos = 0;
    while (std::wstring::npos != (pos = xml.find_first_of(reserved_chars, pos)))
    {
        xml.replace(pos, 1, transformations[xml[pos]]);
        pos++;
    }
}
//=================================
void replace_all(std::string& str, const std::string& old, const std::string& repl) {
    size_t pos = 0;
    while ((pos = str.find(old, pos)) != std::string::npos) {
        str.replace(pos, old.length(), repl);
        pos += repl.length();
    }
}

std::string escape_xml(std::string str) {
    replace_all(str, std::string("&"), std::string("&amp;"));
    replace_all(str, std::string("'"), std::string("&apos;"));
    replace_all(str, std::string("\""), std::string("&quot;"));
    replace_all(str, std::string(">"), std::string("&gt;"));
    replace_all(str, std::string("<"), std::string("&lt;"));

    return str;
}
//========================
std::string unescape_xml(std::string str) {
    replace_all(str, std::string("&amp;"), std::string("&"));
    replace_all(str, std::string("&apos;"), std::string("'"));
    replace_all(str, std::string("&quot;"), std::string("\""));
    replace_all(str, std::string("&gt;"), std::string(">"));
    replace_all(str, std::string("&lt;"), std::string("<"));

    return str;
}

需要好好考虑的是怎么修改才能避免应该解析和格式化的效率;

【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。