/* * Demo of handling malicious XML with Expat (tested with Expat 2.0.1) * v2.0 2008-09-13 * * Copyright (c) 2008 Sebastian Pipping * * == The MIT License == * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * Sebastian Pipping */ #include #include #include #include // Config int const MAX_BYTES_PER_ENTITY_VALUE = 100000; int const MAX_LOOKUPS_PER_ENTITY_VALUE = 30; int const MAX_OUTPUT_INPUT_RATIO = 20; struct XmlCharStringCompare { bool operator()(XML_Char const * s1, XML_Char const * s2) const { return strcmp(s1, s2) < 0; } }; struct EntityInfo { int valueLen; int maxLookups; EntityInfo(int valueLen, int maxLookups) : valueLen(valueLen), maxLookups(maxLookups) { } }; typedef std::map MapType; typedef std::pair PairType; // Global vars XML_Index totalCharDataBytesServed = 0; MapType entityNameToValueLen; XML_Char * dupString(XML_Char const * source) { size_t const len = ::strlen(source); XML_Char * const dup = new XML_Char[len + 1]; ::strcpy(dup, source); return dup; } void initMap() { // Register default entities EntityInfo info(1, 0); entityNameToValueLen.insert(PairType(dupString("amp"), info)); entityNameToValueLen.insert(PairType(dupString("lt"), info)); entityNameToValueLen.insert(PairType(dupString("gt"), info)); entityNameToValueLen.insert(PairType(dupString("apos"), info)); entityNameToValueLen.insert(PairType(dupString("quot"), info)); } XML_Parser getParser(void * userData) { return reinterpret_cast(userData); } void panic(void * userData, XML_Char const * diagonis) { ::puts("\n PANIC:"); ::printf(" %s\n", diagonis); ::puts(" -> Content considered malicious XML"); ::puts(" -> Aborting"); ::XML_StopParser(getParser(userData), XML_FALSE); } void handleCharacterData(void *userData, const XML_Char *s, int len) { ::puts("BEGIN handleCharacterData"); totalCharDataBytesServed += (len * sizeof(XML_Char)); XML_Index const byteIndex = XML_GetCurrentByteIndex(getParser(userData)); XML_Char * toPrint = new XML_Char[len + 1]; ::strncpy(toPrint, s, len); toPrint[len] = '\0'; ::printf(" \"%s\"\n", toPrint); ::printf(" Byte index: %u\n", byteIndex); ::printf(" Char data bytes served: %u\n", totalCharDataBytesServed); delete [] toPrint; #if 1 // Prevent if (totalCharDataBytesServed > MAX_OUTPUT_INPUT_RATIO * byteIndex) { panic(userData, "Output/input ratio too large"); } #endif ::puts("END\n"); } XML_Char * makeString(XML_Char const * first, XML_Char const * afterLast) { size_t const len = afterLast - first; XML_Char * dup = new XML_Char[len + 1]; ::strncpy(dup, first, len); dup[len] = '\0'; return dup; } XML_Char * nextEntityRefMalloc(XML_Char const * start, XML_Char const * & atAmpersand, XML_Char const * & afterSemiColon) { XML_Char const * walker = start; while (true) { switch (walker[0]) { case '\0': // No complete entity found atAmpersand = start; afterSemiColon = walker; return NULL; case '&': // Entity start found atAmpersand = walker; break; case ';': // Entity stop found if (atAmpersand != NULL) { afterSemiColon = walker + 1; return makeString(atAmpersand + 1, walker); } break; } walker++; } } EntityInfo getEntityInfo(XML_Char const * entityName) { static EntityInfo errorInfo(0, 0); MapType::iterator found = entityNameToValueLen.find(entityName); if (found != entityNameToValueLen.end()) { return found->second; } else { return errorInfo; } } void setEntityInfo(XML_Char const * name, EntityInfo const & info) { entityNameToValueLen.insert(PairType(name, info)); } void handleEntityDeclaration(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { ::puts("BEGIN handleEntityDeclaration"); ::printf(" %s := \"%s\"\n", entityName, value); XML_Char const * walker = value; int valueLen = 0; int maxLookups = 0; while (walker[0] != '\0') { XML_Char const * atAmpersand = NULL; XML_Char const * afterSemiColon = NULL; XML_Char * entityRefname = nextEntityRefMalloc(walker, atAmpersand, afterSemiColon); valueLen += (atAmpersand - walker); if (entityRefname != NULL) { EntityInfo const info = getEntityInfo(entityRefname); valueLen += info.valueLen; maxLookups += info.maxLookups + 1; } else { valueLen += (afterSemiColon - walker); break; } walker = afterSemiColon; } int const bytesNeeded = valueLen * sizeof(XML_Char); ::printf(" Length in bytes: %d\n", bytesNeeded); ::printf(" Maximum lookups: %d\n", maxLookups); EntityInfo const info(valueLen, maxLookups); setEntityInfo(entityName, info); #if 1 // Prevent if (bytesNeeded > MAX_BYTES_PER_ENTITY_VALUE) { panic(userData, "Entity takes too much space"); } else if (maxLookups > MAX_LOOKUPS_PER_ENTITY_VALUE) { panic(userData, "Entity requires too many lookups"); } #endif ::puts("END\n"); } int main() { initMap(); char const * const document = #if 0 "\n" "\t\n" "\t\n" "]>\n" "&a4;\n" #else // From http://www.cogsci.ed.ac.uk/~richard/billion-laughs.xml "\n" "\n" # if 0 "\n" # else # if 0 "\n" # else "\n" # endif # endif "\n" "\n" # if 0 "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "]>\n" "&laugh30;\n" # else "]>\n" "\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;&laugh2;\n" "" # endif #endif ; XML_Parser const parser = ::XML_ParserCreate(NULL); ::XML_SetCharacterDataHandler(parser, handleCharacterData); ::XML_SetEntityDeclHandler(parser, handleEntityDeclaration); ::XML_UseParserAsHandlerArg(parser); XML_Status const res = ::XML_Parse(parser, document, strlen(document), 1); bool const good = (res == XML_STATUS_OK); if (good) { ::puts("All good."); } else { ::printf("Error (Line %d, column %d): %s\n", ::XML_GetCurrentLineNumber(parser), ::XML_GetCurrentColumnNumber(parser), ::XML_ErrorString(::XML_GetErrorCode(parser))); } ::XML_ParserFree(parser); return good ? 0 : 1; }