Tag.action = filter: remove tags, but keep content, validate: keep content as long as it passes rules, remove: remove tag and contents
Attribute.name = id, class, href, align, width, etc.
Attribute.onInvalid = what to do when the attribute is invalid, e.g., remove the tag (removeTag), remove the attribute (removeAttribute), filter the tag (filterTag)
Attribute.description = What rules in English you want to tell the users they can have for this attribute. Include helpful things so they'll be able to tune their HTML
-->
<!--
Some attributes are common to all (or most) HTML tags. There aren't many that qualify for this. You have to make sure there's no
collisions between any of these attribute names with attribute names of other tags that are for different purposes.
-->
<common-attributes>
<!-- Common to all HTML tags -->
<attributename="id"description="The 'id' of any HTML attribute should not contain anything besides letters and numbers">
<regexp-list>
<regexpname="htmlId"/>
</regexp-list>
</attribute>
<attributename="class"description="The 'class' of any HTML attribute is usually a single word, but it can also be a list of class names separated by spaces">
<regexp-list>
<regexpname="htmlClass"/>
</regexp-list>
</attribute>
<attributename="lang"description="The 'lang' attribute tells the browser what language the element's attribute values and content are written in">
<regexp-list>
<regexpvalue="[a-zA-Z]{2,20}"/>
</regexp-list>
</attribute>
<attributename="title"description="The 'title' attribute provides text that shows up in a 'tooltip' when a user hovers their mouse over the element">
<regexp-list>
<regexpname="htmlTitle"/>
</regexp-list>
</attribute>
<attributename="alt"description="The 'alt' attribute provides alternative text to users when its visual representation is not available">
<regexp-list>
<regexpname="paragraph"/>
</regexp-list>
</attribute>
<!-- the "style" attribute will be validated by an inline stylesheet scanner, so no need to define anything here - i hate having to special case this but no other choice -->
<attributename="style"description="The 'style' attribute provides the ability for users to change many attributes of the tag's contents using a strict syntax"/>
<attributename="media">
<regexp-list>
<regexpvalue="[a-zA-Z0-9,\-\s]+"/>
</regexp-list>
<literal-list>
<literalvalue="screen"/>
<literalvalue="tty"/>
<literalvalue="tv"/>
<literalvalue="projection"/>
<literalvalue="handheld"/>
<literalvalue="print"/>
<literalvalue="braille"/>
<literalvalue="aural"/>
<literalvalue="all"/>
</literal-list>
</attribute>
<!-- Anchor related -->
<!-- onInvalid="filterTag" has been removed as per suggestion at OWASP SJ 2007 - just "name" is valid -->
<attributename="href">
<regexp-list>
<regexpname="onsiteURL"/>
<regexpname="offsiteURL"/>
</regexp-list>
</attribute>
<attributename="name">
<regexp-list>
<regexpvalue="[a-zA-Z0-9\-_\$]+"/>
<!--
have to allow the $ for .NET controls - although,
will users be supplying input that has server-generated
.NET control names? methinks not, but i want to pass my
test cases
-->
</regexp-list>
</attribute>
<attributename="shape"description="The 'shape' attribute defines the shape of the selectable area">
<literal-list>
<literalvalue="default"/>
<literalvalue="rect"/>
<literalvalue="circle"/>
<literalvalue="poly"/>
</literal-list>
</attribute>
<!-- Table attributes -->
<attributename="border">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="cellpadding">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="cellspacing">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="colspan">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="rowspan">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="background">
<regexp-list>
<regexpname="onsiteURL"/>
</regexp-list>
</attribute>
<attributename="bgcolor">
<regexp-list>
<regexpname="colorName"/>
<regexpname="colorCode"/>
</regexp-list>
</attribute>
<attributename="abbr">
<regexp-list>
<regexpname="paragraph"/>
</regexp-list>
</attribute>
<attributename="headers"description="The 'headers' attribute is a space-separated list of cell IDs">
<regexp-list>
<regexpvalue="[a-zA-Z0-9\s*]*"/>
</regexp-list>
</attribute>
<attributename="charoff">
<regexp-list>
<regexpvalue="numberOrPercent"/>
</regexp-list>
</attribute>
<attributename="char">
<regexp-list>
<regexpvalue=".{0,1}"/>
</regexp-list>
</attribute>
<attributename="axis"description="The 'headers' attribute is a comma-separated list of related header cells">
<regexp-list>
<regexpvalue="[a-zA-Z0-9\s*,]*"/>
</regexp-list>
</attribute>
<attributename="nowrap"description="The 'nowrap' attribute tells the browser not to wrap text that goes over one line">
<regexp-list>
<regexpname="anything"/>
<!-- <regexp value="(nowrap){0,1}"/> -->
</regexp-list>
</attribute>
<!-- Common positioning attributes -->
<attributename="width">
<regexp-list>
<regexpname="numberOrPercent"/>
</regexp-list>
</attribute>
<attributename="height">
<regexp-list>
<regexpname="numberOrPercent"/>
</regexp-list>
</attribute>
<attributename="align"description="The 'align' attribute of an HTML element is a direction word, like 'left', 'right' or 'center'">
<literal-list>
<literalvalue="center"/>
<literalvalue="middle"/>
<literalvalue="left"/>
<literalvalue="right"/>
<literalvalue="justify"/>
<literalvalue="char"/>
</literal-list>
</attribute>
<attributename="valign"description="The 'valign' attribute of an HTML attribute is a direction word, like 'baseline','bottom','middle' or 'top'">
<literal-list>
<literalvalue="baseline"/>
<literalvalue="bottom"/>
<literalvalue="middle"/>
<literalvalue="top"/>
</literal-list>
</attribute>
<!-- Intrinsic JavaScript Events -->
<attributename="onFocus"description="The 'onFocus' event is executed when the control associated with the tag gains focus">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onBlur"description="The 'onBlur' event is executed when the control associated with the tag loses focus">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onClick"description="The 'onClick' event is executed when the control associated with the tag is clicked">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onDblClick"description="The 'onDblClick' event is executed when the control associated with the tag is clicked twice immediately">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onMouseDown"description="The 'onMouseDown' event is executed when the control associated with the tag is clicked but not yet released">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onMouseUp"description="The 'onMouseUp' event is executed when the control associated with the tag is clicked after the button is released">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="onMouseOver"description="The 'onMouseOver' event is executed when the user's mouse hovers over the control associated with the tag">
<literal-list>
<literalvalue="javascript:void(0)"/>
<literalvalue="javascript:history.go(-1)"/>
</literal-list>
</attribute>
<attributename="scope"description="The 'scope' attribute defines what's covered by the header cells">
<literal-list>
<literalvalue="row"/>
<literalvalue="col"/>
<literalvalue="rowgroup"/>
<literalvalue="colgroup"/>
</literal-list>
</attribute>
<!-- If you want users to be able to mess with tabindex, uncomment this -->
<!--
<attribute name="tabindex" description="...">
<regexp-list>
<regexp name="number"/>
</regexp-list>
</attribute>
-->
<!-- Input/form related common attributes -->
<attributename="disabled">
<regexp-list>
<regexpname="anything"/>
</regexp-list>
</attribute>
<attributename="readonly">
<regexp-list>
<regexpname="anything"/>
</regexp-list>
</attribute>
<attributename="accesskey">
<regexp-list>
<regexpname="anything"/>
</regexp-list>
</attribute>
<attributename="size">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="autocomplete">
<literal-list>
<literalvalue="on"/>
<literalvalue="off"/>
</literal-list>
</attribute>
<attributename="rows">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="cols">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
</common-attributes>
<!--
This requires normal updates as browsers continue to diverge from the W3C and each other. As long as the browser wars continue
this is going to continue. I'm not sure war is the right word for what's going on. Doesn't somebody have to win a war after
a while? Even wars of attrition, surely?
-->
<global-tag-attributes>
<!-- Not valid in base, head, html, meta, param, script, style, and title elements. -->
<attributename="id"/>
<attributename="style"/>
<attributename="title"/>
<attributename="class"/>
<!-- Not valid in base, br, frame, frameset, hr, iframe, param, and script elements. -->
<attributename="lang"/>
</global-tag-attributes>
<tags-to-encode>
<tag>g</tag>
<tag>grin</tag>
</tags-to-encode>
<tag-rules>
<!-- Tags related to JavaScript -->
<tagname="script"action="remove"/>
<tagname="noscript"action="validate"/><!-- although no javascript can fire inside a noscript tag, css is still a viable attack vector -->
<!-- Frame & related tags -->
<tagname="iframe"action="remove"/>
<tagname="frameset"action="remove"/>
<tagname="frame"action="remove"/>
<!-- Form related tags -->
<tagname="label"action="validate">
<attributename="for">
<regexp-list>
<regexpname="htmlId"/>
</regexp-list>
</attribute>
</tag>
<!-- All formatting tags -->
<tagname="h1"action="validate"/>
<tagname="h2"action="validate"/>
<tagname="h3"action="validate"/>
<tagname="h4"action="validate"/>
<tagname="h5"action="validate"/>
<tagname="h6"action="validate"/>
<tagname="p"action="validate">
<attributename="align"/>
</tag>
<tagname="i"action="validate"/>
<tagname="b"action="validate"/>
<tagname="u"action="validate"/>
<tagname="strong"action="validate"/>
<tagname="em"action="validate"/>
<tagname="small"action="validate"/>
<tagname="big"action="validate"/>
<tagname="pre"action="validate"/>
<tagname="code"action="validate"/>
<tagname="cite"action="validate"/>
<tagname="samp"action="validate"/>
<tagname="sub"action="validate"/>
<tagname="sup"action="validate"/>
<tagname="strike"action="validate"/>
<tagname="center"action="validate"/>
<tagname="blockquote"action="validate"/>
<tagname="hr"action="validate"/>
<tagname="br"action="validate"/>
<tagname="font"action="validate">
<attributename="color">
<regexp-list>
<regexpname="colorName"/>
<regexpname="colorCode"/>
</regexp-list>
</attribute>
<attributename="face">
<regexp-list>
<regexpvalue="[\w;, \-]+"/>
</regexp-list>
</attribute>
<attributename="size">
<regexp-list>
<regexpvalue="(\+|-){0,1}(\d)+"/>
</regexp-list>
</attribute>
</tag>
<!-- Anchor and anchor related tags -->
<tagname="a"action="validate">
<!-- onInvalid="filterTag" has been removed as per suggestion at OWASP SJ 2007 - just "name" is valid -->
<attributename="href"/>
<attributename="onFocus"/>
<attributename="onBlur"/>
<attributename="nohref">
<regexp-list>
<regexpname="anything"/>
</regexp-list>
</attribute>
<attributename="rel">
<literal-list>
<literalvalue="nofollow"/>
</literal-list>
</attribute>
<attributename="name"/>
</tag>
<tagname="map"action="validate"/>
<!-- base tag removed per demo - this could be enabled with literal-list values you allow -->
<!--
<tag name="base" action="validate">
<attribute name="href"/>
</tag>
-->
<!-- Stylesheet Tags -->
<tagname="style"action="validate">
<attributename="type">
<literal-list>
<literalvalue="text/css"/>
</literal-list>
</attribute>
<attributename="media"/>
</tag>
<tagname="span"action="validate"/>
<tagname="div"action="validate">
<attributename="align"/>
</tag>
<!-- <attribute name="id"/> what could an attacker do if they could overwrite an existing div definition? prolly something bad -->
<!-- <attribute name="class"/> what could an attacker do if they could specify any class in the namespace? prolly something bad -->
<!-- Image & image related tags -->
<tagname="img"action="validate">
<attributename="src"onInvalid="removeTag">
<regexp-list>
<regexpname="onsiteURL"/>
<regexpname="offsiteURL"/>
</regexp-list>
</attribute>
<attributename="name"/>
<attributename="alt"/>
<attributename="height"/>
<attributename="width"/>
<attributename="border"/>
<attributename="align"/>
<attributename="hspace">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
<attributename="vspace">
<regexp-list>
<regexpname="number"/>
</regexp-list>
</attribute>
</tag>
<!-- no way to do this safely without hooking up the same code to @import to embed the remote stylesheet (malicious user could change offsite resource to be malicious after validation -->
<propertyname="azimuth"description="This property is most likely to be implemented by mixing the same signal into different channels at differing volumes.">
<literal-list>
<literalvalue="left-side"/>
<literalvalue="far-left"/>
<literalvalue="left"/>
<literalvalue="center-left"/>
<literalvalue="center"/>
<literalvalue="center-right"/>
<literalvalue="right"/>
<literalvalue="far-right"/>
<literalvalue="right-side"/>
<literalvalue="behind"/>
<literalvalue="leftwards"/>
<literalvalue="rightwards"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="angle"/>
</regexp-list>
</property>
<propertyname="background"description="The 'background' property is a shorthand property for setting the individual background properties (i.e., 'background-color', 'background-image', 'background-repeat', 'background-attachment' and 'background-position') at the same place in the style sheet.">
<literal-list>
<literalvalue="inherit"/>
</literal-list>
<shorthand-list>
<shorthandname="background-color"/>
<shorthandname="background-image"/>
<shorthandname="background-repeat"/>
<shorthandname="background-attachment"/>
<shorthandname="background-position"/>
</shorthand-list>
</property>
<propertyname="background-attachment"description="If a background image is specified, this property specifies whether it is fixed with regard to the viewport ('fixed') or scrolls along with the document ('scroll').">
<literal-list>
<literalvalue="scroll"/>
<literalvalue="fixed"/>
<literalvalue="inherit"/>
</literal-list>
</property>
<propertyname="background-color"description="This property sets the background color of an element, either a <color> value or the keyword 'transparent', to make the underlying colors shine through.">
<literal-list>
<literalvalue="transparent"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="colorName"/>
<regexpname="colorCode"/>
<regexpname="rgbCode"/>
<regexpname="systemColor"/>
</regexp-list>
</property>
<propertyname="background-image"description="This property sets the background image of an element.">
<literal-list>
<literalvalue="none"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="cssOffsiteUri"/>
<regexpname="cssOnsiteUri"/>
</regexp-list>
</property>
<propertyname="background-position"description="If a background image has been specified, this property specifies its initial position.">
<literal-list>
<literalvalue="top"/>
<literalvalue="center"/>
<literalvalue="bottom"/>
<literalvalue="left"/>
<literalvalue="center"/>
<literalvalue="right"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="percentage"/>
<regexpname="length"/>
</regexp-list>
</property>
<propertyname="background-repeat"description="If a background image is specified, this property specifies whether the image is repeated (tiled), and how.">
<propertyname="border-spacing"default="0"description="The lengths specify the distance that separates adjacent cell borders. If one length is specified, it gives both the horizontal and vertical spacing. If two are specified, the first gives the horizontal spacing and the second the vertical spacing. Lengths may not be negative.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<literal-list>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="length"/>
</regexp-list>
</property>
<propertyname="clip"default="auto"description="The 'clip' property applies to elements that have a 'overflow' property with a value other than 'visible'.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<literal-list>
<literalvalue="auto"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="length"/>
</regexp-list>
</property>
<propertyname="counter-increment"default="none"description="The 'counter-increment' property accepts one or more names of counters (identifiers), each one optionally followed by an integer.">
<category-list>
<categoryvalue="all"/>
</category-list>
<literal-list>
<literalvalue="none"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="cssIdentifier"/>
<regexpname="integer"/>
</regexp-list>
</property>
<propertyname="cursor"default="auto"description="This property specifies the type of cursor to be displayed for the pointing device.">
<category-list>
<categoryvalue="visual"/>
<categoryvalue="interactive"/>
</category-list>
<literal-list>
<literalvalue="auto"/>
<literalvalue="inherit"/>
<literalvalue="crosshair"/>
<literalvalue="default"/>
<literalvalue="pointer"/>
<literalvalue="move"/>
<literalvalue="e-resize"/>
<literalvalue="ne-resize"/>
<literalvalue="nw-resize"/>
<literalvalue="n-resize"/>
<literalvalue="se-resize"/>
<literalvalue="sw-resize"/>
<literalvalue="s-resize"/>
<literalvalue="w-resize| text"/>
<literalvalue="wait"/>
<literalvalue="help"/>
</literal-list>
<regexp-list>
<regexpname="cssOffsiteUri"/>
<regexpname="cssOnsiteUri"/>
</regexp-list>
</property>
<propertyname="text-shadow"default="none"description="This property accepts a comma-separated list of shadow effects to be applied to the text of the element.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<literal-list>
<literalvalue="none"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="colorName"/>
<regexpname="colorCode"/>
<regexpname="rgbCode"/>
<regexpname="systemColor"/>
<regexpname="length"/>
</regexp-list>
</property>
<propertyname="font"description="The 'font' property is, except as described below, a shorthand property for setting 'font-style', 'font-variant', 'font-weight', 'font-size', 'line-height', and 'font-family', at the same place in the style sheet.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<literal-list>
<literalvalue="/"/>
<literalvalue="caption"/>
<literalvalue="icon"/>
<literalvalue="menu"/>
<literalvalue="message-box"/>
<literalvalue="small-caption"/>
<literalvalue="status-bar"/>
<literalvalue="inherit"/>
</literal-list>
<shorthand-list>
<shorthandname="font-style"/>
<shorthandname="font-variant"/>
<shorthandname="font-weight"/>
<shorthandname="font-size"/>
<shorthandname="line-height"/>
<shorthandname="font-family"/>
</shorthand-list>
</property>
<propertyname="font-family"description="This property specifies a prioritized list of font family names and/or generic family names.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<!-- allowing only generic font families -->
<literal-list>
<literalvalue="serif"/>
<literalvalue="arial"/>
<literalvalue="lucida console"/>
<literalvalue="sans-serif"/>
<literalvalue="cursive"/>
<literalvalue="verdana"/>
<literalvalue="fantasy"/>
<literalvalue="monospace"/>
</literal-list>
<regexp-list>
<regexpvalue="[\w,\-'" ]+"/>
</regexp-list>
</property>
<propertyname="page"description="The 'page' property can be used to specify a particular type of page where an element should be displayed.">
<category-list>
<categoryvalue="visual"/>
<categoryvalue="paged"/>
</category-list>
<literal-list>
<literalvalue="auto"/>
</literal-list>
<regexp-list>
<regexpname="cssIdentifier"/>
</regexp-list>
</property>
<propertyname="play-during"default="auto"description="Similar to the 'cue-before' and 'cue-after' properties, this property specifies a sound to be played as a background while an element's content is spoken.">
<category-list>
<categoryvalue="aural"/>
</category-list>
<literal-list>
<literalvalue="mix"/>
<literalvalue="repeat"/>
<literalvalue="none"/>
<literalvalue="auto"/>
<literalvalue="inherit"/>
</literal-list>
<regexp-list>
<regexpname="cssOffsiteUri"/>
<regexpname="cssOnsiteUri"/>
</regexp-list>
</property>
<propertyname="text-align"description="This property describes how inline content of a block is aligned.">
<category-list>
<categoryvalue="visual"/>
</category-list>
<!-- For safety, ignoring string alignment which can be used to line table cells on characters -->
<literal-list>
<literalvalue="left"/>
<literalvalue="right"/>
<literalvalue="center"/>
<literalvalue="justify"/>
<literalvalue="inherit"/>
</literal-list>
</property>
<propertyname="voice-family"description="The value is a comma-separated, prioritized list of voice family names (compare with 'font-family').">