//----------------------------------------------------------------------------- // sample config file for HTML tidy //----------------------------------------------------------------------------- // // Using a Configuration File // // Tidy now supports a configuration file, and this is now much the // most convenient way to configure Tidy. Assuming you have created // a config file named "config.txt" (the name doesn't matter), you // can instruct Tidy to use it via the command line option -config // config.txt, e.g. // // tidy -config config.txt file1.html file2.html // // Alternatively, you can name the default config file via the // environment variable named "HTML_TIDY". Note this should be the // absolute path since you are likely to want to run Tidy in different // directories. You can also set a config file at compile time by // defining CONFIG_FILE as the path string, see platform.h. // // You can now set config options on the command line by preceding // the name of the option immediately (no intervening space) // by "--", for example: // // tidy --break-before-br true --show-warnings false // // The following options are supported: // (bool values are either yes or no) // //----------------------------------------------------------------------------- tidy-mark: yes // bool_default=yes // If set to yes, Tidy will add a meta element to the document head // to indicate that the document has been tidied. To suppress this, // set tidy-mark to no. Tidy won't add a meta element if one is // already present. //----------------------------------------------------------------------------- markup: yes // bool_default=yes // Generate a pretty printed version of the markup. Note that Tidy // won't generate a pretty printed version if it finds unknown tags, // or missing trailing quotes on attribute values, or missing trailing // '>' on tags. //----------------------------------------------------------------------------- wrap: 66 // number_default=66 // Sets the right margin for line wrapping. Tidy tries to wrap lines // so that they do not exceed this length. Set wrap to zero if you // want to disable line wrapping. //----------------------------------------------------------------------------- wrap-attributes: no // bool_default=no // Wrap attribute values across lines for easier editing. // This option can be set independently of wrap-scriptlets. //----------------------------------------------------------------------------- wrap-script-literals: no // bool_default=no // If set to yes, this allows lines to be wrapped within string // literals that appear in script attributes. The example shows // how Tidy wraps a really really long script string literal // inserting a backslash character before the linebreak: // // test //----------------------------------------------------------------------------- wrap-asp: yes // bool_default=yes // If set to no, this prevents lines from being wrapped within ASP // pseudo elements, which look like: <% ... %>. //----------------------------------------------------------------------------- wrap-jste: yes // bool_default=yes // If set to no, this prevents lines from being wrapped within JSTE // pseudo elements, which look like: <# ... #>. //----------------------------------------------------------------------------- wrap-php: yes // bool_default=yes // If set to no, this prevents lines from being wrapped within PHP // pseudo elements. //----------------------------------------------------------------------------- literal-attributes: no // bool_default=no // If set to yes, this ensures that whitespace characters within // attribute values are passed through unchanged. //----------------------------------------------------------------------------- tab-size: 4 // number_default=4 // Set number of columns between successive tab stops. It is used // to map tabs to spaces when reading files. Tidy never outputs // files with tabs. //----------------------------------------------------------------------------- indent: yes // (yes/no/auto)default=no // Indent block-level tags. If set to auto Tidy will decide whether // or not to indent the content of tags such as title, h1-h6, li, // td, th, or p depending on whether or not the content includes // a block-level element. You are advised to avoid setting indent // to yes as this can expose layout bugs in some browsers. //----------------------------------------------------------------------------- indent-spaces: 2 // number_default=2 // Set number of spaces to indent content when indentation is enabled. //----------------------------------------------------------------------------- indent-attributes: no // bool_default=no // If set to yes, begin each attribute on a new line. //----------------------------------------------------------------------------- hide-endtags: no // bool_default=no // Omit optional end-tags when generating the pretty printed markup. // This option is ignored if you are outputting to XML. //----------------------------------------------------------------------------- input-xml: no // bool_default=no // Use the XML parser rather than the error correcting HTML parser. //----------------------------------------------------------------------------- output-xml: no // bool_default=no // If set to yes, Tidy will use generate the pretty printed output // writing it as well-formed XML. Any entities not defined in XML // 1.0 will be written as numeric entities to allow them to be parsed // by an XML parser. The tags and attributes will be in the case used // in the input document, regardless of other options. //----------------------------------------------------------------------------- //add-xml-pi: no add-xml-decl: no // bool_default=no // If set to yes, Tidy will add the XML declatation when outputting // XML or XHTML. Note that if the input document includes an // declaration then it will appear in the output independent of the // value of this option. //----------------------------------------------------------------------------- output-xhtml: no // bool_default=no // Generate pretty printed output written as extensible HTML. // This option causes Tidy to set the doctype and default namespace // as appropriate to XHTML. If a doctype or namespace is given they // will checked for consistency with the content of the document. // In the case of an inconsistency, the corrected values will appear // in the output. For XHTML, entities can be written as named or // numeric entities according to the value of the "numeric-entities" // property. The tags and attributes will be output in the case used // in the input document, regardless of other options. //----------------------------------------------------------------------------- doctype: strict // doctype: "-//W3C//DTD HTML 4.01//EN" // doctype: auto // (omit/auto/strict/loose/)default=auto // This property controls the doctype declaration generated by Tidy. // If set to omit the output file won't contain a doctype declaration. // If set to auto (the default) Tidy will use an educated guess based // upon the contents of the document. If set to strict, Tidy will set // the doctype to the strict DTD. If set to loose, the doctype is set // to the loose (transitional) DTD. Alternatively, you can supply a // string for the formal public identifier (fpi) for example: // // doctype: "-//ACME//DTD HTML 3.14159//EN" // // // // If you specify the fpi for an XHTML document, Tidy will set the // system identifier to the empty string. Tidy leaves the document // type for generic XML documents unchanged. //----------------------------------------------------------------------------- char-encoding: ascii // char-encoding: latin1 // (raw/ascii/latin1/utf8/iso2022)default=ascii // Determines how Tidy interprets character streams. For ascii, Tidy // will accept Latin-1 character values, but will use entities for // all characters whose value > 127. For raw, Tidy will output values // above 127 without translating them into entities. For latin1 // characters above 255 will be written as entities. For utf8, Tidy // assumes that both input and output is encoded as UTF-8. You can use // iso2022 for files encoded using the ISO2022 family of encodings e.g. // ISO 2022-JP. The default is ascii. //----------------------------------------------------------------------------- numeric-entities: no // bool_default=no // Cause entities other than the basic XML 1.0 named entities to be // written in the numeric rather than the named entity form. //----------------------------------------------------------------------------- quote-marks: no // bool_default=no // If set to yes, this causes " characters to be written out as // " as is preferred by some editing environments. The // apostrophe character ' is written out as ' since many web // browsers don't yet support '. //----------------------------------------------------------------------------- quote-nbsp: yes // bool_default=yes // Cause non-breaking space characters to be written out as entities, // rather than as the Unicode character value 160 (decimal). //----------------------------------------------------------------------------- quote-ampersand: yes // bool_default=yes // Cause unadorned & characters to be written out as &. //----------------------------------------------------------------------------- assume-xml-procins: no // bool_default=no // Change parsing of processing instructions to require ?> as the // terminator rather than >. This option is automatically set if // the input is in XML. //----------------------------------------------------------------------------- fix-backslash: yes // bool_default=yes // If set to yes, this causes backslash characters "\" in URLs to be // replaced by forward slashes "/". The default is yes. //----------------------------------------------------------------------------- break-before-br: no // bool_default=no // Output a line break before each
element. //----------------------------------------------------------------------------- uppercase-tags: no // bool_default=no // Cause tag names to be output in upper case. A no results in // lowercase, except for XML input where the original case is preserved. //----------------------------------------------------------------------------- uppercase-attributes: no // bool_default=no // Output attribute names upper case. A no results in lowercase, // except for XML where the original case is preserved. //----------------------------------------------------------------------------- word-2000: no // bool_default=no // If set to yes, Tidy will go to great pains to strip out all // the surplus stuff Microsoft Word 2000 inserts when you save // Word documents as "Web pages". Note that Tidy doesn't yet // know what to do with VML markup from Word, but in future I // hope to be able to map VML to SVG. // // Microsoft has developed its own optional filter for exporting // to HTML, and the 2.0 version is much improved. You can download // the filter free from the Microsoft Office Update site. //----------------------------------------------------------------------------- clean: no // bool_default=no // Strip out surplus presentational tags and attributes replacing // them by style rules and structural markup as appropriate. It // works well on the html saved from Microsoft Office'97. //----------------------------------------------------------------------------- logical-emphasis: no // bool_default=no // Replace any occurrence of elements as HTML4 precludes empty // paragraphs. //----------------------------------------------------------------------------- drop-font-tags: no // bool_default=no // If set to yes together with the clean option (see above), // Tidy will discard font and center tags rather than creating the // corresponding style rules. //----------------------------------------------------------------------------- enclose-text: yes // bool_default=no // Enclose any text found in the body element within a p element. // This is useful when you want to take an existing html file and // use it with a style sheet. Any text at the body level will screw // up the margins, but wrap the text within a p element and all is well! //----------------------------------------------------------------------------- enclose-block-text: yes // bool_default=no // Insert a p element to enclose any text found in any element that // allows mixed content for HTML transitional but not HTML strict. //----------------------------------------------------------------------------- fix-bad-comments: yes // bool_default=yes // Replace unexpected adjacent hyphens with "=" characters. This // option is provided for users of Cold Fusion which uses the comment // syntax: //----------------------------------------------------------------------------- add-xml-space: no // bool_default=no // If set to yes, this causes Tidy to add xml:space="preserve" // to elements such as pre, style and script when generating XML. // This is needed if the whitespace in such elements is to be parsed // appropriately without having access to the DTD. //----------------------------------------------------------------------------- // alt-text: string // This allows you to set the default alt text for img attributes. // This feature is dangerous as it suppresses further accessibility // warnings. YOU ARE RESPONSIBLE FOR MAKING YOUR DOCUMENTS ACCESSIBLE // TO PEOPLE WHO CAN'T SEE THE IMAGES!!! //----------------------------------------------------------------------------- write-back: no // bool_default=no // If set to yes, Tidy will write back the tidied markup to the same // file it read from. You are advised to keep copies of important // files before tidying them as on rare occasions the result may not // always be what you expect. //----------------------------------------------------------------------------- keep-time: yes // bool_default=yes // If set to yes, Tidy won't alter the last modified time for files // it writes back to. This allows you to tidy files without effecting // which ones will be uploaded to the Web server when using a tool // such as 'SiteCopy'. Note that this feature may not work on some // platforms. //----------------------------------------------------------------------------- // error-file: filename // Write errors and warnings to the named file rather than to stderr. //----------------------------------------------------------------------------- show-warnings: yes // bool_default=yes // If set to no, warnings are suppressed. This can be useful when a // few errors are hidden in a flurry of warnings. The default is yes. //----------------------------------------------------------------------------- quiet: no // bool_default=no // Output the welcome message or the summary of the numbers of errors // and warnings. //----------------------------------------------------------------------------- gnu-emacs: no // bool_default=no // Change format for reporting errors and warnings to a format that // is more easily parsed by GNU Emacs. //----------------------------------------------------------------------------- split: no // bool_default=no // Use input file to create a sequence of slides, splitting the // markup prior to each successive

. You can see an example // of the results in a recent talk I made on XHTML. The slides are // written to "slide1.html", "slide2.html" etc. //----------------------------------------------------------------------------- new-empty-tags: cfelse // new-empty-tags: tag1, tag2, tag3 // Use this to declare new empty inline tags. The option takes a // space or comma separated list of tag names. Unless you declare // new tags, Tidy will refuse to generate a tidied file if the // input includes previously unknown tags. Remember to also // declare empty tags as either inline or blocklevel, see below. //----------------------------------------------------------------------------- new-inline-tags: cfif, cfelse, math, mroot, mrow, mi, mn, mo, msqrt, mfrac, msubsup, munderover, munder, mover, mmultiscripts, msup, msub, mtext, mprescripts, mtable, mtr, mtd, mth // new-inline-tags: tag1, tag2, tag3 // Use this to declare new non-empty inline tags. The option takes // a space or comma separated list of tag names. Unless you declare // new tags, Tidy will refuse to generate a tidied file if the input // includes previously unknown tags. //----------------------------------------------------------------------------- new-blocklevel-tags: cfoutput, cfquery // new-blocklevel-tags: tag1, tag2, tag3 // Use this to declare new block-level tags. The option takes a space // or comma separated list of tag names. Unless you declare new tags, // Tidy will refuse to generate a tidied file if the input includes // previously unknown tags. Note you can't change the content model for // elements such as table, ul, ol and dl. This is explained in more // detail in the release notes. //----------------------------------------------------------------------------- // new-pre-tags: tag1, tag2, tag3 // Use this to declare new tags that are to be processed in exactly // the same way as HTML's pre element. The option takes a space or // comma separated list of tag names. Unless you declare new tags, // Tidy will refuse to generate a tidied file if the input includes // previously unknown tags. Note you can't as yet add new CDATA // elements (similar to script). //-----------------------------------------------------------------------------