[libkml] 03/07: Imported Upstream version 1.3.0~r864+dfsg
Bas Couwenberg
sebastic at xs4all.nl
Tue Apr 22 14:34:45 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch master
in repository libkml.
commit 958c9b26ad13e59ccd77758d24a0027c333c1357
Author: Bas Couwenberg <sebastic at xs4all.nl>
Date: Tue Apr 22 15:30:07 2014 +0200
Imported Upstream version 1.3.0~r864+dfsg
---
third_party/uriparser-0.7.5/doc/rfc1866.htm | 4446 --------------------
third_party/uriparser-0.7.5/doc/rfc3513.htm | 1579 -------
third_party/uriparser-0.7.5/doc/rfc3986.htm | 3539 ----------------
.../uriparser-0.7.5/doc/rfc3986_grammar_only.txt | 80 -
4 files changed, 9644 deletions(-)
diff --git a/third_party/uriparser-0.7.5/doc/rfc1866.htm b/third_party/uriparser-0.7.5/doc/rfc1866.htm
deleted file mode 100644
index 108a958..0000000
--- a/third_party/uriparser-0.7.5/doc/rfc1866.htm
+++ /dev/null
@@ -1,4446 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html lang="en" xml:lang="en">
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=us-ascii" />
- <meta name="robots" content="index,follow" />
- <meta name="creator" content="rfcmarkup version 1.60" />
- <link rel="icon" href="/images/rfc.png" type="image/png" />
- <link rel="shortcut icon" href="/images/rfc.png" type="image/png" />
- <title>RFC 1866 - Hypertext Markup Language - 2.0</title>
-
- <style type="text/css">
- body {
- margin: 0px 8px;
- font-size: 1em;
- }
- h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
- font-weight: bold;
- line-height: 0pt;
- display: inline;
- white-space: pre;
- font-family: monospace;
- font-size: 1em;
- font-weight: bold;
- }
- pre {
- font-size: 1em;
- }
- .pre {
- white-space: pre;
- font-family: monospace;
- }
- .header{
- font-weight: bold;
- }
- .invisible {
- text-decoration: none;
- color: white;
- }
- @media print {
- body {
- font-size: 10.5pt;
- }
- h1, h2, h3, h4, h5, h6 {
- font-size: 10.5pt;
- }
-
- a:link, a:visited {
- color: inherit;
- text-decoration: none;
- }
- .break {
- page-break-before: always;
- }
- .noprint {
- display: none;
- }
- }
- @media screen {
- .grey, .grey a:link, .grey a:visited {
- color: #777;
- }
- .docinfo {
- background-color: #EEE;
- }
- .top {
- border-top: 2px solid #EEE;
- }
- .bgwhite { background-color: white; }
- .bgred { background-color: #F44; }
- .bggrey { background-color: #666; }
- .bgbrown { background-color: #840; }
- .bgorange { background-color: #FA0; }
- .bgyellow { background-color: #EE0; }
- .bgmagenta{ background-color: #F4F; }
- .bgblue { background-color: #66F; }
- .bgcyan { background-color: #4DD; }
- .bggreen { background-color: #4F4; }
-
- .legend { font-size: 90%; }
- .cplate { font-size: 70%; border: solid grey 1px; }
- }
- </style>
-
- <script type="text/javascript"><!--
- function addHeaderTags() {
- var spans = document.getElementsByTagName("span");
- for (var i=0; i < spans.length; i++) {
- var elem = spans[i];
- if (elem) {
- var level = elem.getAttribute("class");
- if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
- elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
- }
- }
- }
- }
- var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'> </span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'> </span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'> </span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'> &nb [...]
- function showElem(id) {
- var elem = document.getElementById(id);
- elem.innerHTML = eval(id+"_html");
- elem.style.visibility='visible';
- }
- function hideElem(id) {
- var elem = document.getElementById(id);
- elem.style.visibility='hidden';
- elem.innerHTML = "";
- }
- // -->
- </script>
-</head>
-<body onload="addHeaderTags()">
- <div style="height: 8px;">
- <div onmouseover="this.style.cursor='pointer';"
- onclick="showElem('legend');"
- onmouseout="hideElem('legend')"
- style="height: 6px; position: absolute;"
- class="pre noprint docinfo bgbrown"
- title="Click for colour legend." > </div>
- <div id="legend"
- class="docinfo noprint pre legend"
- style="position:absolute; top: 4px; left: 4ex; visibility:hidden; background-color: white; padding: 4px 9px 5px 7px; border: solid #345 1px; "
- onmouseover="showElem('legend');"
- onmouseout="hideElem('legend');">
- </div>
- </div>
-<span class="pre noprint docinfo top">[<a href="../html/" title="Document search and retrieval page">RFCs/IDs</a>] [<a href="/rfc/rfc1866.txt" title="Plaintext version of this document">Plain Text</a>] [From <a href="draft-ietf-html-spec">draft-ietf-html-spec</a>] </span><br />
-<span class="pre noprint docinfo"> </span><br />
-<span class="pre noprint docinfo">Obsoleted by: <a href="./rfc2854">2854</a> HISTORIC</span><br />
-<span class="pre noprint docinfo"> </span><br />
-<pre>
-Network Working Group T. Berners-Lee
-Request for Comments: 1866 MIT/W3C
-Category: Standards Track D. Connolly
- November 1995
-
-
- <span class="h1">Hypertext Markup Language - 2.0</span>
-
-Status of this Memo
-
- This document specifies an Internet standards track protocol for the
- Internet community, and requests discussion and suggestions for
- improvements. Please refer to the current edition of the "Internet
- Official Protocol Standards" (STD 1) for the standardization state
- and status of this protocol. Distribution of this memo is unlimited.
-
-Abstract
-
- The Hypertext Markup Language (HTML) is a simple markup language used
- to create hypertext documents that are platform independent. HTML
- documents are SGML documents with generic semantics that are
- appropriate for representing information from a wide range of
- domains. HTML markup can represent hypertext news, mail,
- documentation, and hypermedia; menus of options; database query
- results; simple structured documents with in-lined graphics; and
- hypertext views of existing bodies of information.
-
- HTML has been in use by the World Wide Web (WWW) global information
- initiative since 1990. This specification roughly corresponds to the
- capabilities of HTML in common use prior to June 1994. HTML is an
- application of ISO Standard 8879:1986 Information Processing Text and
- Office Systems; Standard Generalized Markup Language (SGML).
-
- The "text/html" Internet Media Type (<a href="./rfc1590">RFC 1590</a>) and MIME Content Type
- (<a href="./rfc1521">RFC 1521</a>) is defined by this specification.
-
-Table of Contents
-
- <a href="#section-1">1</a>. Introduction ........................................... <a href="#page-2">2</a>
- <a href="#section-1.1">1.1</a> Scope .................................................. <a href="#page-3">3</a>
- <a href="#section-1.2">1.2</a> Conformance ............................................ <a href="#page-3">3</a>
- <a href="#section-2">2</a>. Terms .................................................. <a href="#page-6">6</a>
- <a href="#section-3">3</a>. HTML as an Application of SGML .........................<a href="#page-10">10</a>
- <a href="#section-3.1">3.1</a> SGML Documents .........................................<a href="#page-10">10</a>
- <a href="#section-3.2">3.2</a> HTML Lexical Syntax ................................... <a href="#page-12">12</a>
- <a href="#section-3.3">3.3</a> HTML Public Text Identifiers .......................... <a href="#page-17">17</a>
- <a href="#section-3.4">3.4</a> Example HTML Document ................................. <a href="#page-17">17</a>
- <a href="#section-4">4</a>. HTML as an Internet Media Type ........................ <a href="#page-18">18</a>
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 1]</span>
-<a name="page-2" id="page-2" href="#page-2" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- <a href="#section-4.1">4.1</a> text/html media type .................................. <a href="#page-18">18</a>
- <a href="#section-4.2">4.2</a> HTML Document Representation .......................... <a href="#page-19">19</a>
- <a href="#section-5">5</a>. Document Structure .................................... <a href="#page-20">20</a>
- <a href="#section-5.1">5.1</a> Document Element: HTML ................................ <a href="#page-21">21</a>
- <a href="#section-5.2">5.2</a> Head: HEAD ............................................ <a href="#page-21">21</a>
- <a href="#section-5.3">5.3</a> Body: BODY ............................................ <a href="#page-24">24</a>
- <a href="#section-5.4">5.4</a> Headings: H1 ... H6 ................................... <a href="#page-24">24</a>
- <a href="#section-5.5">5.5</a> Block Structuring Elements ............................ <a href="#page-25">25</a>
- <a href="#section-5.6">5.6</a> List Elements ......................................... <a href="#page-28">28</a>
- <a href="#section-5.7">5.7</a> Phrase Markup ......................................... <a href="#page-30">30</a>
- <a href="#section-5.8">5.8</a> Line Break: BR ........................................ <a href="#page-34">34</a>
- <a href="#section-5.9">5.9</a> Horizontal Rule: HR ................................... <a href="#page-34">34</a>
- <a href="#section-5.10">5.10</a> Image: IMG ............................................ <a href="#page-34">34</a>
- <a href="#section-6">6</a>. Characters, Words, and Paragraphs ..................... <a href="#page-35">35</a>
- <a href="#section-6.1">6.1</a> The HTML Document Character Set ....................... <a href="#page-36">36</a>
- <a href="#section-7">7</a>. Hyperlinks ............................................ <a href="#page-36">36</a>
- <a href="#section-7.1">7.1</a> Accessing Resources ................................... <a href="#page-37">37</a>
- <a href="#section-7.2">7.2</a> Activation of Hyperlinks .............................. <a href="#page-38">38</a>
- <a href="#section-7.3">7.3</a> Simultaneous Presentation of Image Resources .......... <a href="#page-38">38</a>
- <a href="#section-7.4">7.4</a> Fragment Identifiers .................................. <a href="#page-38">38</a>
- <a href="#section-7.5">7.5</a> Queries and Indexes ................................... <a href="#page-39">39</a>
- <a href="#section-7.6">7.6</a> Image Maps ............................................ <a href="#page-39">39</a>
- <a href="#section-8">8</a>. Forms ................................................. <a href="#page-40">40</a>
- <a href="#section-8.1">8.1</a> Form Elements ......................................... <a href="#page-40">40</a>
- <a href="#section-8.2">8.2</a> Form Submission ....................................... <a href="#page-45">45</a>
- <a href="#section-9">9</a>. HTML Public Text ...................................... <a href="#page-49">49</a>
- <a href="#section-9.1">9.1</a> HTML DTD .............................................. <a href="#page-49">49</a>
- <a href="#section-9.2">9.2</a> Strict HTML DTD ....................................... <a href="#page-61">61</a>
- <a href="#section-9.3">9.3</a> Level 1 HTML DTD ...................................... <a href="#page-62">62</a>
- <a href="#section-9.4">9.4</a> Strict Level 1 HTML DTD ............................... <a href="#page-63">63</a>
- <a href="#section-9.5">9.5</a> SGML Declaration for HTML ............................. <a href="#page-64">64</a>
- <a href="#section-9.6">9.6</a> Sample SGML Open Entity Catalog for HTML .............. <a href="#page-65">65</a>
- <a href="#section-9.7">9.7</a> Character Entity Sets ................................. <a href="#page-66">66</a>
- <a href="#section-10">10</a>. Security Considerations ............................... <a href="#page-69">69</a>
- <a href="#section-11">11</a>. References ............................................ <a href="#page-69">69</a>
- <a href="#section-12">12</a>. Acknowledgments ....................................... <a href="#page-71">71</a>
- <a href="#section-12.1">12.1</a> Authors' Addresses .................................... <a href="#page-71">71</a>
- <a href="#section-13">13</a>. The HTML Coded Character Set .......................... <a href="#page-72">72</a>
- <a href="#section-14">14</a>. Proposed Entities ..................................... <a href="#page-75">75</a>
-
-<span class="h2"><a name="section-1">1</a>. Introduction</span>
-
- The HyperText Markup Language (HTML) is a simple data format used to
- create hypertext documents that are portable from one platform to
- another. HTML documents are SGML documents with generic semantics
- that are appropriate for representing information from a wide range
- of domains.
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 2]</span>
-<a name="page-3" id="page-3" href="#page-3" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- As HTML is an application of SGML, this specification assumes a
- working knowledge of [<a href="#ref-SGML">SGML</a>].
-
-<span class="h3"><a name="section-1.1">1.1</a>. Scope</span>
-
- HTML has been in use by the World-Wide Web (WWW) global information
- initiative since 1990. Previously, informal documentation on HTML has
- been available from a number of sources on the Internet. This
- specification brings together, clarifies, and formalizes a set of
- features that roughly corresponds to the capabilities of HTML in
- common use prior to June 1994. A number of new features to HTML are
- being proposed and experimented in the Internet community.
-
- This document thus defines a HTML 2.0 (to distinguish it from the
- previous informal specifications). Future (generally upwardly
- compatible) versions of HTML with new features will be released with
- higher version numbers.
-
- HTML is an application of ISO Standard 8879:1986, "Information
- Processing Text and Office Systems; Standard Generalized Markup
- Language" (SGML). The HTML Document Type Definition (DTD) is a formal
- definition of the HTML syntax in terms of SGML.
-
- This specification also defines HTML as an Internet Media
- Type[IMEDIA] and MIME Content Type[MIME] called `text/html'. As such,
- it defines the semantics of the HTML syntax and how that syntax
- should be interpreted by user agents.
-
-<span class="h3"><a name="section-1.2">1.2</a>. Conformance</span>
-
- This specification governs the syntax of HTML documents and aspects
- of the behavior of HTML user agents.
-
-<span class="h4"><a name="section-1.2.1">1.2.1</a>. Documents</span>
-
- A document is a conforming HTML document if:
-
- * It is a conforming SGML document, and it conforms to the
- HTML DTD (see 9.1, "HTML DTD").
-
- NOTE - There are a number of syntactic idioms that
- are not supported or are supported inconsistently in
- some historical user agent implementations. These
- idioms are identified in notes like this throughout
- this specification.
-
- * It conforms to the application conventions in this
- specification. For example, the value of the HREF attribute
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 3]</span>
-<a name="page-4" id="page-4" href="#page-4" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- of the <A> element must conform to the URI syntax.
-
- * Its document character set includes [<a href="#ref-ISO-8859-1">ISO-8859-1</a>] and
- agrees with [<a href="#ref-ISO-10646">ISO-10646</a>]; that is, each code position listed
- in 13, "The HTML Coded Character Set" is included, and each
- code position in the document character set is mapped to the
- same character as [<a href="#ref-ISO-10646">ISO-10646</a>] designates for that code
- position.
-
- NOTE - The document character set is somewhat
- independent of the character encoding scheme used to
- represent a document. For example, the `ISO-2022-JP'
- character encoding scheme can be used for HTML
- documents, since its repertoire is a subset of the
- [<a href="#ref-ISO-10646">ISO-10646</a>] repertoire. The critical distinction is
- that numeric character references agree with
- [<a href="#ref-ISO-10646">ISO-10646</a>] regardless of how the document is
- encoded.
-
-<span class="h4"><a name="section-1.2.2">1.2.2</a>. Feature Test Entities</span>
-
- The HTML DTD defines a standard HTML document type and several
- variations, by way of feature test entities. Feature test entities
- are declarations in the HTML DTD that control the inclusion or
- exclusion of portions of the DTD.
-
- HTML.Recommended
- Certain features of the language are necessary for
- compatibility with widespread usage, but they may
- compromise the structural integrity of a document. This
- feature test entity selects a more prescriptive document
- type definition that eliminates those features. It is
- set to `IGNORE' by default.
-
- For example, in order to preserve the structure of a
- document, an editing user agent may translate HTML
- documents to the recommended subset, or it may require
- that the documents be in the recommended subset for
- import.
-
- HTML.Deprecated
- Certain features of the language are necessary for
- compatibility with earlier versions of the
- specification, but they tend to be used and implemented
- inconsistently, and their use is deprecated. This
- feature test entity enables a document type definition
- that allows these features. It is set to `INCLUDE' by
- default.
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 4]</span>
-<a name="page-5" id="page-5" href="#page-5" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- Documents generated by translation software or editing
- software should not contain deprecated idioms.
-
-<span class="h4"><a name="section-1.2.3">1.2.3</a>. User Agents</span>
-
- An HTML user agent conforms to this specification if:
-
- * It parses the characters of an HTML document into data
- characters and markup according to [<a href="#ref-SGML">SGML</a>].
-
- NOTE - In the interest of robustness and
- extensibility, there are a number of widely deployed
- conventions for handling non-conforming documents.
- See 4.2.1, "Undeclared Markup Error Handling" for
- details.
-
- * It supports the `ISO-8859-1' character encoding scheme and
- processes each character in the ISO Latin Alphabet No. 1 as
- specified in 6.1, "The HTML Document Character Set".
-
- NOTE - To support non-western writing systems, HTML
- user agents are encouraged to support
- `ISO-10646-UCS-2' or similar character encoding
- schemes and as much of the character repertoire of
- [<a href="#ref-ISO-10646">ISO-10646</a>] as is practical.
-
- * It behaves identically for documents whose parsed token
- sequences are identical.
-
- For example, comments and the whitespace in tags disappear
- during tokenization, and hence they do not influence the
- behavior of conforming user agents.
-
- * It allows the user to traverse (or at least attempt to
- traverse, resources permitting) all hyperlinks from <A>
- elements in an HTML document.
-
- An HTML user agent is a level 2 user agent if, additionally:
-
- * It allows the user to express all form field values
- specified in an HTML document and to (attempt to) submit the
- values as requests to information services.
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 5]</span>
-<a name="page-6" id="page-6" href="#page-6" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h2"><a name="section-2">2</a>. Terms</span>
-
- absolute URI
- a URI in absolute form; for example, as per [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]
-
- anchor
- one of two ends of a hyperlink; typically, a phrase
- marked as an <A> element.
-
- base URI
- an absolute URI used in combination with a relative URI
- to determine another absolute URI.
-
- character
- An atom of information, for example a letter or a digit.
- Graphic characters have associated glyphs, whereas
- control characters have associated processing semantics.
-
- character encoding
- scheme
- A function whose domain is the set of sequences of
- octets, and whose range is the set of sequences of
- characters from a character repertoire; that is, a
- sequence of octets and a character encoding scheme
- determines a sequence of characters.
-
- character repertoire
- A finite set of characters; e.g. the range of a coded
- character set.
-
- code position
- An integer. A coded character set and a code position
- from its domain determine a character.
-
- coded character set
- A function whose domain is a subset of the integers and
- whose range is a character repertoire. That is, for some
- set of integers (usually of the form {0, 1, 2, ..., N}
- ), a coded character set and an integer in that set
- determine a character. Conversely, a character and a
- coded character set determine the character's code
- position (or, in rare cases, a few code positions).
-
- conforming HTML user
- agent
- A user agent that conforms to this specification in its
- processing of the Internet Media Type `text/html'.
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 6]</span>
-<a name="page-7" id="page-7" href="#page-7" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- data character
- Characters other than markup, which make up the content
- of elements.
-
- document character set
- a coded character set whose range includes all
- characters used in a document. Every SGML document has
- exactly one document character set. Numeric character
- references are resolved via the document character set.
-
- DTD
- document type definition. Rules that apply SGML to the
- markup of documents of a particular type, including a
- set of element and entity declarations. [<a href="#ref-SGML">SGML</a>]
-
- element
- A component of the hierarchical structure defined by a
- document type definition; it is identified in a document
- instance by descriptive markup, usually a start-tag and
- end-tag. [<a href="#ref-SGML">SGML</a>]
-
- end-tag
- Descriptive markup that identifies the end of an
- element. [<a href="#ref-SGML">SGML</a>]
-
- entity
- data with an associated notation or interpretation; for
- example, a sequence of octets associated with an
- Internet Media Type. [<a href="#ref-SGML">SGML</a>]
-
- fragment identifier
- the portion of an HREF attribute value following the `#'
- character which modifies the presentation of the
- destination of a hyperlink.
-
- form data set
- a sequence of name/value pairs; the names are given by
- an HTML document and the values are given by a user.
-
- HTML document
- An SGML document conforming to this document type
- definition.
-
- hyperlink
- a relationship between two anchors, called the head and
- the tail. The link goes from the tail to the head. The
- head and tail are also known as destination and source,
- respectively.
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 7]</span>
-<a name="page-8" id="page-8" href="#page-8" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- markup
- Syntactically delimited characters added to the data of
- a document to represent its structure. There are four
- different kinds of markup: descriptive markup (tags),
- references, markup declarations, and processing
- instructions. [<a href="#ref-SGML">SGML</a>]
-
- may
- A document or user interface is conforming whether this
- statement applies or not.
-
- media type
- an Internet Media Type, as per [<a href="#ref-IMEDIA" title='"Media Type Registration Procedure"'>IMEDIA</a>].
-
- message entity
- a head and body. The head is a collection of name/value
- fields, and the body is a sequence of octets. The head
- defines the content type and content transfer encoding
- of the body. [<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]
-
- minimally conforming
- HTML user agent
- A user agent that conforms to this specification except
- for form processing. It may only process level 1 HTML
- documents.
-
- must
- Documents or user agents in conflict with this statement
- are not conforming.
-
- numeric character
- reference
- markup that refers to a character by its code position
- in the document character set.
-
- SGML document
- A sequence of characters organized physically as a set
- of entities and logically into a hierarchy of elements.
- An SGML document consists of data characters and markup;
- the markup describes the structure of the information
- and an instance of that structure. [<a href="#ref-SGML">SGML</a>]
-
- shall
- If a document or user agent conflicts with this
- statement, it does not conform to this specification.
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 8]</span>
-<a name="page-9" id="page-9" href="#page-9" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- should
- If a document or user agent conflicts with this
- statement, undesirable results may occur in practice
- even though it conforms to this specification.
-
- start-tag
- Descriptive markup that identifies the start of an
- element and specifies its generic identifier and
- attributes. [<a href="#ref-SGML">SGML</a>]
-
- syntax-reference
- character set
- A coded character set whose range includes all
- characters used for markup; e.g. name characters and
- delimiter characters.
-
- tag
- Markup that delimits an element. A tag includes a name
- which refers to an element declaration in the DTD, and
- may include attributes. [<a href="#ref-SGML">SGML</a>]
-
- text entity
- A finite sequence of characters. A text entity typically
- takes the form of a sequence of octets with some
- associated character encoding scheme, transmitted over
- the network or stored in a file. [<a href="#ref-SGML">SGML</a>]
-
- typical
- Typical processing is described for many elements. This
- is not a mandatory part of the specification but is
- given as guidance for designers and to help explain the
- uses for which the elements were intended.
-
- URI
- A Uniform Resource Identifier is a formatted string that
- serves as an identifier for a resource, typically on the
- Internet. URIs are used in HTML to identify the anchors
- of hyperlinks. URIs in common practice include Uniform
- Resource Locators (URLs)[<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] and Relative URLs
- [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>].
-
- user agent
- A component of a distributed system that presents an
- interface and processes requests on behalf of a user;
- for example, a www browser or a mail user agent.
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 9]</span>
-<a name="page-10" id="page-10" href="#page-10" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- WWW
- The World-Wide Web is a hypertext-based, distributed
- information system created by researchers at CERN in
- Switzerland. <URL:http://www.w3.org/>
-
-<span class="h2"><a name="section-3">3</a>. HTML as an Application of SGML</span>
-
- HTML is an application of ISO 8879:1986 -- Standard Generalized
- Markup Language (SGML). SGML is a system for defining structured
- document types and markup languages to represent instances of those
- document types[SGML]. The public text -- DTD and SGML declaration --
- of the HTML document type definition are provided in 9, "HTML Public
- Text".
-
- The term "HTML" refers to both the document type defined here and the
- markup language for representing instances of this document type.
-
-<span class="h3"><a name="section-3.1">3.1</a>. SGML Documents</span>
-
- An HTML document is an SGML document; that is, a sequence of
- characters organized physically into a set of entities, and logically
- as a hierarchy of elements.
-
- In the SGML specification, the first production of the SGML syntax
- grammar separates an SGML document into three parts: an SGML
- declaration, a prologue, and an instance. For the purposes of this
- specification, the prologue is a DTD. This DTD describes another
- grammar: the start symbol is given in the doctype declaration, the
- terminals are data characters and tags, and the productions are
- determined by the element declarations. The instance must conform to
- the DTD, that is, it must be in the language defined by this grammar.
-
- The SGML declaration determines the lexicon of the grammar. It
- specifies the document character set, which determines a character
- repertoire that contains all characters that occur in all text
- entities in the document, and the code positions associated with
- those characters.
-
- The SGML declaration also specifies the syntax-reference character
- set of the document, and a few other parameters that bind the
- abstract syntax of SGML to a concrete syntax. This concrete syntax
- determines how the sequence of characters of the document is mapped
- to a sequence of terminals in the grammar of the prologue.
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 10]</span>
-<a name="page-11" id="page-11" href="#page-11" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- For example, consider the following document:
-
- <!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <title>Parsing Example</title>
- <p>Some text. <em>*wow*</em></p>
-
- An HTML user agent should use the SGML declaration that is given in
- 9.5, "SGML Declaration for HTML". According to its document character
- set, `*' refers to an asterisk character, `*'.
-
- The instance above is regarded as the following sequence of
- terminals:
-
- 1. start-tag: TITLE
-
- 2. data characters: "Parsing Example"
-
- 3. end-tag: TITLE
-
- 4. start-tag: P
-
- 5. data characters "Some text."
-
- 6. start-tag: EM
-
- 7. data characters: "*wow*"
-
- 8. end-tag: EM
-
- 9. end-tag: P
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 11]</span>
-<a name="page-12" id="page-12" href="#page-12" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- The start symbol of the DTD grammar is HTML, and the productions are
- given in the public text identified by `-//IETF//DTD HTML 2.0//EN'
- (9.1, "HTML DTD"). The terminals above parse as:
-
- HTML
- |
- \-HEAD
- | |
- | \-TITLE
- | |
- | \-<TITLE>
- | |
- | \-"Parsing Example"
- | |
- | \-</TITLE>
- |
- \-BODY
- |
- \-P
- |
- \-<P>
- |
- \-"Some text. "
- |
- \-EM
- | |
- | \-<EM>
- | |
- | \-"*wow*"
- | |
- | \-</EM>
- |
- \-</P>
-
- Some of the elements are delimited explicitly by tags, while the
- boundaries of others are inferred. The <HTML> element contains a
- <HEAD> element and a <BODY> element. The <HEAD> contains <TITLE>,
- which is explicitly delimited by start- and end-tags.
-
-<span class="h3"><a name="section-3.2">3.2</a>. HTML Lexical Syntax</span>
-
- SGML specifies an abstract syntax and a reference concrete syntax.
- Aside from certain quantities and capacities (e.g. the limit on the
- length of a name), all HTML documents use the reference concrete
- syntax. In particular, all markup characters are in the repertoire of
- [<a href="#ref-ISO-646" title='"./rfc1866"'>ISO-646</a>]. Data characters are drawn from the document character set
- (see 6, "Characters, Words, and Paragraphs").
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 12]</span>
-<a name="page-13" id="page-13" href="#page-13" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- A complete discussion of SGML parsing, e.g. the mapping of a sequence
- of characters to a sequence of tags and data, is left to the SGML
- standard[SGML]. This section is only a summary.
-
-<span class="h4"><a name="section-3.2.1">3.2.1</a>. Data Characters</span>
-
- Any sequence of characters that do not constitute markup (see 9.6
- "Delimiter Recognition" of [<a href="#ref-SGML">SGML</a>]) are mapped directly to strings of
- data characters. Some markup also maps to data character strings.
- Numeric character references map to single-character strings, via the
- document character set. Each reference to one of the general entities
- defined in the HTML DTD maps to a single-character string.
-
- For example,
-
- abc<def => "abc","<","def"
- abc<def => "abc","<","def"
-
- The terminating semicolon on entity or numeric character references
- is only necessary when the character following the reference would
- otherwise be recognized as part of the name (see 9.4.5 "Reference
- End" in [<a href="#ref-SGML">SGML</a>]).
-
- abc < def => "abc ","<"," def"
- abc < def => "abc ","<"," def"
-
- An ampersand is only recognized as markup when it is followed by a
- letter or a `#' and a digit:
-
- abc & lt def => "abc & lt def"
- abc &# 60 def => "abc &# 60 def"
-
- A useful technique for translating plain text to HTML is to replace
- each '<', '&', and '>' by an entity reference or numeric character
- reference as follows:
-
- ENTITY NUMERIC
- CHARACTER REFERENCE CHAR REF CHARACTER DESCRIPTION
- --------- ---------- ----------- ---------------------
- & & & Ampersand
- < < < Less than
- > > > Greater than
-
- NOTE - There are SGML mechanisms, CDATA and RCDATA
- declared content, that allow most `<', `>', and `&'
- characters to be entered without the use of entity
- references. Because these mechanisms tend to be used and
- implemented inconsistently, and because they conflict
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 13]</span>
-<a name="page-14" id="page-14" href="#page-14" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- with techniques for reducing HTML to 7 bit ASCII for
- transport, they are deprecated in this version of HTML.
- See 5.5.2.1, "Example and Listing: XMP, LISTING".
-
-<span class="h4"><a name="section-3.2.2">3.2.2</a>. Tags</span>
-
- Tags delimit elements such as headings, paragraphs, lists, character
- highlighting, and links. Most HTML elements are identified in a
- document as a start-tag, which gives the element name and attributes,
- followed by the content, followed by the end tag. Start-tags are
- delimited by `<' and `>'; end tags are delimited by `</' and `>'. An
- example is:
-
- <H1>This is a Heading</H1>
-
- Some elements only have a start-tag without an end-tag. For example,
- to create a line break, use the `<BR>' tag. Additionally, the end
- tags of some other elements, such as Paragraph (`</P>'), List Item
- (`</LI>'), Definition Term (`</DT>'), and Definition Description
- (`</DD>') elements, may be omitted.
-
- The content of an element is a sequence of data character strings and
- nested elements. Some elements, such as anchors, cannot be nested.
- Anchors and character highlighting may be put inside other
- constructs. See the HTML DTD, 9.1, "HTML DTD" for full details.
-
- NOTE - The SGML declaration for HTML specifies SHORTTAG YES, which
- means that there are other valid syntaxes for tags, such as NET
- tags, `<EM/.../'; empty start tags, `<>'; and empty end-tags,
- `</>'. Until support for these idioms is widely deployed, their
- use is strongly discouraged.
-
-<span class="h4"><a name="section-3.2.3">3.2.3</a>. Names</span>
-
- A name consists of a letter followed by letters, digits, periods, or
- hyphens. The length of a name is limited to 72 characters by the
- `NAMELEN' parameter in the SGML declaration for HTML, 9.5, "SGML
- Declaration for HTML". Element and attribute names are not case
- sensitive, but entity names are. For example, `<BLOCKQUOTE>',
- `<BlockQuote>', and `<blockquote>' are equivalent, whereas `&' is
- different from `&'.
-
- In a start-tag, the element name must immediately follow the tag open
- delimiter `<'.
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 14]</span>
-<a name="page-15" id="page-15" href="#page-15" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h4"><a name="section-3.2.4">3.2.4</a>. Attributes</span>
-
- In a start-tag, white space and attributes are allowed between the
- element name and the closing delimiter. An attribute specification
- typically consists of an attribute name, an equal sign, and a value,
- though some attribute specifications may be just a name token. White
- space is allowed around the equal sign.
-
- The value of the attribute may be either:
-
- * A string literal, delimited by single quotes or double
- quotes and not containing any occurrences of the delimiting
- character.
-
- NOTE - Some historical implementations consider any
- occurrence of the `>' character to signal the end of
- a tag. For compatibility with such implementations,
- when `>' appears in an attribute value, it should be
- represented with a numeric character reference. For
- example, `<IMG SRC="eq1.jpg" alt="a>b">' should be
- written `<IMG SRC="eq1.jpg" alt="a>b">' or `<IMG
- SRC="eq1.jpg" alt="a>b">'.
-
- * A name token (a sequence of letters, digits, periods, or
- hyphens). Name tokens are not case sensitive.
-
- NOTE - Some historical implementations allow any
- character except space or `>' in a name token.
-
- In this example, <img> is the element name, src is the attribute
- name, and `http://host/dir/file.gif' is the attribute value:
-
- <img src='http://host/dir/file.gif'>
-
- A useful technique for computing an attribute value literal for a
- given string is to replace each quote and white space character by an
- entity reference or numeric character reference as follows:
-
- ENTITY NUMERIC
- CHARACTER REFERENCE CHAR REF CHARACTER DESCRIPTION
- --------- ---------- ----------- ---------------------
- HT 	 Tab
- LF Line Feed
- CR Carriage Return
- SP   Space
- " " " Quotation mark
- & & & Ampersand
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 15]</span>
-<a name="page-16" id="page-16" href="#page-16" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- For example:
-
- <IMG SRC="image.jpg" alt="First "real" example">
-
- The `NAMELEN' parameter in the SGML declaration (9.5, "SGML
- Declaration for HTML") limits the length of an attribute value to
- 1024 characters.
-
- Attributes such as ISMAP and COMPACT may be written using a minimized
- syntax (see 7.9.1.2 "Omitted Attribute Name" in [<a href="#ref-SGML">SGML</a>]). The markup:
-
- <UL COMPACT="compact">
-
- can be written using a minimized syntax:
-
- <UL COMPACT>
-
- NOTE - Some historical implementations only understand the minimized
- syntax.
-
-<span class="h4"><a name="section-3.2.5">3.2.5</a>. Comments</span>
-
- To include comments in an HTML document, use a comment declaration. A
- comment declaration consists of `<!' followed by zero or more
- comments followed by `>'. Each comment starts with `--' and includes
- all text up to and including the next occurrence of `--'. In a
- comment declaration, white space is allowed after each comment, but
- not before the first comment. The entire comment declaration is
- ignored.
-
- NOTE - Some historical HTML implementations incorrectly consider
- any `>' character to be the termination of a comment.
-
- For example:
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <HEAD>
- <TITLE>HTML Comment Example</TITLE>
- <!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->
- <!-- another -- -- comment -->
- <!>
- </HEAD>
- <BODY>
- <p> <!- not a comment, just regular old data characters ->
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 16]</span>
-<a name="page-17" id="page-17" href="#page-17" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h3"><a name="section-3.3">3.3</a>. HTML Public Text Identifiers</span>
-
- To identify information as an HTML document conforming to this
- specification, each document must start with one of the following
- document type declarations.
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
-
- This document type declaration refers to the HTML DTD in 9.1, "HTML
- DTD".
-
- NOTE - If the body of a `text/html' message entity does not begin
- with a document type declaration, an HTML user agent should infer
- the above document type declaration.
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN">
-
- This document type declaration also refers to the HTML DTD which
- appears in 9.1, "HTML DTD".
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 1//EN">
-
- This document type declaration refers to the level 1 HTML DTD in 9.3,
- "Level 1 HTML DTD". Form elements must not occur in level 1
- documents.
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Strict//EN">
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Strict Level 1//EN">
-
- These two document type declarations refer to the HTML DTD in 9.2,
- "Strict HTML DTD" and 9.4, "Strict Level 1 HTML DTD". They refer to
- the more structurally rigid definition of HTML.
-
- HTML user agents may support other document types. In particular,
- they may support other formal public identifiers, or other document
- types altogether. They may support an internal declaration subset
- with supplemental entity, element, and other markup declarations.
-
-<span class="h3"><a name="section-3.4">3.4</a>. Example HTML Document</span>
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <HTML>
- <!-- Here's a good place to put a comment. -->
- <HEAD>
- <TITLE>Structural Example</TITLE>
- </HEAD><BODY>
- <H1>First Header</H1>
- <P>This is a paragraph in the example HTML file. Keep in mind
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 17]</span>
-<a name="page-18" id="page-18" href="#page-18" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- that the title does not appear in the document text, but that
- the header (defined by H1) does.</P>
- <OL>
- <LI>First item in an ordered list.
- <LI>Second item in an ordered list.
- <UL COMPACT>
- <LI> Note that lists can be nested;
- <LI> Whitespace may be used to assist in reading the
- HTML source.
- </UL>
- <LI>Third item in an ordered list.
- </OL>
- <P>This is an additional paragraph. Technically, end tags are
- not required for paragraphs, although they are allowed. You can
- include character highlighting in a paragraph. <EM>This sentence
- of the paragraph is emphasized.</EM> Note that the </P>
- end tag has been omitted.
- <P>
- <IMG SRC ="triangle.xbm" alt="Warning: ">
- Be sure to read these <b>bold instructions</b>.
- </BODY></HTML>
-
-<span class="h2"><a name="section-4">4</a>. HTML as an Internet Media Type</span>
-
- An HTML user agent allows users to interact with resources which have
- HTML representations. At a minimum, it must allow users to examine
- and navigate the content of HTML level 1 documents. HTML user agents
- should be able to preserve all formatting distinctions represented in
- an HTML document, and be able to simultaneously present resources
- referred to by IMG elements (they may ignore some formatting
- distinctions or IMG resources at the request of the user). Level 2
- HTML user agents should support form entry and submission.
-
-<span class="h3"><a name="section-4.1">4.1</a>. text/html media type</span>
-
- This specification defines the Internet Media Type [<a href="#ref-IMEDIA" title='"Media Type Registration Procedure"'>IMEDIA</a>] (formerly
- referred to as the Content Type [<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]) called `text/html'. The
- following is to be registered with [<a href="#ref-IANA" title='"Assigned Numbers"'>IANA</a>].
-
- Media Type name
- text
-
- Media subtype name
- html
-
- Required parameters
- none
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 18]</span>
-<a name="page-19" id="page-19" href="#page-19" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- Optional parameters
- level, charset
-
- Encoding considerations
- any encoding is allowed
-
- Security considerations
- see 10, "Security Considerations"
-
- The optional parameters are defined as follows:
-
- Level
- The level parameter specifies the feature set used in
- the document. The level is an integer number, implying
- that any features of same or lower level may be present
- in the document. Level 1 is all features defined in this
- specification except those that require the <FORM>
- element. Level 2 includes form processing. Level 2 is
- the default.
-
- Charset
- The charset parameter (as defined in <a href="./rfc1521#section-7.1.1">section 7.1.1 of
- RFC 1521</a>[<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]) may be given to specify the character
- encoding scheme used to represent the HTML document as a
- sequence of octets. The default value is outside the
- scope of this specification; but for example, the
- default is `US-ASCII' in the context of MIME mail, and
- `ISO-8859-1' in the context of HTTP [<a href="#ref-HTTP" title='"Hypertext Transfer Protocol - HTTP/1.0"'>HTTP</a>].
-
-<span class="h3"><a name="section-4.2">4.2</a>. HTML Document Representation</span>
-
- A message entity with a content type of `text/html' represents an
- HTML document, consisting of a single text entity. The `charset'
- parameter (whether implicit or explicit) identifies a character
- encoding scheme. The text entity consists of the characters
- determined by this character encoding scheme and the octets of the
- body of the message entity.
-
-<span class="h4"><a name="section-4.2.1">4.2.1</a>. Undeclared Markup Error Handling</span>
-
- To facilitate experimentation and interoperability between
- implementations of various versions of HTML, the installed base of
- HTML user agents supports a superset of the HTML 2.0 language by
- reducing it to HTML 2.0: markup in the form of a start-tag or end-
- tag, whose generic identifier is not declared is mapped to nothing
- during tokenization. Undeclared attributes are treated similarly. The
- entire attribute specification of an unknown attribute (i.e., the
- unknown attribute and its value, if any) should be ignored. On the
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 19]</span>
-<a name="page-20" id="page-20" href="#page-20" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- other hand, references to undeclared entities should be treated as
- data characters.
-
- For example:
-
- <div class=chapter><h1>foo</h1><p>...</div>
- => <H1>,"foo",</H1>,<P>,"..."
- xxx <P ID=z23> yyy
- => "xxx ",<P>," yyy
- Let α & β be finite sets.
- => "Let α & β be finite sets."
-
- Support for notifying the user of such errors is encouraged.
-
- Information providers are warned that this convention is not binding:
- unspecified behavior may result, as such markup does not conform to
- this specification.
-
-<span class="h4"><a name="section-4.2.2">4.2.2</a>. Conventional Representation of Newlines</span>
-
- SGML specifies that a text entity is a sequence of records, each
- beginning with a record start character and ending with a record end
- character (code positions 10 and 13 respectively) (<a href="#section-7.6.1">section 7.6.1</a>,
- "Record Boundaries" in [<a href="#ref-SGML">SGML</a>]).
-
- [<a name="ref-MIME" id="ref-MIME">MIME</a>] specifies that a body of type `text/*' is a sequence of lines,
- each terminated by CRLF, that is, octets 13, 10.
-
- In practice, HTML documents are frequently represented and
- transmitted using an end of line convention that depends on the
- conventions of the source of the document; frequently, that
- representation consists of CR only, LF only, or a CR LF sequence.
- Hence the decoding of the octets will often result in a text entity
- with some missing record start and record end characters.
-
- Since there is no ambiguity, HTML user agents are encouraged to infer
- the missing record start and end characters.
-
- An HTML user agent should treat end of line in any of its variations
- as a word space in all contexts except preformatted text. Within
- preformatted text, an HTML user agent should treat any of the three
- common representations of end-of-line as starting a new line.
-
-<span class="h2"><a name="section-5">5</a>. Document Structure</span>
-
- An HTML document is a tree of elements, including a head and body,
- headings, paragraphs, lists, etc. Form elements are discussed in 8,
- "Forms".
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 20]</span>
-<a name="page-21" id="page-21" href="#page-21" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h3"><a name="section-5.1">5.1</a>. Document Element: HTML</span>
-
- The HTML document element consists of a head and a body, much like a
- memo or a mail message. The head contains the title and optional
- elements. The body is a text flow consisting of paragraphs, lists,
- and other elements.
-
-<span class="h3"><a name="section-5.2">5.2</a>. Head: HEAD</span>
-
- The head of an HTML document is an unordered collection of
- information about the document. For example:
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <HEAD>
- <TITLE>Introduction to HTML</TITLE>
- </HEAD>
- ...
-
-<span class="h4"><a name="section-5.2.1">5.2.1</a>. Title: TITLE</span>
-
- Every HTML document must contain a <TITLE> element.
-
- The title should identify the contents of the document in a global
- context. A short title, such as "Introduction" may be meaningless out
- of context. A title such as "Introduction to HTML Elements" is more
- appropriate.
-
- NOTE - The length of a title is not limited; however, long titles
- may be truncated in some applications. To minimize this
- possibility, titles should be fewer than 64 characters.
-
- A user agent may display the title of a document in a history list or
- as a label for the window displaying the document. This differs from
- headings (5.4, "Headings: H1 ... H6"), which are typically displayed
- within the body text flow.
-
-<span class="h4"><a name="section-5.2.2">5.2.2</a>. Base Address: BASE</span>
-
- The optional <BASE> element provides a base address for interpreting
- relative URLs when the document is read out of context (see 7,
- "Hyperlinks"). The value of the HREF attribute must be an absolute
- URI.
-
-<span class="h4"><a name="section-5.2.3">5.2.3</a>. Keyword Index: ISINDEX</span>
-
- The <ISINDEX> element indicates that the user agent should allow the
- user to search an index by giving keywords. See 7.5, "Queries and
- Indexes" for details.
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 21]</span>
-<a name="page-22" id="page-22" href="#page-22" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h4"><a name="section-5.2.4">5.2.4</a>. Link: LINK</span>
-
- The <LINK> element represents a hyperlink (see 7, "Hyperlinks"). Any
- number of LINK elements may occur in the <HEAD> element of an HTML
- document. It has the same attributes as the <A> element (see 5.7.3,
- "Anchor: A").
-
- The <LINK> element is typically used to indicate authorship, related
- indexes and glossaries, older or more recent versions, document
- hierarchy, associated resources such as style sheets, etc.
-
-<span class="h4"><a name="section-5.2.5">5.2.5</a>. Associated Meta-information: META</span>
-
- The <META> element is an extensible container for use in identifying
- specialized document meta-information. Meta-information has two main
- functions:
-
- * to provide a means to discover that the data set exists
- and how it might be obtained or accessed; and
-
- * to document the content, quality, and features of a data
- set, indicating its fitness for use.
-
- Each <META> element specifies a name/value pair. If multiple META
- elements are provided with the same name, their combined contents--
- concatenated as a comma-separated list--is the value associated with
- that name.
-
- NOTE - The <META> element should not be used where a
- specific element, such as <TITLE>, would be more
- appropriate. Rather than a <META> element with a URI as
- the value of the CONTENT attribute, use a <LINK>
- element.
-
- HTTP servers may read the content of the document <HEAD> to generate
- header fields corresponding to any elements defining a value for the
- attribute HTTP-EQUIV.
-
- NOTE - The method by which the server extracts document
- meta-information is unspecified and not mandatory. The
- <META> element only provides an extensible mechanism for
- identifying and embedding document meta-information --
- how it may be used is up to the individual server
- implementation and the HTML user agent.
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 22]</span>
-<a name="page-23" id="page-23" href="#page-23" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- Attributes of the META element:
-
- HTTP-EQUIV
- binds the element to an HTTP header field. An HTTP
- server may use this information to process the document.
- In particular, it may include a header field in the
- responses to requests for this document: the header name
- is taken from the HTTP-EQUIV attribute value, and the
- header value is taken from the value of the CONTENT
- attribute. HTTP header names are not case sensitive.
-
- NAME
- specifies the name of the name/value pair. If not
- present, HTTP-EQUIV gives the name.
-
- CONTENT
- specifies the value of the name/value pair.
-
- Examples
-
- If the document contains:
-
- <META HTTP-EQUIV="Expires"
- CONTENT="Tue, 04 Dec 1993 21:29:02 GMT">
- <meta http-equiv="Keywords" CONTENT="Fred">
- <META HTTP-EQUIV="Reply-to"
- content="fielding at ics.uci.edu (Roy Fielding)">
- <Meta Http-equiv="Keywords" CONTENT="Barney">
-
- then the server may include the following header fields:
-
- Expires: Tue, 04 Dec 1993 21:29:02 GMT
- Keywords: Fred, Barney
- Reply-to: fielding at ics.uci.edu (Roy Fielding)
-
- as part of the HTTP response to a `GET' or `HEAD' request for
- that document.
-
- An HTTP server must not use the <META> element to form an HTTP
- response header unless the HTTP-EQUIV attribute is present.
-
- An HTTP server may disregard any <META> elements that specify
- information controlled by the HTTP server, for example `Server',
-
- `Date', and `Last-modified'.
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 23]</span>
-<a name="page-24" id="page-24" href="#page-24" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h4"><a name="section-5.2.6">5.2.6</a>. Next Id: NEXTID</span>
-
- The <NEXTID> element is included for historical reasons only. HTML
- documents should not contain <NEXTID> elements.
-
- The <NEXTID> element gives a hint for the name to use for a new <A>
- element when editing an HTML document. It should be distinct from all
- NAME attribute values on <A> elements. For example:
-
- <NEXTID N=Z27>
-
-<span class="h3"><a name="section-5.3">5.3</a>. Body: BODY</span>
-
- The <BODY> element contains the text flow of the document, including
- headings, paragraphs, lists, etc.
-
- For example:
-
- <BODY>
- <h1>Important Stuff</h1>
- <p>Explanation about important stuff...
- </BODY>
-
-<a href="#section-5.4">5.4</a>. Headings: H1 ... H6
-
- The six heading elements, <H1> through <H6>, denote section headings.
- Although the order and occurrence of headings is not constrained by
- the HTML DTD, documents should not skip levels (for example, from H1
- to H3), as converting such documents to other representations is
- often problematic.
-
- Example of use:
-
- <H1>This is a heading</H1>
- Here is some text
- <H2>Second level heading</H2>
- Here is some more text.
-
- Typical renderings are:
-
- H1
- Bold, very-large font, centered. One or two blank lines
- above and below.
-
- H2
- Bold, large font, flush-left. One or two blank lines
- above and below.
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 24]</span>
-<a name="page-25" id="page-25" href="#page-25" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- H3
- Italic, large font, slightly indented from the left
- margin. One or two blank lines above and below.
-
- H4
- Bold, normal font, indented more than H3. One blank line
- above and below.
-
- H5
- Italic, normal font, indented as H4. One blank line
- above.
-
- H6
- Bold, indented same as normal text, more than H5. One
- blank line above.
-
-<span class="h3"><a name="section-5.5">5.5</a>. Block Structuring Elements</span>
-
- Block structuring elements include paragraphs, lists, and block
- quotes. They must not contain heading elements, but they may contain
- phrase markup, and in some cases, they may be nested.
-
-<span class="h4"><a name="section-5.5.1">5.5.1</a>. Paragraph: P</span>
-
- The <P> element indicates a paragraph. The exact indentation, leading
- space, etc. of a paragraph is not specified and may be a function of
- other tags, style sheets, etc.
-
- Typically, paragraphs are surrounded by a vertical space of one line
- or half a line. The first line in a paragraph is indented in some
- cases.
-
- Example of use:
-
- <H1>This Heading Precedes the Paragraph</H1>
- <P>This is the text of the first paragraph.
- <P>This is the text of the second paragraph. Although you do not
- need to start paragraphs on new lines, maintaining this
- convention facilitates document maintenance.</P>
- <P>This is the text of a third paragraph.</P>
-
-<span class="h4"><a name="section-5.5.2">5.5.2</a>. Preformatted Text: PRE</span>
-
- The <PRE> element represents a character cell block of text and is
- suitable for text that has been formatted for a monospaced font.
-
- The <PRE> tag may be used with the optional WIDTH attribute. The
- WIDTH attribute specifies the maximum number of characters for a line
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 25]</span>
-<a name="page-26" id="page-26" href="#page-26" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- and allows the HTML user agent to select a suitable font and
- indentation.
-
- Within preformatted text:
-
- * Line breaks within the text are rendered as a move to the
- beginning of the next line.
-
- NOTE - References to the "beginning of a new line"
- do not imply that the renderer is forbidden from
- using a constant left indent for rendering
- preformatted text. The left indent may be
- constrained by the width required.
-
- * Anchor elements and phrase markup may be used.
-
- NOTE - Constraints on the processing of <PRE>
- content may limit or prevent the ability of the HTML
- user agent to faithfully render phrase markup.
-
- * Elements that define paragraph formatting (headings,
- address, etc.) must not be used.
-
- NOTE - Some historical documents contain <P> tags in
- <PRE> elements. User agents are encouraged to treat
- this as a line break. A <P> tag followed by a
- newline character should produce only one line
- break, not a line break plus a blank line.
-
- * The horizontal tab character (code position 9 in the HTML
- document character set) must be interpreted as the smallest
- positive nonzero number of spaces which will leave the
- number of characters so far on the line as a multiple of 8.
- Documents should not contain tab characters, as they are not
- supported consistently.
-
- Example of use:
-
- <PRE>
- Line 1.
- Line 2 is to the right of line 1. <a href="abc">abc</a>
- Line 3 aligns with line 2. <a href="def">def</a>
- </PRE>
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 26]</span>
-<a name="page-27" id="page-27" href="#page-27" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-5.5.2.1">5.5.2.1</a>. Example and Listing: XMP, LISTING</span>
-
- The <XMP> and <LISTING> elements are similar to the <PRE> element,
- but they have a different syntax. Their content is declared as CDATA,
- which means that no markup except the end-tag open delimiter-in-
- context is recognized (see 9.6 "Delimiter Recognition" of [<a href="#ref-SGML">SGML</a>]).
-
- NOTE - In a previous draft of the HTML specification, the syntax
- of <XMP> and <LISTING> elements allowed closing tags to be treated
- as data characters, as long as the tag name was not <XMP> or
- <LISTING>, respectively.
-
- Since CDATA declared content has a number of unfortunate interactions
- with processing techniques and tends to be used and implemented
- inconsistently, HTML documents should not contain <XMP> nor <LISTING>
- elements -- the <PRE> tag is more expressive and more consistently
- supported.
-
- The <LISTING> element should be rendered so that at least 132
- characters fit on a line. The <XMP> element should be rendered so
- that at least 80 characters fit on a line but is otherwise identical
- to the <LISTING> element.
-
- NOTE - In a previous draft, HTML included a <PLAINTEXT> element
- that is similar to the <LISTING> element, except that there is no
- closing tag: all characters after the <PLAINTEXT> start-tag are
- data.
-
-<span class="h4"><a name="section-5.5.3">5.5.3</a>. Address: ADDRESS</span>
-
- The <ADDRESS> element contains such information as address, signature
- and authorship, often at the beginning or end of the body of a
- document.
-
- Typically, the <ADDRESS> element is rendered in an italic typeface
- and may be indented.
-
- Example of use:
-
- <ADDRESS>
- Newsletter editor<BR>
- J.R. Brown<BR>
- JimquickPost News, Jimquick, CT 01234<BR>
- Tel (123) 456 7890
- </ADDRESS>
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 27]</span>
-<a name="page-28" id="page-28" href="#page-28" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h4"><a name="section-5.5.4">5.5.4</a>. Block Quote: BLOCKQUOTE</span>
-
- The <BLOCKQUOTE> element contains text quoted from another source.
-
- A typical rendering might be a slight extra left and right indent,
- and/or italic font. The <BLOCKQUOTE> typically provides space above
- and below the quote.
-
- Single-font rendition may reflect the quotation style of Internet
- mail by putting a vertical line of graphic characters, such as the
- greater than symbol (>), in the left margin.
-
- Example of use:
-
- I think the play ends
- <BLOCKQUOTE>
- <P>Soft you now, the fair Ophelia. Nymph, in thy orisons, be all
- my sins remembered.
- </BLOCKQUOTE>
- but I am not sure.
-
-<span class="h3"><a name="section-5.6">5.6</a>. List Elements</span>
-
- HTML includes a number of list elements. They may be used in
- combination; for example, a <OL> may be nested in an <LI> element of
- a <UL>.
-
- The COMPACT attribute suggests that a compact rendering be used.
-
-<span class="h4"><a name="section-5.6.1">5.6.1</a>. Unordered List: UL, LI</span>
-
- The <UL> represents a list of items -- typically rendered as a
- bulleted list.
-
- The content of a <UL> element is a sequence of <LI> elements. For
- example:
-
- <UL>
- <LI>First list item
- <LI>Second list item
- <p>second paragraph of second item
- <LI>Third list item
- </UL>
-
-<span class="h4"><a name="section-5.6.2">5.6.2</a>. Ordered List: OL</span>
-
- The <OL> element represents an ordered list of items, sorted by
- sequence or order of importance. It is typically rendered as a
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 28]</span>
-<a name="page-29" id="page-29" href="#page-29" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- numbered list.
-
- The content of a <OL> element is a sequence of <LI> elements. For
- example:
-
- <OL>
- <LI>Click the Web button to open URI window.
- <LI>Enter the URI number in the text field of the Open URI
- window. The Web document you specified is displayed.
- <ol>
- <li>substep 1
- <li>substep 2
- </ol>
- <LI>Click highlighted text to move from one link to another.
- </OL>
-
-<span class="h4"><a name="section-5.6.3">5.6.3</a>. Directory List: DIR</span>
-
- The <DIR> element is similar to the <UL> element. It represents a
- list of short items, typically up to 20 characters each. Items in a
- directory list may be arranged in columns, typically 24 characters
- wide.
-
- The content of a <DIR> element is a sequence of <LI> elements.
- Nested block elements are not allowed in the content of <DIR>
- elements. For example:
-
- <DIR>
- <LI>A-H<LI>I-M
- <LI>M-R<LI>S-Z
- </DIR>
-
-<span class="h4"><a name="section-5.6.4">5.6.4</a>. Menu List: MENU</span>
-
- The <MENU> element is a list of items with typically one line per
- item. The menu list style is typically more compact than the style of
- an unordered list.
-
- The content of a <MENU> element is a sequence of <LI> elements.
- Nested block elements are not allowed in the content of <MENU>
- elements. For example:
-
- <MENU>
- <LI>First item in the list.
- <LI>Second item in the list.
- <LI>Third item in the list.
- </MENU>
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 29]</span>
-<a name="page-30" id="page-30" href="#page-30" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h4"><a name="section-5.6.5">5.6.5</a>. Definition List: DL, DT, DD</span>
-
- A definition list is a list of terms and corresponding definitions.
- Definition lists are typically formatted with the term flush-left and
- the definition, formatted paragraph style, indented after the term.
-
- The content of a <DL> element is a sequence of <DT> elements and/or
- <DD> elements, usually in pairs. Multiple <DT> may be paired with a
- single <DD> element. Documents should not contain multiple
- consecutive <DD> elements.
-
- Example of use:
-
- <DL>
- <DT>Term<DD>This is the definition of the first term.
- <DT>Term<DD>This is the definition of the second term.
- </DL>
-
- If the DT term does not fit in the DT column (typically one third of
- the display area), it may be extended across the page with the DD
- section moved to the next line, or it may be wrapped onto successive
- lines of the left hand column.
-
- The optional COMPACT attribute suggests that a compact rendering be
- used, because the list items are small and/or the entire list is
- large.
-
- Unless the COMPACT attribute is present, an HTML user agent may leave
- white space between successive DT, DD pairs. The COMPACT attribute
- may also reduce the width of the left-hand (DT) column.
-
- <DL COMPACT>
- <DT>Term<DD>This is the first definition in compact format.
- <DT>Term<DD>This is the second definition in compact format.
- </DL>
-
-<span class="h3"><a name="section-5.7">5.7</a>. Phrase Markup</span>
-
- Phrases may be marked up according to idiomatic usage, typographic
- appearance, or for use as hyperlink anchors.
-
- User agents must render highlighted phrases distinctly from plain
- text. Additionally, <EM> content must be rendered as distinct from
- <STRONG> content, and <B> content must rendered as distinct from <I>
- content.
-
- Phrase elements may be nested within the content of other phrase
- elements; however, HTML user agents may render nested phrase elements
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 30]</span>
-<a name="page-31" id="page-31" href="#page-31" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- indistinctly from non-nested elements:
-
- plain <B>bold <I>italic</I></B> may be rendered
- the same as plain <B>bold </B><I>italic</I>
-
-<span class="h4"><a name="section-5.7.1">5.7.1</a>. Idiomatic Elements</span>
-
- Phrases may be marked up to indicate certain idioms.
-
- NOTE - User agents may support the <DFN> element, not included in
- this specification, as it has been deployed to some extent. It is
- used to indicate the defining instance of a term, and it is
- typically rendered in italic or bold italic.
-
-<span class="h5"><a name="section-5.7.1.1">5.7.1.1</a>. Citation: CITE</span>
-
- The <CITE> element is used to indicate the title of a book or
- other citation. It is typically rendered as italics. For example:
-
- He just couldn't get enough of <cite>The Grapes of Wrath</cite>.
-
-<span class="h5"><a name="section-5.7.1.2">5.7.1.2</a>. Code: CODE</span>
-
- The <CODE> element indicates an example of code, typically
- rendered in a mono-spaced font. The <CODE> element is intended for
- short words or phrases of code; the <PRE> block structuring
- element (5.5.2, "Preformatted Text: PRE") is more appropriate
- for multiple-line listings. For example:
-
- The expression <code>x += 1</code>
- is short for <code>x = x + 1</code>.
-
-<span class="h5"><a name="section-5.7.1.3">5.7.1.3</a>. Emphasis: EM</span>
-
- The <EM> element indicates an emphasized phrase, typically
- rendered as italics. For example:
-
- A singular subject <em>always</em> takes a singular verb.
-
-<span class="h5"><a name="section-5.7.1.4">5.7.1.4</a>. Keyboard: KBD</span>
-
- The <KBD> element indicates text typed by a user, typically
- rendered in a mono-spaced font. This is commonly used in
- instruction manuals. For example:
-
- Enter <kbd>FIND IT</kbd> to search the database.
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 31]</span>
-<a name="page-32" id="page-32" href="#page-32" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-5.7.1.5">5.7.1.5</a>. Sample: SAMP</span>
-
- The <SAMP> element indicates a sequence of literal characters,
- typically rendered in a mono-spaced font. For example:
-
- The only word containing the letters <samp>mt</samp> is dreamt.
-
-<span class="h5"><a name="section-5.7.1.6">5.7.1.6</a>. Strong Emphasis: STRONG</span>
-
- The <STRONG> element indicates strong emphasis, typically rendered
- in bold. For example:
-
- <strong>STOP</strong>, or I'll say "<strong>STOP</strong>" again!
-
-<span class="h5"><a name="section-5.7.1.7">5.7.1.7</a>. Variable: VAR</span>
-
- The <VAR> element indicates a placeholder variable, typically
- rendered as italic. For example:
-
- Type <SAMP>html-check <VAR>file</VAR> | more</SAMP>
- to check <VAR>file</VAR> for markup errors.
-
-<span class="h4"><a name="section-5.7.2">5.7.2</a>. Typographic Elements</span>
-
- Typographic elements are used to specify the format of marked
- text.
-
- Typical renderings for idiomatic elements may vary between user
- agents. If a specific rendering is necessary -- for example, when
- referring to a specific text attribute as in "The italic parts are
- mandatory" -- a typographic element can be used to ensure that the
- intended typography is used where possible.
-
- NOTE - User agents may support some typographic elements not
- included in this specification, as they have been deployed to some
- extent. The <STRIKE> element indicates horizontal line through the
- characters, and the <U> element indicates an underline.
-
-<span class="h5"><a name="section-5.7.2.1">5.7.2.1</a>. Bold: B</span>
-
- The <B> element indicates bold text. Where bold typography is
- unavailable, an alternative representation may be used.
-
-<span class="h5"><a name="section-5.7.2.2">5.7.2.2</a>. Italic: I</span>
-
- The <I> element indicates italic text. Where italic typography is
- unavailable, an alternative representation may be used.
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 32]</span>
-<a name="page-33" id="page-33" href="#page-33" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-5.7.2.3">5.7.2.3</a>. Teletype: TT</span>
-
- The <TT> element indicates teletype (monospaced )text. Where a
- teletype font is unavailable, an alternative representation may be
- used.
-
-<span class="h4"><a name="section-5.7.3">5.7.3</a>. Anchor: A</span>
-
- The <A> element indicates a hyperlink anchor (see 7, "Hyperlinks").
- At least one of the NAME and HREF attributes should be present.
- Attributes of the <A> element:
-
- HREF
- gives the URI of the head anchor of a hyperlink.
-
- NAME
- gives the name of the anchor, and makes it available as
- a head of a hyperlink.
-
- TITLE
- suggests a title for the destination resource --
- advisory only. The TITLE attribute may be used:
-
- * for display prior to accessing the destination
- resource, for example, as a margin note or on a
- small box while the mouse is over the anchor, or
- while the document is being loaded;
-
- * for resources that do not include a title, such as
- graphics, plain text and Gopher menus, for use as a
- window title.
-
- REL
- The REL attribute gives the relationship(s) described by
- the hyperlink. The value is a whitespace separated list
- of relationship names. The semantics of link
- relationships are not specified in this document.
-
- REV
- same as the REL attribute, but the semantics of the
- relationship are in the reverse direction. A link from A
- to B with REL="X" expresses the same relationship as a
- link from B to A with REV="X". An anchor may have both
- REL and REV attributes.
-
- URN
- specifies a preferred, more persistent identifier for
- the head anchor of the hyperlink. The syntax and
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 33]</span>
-<a name="page-34" id="page-34" href="#page-34" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- semantics of the URN attribute are not yet specified.
-
- METHODS
- specifies methods to be used in accessing the
- destination, as a whitespace-separated list of names.
- The set of applicable names is a function of the scheme
- of the URI in the HREF attribute. For similar reasons as
- for the TITLE attribute, it may be useful to include the
- information in advance in the link. For example, the
- HTML user agent may chose a different rendering as a
- function of the methods allowed; for example, something
- that is searchable may get a different icon.
-
-<span class="h3"><a name="section-5.8">5.8</a>. Line Break: BR</span>
-
- The <BR> element specifies a line break between words (see 6,
- "Characters, Words, and Paragraphs"). For example:
-
- <P> Pease porridge hot<BR>
- Pease porridge cold<BR>
- Pease porridge in the pot<BR>
- Nine days old.
-
-<span class="h3"><a name="section-5.9">5.9</a>. Horizontal Rule: HR</span>
-
- The <HR> element is a divider between sections of text; typically a
- full width horizontal rule or equivalent graphic. For example:
-
- <HR>
- <ADDRESS>February 8, 1995, CERN</ADDRESS>
- </BODY>
-
-<span class="h3"><a name="section-5.10">5.10</a>. Image: IMG</span>
-
- The <IMG> element refers to an image or icon via a hyperlink (see
- 7.3, "Simultaneous Presentation of Image Resources").
-
- HTML user agents may process the value of the ALT attribute as an
- alternative to processing the image resource indicated by the SRC
- attribute.
-
- NOTE - Some HTML user agents can process graphics linked via
- anchors, but not <IMG> graphics. If a graphic is essential, it
- should be referenced from an <A> element rather than an <IMG>
- element. If the graphic is not essential, then the <IMG> element
- is appropriate.
-
- Attributes of the <IMG> element:
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 34]</span>
-<a name="page-35" id="page-35" href="#page-35" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- ALIGN
- alignment of the image with respect to the text
- baseline.
-
- * `TOP' specifies that the top of the image aligns
- with the tallest item on the line containing the
- image.
-
- * `MIDDLE' specifies that the center of the image
- aligns with the baseline of the line containing the
- image.
-
- * `BOTTOM' specifies that the bottom of the image
- aligns with the baseline of the line containing the
- image.
-
- ALT
- text to use in place of the referenced image resource,
- for example due to processing constraints or user
- preference.
-
- ISMAP
- indicates an image map (see 7.6, "Image Maps").
-
- SRC
- specifies the URI of the image resource.
-
- NOTE - In practice, the media types of image
- resources are limited to a few raster graphic
- formats: typically `image/gif', `image/jpeg'. In
- particular, `text/html' resources are not
- intended to be used as image resources.
-
- Examples of use:
-
- <IMG SRC="triangle.xbm" ALT="Warning:"> Be sure
- to read these instructions.
-
- <a href="http://machine/htbin/imagemap/sample">
- <IMG SRC="sample.xbm" ISMAP>
- </a>
-
-<span class="h2"><a name="section-6">6</a>. Characters, Words, and Paragraphs</span>
-
- An HTML user agent should present the body of an HTML document as a
- collection of typeset paragraphs and preformatted text. Except for
- preformatted elements (<PRE>, <XMP>, <LISTING>, <TEXTAREA>), each
- block structuring element is regarded as a paragraph by taking the
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 35]</span>
-<a name="page-36" id="page-36" href="#page-36" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- data characters in its content and the content of its descendant
- elements, concatenating them, and splitting the result into words,
- separated by space, tab, or record end characters (and perhaps hyphen
- characters). The sequence of words is typeset as a paragraph by
- breaking it into lines.
-
-<span class="h3"><a name="section-6.1">6.1</a>. The HTML Document Character Set</span>
-
- The document character set specified in 9.5, "SGML Declaration for
- HTML" must be supported by HTML user agents. It includes the graphic
- characters of Latin Alphabet No. 1, or simply Latin-1. Latin-1
- comprises 191 graphic characters, including the alphabets of most
- Western European languages.
-
- NOTE - Use of the non-breaking space and soft hyphen indicator
- characters is discouraged because support for them is not widely
- deployed.
-
- NOTE - To support non-western writing systems, a larger character
- repertoire will be specified in a future version of HTML. The
- document character set will be [<a href="#ref-ISO-10646">ISO-10646</a>], or some subset that
- agrees with [<a href="#ref-ISO-10646">ISO-10646</a>]; in particular, all numeric character
- references must use code positions assigned by [<a href="#ref-ISO-10646">ISO-10646</a>].
-
- In SGML applications, the use of control characters is limited in
- order to maximize the chance of successful interchange over
- heterogeneous networks and operating systems. In the HTML document
- character set only three control characters are allowed: Horizontal
- Tab, Carriage Return, and Line Feed (code positions 9, 13, and 10).
-
- The HTML DTD references the Added Latin 1 entity set, to allow
- mnemonic representation of selected Latin 1 characters using only the
- widely supported ASCII character repertoire. For example:
-
- Kurt Gödel was a famous logician and mathematician.
-
- See 9.7.2, "ISO Latin 1 Character Entity Set" for a table of the
- "Added Latin 1" entities, and 13, "The HTML Coded Character Set" for
- a table of the code positions of [ISO 8859-1] and the control
- characters in the HTML document character set.
-
-<span class="h2"><a name="section-7">7</a>. Hyperlinks</span>
-
- In addition to general purpose elements such as paragraphs and lists,
- HTML documents can express hyperlinks. An HTML user agent allows the
- user to navigate these hyperlinks.
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 36]</span>
-<a name="page-37" id="page-37" href="#page-37" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- A hyperlink is a relationship between two anchors, called the head
- and the tail of the hyperlink[DEXTER]. Anchors are identified by an
- anchor address: an absolute Uniform Resource Identifier (URI),
- optionally followed by a '#' and a sequence of characters called a
- fragment identifier. For example:
-
- <a href="http://www.w3.org/hypertext/WWW/TheProject.html">http://www.w3.org/hypertext/WWW/TheProject.html</a>
- <a href="http://www.w3.org/hypertext/WWW/TheProject.html#z31">http://www.w3.org/hypertext/WWW/TheProject.html#z31</a>
-
- In an anchor address, the URI refers to a resource; it may be used in
- a variety of information retrieval protocols to obtain an entity that
- represents the resource, such as an HTML document. The fragment
- identifier, if present, refers to some view on, or portion of the
- resource.
-
- Each of the following markup constructs indicates the tail anchor of
- a hyperlink or set of hyperlinks:
-
- * <A> elements with HREF present.
-
- * <LINK> elements.
-
- * <IMG> elements.
-
- * <INPUT> elements with the SRC attribute present.
-
- * <ISINDEX> elements.
-
- * <FORM> elements with `METHOD=GET'.
-
- These markup constructs refer to head anchors by a URI, either
- absolute or relative, or a fragment identifier, or both.
-
- In the case of a relative URI, the absolute URI in the address of the
- head anchor is the result of combining the relative URI with a base
- absolute URI as in [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>]. The base document is taken from the
- document's <BASE> element, if present; else, it is determined as in
- [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>].
-
-<span class="h3"><a name="section-7.1">7.1</a>. Accessing Resources</span>
-
- Once the address of the head anchor is determined, the user agent may
- obtain a representation of the resource.
-
- For example, if the base URI is `http://host/x/y.html' and the
- document contains:
-
- <img src="../icons/abc.gif">
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 37]</span>
-<a name="page-38" id="page-38" href="#page-38" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- then the user agent uses the URI `http://host/icons/abc.gif' to
- access the resource, as in [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]..
-
-<span class="h3"><a name="section-7.2">7.2</a>. Activation of Hyperlinks</span>
-
- An HTML user agent allows the user to navigate the content of the
- document and request activation of hyperlinks denoted by <A>
- elements. HTML user agents should also allow activation of <LINK>
- element hyperlinks.
-
- To activate a link, the user agent obtains a representation of the
- resource identified in the address of the head anchor. If the
- representation is another HTML document, navigation may begin again
- with this new document.
-
-<span class="h3"><a name="section-7.3">7.3</a>. Simultaneous Presentation of Image Resources</span>
-
- An HTML user agent may activate hyperlinks indicated by <IMG> and
- <INPUT> elements concurrently with processing the document; that is,
- image hyperlinks may be processed without explicit request by the
- user. Image resources should be embedded in the presentation at the
- point of the tail anchor, that is the <IMG> or <INPUT> element.
-
- <LINK> hyperlinks may also be processed without explicit user
- request; for example, style sheet resources may be processed before
- or during the processing of the document.
-
-<span class="h3"><a name="section-7.4">7.4</a>. Fragment Identifiers</span>
-
- Any characters following a `#' character in a hypertext address
- constitute a fragment identifier. In particular, an address of the
- form `#fragment' refers to an anchor in the same document.
-
- The meaning of fragment identifiers depends on the media type of the
- representation of the anchor's resource. For `text/html'
- representations, it refers to the <A> element with a NAME attribute
- whose value is the same as the fragment identifier. The matching is
- case sensitive. The document should have exactly one such element.
- The user agent should indicate the anchor element, for example by
- scrolling to and/or highlighting the phrase.
-
- For example, if the base URI is `http://host/x/y.html' and the user
- activated the link denoted by the following markup:
-
- <p> See: <a href="app1.html#bananas">appendix 1</a>
- for more detail on bananas.
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 38]</span>
-<a name="page-39" id="page-39" href="#page-39" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- Then the user agent accesses the resource identified by
- `http://host/x/app1.html'. Assuming the resource is represented using
- the `text/html' media type, the user agent must locate the <A>
- element whose NAME attribute is `bananas' and begin navigation there.
-
-<span class="h3"><a name="section-7.5">7.5</a>. Queries and Indexes</span>
-
- The <ISINDEX> element represents a set of hyperlinks. The user can
- choose from the set by providing keywords to the user agent. The
- user agent computes the head URI by appending `?' and the keywords to
- the base URI. The keywords are escaped according to [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] and joined
- by `+'. For example, if a document contains:
-
- <BASE HREF="http://host/index">
- <ISINDEX>
-
- and the user provides the keywords `apple' and `berry', then the
- user agent must access the resource
- `http://host/index?apple+berry'.
-
- <FORM> elements with `METHOD=GET' also represent sets of
- hyperlinks. See 8.2.2, "Query Forms: METHOD=GET" for details.
-
-<span class="h3"><a name="section-7.6">7.6</a>. Image Maps</span>
-
- If the ISMAP attribute is present on an <IMG> element, the <IMG>
- element must be contained in an <A> element with an HREF present.
- This construct represents a set of hyperlinks. The user can choose
- from the set by choosing a pixel of the image. The user agent
- computes the head URI by appending `?' and the x and y coordinates of
- the pixel to the URI given in the <A> element. For example, if a
- document contains:
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <head><title>ImageMap Example</title>
- <BASE HREF="http://host/index"></head>
- <body>
- <p> Choose any of these icons:<br>
- <a href="/cgi-bin/imagemap"><img ismap src="icons.gif"></a>
-
- and the user chooses the upper-leftmost pixel, the chosen
- hyperlink is the one with the URI
- `http://host/cgi-bin/imagemap?0,0'.
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 39]</span>
-<a name="page-40" id="page-40" href="#page-40" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h2"><a name="section-8">8</a>. Forms</span>
-
- A form is a template for a form data set and an associated
- method and action URI. A form data set is a sequence of
- name/value pair fields. The names are specified on the NAME
- attributes of form input elements, and the values are given
- initial values by various forms of markup and edited by the
- user. The resulting form data set is used to access an
- information service as a function of the action and method.
-
- Forms elements can be mixed in with document structuring
- elements. For example, a <PRE> element may contain a <FORM>
- element, or a <FORM> element may contain lists which contain
- <INPUT> elements. This gives considerable flexibility in
- designing the layout of forms.
-
- Form processing is a level 2 feature.
-
-<span class="h3"><a name="section-8.1">8.1</a>. Form Elements</span>
-
-<span class="h4"><a name="section-8.1.1">8.1.1</a>. Form: FORM</span>
-
- The <FORM> element contains a sequence of input elements, along
- with document structuring elements. The attributes are:
-
- ACTION
- specifies the action URI for the form. The action URI of
- a form defaults to the base URI of the document (see 7,
- "Hyperlinks").
-
- METHOD
- selects a method of accessing the action URI. The set of
- applicable methods is a function of the scheme of the
- action URI of the form. See 8.2.2, "Query Forms:
- METHOD=GET" and 8.2.3, "Forms with Side-Effects:
- METHOD=POST".
-
- ENCTYPE
- specifies the media type used to encode the name/value
- pairs for transport, in case the protocol does not
- itself impose a format. See 8.2.1, "The form-urlencoded
- Media Type".
-
-<span class="h4"><a name="section-8.1.2">8.1.2</a>. Input Field: INPUT</span>
-
- The <INPUT> element represents a field for user input. The TYPE
- attribute discriminates between several variations of fields.
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 40]</span>
-<a name="page-41" id="page-41" href="#page-41" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- The <INPUT> element has a number of attributes. The set of applicable
- attributes depends on the value of the TYPE attribute.
-
-<span class="h5"><a name="section-8.1.2.1">8.1.2.1</a>. Text Field: INPUT TYPE=TEXT</span>
-
- The default value of the TYPE attribute is `TEXT', indicating a
- single line text entry field. (Use the <TEXTAREA> element for multi-
- line text fields.)
-
- Required attributes are:
-
- NAME
- name for the form field corresponding to this element.
-
- The optional attributes are:
-
- MAXLENGTH
- constrains the number of characters that can be entered
- into a text input field. If the value of MAXLENGTH is
- greater the the value of the SIZE attribute, the field
- should scroll appropriately. The default number of
- characters is unlimited.
-
- SIZE
- specifies the amount of display space allocated to this
- input field according to its type. The default depends
- on the user agent.
-
- VALUE
- The initial value of the field.
-
- For example:
-
-<p>Street Address: <input name=street><br>
-Postal City code: <input name=city size=16 maxlength=16><br>
-Zip Code: <input name=zip size=10 maxlength=10 value="99999-9999"><br>
-
-<span class="h5"><a name="section-8.1.2.2">8.1.2.2</a>. Password Field: INPUT TYPE=PASSWORD</span>
-
- An <INPUT> element with `TYPE=PASSWORD' is a text field as above,
- except that the value is obscured as it is entered. (see also: 10,
- "Security Considerations").
-
- For example:
-
-<p>Name: <input name=login> Password: <input type=password name=passwd>
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 41]</span>
-<a name="page-42" id="page-42" href="#page-42" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-8.1.2.3">8.1.2.3</a>. Check Box: INPUT TYPE=CHECKBOX</span>
-
- An <INPUT> element with `TYPE=CHECKBOX' represents a boolean choice.
- A set of such elements with the same name represents an n-of-many
- choice field. Required attributes are:
-
- NAME
- symbolic name for the form field corresponding to this
- element or group of elements.
-
- VALUE
- The portion of the value of the field contributed by
- this element.
-
- Optional attributes are:
-
- CHECKED
- indicates that the initial state is on.
-
- For example:
-
- <p>What flavors do you like?
- <input type=checkbox name=flavor value=vanilla>Vanilla<br>
- <input type=checkbox name=flavor value=strawberry>Strawberry<br>
- <input type=checkbox name=flavor value=chocolate checked>Chocolate<br>
-
-<span class="h5"><a name="section-8.1.2.4">8.1.2.4</a>. Radio Button: INPUT TYPE=RADIO</span>
-
- An <INPUT> element with `TYPE=RADIO' represents a boolean choice. A
- set of such elements with the same name represents a 1-of-many choice
- field. The NAME and VALUE attributes are required as for check boxes.
- Optional attributes are:
-
- CHECKED
- indicates that the initial state is on.
- At all times, exactly one of the radio buttons in a set is checked.
- If none of the <INPUT> elements of a set of radio buttons specifies
- `CHECKED', then the user agent must check the first radio button of
- the set initially.
-
- For example:
-
- <p>Which is your favorite?
- <input type=radio name=flavor value=vanilla>Vanilla<br>
- <input type=radio name=flavor value=strawberry>Strawberry<br>
- <input type=radio name=flavor value=chocolate>Chocolate<br>
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 42]</span>
-<a name="page-43" id="page-43" href="#page-43" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-8.1.2.5">8.1.2.5</a>. Image Pixel: INPUT TYPE=IMAGE</span>
-
- An <INPUT> element with `TYPE=IMAGE' specifies an image resource to
- display, and allows input of two form fields: the x and y coordinate
- of a pixel chosen from the image. The names of the fields are the
- name of the field with `.x' and `.y' appended. `TYPE=IMAGE' implies
- `TYPE=SUBMIT' processing; that is, when a pixel is chosen, the form
- as a whole is submitted.
-
- The NAME attribute is required as for other input fields. The SRC
- attribute is required and the ALIGN is optional as for the <IMG>
- element (see 5.10, "Image: IMG").
-
- For example:
-
- <p>Choose a point on the map:
- <input type=image name=point src="map.gif">
-
-<span class="h5"><a name="section-8.1.2.6">8.1.2.6</a>. Hidden Field: INPUT TYPE=HIDDEN</span>
-
- An <INPUT> element with `TYPE=HIDDEN' represents a hidden field.The
- user does not interact with this field; instead, the VALUE attribute
- specifies the value of the field. The NAME and VALUE attributes are
- required.
-
- For example:
-
- <input type=hidden name=context value="l2k3j4l2k3j4l2k3j4lk23">
-
-<span class="h5"><a name="section-8.1.2.7">8.1.2.7</a>. Submit Button: INPUT TYPE=SUBMIT</span>
-
- An <INPUT> element with `TYPE=SUBMIT' represents an input option,
- typically a button, that instructs the user agent to submit the form.
- Optional attributes are:
-
- NAME
- indicates that this element contributes a form field
- whose value is given by the VALUE attribute. If the NAME
- attribute is not present, this element does not
- contribute a form field.
-
- VALUE
- indicates a label for the input (button).
-
- You may submit this request internally:
- <input type=submit name=recipient value=internal><br>
- or to the external world:
- <input type=submit name=recipient value=world>
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 43]</span>
-<a name="page-44" id="page-44" href="#page-44" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h5"><a name="section-8.1.2.8">8.1.2.8</a>. Reset Button: INPUT TYPE=RESET</span>
-
- An <INPUT> element with `TYPE=RESET' represents an input option,
- typically a button, that instructs the user agent to reset the form's
- fields to their initial states. The VALUE attribute, if present,
- indicates a label for the input (button).
-
- When you are finished, you may submit this request:
- <input type=submit><br>
- You may clear the form and start over at any time: <input type=reset>
-
-<span class="h4"><a name="section-8.1.3">8.1.3</a>. Selection: SELECT</span>
-
- The <SELECT> element constrains the form field to an enumerated list
- of values. The values are given in <OPTION> elements. Attributes
- are:
-
- MULTIPLE
- indicates that more than one option may be included in
- the value.
-
- NAME
- specifies the name of the form field.
-
- SIZE
- specifies the number of visible items. Select fields of
- size one are typically pop-down menus, whereas select
- fields with size greater than one are typically lists.
-
- For example:
-
- <SELECT NAME="flavor">
- <OPTION>Vanilla
- <OPTION>Strawberry
- <OPTION value="RumRasin">Rum and Raisin
- <OPTION selected>Peach and Orange
- </SELECT>
-
- The initial state has the first option selected, unless a SELECTED
- attribute is present on any of the <OPTION> elements.
-
-<span class="h5"><a name="section-8.1.3.1">8.1.3.1</a>. Option: OPTION</span>
-
- The Option element can only occur within a Select element. It
- represents one choice, and has the following attributes:
-
- SELECTED
- Indicates that this option is initially selected.
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 44]</span>
-<a name="page-45" id="page-45" href="#page-45" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- VALUE
- indicates the value to be returned if this option is
- chosen. The field value defaults to the content of the
- <OPTION> element.
-
- The content of the <OPTION> element is presented to the user to
- represent the option. It is used as a returned value if the VALUE
- attribute is not present.
-
-<span class="h4"><a name="section-8.1.4">8.1.4</a>. Text Area: TEXTAREA</span>
-
- The <TEXTAREA> element represents a multi-line text field.
- Attributes are:
-
- COLS
- the number of visible columns to display for the text
- area, in characters.
-
- NAME
- Specifies the name of the form field.
-
- ROWS
- The number of visible rows to display for the text area,
- in characters.
-
- For example:
-
- <TEXTAREA NAME="address" ROWS=6 COLS=64>
- HaL Computer Systems
- 1315 Dell Avenue
- Campbell, California 95008
- </TEXTAREA>
-
- The content of the <TEXTAREA> element is the field's initial value.
-
- Typically, the ROWS and COLS attributes determine the visible
- dimension of the field in characters. The field is typically rendered
- in a fixed-width font. HTML user agents should allow text to extend
- beyond these limits by scrolling as needed.
-
-<span class="h3"><a name="section-8.2">8.2</a>. Form Submission</span>
-
- An HTML user agent begins processing a form by presenting the
- document with the fields in their initial state. The user is allowed
- to modify the fields, constrained by the field type etc. When the
- user indicates that the form should be submitted (using a submit
- button or image input), the form data set is processed according to
- its method, action URI and enctype.
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 45]</span>
-<a name="page-46" id="page-46" href="#page-46" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- When there is only one single-line text input field in a form, the
- user agent should accept Enter in that field as a request to submit
- the form.
-
-<span class="h4"><a name="section-8.2.1">8.2.1</a>. The form-urlencoded Media Type</span>
-
- The default encoding for all forms is `application/x-www-form-
- urlencoded'. A form data set is represented in this media type as
- follows:
-
- 1. The form field names and values are escaped: space
- characters are replaced by `+', and then reserved characters
- are escaped as per [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]; that is, non-alphanumeric
- characters are replaced by `%HH', a percent sign and two
- hexadecimal digits representing the ASCII code of the
- character. Line breaks, as in multi-line text field values,
- are represented as CR LF pairs, i.e. `%0D%0A'.
-
- 2. The fields are listed in the order they appear in the
- document with the name separated from the value by `=' and
- the pairs separated from each other by `&'. Fields with null
- values may be omitted. In particular, unselected radio
- buttons and checkboxes should not appear in the encoded
- data, but hidden fields with VALUE attributes present
- should.
-
- NOTE - The URI from a query form submission can be
- used in a normal anchor style hyperlink.
- Unfortunately, the use of the `&' character to
- separate form fields interacts with its use in SGML
- attribute values as an entity reference delimiter.
- For example, the URI `http://host/?x=1&y=2' must be
- written `<a href="http://host/?x=1&y=2"' or `<a
- href="http://host/?x=1&y=2">'.
-
- HTTP server implementors, and in particular, CGI
- implementors are encouraged to support the use of
- `;' in place of `&' to save users the trouble of
- escaping `&' characters this way.
-
-<span class="h4"><a name="section-8.2.2">8.2.2</a>. Query Forms: METHOD=GET</span>
-
- If the processing of a form is idempotent (i.e. it has no lasting
- observable effect on the state of the world), then the form method
- should be `GET'. Many database searches have no visible side-effects
- and make ideal applications of query forms.
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 46]</span>
-<a name="page-47" id="page-47" href="#page-47" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- To process a form whose action URL is an HTTP URL and whose method is
- `GET', the user agent starts with the action URI and appends a `?'
- and the form data set, in `application/x-www-form-urlencoded' format
- as above. The user agent then traverses the link to this URI just as
- if it were an anchor (see 7.2, "Activation of Hyperlinks").
-
- NOTE - The URL encoding may result in very long URIs, which cause
- some historical HTTP server implementations to exhibit defective
- behavior. As a result, some HTML forms are written using
- `METHOD=POST' even though the form submission has no side-effects.
-
-<span class="h4"><a name="section-8.2.3">8.2.3</a>. Forms with Side-Effects: METHOD=POST</span>
-
- If the service associated with the processing of a form has side
- effects (for example, modification of a database or subscription to a
- service), the method should be `POST'.
-
- To process a form whose action URL is an HTTP URL and whose method is
- `POST', the user agent conducts an HTTP POST transaction using the
- action URI, and a message body of type `application/x-www-form-
- urlencoded' format as above. The user agent should display the
- response from the HTTP POST interaction just as it would display the
- response from an HTTP GET above.
-
-<span class="h4"><a name="section-8.2.4">8.2.4</a>. Example Form Submission: Questionnaire Form</span>
-
- Consider the following document:
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
- <title>Sample of HTML Form Submission</title>
- <H1>Sample Questionnaire</H1>
- <P>Please fill out this questionnaire:
- <FORM METHOD="POST" ACTION="http://www.w3.org/sample">
- <P>Your name: <INPUT NAME="name" size="48">
- <P>Male <INPUT NAME="gender" TYPE=RADIO VALUE="male">
- <P>Female <INPUT NAME="gender" TYPE=RADIO VALUE="female">
- <P>Number in family: <INPUT NAME="family" TYPE=text>
- <P>Cities in which you maintain a residence:
- <UL>
- <LI>Kent <INPUT NAME="city" TYPE=checkbox VALUE="kent">
- <LI>Miami <INPUT NAME="city" TYPE=checkbox VALUE="miami">
- <LI>Other <TEXTAREA NAME="other" cols=48 rows=4></textarea>
- </UL>
- Nickname: <INPUT NAME="nickname" SIZE="42">
- <P>Thank you for responding to this questionnaire.
- <P><INPUT TYPE=SUBMIT> <INPUT TYPE=RESET>
- </FORM>
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 47]</span>
-<a name="page-48" id="page-48" href="#page-48" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- The initial state of the form data set is:
-
- name
- ""
-
- gender
- "male"
-
- family
- ""
-
- other
- ""
-
- nickname
- ""
-
- Note that the radio input has an initial value, while the
- checkbox has none.
-
- The user might edit the fields and request that the form be
- submitted. At that point, suppose the values are:
-
- name
- "John Doe"
-
- gender
- "male"
-
- family
- "5"
-
- city
- "kent"
-
- city
- "miami"
-
- other
- "abc\ndefk"
-
- nickname
- "J&D"
-
- The user agent then conducts an HTTP POST transaction using the URI
- `http://www.w3.org/sample'. The message body would be (ignore the
- line break):
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 48]</span>
-<a name="page-49" id="page-49" href="#page-49" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- name=John+Doe&gender=male&family=5&city=kent&city=miami&
- other=abc%0D%0Adef&nickname=J%26D
-
-<span class="h2"><a name="section-9">9</a>. HTML Public Text</span>
-
-<span class="h3"><a name="section-9.1">9.1</a>. HTML DTD</span>
-
- This is the Document Type Definition for the HyperText Markup
- Language, level 2.
-
-<!-- html.dtd
-
- Document Type Definition for the HyperText Markup Language
- (HTML DTD)
-
- $Id: html.dtd,v 1.30 1995/09/21 23:30:19 connolly Exp $
-
- Author: Daniel W. Connolly <connolly at w3.org>
- See Also: html.decl, html-1.dtd
- <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
--->
-
-<!ENTITY % HTML.Version
- "-//IETF//DTD HTML 2.0//EN"
-
- -- Typical usage:
-
- <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
- <html>
- ...
- </html>
- --
- >
-
-
-<!--============ Feature Test Entities ========================-->
-
-<!ENTITY % HTML.Recommended "IGNORE"
- -- Certain features of the language are necessary for
- compatibility with widespread usage, but they may
- compromise the structural integrity of a document.
- This feature test entity enables a more prescriptive
- document type definition that eliminates
- those features.
- -->
-
-<![ %HTML.Recommended [
- <!ENTITY % HTML.Deprecated "IGNORE">
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 49]</span>
-<a name="page-50" id="page-50" href="#page-50" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-]]>
-
-<!ENTITY % HTML.Deprecated "INCLUDE"
- -- Certain features of the language are necessary for
- compatibility with earlier versions of the specification,
- but they tend to be used and implemented inconsistently,
- and their use is deprecated. This feature test entity
- enables a document type definition that eliminates
- these features.
- -->
-
-<!ENTITY % HTML.Highlighting "INCLUDE"
- -- Use this feature test entity to validate that a
- document uses no highlighting tags, which may be
- ignored on minimal implementations.
- -->
-
-<!ENTITY % HTML.Forms "INCLUDE"
- -- Use this feature test entity to validate that a document
- contains no forms, which may not be supported in minimal
- implementations
- -->
-
-<!--============== Imported Names ==============================-->
-
-<!ENTITY % Content-Type "CDATA"
- -- meaning an internet media type
- (aka MIME content type, as per <a href="./rfc1521">RFC1521</a>)
- -->
-
-<!ENTITY % HTTP-Method "GET | POST"
- -- as per HTTP specification, in progress
- -->
-
-<!--========= DTD "Macros" =====================-->
-
-<!ENTITY % heading "H1|H2|H3|H4|H5|H6">
-
-<!ENTITY % list " UL | OL | DIR | MENU " >
-
-
-<!--======= Character mnemonic entities =================-->
-
-<!ENTITY % ISOlat1 PUBLIC
- "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML">
-%ISOlat1;
-
-<!ENTITY amp CDATA "&" -- ampersand -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 50]</span>
-<a name="page-51" id="page-51" href="#page-51" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ENTITY gt CDATA ">" -- greater than -->
-<!ENTITY lt CDATA "<" -- less than -->
-<!ENTITY quot CDATA """ -- double quote -->
-
-
-<!--========= SGML Document Access (SDA) Parameter Entities =====-->
-
-<!-- HTML 2.0 contains SGML Document Access (SDA) fixed attributes
-in support of easy transformation to the International Committee
-for Accessible Document Design (ICADD) DTD
- "-//EC-USA-CDA/ICADD//DTD ICADD22//EN".
-<span class="h1"><a name="appendix-ICADD">ICADD</a> applications are designed to support usable access to</span>
-structured information by print-impaired individuals through
-Braille, large print and voice synthesis. For more information on
-<span class="h1"><a name="appendix-SDA">SDA</a> & ICADD:</span>
- - ISO 12083:1993, Annex A.8, Facilities for Braille,
- large print and computer voice
- - ICADD ListServ
- <ICADD%ASUACAD.BITNET at ARIZVM1.ccit.arizona.edu>
- - Usenet news group bit.listserv.easi
- - Recording for the Blind, +1 800 221 4792
--->
-
-<!ENTITY % SDAFORM "SDAFORM CDATA #FIXED"
- -- one to one mapping -->
-<!ENTITY % SDARULE "SDARULE CDATA #FIXED"
- -- context-sensitive mapping -->
-<!ENTITY % SDAPREF "SDAPREF CDATA #FIXED"
- -- generated text prefix -->
-<!ENTITY % SDASUFF "SDASUFF CDATA #FIXED"
- -- generated text suffix -->
-<!ENTITY % SDASUSP "SDASUSP NAME #FIXED"
- -- suspend transform process -->
-
-
-<!--========== Text Markup =====================-->
-
-<![ %HTML.Highlighting [
-
-<!ENTITY % font " TT | B | I ">
-
-<!ENTITY % phrase "EM | STRONG | CODE | SAMP | KBD | VAR | CITE ">
-
-<!ENTITY % text "#PCDATA | A | IMG | BR | %phrase | %font">
-
-<!ELEMENT (%font;|%phrase) - - (%text)*>
-<!ATTLIST ( TT | CODE | SAMP | KBD | VAR )
- %SDAFORM; "Lit"
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 51]</span>
-<a name="page-52" id="page-52" href="#page-52" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- >
-<!ATTLIST ( B | STRONG )
- %SDAFORM; "B"
- >
-<!ATTLIST ( I | EM | CITE )
- %SDAFORM; "It"
- >
-
-<!-- <TT> Typewriter text -->
-<!-- <B> Bold text -->
-<!-- <I> Italic text -->
-
-<!-- <EM> Emphasized phrase -->
-<!-- <STRONG> Strong emphasis -->
-<!-- <CODE> Source code phrase -->
-<!-- <SAMP> Sample text or characters -->
-<!-- <KBD> Keyboard phrase, e.g. user input -->
-<!-- <VAR> Variable phrase or substitutable -->
-<!-- <CITE> Name or title of cited work -->
-
-<!ENTITY % pre.content "#PCDATA | A | HR | BR | %font | %phrase">
-
-]]>
-
-<!ENTITY % text "#PCDATA | A | IMG | BR">
-
-<!ELEMENT BR - O EMPTY>
-<!ATTLIST BR
- %SDAPREF; "&#RE;"
- >
-
-<!-- <BR> Line break -->
-
-
-<!--========= Link Markup ======================-->
-
-<!ENTITY % linkType "NAMES">
-
-<!ENTITY % linkExtraAttributes
- "REL %linkType #IMPLIED
- REV %linkType #IMPLIED
- URN CDATA #IMPLIED
- TITLE CDATA #IMPLIED
- METHODS NAMES #IMPLIED
- ">
-
-<![ %HTML.Recommended [
- <!ENTITY % A.content "(%text)*"
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 52]</span>
-<a name="page-53" id="page-53" href="#page-53" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- -- <H1><a name="xxx">Heading</a></H1>
- is preferred to
- <a name="xxx"><H1>Heading</H1></a>
- -->
-]]>
-
-<!ENTITY % A.content "(%heading|%text)*">
-
-<!ELEMENT A - - %A.content -(A)>
-<!ATTLIST A
- HREF CDATA #IMPLIED
- NAME CDATA #IMPLIED
- %linkExtraAttributes;
- %SDAPREF; "<Anchor: #AttList>"
- >
-<!-- <A> Anchor; source/destination of link -->
-<!-- <A NAME="..."> Name of this anchor -->
-<!-- <A HREF="..."> Address of link destination -->
-<!-- <A URN="..."> Permanent address of destination -->
-<!-- <A REL=...> Relationship to destination -->
-<!-- <A REV=...> Relationship of destination to this -->
-<!-- <A TITLE="..."> Title of destination (advisory) -->
-<!-- <A METHODS="..."> Operations on destination (advisory) -->
-
-
-<!--========== Images ==========================-->
-
-<!ELEMENT IMG - O EMPTY>
-<!ATTLIST IMG
- SRC CDATA #REQUIRED
- ALT CDATA #IMPLIED
- ALIGN (top|middle|bottom) #IMPLIED
- ISMAP (ISMAP) #IMPLIED
- %SDAPREF; "<Fig><?SDATrans Img: #AttList>#AttVal(Alt)</Fig>"
- >
-
-<!-- <IMG> Image; icon, glyph or illustration -->
-<!-- <IMG SRC="..."> Address of image object -->
-<!-- <IMG ALT="..."> Textual alternative -->
-<!-- <IMG ALIGN=...> Position relative to text -->
-<!-- <IMG ISMAP> Each pixel can be a link -->
-
-<!--========== Paragraphs=======================-->
-
-<!ELEMENT P - O (%text)*>
-<!ATTLIST P
- %SDAFORM; "Para"
- >
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 53]</span>
-<a name="page-54" id="page-54" href="#page-54" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!-- <P> Paragraph -->
-
-
-<!--========== Headings, Titles, Sections ===============-->
-
-<!ELEMENT HR - O EMPTY>
-<!ATTLIST HR
- %SDAPREF; "&#RE;&#RE;"
- >
-
-<!-- <HR> Horizontal rule -->
-
-<!ELEMENT ( %heading ) - - (%text;)*>
-<!ATTLIST H1
- %SDAFORM; "H1"
- >
-<!ATTLIST H2
- %SDAFORM; "H2"
- >
-<!ATTLIST H3
- %SDAFORM; "H3"
- >
-<!ATTLIST H4
- %SDAFORM; "H4"
- >
-<!ATTLIST H5
- %SDAFORM; "H5"
- >
-<!ATTLIST H6
- %SDAFORM; "H6"
- >
-
-<!-- <H1> Heading, level 1 -->
-<!-- <H2> Heading, level 2 -->
-<!-- <H3> Heading, level 3 -->
-<!-- <H4> Heading, level 4 -->
-<!-- <H5> Heading, level 5 -->
-<!-- <H6> Heading, level 6 -->
-
-
-<!--========== Text Flows ======================-->
-
-<![ %HTML.Forms [
- <!ENTITY % block.forms "BLOCKQUOTE | FORM | ISINDEX">
-]]>
-
-<!ENTITY % block.forms "BLOCKQUOTE">
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 54]</span>
-<a name="page-55" id="page-55" href="#page-55" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<![ %HTML.Deprecated [
- <!ENTITY % preformatted "PRE | XMP | LISTING">
-]]>
-
-<!ENTITY % preformatted "PRE">
-
-<!ENTITY % block "P | %list | DL
- | %preformatted
- | %block.forms">
-
-<!ENTITY % flow "(%text|%block)*">
-
-<!ENTITY % pre.content "#PCDATA | A | HR | BR">
-<!ELEMENT PRE - - (%pre.content)*>
-<!ATTLIST PRE
- WIDTH NUMBER #implied
- %SDAFORM; "Lit"
- >
-
-<!-- <PRE> Preformatted text -->
-<!-- <PRE WIDTH=...> Maximum characters per line -->
-
-<![ %HTML.Deprecated [
-
-<!ENTITY % literal "CDATA"
- -- historical, non-conforming parsing mode where
- the only markup signal is the end tag
- in full
- -->
-
-<!ELEMENT (XMP|LISTING) - - %literal>
-<!ATTLIST XMP
- %SDAFORM; "Lit"
- %SDAPREF; "Example:&#RE;"
- >
-<!ATTLIST LISTING
- %SDAFORM; "Lit"
- %SDAPREF; "Listing:&#RE;"
- >
-
-<!-- <XMP> Example section -->
-<!-- <LISTING> Computer listing -->
-
-<!ELEMENT PLAINTEXT - O %literal>
-<!-- <PLAINTEXT> Plain text passage -->
-
-<!ATTLIST PLAINTEXT
- %SDAFORM; "Lit"
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 55]</span>
-<a name="page-56" id="page-56" href="#page-56" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- >
-]]>
-
-<!--========== Lists ==================-->
-
-<!ELEMENT DL - - (DT | DD)+>
-<!ATTLIST DL
- COMPACT (COMPACT) #IMPLIED
- %SDAFORM; "List"
- %SDAPREF; "Definition List:"
- >
-
-<!ELEMENT DT - O (%text)*>
-<!ATTLIST DT
- %SDAFORM; "Term"
- >
-
-<!ELEMENT DD - O %flow>
-<!ATTLIST DD
- %SDAFORM; "LItem"
- >
-
-<!-- <DL> Definition list, or glossary -->
-<!-- <DL COMPACT> Compact style list -->
-<!-- <DT> Term in definition list -->
-<!-- <DD> Definition of term -->
-
-<!ELEMENT (OL|UL) - - (LI)+>
-<!ATTLIST OL
- COMPACT (COMPACT) #IMPLIED
- %SDAFORM; "List"
- >
-<!ATTLIST UL
- COMPACT (COMPACT) #IMPLIED
- %SDAFORM; "List"
- >
-<!-- <UL> Unordered list -->
-<!-- <UL COMPACT> Compact list style -->
-<!-- <OL> Ordered, or numbered list -->
-<!-- <OL COMPACT> Compact list style -->
-
-
-<!ELEMENT (DIR|MENU) - - (LI)+ -(%block)>
-<!ATTLIST DIR
- COMPACT (COMPACT) #IMPLIED
- %SDAFORM; "List"
- %SDAPREF; "<LHead>Directory</LHead>"
- >
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 56]</span>
-<a name="page-57" id="page-57" href="#page-57" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ATTLIST MENU
- COMPACT (COMPACT) #IMPLIED
- %SDAFORM; "List"
- %SDAPREF; "<LHead>Menu</LHead>"
- >
-
-<!-- <DIR> Directory list -->
-<!-- <DIR COMPACT> Compact list style -->
-<!-- <MENU> Menu list -->
-<!-- <MENU COMPACT> Compact list style -->
-
-<!ELEMENT LI - O %flow>
-<!ATTLIST LI
- %SDAFORM; "LItem"
- >
-
-<!-- <LI> List item -->
-
-<!--========== Document Body ===================-->
-
-<![ %HTML.Recommended [
- <!ENTITY % body.content "(%heading|%block|HR|ADDRESS|IMG)*"
- -- <h1>Heading</h1>
- <p>Text ...
- is preferred to
- <h1>Heading</h1>
- Text ...
- -->
-]]>
-
-<!ENTITY % body.content "(%heading | %text | %block |
- HR | ADDRESS)*">
-
-<!ELEMENT BODY O O %body.content>
-
-<!-- <BODY> Document body -->
-
-<!ELEMENT BLOCKQUOTE - - %body.content>
-<!ATTLIST BLOCKQUOTE
- %SDAFORM; "BQ"
- >
-
-<!-- <BLOCKQUOTE> Quoted passage -->
-
-<!ELEMENT ADDRESS - - (%text|P)*>
-<!ATTLIST ADDRESS
- %SDAFORM; "Lit"
- %SDAPREF; "Address:&#RE;"
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 57]</span>
-<a name="page-58" id="page-58" href="#page-58" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- >
-
-<!-- <ADDRESS> Address, signature, or byline -->
-
-
-<!--======= Forms ====================-->
-
-<![ %HTML.Forms [
-
-<!ELEMENT FORM - - %body.content -(FORM) +(INPUT|SELECT|TEXTAREA)>
-<!ATTLIST FORM
- ACTION CDATA #IMPLIED
- METHOD (%HTTP-Method) GET
- ENCTYPE %Content-Type; "application/x-www-form-urlencoded"
- %SDAPREF; "<Para>Form:</Para>"
- %SDASUFF; "<Para>Form End.</Para>"
- >
-
-<!-- <FORM> Fill-out or data-entry form -->
-<!-- <FORM ACTION="..."> Address for completed form -->
-<!-- <FORM METHOD=...> Method of submitting form -->
-<!-- <FORM ENCTYPE="..."> Representation of form data -->
-
-<!ENTITY % InputType "(TEXT | PASSWORD | CHECKBOX |
- RADIO | SUBMIT | RESET |
- IMAGE | HIDDEN )">
-<!ELEMENT INPUT - O EMPTY>
-<!ATTLIST INPUT
- TYPE %InputType TEXT
- NAME CDATA #IMPLIED
- VALUE CDATA #IMPLIED
- SRC CDATA #IMPLIED
- CHECKED (CHECKED) #IMPLIED
- SIZE CDATA #IMPLIED
- MAXLENGTH NUMBER #IMPLIED
- ALIGN (top|middle|bottom) #IMPLIED
- %SDAPREF; "Input: "
- >
-
-<!-- <INPUT> Form input datum -->
-<!-- <INPUT TYPE=...> Type of input interaction -->
-<!-- <INPUT NAME=...> Name of form datum -->
-<!-- <INPUT VALUE="..."> Default/initial/selected value -->
-<!-- <INPUT SRC="..."> Address of image -->
-<!-- <INPUT CHECKED> Initial state is "on" -->
-<!-- <INPUT SIZE=...> Field size hint -->
-<!-- <INPUT MAXLENGTH=...> Data length maximum -->
-<!-- <INPUT ALIGN=...> Image alignment -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 58]</span>
-<a name="page-59" id="page-59" href="#page-59" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ELEMENT SELECT - - (OPTION+) -(INPUT|SELECT|TEXTAREA)>
-<!ATTLIST SELECT
- NAME CDATA #REQUIRED
- SIZE NUMBER #IMPLIED
- MULTIPLE (MULTIPLE) #IMPLIED
- %SDAFORM; "List"
- %SDAPREF;
- "<LHead>Select #AttVal(Multiple)</LHead>"
- >
-
-<!-- <SELECT> Selection of option(s) -->
-<!-- <SELECT NAME=...> Name of form datum -->
-<!-- <SELECT SIZE=...> Options displayed at a time -->
-<!-- <SELECT MULTIPLE> Multiple selections allowed -->
-
-<!ELEMENT OPTION - O (#PCDATA)*>
-<!ATTLIST OPTION
- SELECTED (SELECTED) #IMPLIED
- VALUE CDATA #IMPLIED
- %SDAFORM; "LItem"
- %SDAPREF;
- "Option: #AttVal(Value) #AttVal(Selected)"
- >
-
-<!-- <OPTION> A selection option -->
-<!-- <OPTION SELECTED> Initial state -->
-<!-- <OPTION VALUE="..."> Form datum value for this option-->
-
-<!ELEMENT TEXTAREA - - (#PCDATA)* -(INPUT|SELECT|TEXTAREA)>
-<!ATTLIST TEXTAREA
- NAME CDATA #REQUIRED
- ROWS NUMBER #REQUIRED
- COLS NUMBER #REQUIRED
- %SDAFORM; "Para"
- %SDAPREF; "Input Text -- #AttVal(Name): "
- >
-
-<!-- <TEXTAREA> An area for text input -->
-<!-- <TEXTAREA NAME=...> Name of form datum -->
-<!-- <TEXTAREA ROWS=...> Height of area -->
-<!-- <TEXTAREA COLS=...> Width of area -->
-
-]]>
-
-
-<!--======= Document Head ======================-->
-
-<![ %HTML.Recommended [
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 59]</span>
-<a name="page-60" id="page-60" href="#page-60" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- <!ENTITY % head.extra "">
-]]>
-<!ENTITY % head.extra "& NEXTID?">
-
-<!ENTITY % head.content "TITLE & ISINDEX? & BASE? %head.extra">
-
-<!ELEMENT HEAD O O (%head.content) +(META|LINK)>
-
-<!-- <HEAD> Document head -->
-
-<!ELEMENT TITLE - - (#PCDATA)* -(META|LINK)>
-<!ATTLIST TITLE
- %SDAFORM; "Ti" >
-
-<!-- <TITLE> Title of document -->
-
-<!ELEMENT LINK - O EMPTY>
-<!ATTLIST LINK
- HREF CDATA #REQUIRED
- %linkExtraAttributes;
- %SDAPREF; "Linked to : #AttVal (TITLE) (URN) (HREF)>" >
-
-<!-- <LINK> Link from this document -->
-<!-- <LINK HREF="..."> Address of link destination -->
-<!-- <LINK URN="..."> Lasting name of destination -->
-<!-- <LINK REL=...> Relationship to destination -->
-<!-- <LINK REV=...> Relationship of destination to this -->
-<!-- <LINK TITLE="..."> Title of destination (advisory) -->
-<!-- <LINK METHODS="..."> Operations allowed (advisory) -->
-
-<!ELEMENT ISINDEX - O EMPTY>
-<!ATTLIST ISINDEX
- %SDAPREF;
- "<Para>[Document is indexed/searchable.]</Para>">
-
-<!-- <ISINDEX> Document is a searchable index -->
-
-<!ELEMENT BASE - O EMPTY>
-<!ATTLIST BASE
- HREF CDATA #REQUIRED >
-
-<!-- <BASE> Base context document -->
-<!-- <BASE HREF="..."> Address for this document -->
-
-<!ELEMENT NEXTID - O EMPTY>
-<!ATTLIST NEXTID
- N CDATA #REQUIRED >
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 60]</span>
-<a name="page-61" id="page-61" href="#page-61" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!-- <NEXTID> Next ID to use for link name -->
-<!-- <NEXTID N=...> Next ID to use for link name -->
-
-<!ELEMENT META - O EMPTY>
-<!ATTLIST META
- HTTP-EQUIV NAME #IMPLIED
- NAME NAME #IMPLIED
- CONTENT CDATA #REQUIRED >
-
-<!-- <META> Generic Meta-information -->
-<!-- <META HTTP-EQUIV=...> HTTP response header name -->
-<!-- <META NAME=...> Meta-information name -->
-<!-- <META CONTENT="..."> Associated information -->
-
-<!--======= Document Structure =================-->
-
-<![ %HTML.Deprecated [
- <!ENTITY % html.content "HEAD, BODY, PLAINTEXT?">
-]]>
-<!ENTITY % html.content "HEAD, BODY">
-
-<!ELEMENT HTML O O (%html.content)>
-<!ENTITY % version.attr "VERSION CDATA #FIXED '%HTML.Version;'">
-
-<!ATTLIST HTML
- %version.attr;
- %SDAFORM; "Book"
- >
-
-<!-- <HTML> HTML Document -->
-
-<span class="h3"><a name="section-9.2">9.2</a>. Strict HTML DTD</span>
-
- This document type declaration refers to the HTML DTD with the
- `HTML.Recommended' entity defined as `INCLUDE' rather than IGNORE;
- that is, it refers to the more structurally rigid definition of HTML.
-
-<!-- html-s.dtd
-
- Document Type Definition for the HyperText Markup Language
- with strict validation (HTML Strict DTD).
-
- $Id: html-s.dtd,v 1.3 1995/06/02 18:55:46 connolly Exp $
-
- Author: Daniel W. Connolly <connolly at w3.org>
- See Also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
--->
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 61]</span>
-<a name="page-62" id="page-62" href="#page-62" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ENTITY % HTML.Version
- "-//IETF//DTD HTML 2.0 Strict//EN"
-
- -- Typical usage:
-
- <!DOCTYPE HTML PUBLIC
- "-//IETF//DTD HTML Strict//EN">
- <html>
- ...
- </html>
- --
- >
-
-<!-- Feature Test Entities -->
-<!ENTITY % HTML.Recommended "INCLUDE">
-
-<!ENTITY % html PUBLIC "-//IETF//DTD HTML 2.0//EN">
-%html;
-
-<span class="h3"><a name="section-9.3">9.3</a>. Level 1 HTML DTD</span>
-
- This document type declaration refers to the HTML DTD with the
- `HTML.Forms' entity defined as `IGNORE' rather than `INCLUDE'.
- Documents which contain <FORM> elements do not conform to this DTD,
- and must use the level 2 DTD.
-
-<!-- html-1.dtd
-
- Document Type Definition for the HyperText Markup Language
- with Level 1 Extensions (HTML Level 1 DTD).
-
- $Id: html-1.dtd,v 1.2 1995/03/29 18:53:10 connolly Exp $
-
- Author: Daniel W. Connolly <connolly at w3.org>
- See Also: <a href="http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html">http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html</a>
-
--->
-
-<!ENTITY % HTML.Version
- "-//IETF//DTD HTML 2.0 Level 1//EN"
-
- -- Typical usage:
-
- <!DOCTYPE HTML PUBLIC
- "-//IETF//DTD HTML Level 1//EN">
- <html>
- ...
- </html>
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 62]</span>
-<a name="page-63" id="page-63" href="#page-63" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- --
- >
-
-<!-- Feature Test Entities -->
-<!ENTITY % HTML.Forms "IGNORE">
-
-<!ENTITY % html PUBLIC "-//IETF//DTD HTML 2.0//EN">
-%html;
-
-<span class="h3"><a name="section-9.4">9.4</a>. Strict Level 1 HTML DTD</span>
-
- This document type declaration refers to the level 1 HTML DTD with
- the `HTML.Recommended' entity defined as `INCLUDE' rather than
- IGNORE; that is, it refers to the more structurally rigid definition
- of HTML.
-
-<!-- html-1s.dtd
-
- Document Type Definition for the HyperText Markup Language
- Struct Level 1
-
- $Id: html-1s.dtd,v 1.3 1995/06/02 18:55:43 connolly Exp $
-
- Author: Daniel W. Connolly <connolly at w3.org>
- See Also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
--->
-
-<!ENTITY % HTML.Version
- "-//IETF//DTD HTML 2.0 Strict Level 1//EN"
-
- -- Typical usage:
-
- <!DOCTYPE HTML PUBLIC
- "-//IETF//DTD HTML Strict Level 1//EN">
- <html>
- ...
- </html>
- --
- >
-
-<!-- Feature Test Entities -->
-
-
-<!ENTITY % HTML.Recommended "INCLUDE">
-
-<!ENTITY % html-1 PUBLIC "-//IETF//DTD HTML 2.0 Level 1//EN">
-%html-1;
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 63]</span>
-<a name="page-64" id="page-64" href="#page-64" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h3"><a name="section-9.5">9.5</a>. SGML Declaration for HTML</span>
-
- This is the SGML Declaration for HyperText Markup Language.
-
-<!SGML "ISO 8879:1986"
---
- SGML Declaration for HyperText Markup Language (HTML).
-
---
-
-CHARSET
- BASESET "ISO 646:1983//CHARSET
- International Reference Version
- (IRV)//ESC 2/5 4/0"
- DESCSET 0 9 UNUSED
- 9 2 9
- 11 2 UNUSED
- 13 1 13
- 14 18 UNUSED
- 32 95 32
- 127 1 UNUSED
- BASESET "ISO Registration Number 100//CHARSET
- ECMA-94 Right Part of
- Latin Alphabet Nr. 1//ESC 2/13 4/1"
-
- DESCSET 128 32 UNUSED
- 160 96 32
-
-<span class="h1"><a name="appendix-CAPACITY">CAPACITY</a> SGMLREF</span>
- TOTALCAP 150000
- GRPCAP 150000
- ENTCAP 150000
-
-<span class="h1"><a name="appendix-SCOPE">SCOPE</a> DOCUMENT</span>
-SYNTAX
- SHUNCHAR CONTROLS 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
- 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 127
- BASESET "ISO 646:1983//CHARSET
- International Reference Version
- (IRV)//ESC 2/5 4/0"
- DESCSET 0 128 0
- FUNCTION
- RE 13
- RS 10
- SPACE 32
- TAB SEPCHAR 9
- NAMING LCNMSTRT ""
- UCNMSTRT ""
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 64]</span>
-<a name="page-65" id="page-65" href="#page-65" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- LCNMCHAR ".-"
- UCNMCHAR ".-"
- NAMECASE GENERAL YES
- ENTITY NO
- DELIM GENERAL SGMLREF
- SHORTREF SGMLREF
- NAMES SGMLREF
- QUANTITY SGMLREF
- ATTSPLEN 2100
- LITLEN 1024
- NAMELEN 72 -- somewhat arbitrary; taken from
- internet line length conventions --
- PILEN 1024
- TAGLVL 100
- TAGLEN 2100
- GRPGTCNT 150
- GRPCNT 64
-
-FEATURES
- MINIMIZE
- DATATAG NO
- OMITTAG YES
- RANK NO
- SHORTTAG YES
- LINK
- SIMPLE NO
- IMPLICIT NO
- EXPLICIT NO
- OTHER
- CONCUR NO
- SUBDOC NO
- FORMAL YES
- APPINFO "SDA" -- conforming SGML Document Access application
- --
->
-<!--
- $Id: html.decl,v 1.17 1995/06/08 14:59:32 connolly Exp $
-
- Author: Daniel W. Connolly <connolly at w3.org>
-
- See also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
- -->
-
-<span class="h3"><a name="section-9.6">9.6</a>. Sample SGML Open Entity Catalog for HTML</span>
-
- The SGML standard describes an "entity manager" as the portion or
- component of an SGML system that maps SGML entities into the actual
- storage model (e.g., the file system). The standard itself does not
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 65]</span>
-<a name="page-66" id="page-66" href="#page-66" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- define a particular mapping methodology or notation.
-
- To assist the interoperability among various SGML tools and systems,
- the SGML Open consortium has passed a technical resolution that
- defines a format for an application-independent entity catalog that
- maps external identifiers and/or entity names to file names.
-
- Each entry in the catalog associates a storage object identifier
- (such as a file name) with information about the external entity that
- appears in the SGML document. In addition to entries that associate
- public identifiers, a catalog entry can associate an entity name with
- a storage object identifier. For example, the following are possible
- catalog entries:
-
- -- catalog: SGML Open style entity catalog for HTML --
- -- $Id: catalog,v 1.3 1995/09/21 23:30:23 connolly Exp $ --
-
- -- Ways to refer to Level 2: most general to most specific --
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML//EN" html.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0//EN" html.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Level 2//EN" html.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Level 2//EN" html.dtd</span>
-
- -- Ways to refer to Level 1: most general to most specific --
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Level 1//EN" html-1.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Level 1//EN" html-1.dtd</span>
-
- -- Ways to refer to
- Strict Level 2: most general to most specific --
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict//EN" html-s.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict//EN" html-s.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict Level 2//EN" html-s.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict Level 2//EN" html-s.dtd</span>
-
- -- Ways to refer to
- Strict Level 1: most general to most specific --
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict Level 1//EN" html-1s.dtd</span>
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict Level 1//EN" html-1s.dtd</span>
-
- -- ISO latin 1 entity set for HTML --
-<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML" ISOlat1\</span>
-sgml
-
-<span class="h3"><a name="section-9.7">9.7</a>. Character Entity Sets</span>
-
- The HTML DTD defines the following entities. They represent
- particular graphic characters which have special meanings in places
- in the markup, or may not be part of the character set available to
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 66]</span>
-<a name="page-67" id="page-67" href="#page-67" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- the writer.
-
-<span class="h4"><a name="section-9.7.1">9.7.1</a>. Numeric and Special Graphic Entity Set</span>
-
- The following table lists each of the characters included from the
- Numeric and Special Graphic entity set, along with its name, syntax
- for use, and description. This list is derived from `ISO Standard
- 8879:1986//ENTITIES Numeric and Special Graphic//EN'. However, HTML
- does not include for the entire entity set -- only the entities
- listed below are included.
-
- GLYPH NAME SYNTAX DESCRIPTION
- < lt < Less than sign
- > gt > Greater than signn
- & amp & Ampersand
- " quot " Double quote sign
-
-<span class="h4"><a name="section-9.7.2">9.7.2</a>. ISO Latin 1 Character Entity Set</span>
-
- The following public text lists each of the characters specified in
- the Added Latin 1 entity set, along with its name, syntax for use,
- and description. This list is derived from ISO Standard
- 8879:1986//ENTITIES Added Latin 1//EN. HTML includes the entire
- entity set.
-
-<!-- (C) International Organization for Standardization 1986
- Permission to copy in any form is granted for use with
- conforming SGML systems and applications as defined in
- ISO 8879, provided this notice is included in all copies.
--->
-<!-- Character entity set. Typical invocation:
- <!ENTITY % ISOlat1 PUBLIC
- "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML">
- %ISOlat1;
--->
-<!-- Modified for use in HTML
- $Id: ISOlat1.sgml,v 1.2 1994/11/30 23:45:12 connolly Exp $ -->
-<!ENTITY AElig CDATA "Æ" -- capital AE diphthong (ligature) -->
-<!ENTITY Aacute CDATA "Á" -- capital A, acute accent -->
-<!ENTITY Acirc CDATA "Â" -- capital A, circumflex accent -->
-<!ENTITY Agrave CDATA "À" -- capital A, grave accent -->
-<!ENTITY Aring CDATA "Å" -- capital A, ring -->
-<!ENTITY Atilde CDATA "Ã" -- capital A, tilde -->
-<!ENTITY Auml CDATA "Ä" -- capital A, dieresis or umlaut mark -->
-<!ENTITY Ccedil CDATA "Ç" -- capital C, cedilla -->
-<!ENTITY ETH CDATA "Ð" -- capital Eth, Icelandic -->
-<!ENTITY Eacute CDATA "É" -- capital E, acute accent -->
-<!ENTITY Ecirc CDATA "Ê" -- capital E, circumflex accent -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 67]</span>
-<a name="page-68" id="page-68" href="#page-68" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ENTITY Egrave CDATA "È" -- capital E, grave accent -->
-<!ENTITY Euml CDATA "Ë" -- capital E, dieresis or umlaut mark -->
-<!ENTITY Iacute CDATA "Í" -- capital I, acute accent -->
-<!ENTITY Icirc CDATA "Î" -- capital I, circumflex accent -->
-<!ENTITY Igrave CDATA "Ì" -- capital I, grave accent -->
-<!ENTITY Iuml CDATA "Ï" -- capital I, dieresis or umlaut mark -->
-<!ENTITY Ntilde CDATA "Ñ" -- capital N, tilde -->
-<!ENTITY Oacute CDATA "Ó" -- capital O, acute accent -->
-<!ENTITY Ocirc CDATA "Ô" -- capital O, circumflex accent -->
-<!ENTITY Ograve CDATA "Ò" -- capital O, grave accent -->
-<!ENTITY Oslash CDATA "Ø" -- capital O, slash -->
-<!ENTITY Otilde CDATA "Õ" -- capital O, tilde -->
-<!ENTITY Ouml CDATA "Ö" -- capital O, dieresis or umlaut mark -->
-<!ENTITY THORN CDATA "Þ" -- capital THORN, Icelandic -->
-<!ENTITY Uacute CDATA "Ú" -- capital U, acute accent -->
-<!ENTITY Ucirc CDATA "Û" -- capital U, circumflex accent -->
-<!ENTITY Ugrave CDATA "Ù" -- capital U, grave accent -->
-<!ENTITY Uuml CDATA "Ü" -- capital U, dieresis or umlaut mark -->
-<!ENTITY Yacute CDATA "Ý" -- capital Y, acute accent -->
-<!ENTITY aacute CDATA "á" -- small a, acute accent -->
-<!ENTITY acirc CDATA "â" -- small a, circumflex accent -->
-<!ENTITY aelig CDATA "æ" -- small ae diphthong (ligature) -->
-<!ENTITY agrave CDATA "à" -- small a, grave accent -->
-<!ENTITY aring CDATA "å" -- small a, ring -->
-<!ENTITY atilde CDATA "ã" -- small a, tilde -->
-<!ENTITY auml CDATA "ä" -- small a, dieresis or umlaut mark -->
-<!ENTITY ccedil CDATA "ç" -- small c, cedilla -->
-<!ENTITY eacute CDATA "é" -- small e, acute accent -->
-<!ENTITY ecirc CDATA "ê" -- small e, circumflex accent -->
-<!ENTITY egrave CDATA "è" -- small e, grave accent -->
-<!ENTITY eth CDATA "ð" -- small eth, Icelandic -->
-<!ENTITY euml CDATA "ë" -- small e, dieresis or umlaut mark -->
-<!ENTITY iacute CDATA "í" -- small i, acute accent -->
-<!ENTITY icirc CDATA "î" -- small i, circumflex accent -->
-<!ENTITY igrave CDATA "ì" -- small i, grave accent -->
-<!ENTITY iuml CDATA "ï" -- small i, dieresis or umlaut mark -->
-<!ENTITY ntilde CDATA "ñ" -- small n, tilde -->
-<!ENTITY oacute CDATA "ó" -- small o, acute accent -->
-<!ENTITY ocirc CDATA "ô" -- small o, circumflex accent -->
-<!ENTITY ograve CDATA "ò" -- small o, grave accent -->
-<!ENTITY oslash CDATA "ø" -- small o, slash -->
-<!ENTITY otilde CDATA "õ" -- small o, tilde -->
-<!ENTITY ouml CDATA "ö" -- small o, dieresis or umlaut mark -->
-<!ENTITY szlig CDATA "ß" -- small sharp s, German (sz ligature)->
-<!ENTITY thorn CDATA "þ" -- small thorn, Icelandic -->
-<!ENTITY uacute CDATA "ú" -- small u, acute accent -->
-<!ENTITY ucirc CDATA "û" -- small u, circumflex accent -->
-<!ENTITY ugrave CDATA "ù" -- small u, grave accent -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 68]</span>
-<a name="page-69" id="page-69" href="#page-69" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<!ENTITY uuml CDATA "ü" -- small u, dieresis or umlaut mark -->
-<!ENTITY yacute CDATA "ý" -- small y, acute accent -->
-<!ENTITY yuml CDATA "ÿ" -- small y, dieresis or umlaut mark -->
-
-<span class="h2"><a name="section-10">10</a>. Security Considerations</span>
-
- Anchors, embedded images, and all other elements which contain URIs
- as parameters may cause the URI to be dereferenced in response to
- user input. In this case, the security considerations of [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] apply.
-
- The widely deployed methods for submitting forms requests -- HTTP and
- SMTP -- provide little assurance of confidentiality. Information
- providers who request sensitive information via forms -- especially
- by way of the `PASSWORD' type input field (see 8.1.2, "Input Field:
- INPUT") -- should be aware and make their users aware of the lack of
- confidentiality.
-
-<span class="h2"><a name="section-11">11</a>. References</span>
-
- [<a name="ref-URI" id="ref-URI">URI</a>]
- Berners-Lee, T., "Universal Resource Identifiers in WWW:
- A Unifying Syntax for the Expression of Names and
- Addresses of Objects on the Network as used in the
- World- Wide Web", <a href="./rfc1630">RFC 1630</a>, CERN, June 1994.
- <URL:ftp://ds.internic.net/rfc/rfc1630.txt>
-
- [<a name="ref-URL" id="ref-URL">URL</a>]
- Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform
- Resource Locators (URL)", <a href="./rfc1738">RFC 1738</a>, CERN, Xerox PARC,
- University of Minnesota, December 1994.
- <URL:ftp://ds.internic.net/rfc/rfc1738.txt>
-
- [<a name="ref-HTTP" id="ref-HTTP">HTTP</a>]
- Berners-Lee, T., Fielding, R., and H. Frystyk Nielsen,
- "Hypertext Transfer Protocol - HTTP/1.0", Work in
- Progress, MIT, UC Irvine, CERN, March 1995.
-
- [<a name="ref-MIME" id="ref-MIME">MIME</a>]
- Borenstein, N., and N. Freed. "MIME (Multipurpose
- Internet Mail Extensions) Part One: Mechanisms for
- Specifying and Describing the Format of Internet Message
- Bodies", <a href="./rfc1521">RFC 1521</a>, Bellcore, Innosoft, September 1993.
- <URL:ftp://ds.internic.net/rfc/rfc1521.txt>
-
- [<a name="ref-RELURL" id="ref-RELURL">RELURL</a>]
- Fielding, R., "Relative Uniform Resource Locators", <a href="./rfc1808">RFC</a>
- <a href="./rfc1808">1808</a>, June 1995
- <URL:ftp://ds.internic.net/rfc/rfc1808.txt>
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 69]</span>
-<a name="page-70" id="page-70" href="#page-70" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- [<a name="ref-GOLD90" id="ref-GOLD90">GOLD90</a>]
- Goldfarb, C., "The SGML Handbook", Y. Rubinsky, Ed.,
- Oxford University Press, 1990.
-
- [<a name="ref-DEXTER" id="ref-DEXTER">DEXTER</a>]
- Frank Halasz and Mayer Schwartz, "The Dexter Hypertext
- Reference Model", Communications of the ACM, pp.
- 30-39, vol. 37 no. 2, Feb 1994.
-
- [<a name="ref-IMEDIA" id="ref-IMEDIA">IMEDIA</a>]
- Postel, J., "Media Type Registration Procedure",
- <a href="./rfc1590">RFC 1590</a>, USC/Information Sciences Institute, March 1994.
- <URL:ftp://ds.internic.net/rfc/rfc1590.txt>
-
- [<a name="ref-IANA" id="ref-IANA">IANA</a>]
- Reynolds, J., and J. Postel, "Assigned Numbers", STD 2,
- <a href="./rfc1700">RFC 1700</a>, USC/Information Sciecnes Institute, October
- 1994. <URL:ftp://ds.internic.net/rfc/rfc1700.txt>
-
- [<a name="ref-SQ91" id="ref-SQ91">SQ91</a>]
- SoftQuad. "The SGML Primer", 3rd ed., SoftQuad Inc.,
- 1991. <URL:http://www.sq.com/>
-
- [<a name="ref-ISO-646" id="ref-ISO-646">ISO-646</a>]
- ISO/IEC 646:1991 Information technology -- ISO 7-bit
- coded character set for information interchange
- <URL:http://www.iso.ch/cate/d4777.html>
-
- [<a name="ref-ISO-10646" id="ref-ISO-10646">ISO-10646</a>]
- ISO/IEC 10646-1:1993 Information technology -- Universal
- Multiple-Octet Coded Character Set (UCS) -- Part 1:
- Architecture and Basic Multilingual Plane
- <URL:http://www.iso.ch/cate/d18741.html>
-
- [<a name="ref-ISO-8859-1" id="ref-ISO-8859-1">ISO-8859-1</a>]
- ISO 8859. International Standard -- Information
- Processing -- 8-bit Single-Byte Coded Graphic Character
- Sets -- Part 1: Latin Alphabet No. 1, ISO 8859-1:1987.
- <URL:http://www.iso.ch/cate/d16338.html>
-
- [<a name="ref-SGML" id="ref-SGML">SGML</a>]
- ISO 8879. Information Processing -- Text and Office
- Systems - Standard Generalized Markup Language (SGML),
- 1986. <URL:http://www.iso.ch/cate/d16387.html>
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 70]</span>
-<a name="page-71" id="page-71" href="#page-71" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h2"><a name="section-12">12</a>. Acknowledgments</span>
-
- The HTML document type was designed by Tim Berners-Lee at CERN as
- part of the 1990 World Wide Web project. In 1992, Dan Connolly wrote
- the HTML Document Type Definition (DTD) and a brief HTML
- specification.
-
- Since 1993, a wide variety of Internet participants have contributed
- to the evolution of HTML, which has included the addition of in-line
- images introduced by the NCSA Mosaic software for WWW. Dave Raggett
- played an important role in deriving the forms material from the
- HTML+ specification.
-
- Dan Connolly and Karen Olson Muldrow rewrote the HTML Specification
- in 1994. The document was then edited by the HTML working group as a
- whole, with updates being made by Eric Schieler, Mike Knezovich, and
- Eric W. Sink at Spyglass, Inc. Finally, Roy Fielding restructured
- the entire draft into its current form.
-
- Special thanks to the many active participants in the HTML working
- group, too numerous to list individually, without whom there would be
- no standards process and no standard. That this document approaches
- its objective of carefully converging a description of current
- practice and formalization of HTML's relationship to SGML is a
- tribute to their effort.
-
-<span class="h3"><a name="section-12.1">12.1</a>. Authors' Addresses</span>
-
- Tim Berners-Lee
- Director, W3 Consortium
- MIT Laboratory for Computer Science
- 545 Technology Square
- Cambridge, MA 02139, U.S.A.
-
- Phone: +1 (617) 253 9670
- Fax: +1 (617) 258 8682
- EMail: timbl at w3.org
-
-
- Daniel W. Connolly
- Research Technical Staff, W3 Consortium
- MIT Laboratory for Computer Science
- 545 Technology Square
- Cambridge, MA 02139, U.S.A.
-
- Phone: +1 (617) 258 8682
- EMail: connolly at w3.org
- URI: <a href="http://www.w3.org/hypertext/WWW/People/Connolly/">http://www.w3.org/hypertext/WWW/People/Connolly/</a>
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 71]</span>
-<a name="page-72" id="page-72" href="#page-72" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
-<span class="h2"><a name="section-13">13</a>. The HTML Coded Character Set</span>
-
- This list details the code positions and characters of the HTML
- document character set, specified in 9.5, "SGML Declaration for
- HTML". This coded character set is based on [<a href="#ref-ISO-8859-1">ISO-8859-1</a>].
-
- REFERENCE DESCRIPTION
- -------------- -----------
- � -  Unused
- 	 Horizontal tab
- Line feed
-  -  Unused
- Carriage Return
-  -  Unused
-   Space
- ! Exclamation mark
- " Quotation mark
- # Number sign
- $ Dollar sign
- % Percent sign
- & Ampersand
- ' Apostrophe
- ( Left parenthesis
- ) Right parenthesis
- * Asterisk
- + Plus sign
- , Comma
- - Hyphen
- . Period (fullstop)
- / Solidus (slash)
- 0 - 9 Digits 0-9
- : Colon
- ; Semi-colon
- < Less than
- = Equals sign
- > Greater than
- ? Question mark
- @ Commercial at
- A - Z Letters A-Z
- [ Left square bracket
- \ Reverse solidus (backslash)
- ] Right square bracket
- ^ Caret
- _ Horizontal bar (underscore)
- ` Acute accent
- a - z Letters a-z
- { Left curly brace
- | Vertical bar
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 72]</span>
-<a name="page-73" id="page-73" href="#page-73" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- } Right curly brace
- ~ Tilde
-  - Ÿ Unused
-   Non-breaking Space
- ¡ Inverted exclamation
- ¢ Cent sign
- £ Pound sterling
- ¤ General currency sign
- ¥ Yen sign
- ¦ Broken vertical bar
- § Section sign
- ¨ Umlaut (dieresis)
- © Copyright
- ª Feminine ordinal
- « Left angle quote, guillemotleft
- ¬ Not sign
- ­ Soft hyphen
- ® Registered trademark
- ¯ Macron accent
- ° Degree sign
- ± Plus or minus
- ² Superscript two
- ³ Superscript three
- ´ Acute accent
- µ Micro sign
- ¶ Paragraph sign
- · Middle dot
- ¸ Cedilla
- ¹ Superscript one
- º Masculine ordinal
- » Right angle quote, guillemotright
- ¼ Fraction one-fourth
- ½ Fraction one-half
- ¾ Fraction three-fourths
- ¿ Inverted question mark
- À Capital A, grave accent
- Á Capital A, acute accent
- Â Capital A, circumflex accent
- Ã Capital A, tilde
- Ä Capital A, dieresis or umlaut mark
- Å Capital A, ring
- Æ Capital AE dipthong (ligature)
- Ç Capital C, cedilla
- È Capital E, grave accent
- É Capital E, acute accent
- Ê Capital E, circumflex accent
- Ë Capital E, dieresis or umlaut mark
- Ì Capital I, grave accent
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 73]</span>
-<a name="page-74" id="page-74" href="#page-74" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- Í Capital I, acute accent
- Î Capital I, circumflex accent
- Ï Capital I, dieresis or umlaut mark
- Ð Capital Eth, Icelandic
- Ñ Capital N, tilde
- Ò Capital O, grave accent
- Ó Capital O, acute accent
- Ô Capital O, circumflex accent
- Õ Capital O, tilde
- Ö Capital O, dieresis or umlaut mark
- × Multiply sign
- Ø Capital O, slash
- Ù Capital U, grave accent
- Ú Capital U, acute accent
- Û Capital U, circumflex accent
- Ü Capital U, dieresis or umlaut mark
- Ý Capital Y, acute accent
- Þ Capital THORN, Icelandic
- ß Small sharp s, German (sz ligature)
- à Small a, grave accent
- á Small a, acute accent
- â Small a, circumflex accent
- ã Small a, tilde
- ä Small a, dieresis or umlaut mark
- å Small a, ring
- æ Small ae dipthong (ligature)
- ç Small c, cedilla
- è Small e, grave accent
- é Small e, acute accent
- ê Small e, circumflex accent
- ë Small e, dieresis or umlaut mark
- ì Small i, grave accent
- í Small i, acute accent
- î Small i, circumflex accent
- ï Small i, dieresis or umlaut mark
- ð Small eth, Icelandic
- ñ Small n, tilde
- ò Small o, grave accent
- ó Small o, acute accent
- ô Small o, circumflex accent
- õ Small o, tilde
- ö Small o, dieresis or umlaut mark
- ÷ Division sign
- ø Small o, slash
- ù Small u, grave accent
- ú Small u, acute accent
- û Small u, circumflex accent
- ü Small u, dieresis or umlaut mark
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 74]</span>
-<a name="page-75" id="page-75" href="#page-75" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- ý Small y, acute accent
- þ Small thorn, Icelandic
- ÿ Small y, dieresis or umlaut mark
-
-<span class="h2"><a name="section-14">14</a>. Proposed Entities</span>
-
- The HTML DTD references the "Added Latin 1" entity set, which only
- supplies named entities for a subset of the non-ASCII characters in
- [<a href="#ref-ISO-8859-1">ISO-8859-1</a>], namely the accented characters. The following entities
- should be supported so that all ISO 8859-1 characters may only be
- referenced symbolically. The names for these entities are taken from
- the appendixes of [<a href="#ref-SGML">SGML</a>].
-
- <!ENTITY nbsp CDATA " " -- no-break space -->
- <!ENTITY iexcl CDATA "¡" -- inverted exclamation mark -->
- <!ENTITY cent CDATA "¢" -- cent sign -->
- <!ENTITY pound CDATA "£" -- pound sterling sign -->
- <!ENTITY curren CDATA "¤" -- general currency sign -->
- <!ENTITY yen CDATA "¥" -- yen sign -->
- <!ENTITY brvbar CDATA "¦" -- broken (vertical) bar -->
- <!ENTITY sect CDATA "§" -- section sign -->
- <!ENTITY uml CDATA "¨" -- umlaut (dieresis) -->
- <!ENTITY copy CDATA "©" -- copyright sign -->
- <!ENTITY ordf CDATA "ª" -- ordinal indicator, feminine -->
- <!ENTITY laquo CDATA "«" -- angle quotation mark, left -->
- <!ENTITY not CDATA "¬" -- not sign -->
- <!ENTITY shy CDATA "­" -- soft hyphen -->
- <!ENTITY reg CDATA "®" -- registered sign -->
- <!ENTITY macr CDATA "¯" -- macron -->
- <!ENTITY deg CDATA "°" -- degree sign -->
- <!ENTITY plusmn CDATA "±" -- plus-or-minus sign -->
- <!ENTITY sup2 CDATA "²" -- superscript two -->
- <!ENTITY sup3 CDATA "³" -- superscript three -->
- <!ENTITY acute CDATA "´" -- acute accent -->
- <!ENTITY micro CDATA "µ" -- micro sign -->
- <!ENTITY para CDATA "¶" -- pilcrow (paragraph sign) -->
- <!ENTITY middot CDATA "·" -- middle dot -->
- <!ENTITY cedil CDATA "¸" -- cedilla -->
- <!ENTITY sup1 CDATA "¹" -- superscript one -->
- <!ENTITY ordm CDATA "º" -- ordinal indicator, masculine -->
- <!ENTITY raquo CDATA "»" -- angle quotation mark, right -->
- <!ENTITY frac14 CDATA "¼" -- fraction one-quarter -->
- <!ENTITY frac12 CDATA "½" -- fraction one-half -->
- <!ENTITY frac34 CDATA "¾" -- fraction three-quarters -->
- <!ENTITY iquest CDATA "¿" -- inverted question mark -->
- <!ENTITY Agrave CDATA "À" -- capital A, grave accent -->
- <!ENTITY Aacute CDATA "Á" -- capital A, acute accent -->
- <!ENTITY Acirc CDATA "Â" -- capital A, circumflex accent -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 75]</span>
-<a name="page-76" id="page-76" href="#page-76" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- <!ENTITY Atilde CDATA "Ã" -- capital A, tilde -->
- <!ENTITY Auml CDATA "Ä" -- capital A, dieresis or umlaut mark -->
- <!ENTITY Aring CDATA "Å" -- capital A, ring -->
- <!ENTITY AElig CDATA "Æ" -- capital AE diphthong (ligature) -->
- <!ENTITY Ccedil CDATA "Ç" -- capital C, cedilla -->
- <!ENTITY Egrave CDATA "È" -- capital E, grave accent -->
- <!ENTITY Eacute CDATA "É" -- capital E, acute accent -->
- <!ENTITY Ecirc CDATA "Ê" -- capital E, circumflex accent -->
- <!ENTITY Euml CDATA "Ë" -- capital E, dieresis or umlaut mark -->
- <!ENTITY Igrave CDATA "Ì" -- capital I, grave accent -->
- <!ENTITY Iacute CDATA "Í" -- capital I, acute accent -->
- <!ENTITY Icirc CDATA "Î" -- capital I, circumflex accent -->
- <!ENTITY Iuml CDATA "Ï" -- capital I, dieresis or umlaut mark -->
- <!ENTITY ETH CDATA "Ð" -- capital Eth, Icelandic -->
- <!ENTITY Ntilde CDATA "Ñ" -- capital N, tilde -->
- <!ENTITY Ograve CDATA "Ò" -- capital O, grave accent -->
- <!ENTITY Oacute CDATA "Ó" -- capital O, acute accent -->
- <!ENTITY Ocirc CDATA "Ô" -- capital O, circumflex accent -->
- <!ENTITY Otilde CDATA "Õ" -- capital O, tilde -->
- <!ENTITY Ouml CDATA "Ö" -- capital O, dieresis or umlaut mark -->
- <!ENTITY times CDATA "×" -- multiply sign -->
- <!ENTITY Oslash CDATA "Ø" -- capital O, slash -->
- <!ENTITY Ugrave CDATA "Ù" -- capital U, grave accent -->
- <!ENTITY Uacute CDATA "Ú" -- capital U, acute accent -->
- <!ENTITY Ucirc CDATA "Û" -- capital U, circumflex accent -->
- <!ENTITY Uuml CDATA "Ü" -- capital U, dieresis or umlaut mark -->
- <!ENTITY Yacute CDATA "Ý" -- capital Y, acute accent -->
- <!ENTITY THORN CDATA "Þ" -- capital THORN, Icelandic -->
- <!ENTITY szlig CDATA "ß" -- small sharp s, German (sz ligature) -->
- <!ENTITY agrave CDATA "à" -- small a, grave accent -->
- <!ENTITY aacute CDATA "á" -- small a, acute accent -->
- <!ENTITY acirc CDATA "â" -- small a, circumflex accent -->
- <!ENTITY atilde CDATA "ã" -- small a, tilde -->
- <!ENTITY auml CDATA "ä" -- small a, dieresis or umlaut mark -->
- <!ENTITY aring CDATA "å" -- small a, ring -->
- <!ENTITY aelig CDATA "æ" -- small ae diphthong (ligature) -->
- <!ENTITY ccedil CDATA "ç" -- small c, cedilla -->
- <!ENTITY egrave CDATA "è" -- small e, grave accent -->
- <!ENTITY eacute CDATA "é" -- small e, acute accent -->
- <!ENTITY ecirc CDATA "ê" -- small e, circumflex accent -->
- <!ENTITY euml CDATA "ë" -- small e, dieresis or umlaut mark -->
- <!ENTITY igrave CDATA "ì" -- small i, grave accent -->
- <!ENTITY iacute CDATA "í" -- small i, acute accent -->
- <!ENTITY icirc CDATA "î" -- small i, circumflex accent -->
- <!ENTITY iuml CDATA "ï" -- small i, dieresis or umlaut mark -->
- <!ENTITY eth CDATA "ð" -- small eth, Icelandic -->
- <!ENTITY ntilde CDATA "ñ" -- small n, tilde -->
- <!ENTITY ograve CDATA "ò" -- small o, grave accent -->
-
-
-
-<span class="grey">Berners-Lee & Connolly Standards Track [Page 76]</span>
-<a name="page-77" id="page-77" href="#page-77" class="invisible"><span class="break"> </span></a>
-<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
-
-
- <!ENTITY oacute CDATA "ó" -- small o, acute accent -->
- <!ENTITY ocirc CDATA "ô" -- small o, circumflex accent -->
- <!ENTITY otilde CDATA "õ" -- small o, tilde -->
- <!ENTITY ouml CDATA "ö" -- small o, dieresis or umlaut mark -->
- <!ENTITY divide CDATA "÷" -- divide sign -->
- <!ENTITY oslash CDATA "ø" -- small o, slash -->
- <!ENTITY ugrave CDATA "ù" -- small u, grave accent -->
- <!ENTITY uacute CDATA "ú" -- small u, acute accent -->
- <!ENTITY ucirc CDATA "û" -- small u, circumflex accent -->
- <!ENTITY uuml CDATA "ü" -- small u, dieresis or umlaut mark -->
- <!ENTITY yacute CDATA "ý" -- small y, acute accent -->
- <!ENTITY thorn CDATA "þ" -- small thorn, Icelandic -->
- <!ENTITY yuml CDATA "ÿ" -- small y, dieresis or umlaut mark -->
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Berners-Lee & Connolly Standards Track [Page 77]
-<span class="break"> </span>
-
-</pre><br />
-<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.60, available from
-<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
-</small></small></span>
-</body></html>
diff --git a/third_party/uriparser-0.7.5/doc/rfc3513.htm b/third_party/uriparser-0.7.5/doc/rfc3513.htm
deleted file mode 100644
index 80ce7ff..0000000
--- a/third_party/uriparser-0.7.5/doc/rfc3513.htm
+++ /dev/null
@@ -1,1579 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xml:lang="en" lang="en"><head>
-
-
- <meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
- <meta name="robots" content="index,follow">
- <meta name="creator" content="rfcmarkup version 1.46">
- <link rel="icon" href="http://tools.ietf.org/images/rfc.png" type="image/png">
- <link rel="shortcut icon" href="http://tools.ietf.org/images/rfc.png" type="image/png"><title>RFC 3513 Internet Protocol Version 6 (IPv6) Addressing Architecture</title>
-
-
- <style type="text/css">
- body {
- margin: 0px 8px;
- font-size: 1em;
- }
- h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
- font-weight: bold;
- line-height: 0pt;
- display: inline;
- white-space: pre;
- font-family: monospace;
- font-size: 1em;
- font-weight: bold;
- }
- pre {
- font-size: 1em;
- }
- .pre {
- white-space: pre;
- font-family: monospace;
- }
- .header{
- font-weight: bold;
- }
- @media print {
- body {
- font-size: 10.5pt;
- }
- h1, h2, h3, h4, h5, h6 {
- font-size: 10.5pt;
- }
-
- a:link, a:visited {
- color: inherit;
- text-decoration: none;
- }
- .break {
- page-break-before: always;
- text-decoration: none;
- }
- .noprint {
- display: none;
- }
- }
- @media screen {
- .grey, .grey a:link, .grey a:visited {
- color: #777;
- }
- .break {
- text-decoration: none;
- display: none;
- }
- .docinfo {
- background-color: #EEE;
- }
- .top {
- border-top: 2px solid #EEE;
- }
- .bgwhite { background-color: white; }
- .bgred { background-color: #F44; }
- .bggrey { background-color: #666; }
- .bgbrown { background-color: #840; }
- .bgorange { background-color: #FA0; }
- .bgyellow { background-color: #EE0; }
- .bgmagenta{ background-color: #F4F; }
- .bgblue { background-color: #66F; }
- .bgcyan { background-color: #4DD; }
- .bggreen { background-color: #4F4; }
-
- .legend { font-size: 90%; }
- .cplate { font-size: 70%; border: solid grey 1px; }
- }
- </style>
-
- <script type="text/javascript"><!--
- function addHeaderTags() {
- var spans = document.getElementsByTagName("span");
- for (var i=0; i < spans.length; i++) {
- var elem = spans[i];
- if (elem) {
- var level = elem.getAttribute("class");
- if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
- elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
- }
- }
- }
- }
- var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'> </span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'> </span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'> </span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'> &nb [...]
- function showElem(id) {
- var elem = document.getElementById(id);
- elem.innerHTML = eval(id+"_html");
- elem.style.visibility='visible';
- }
- function hideElem(id) {
- var elem = document.getElementById(id);
- elem.style.visibility='hidden';
- elem.innerHTML = "";
- }
- // -->
- </script></head><body onload="addHeaderTags()">
- <div style="height: 8px;">
- <span style="cursor: pointer;" onmouseover="this.style.cursor='pointer';" onclick="showElem('legend');" onmouseout="hideElem('legend')" class="pre noprint docinfo bgbrown" title="Click for colour legend."> </span>
- <div id="legend" class="docinfo noprint pre legend" style="border: 1px solid rgb(51, 68, 85); padding: 4px 9px 5px 7px; position: absolute; top: 4px; left: 4ex; visibility: hidden; background-color: white;" onmouseover="showElem('legend');" onmouseout="hideElem('legend');"></div>
- </div>
-<span class="pre noprint docinfo top">[<a href="http://tools.ietf.org/html/">RFCs/IDs</a>] [<a href="http://tools.ietf.org/rfc/rfc3513.txt">Plain Text</a>] [From <a href="http://tools.ietf.org/html/draft-ietf-ipngwg-addr-arch-v3">draft-ietf-ipngwg-addr-arch-v3</a>] </span><br>
-<span class="pre noprint docinfo"> </span><br>
-<span class="pre noprint docinfo">Obsoleted by: <a href="http://tools.ietf.org/html/rfc4291">4291</a> PROPOSED STANDARD</span><br>
-<span class="pre noprint docinfo"> </span><br>
-<pre>Network Working Group R. Hinden
-Request for Comments: 3513 Nokia
-Obsoletes: <a href="http://tools.ietf.org/html/rfc2373">2373</a> S. Deering
-Category: Standards Track Cisco Systems
- April 2003
-
-
- <span class="h1"><h1>Internet Protocol Version 6 (IPv6) Addressing Architecture</h1></span>
-
-Status of this Memo
-
- This document specifies an Internet standards track protocol for the
- Internet community, and requests discussion and suggestions for
- improvements. Please refer to the current edition of the "Internet
- Official Protocol Standards" (STD 1) for the standardization state
- and status of this protocol. Distribution of this memo is unlimited.
-
-Copyright Notice
-
- Copyright (C) The Internet Society (2003). All Rights Reserved.
-
-Abstract
-
- This specification defines the addressing architecture of the IP
- Version 6 (IPv6) protocol. The document includes the IPv6 addressing
- model, text representations of IPv6 addresses, definition of IPv6
- unicast addresses, anycast addresses, and multicast addresses, and an
- IPv6 node's required addresses.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 1]</span>
-<a name="page-2" id="page-2" href="#page-2"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-Table of Contents
-
- <a href="#section-1">1</a>. Introduction.................................................<a href="#page-3">3</a>
- <a href="#section-2">2</a>. IPv6 Addressing..............................................<a href="#page-3">3</a>
- <a href="#section-2.1">2.1</a> Addressing Model.........................................<a href="#page-4">4</a>
- <a href="#section-2.2">2.2</a> Text Representation of Addresses.........................<a href="#page-4">4</a>
- <a href="#section-2.3">2.3</a> Text Representation of Address Prefixes..................<a href="#page-5">5</a>
- <a href="#section-2.4">2.4</a> Address Type Identification..............................<a href="#page-6">6</a>
- <a href="#section-2.5">2.5</a> Unicast Addresses........................................<a href="#page-7">7</a>
- <a href="#section-2.5.1">2.5.1</a> Interface Identifiers..............................<a href="#page-8">8</a>
- <a href="#section-2.5.2">2.5.2</a> The Unspecified Address............................<a href="#page-9">9</a>
- <a href="#section-2.5.3">2.5.3</a> The Loopback Address...............................<a href="#page-9">9</a>
- <a href="#section-2.5.4">2.5.4</a> Global Unicast Addresses..........................<a href="#page-10">10</a>
- <a href="#section-2.5.5">2.5.5</a> IPv6 Addresses with Embedded IPv4 Addresses.......<a href="#page-10">10</a>
- <a href="#section-2.5.6">2.5.6</a> Local-use IPv6 Unicast Addresses..................<a href="#page-11">11</a>
- <a href="#section-2.6">2.6</a> Anycast Addresses.......................................<a href="#page-12">12</a>
- <a href="#section-2.6.1">2.6.1</a> Required Anycast Address..........................<a href="#page-13">13</a>
- <a href="#section-2.7">2.7</a> Multicast Addresses.....................................<a href="#page-13">13</a>
- <a href="#section-2.7.1">2.7.1</a> Pre-Defined Multicast Addresses...................<a href="#page-15">15</a>
- <a href="#section-2.8">2.8</a> A Node's Required Addresses.............................<a href="#page-17">17</a>
- <a href="#section-3">3</a>. Security Considerations.....................................<a href="#page-17">17</a>
- <a href="#section-4">4</a>. IANA Considerations.........................................<a href="#page-18">18</a>
- <a href="#section-5">5</a>. References..................................................<a href="#page-19">19</a>
- <a href="#section-5.1">5.1</a> Normative References....................................<a href="#page-19">19</a>
- <a href="#section-5.2">5.2</a> Informative References..................................<a href="#page-19">19</a>
- APPENDIX A: Creating Modified EUI-64 format Interface IDs......<a href="#page-21">21</a>
- APPENDIX B: Changes from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a>..............................<a href="#page-24">24</a>
- Authors' Addresses.............................................<a href="#page-25">25</a>
- Full Copyright Statement.......................................<a href="#page-26">26</a>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 2]</span>
-<a name="page-3" id="page-3" href="#page-3"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h2"><h2><a name="section-1">1</a>. Introduction</h2></span>
-
- This specification defines the addressing architecture of the IP
- Version 6 (IPv6) protocol. It includes the basic formats for the
- various types of IPv6 addresses (unicast, anycast, and multicast).
-
- The authors would like to acknowledge the contributions of Paul
- Francis, Scott Bradner, Jim Bound, Brian Carpenter, Matt Crawford,
- Deborah Estrin, Roger Fajman, Bob Fink, Peter Ford, Bob Gilligan,
- Dimitry Haskin, Tom Harsch, Christian Huitema, Tony Li, Greg
- Minshall, Thomas Narten, Erik Nordmark, Yakov Rekhter, Bill Simpson,
- Sue Thomson, Markku Savela, and Larry Masinter.
-
-<span class="h2"><h2><a name="section-2">2</a>. IPv6 Addressing</h2></span>
-
- IPv6 addresses are 128-bit identifiers for interfaces and sets of
- interfaces (where "interface" is as defined in <a href="#section-2">section 2</a> of [<a href="#ref-IPV6" title=""Internet Protocol, Version 6 (IPv6) Specification"">IPV6</a>]).
- There are three types of addresses:
-
- Unicast: An identifier for a single interface. A packet sent to a
- unicast address is delivered to the interface identified
- by that address.
-
- Anycast: An identifier for a set of interfaces (typically belonging
- to different nodes). A packet sent to an anycast address
- is delivered to one of the interfaces identified by that
- address (the "nearest" one, according to the routing
- protocols' measure of distance).
-
- Multicast: An identifier for a set of interfaces (typically belonging
- to different nodes). A packet sent to a multicast address
- is delivered to all interfaces identified by that address.
-
- There are no broadcast addresses in IPv6, their function being
- superseded by multicast addresses.
-
- In this document, fields in addresses are given a specific name, for
- example "subnet". When this name is used with the term "ID" for
- identifier after the name (e.g., "subnet ID"), it refers to the
- contents of the named field. When it is used with the term "prefix"
- (e.g., "subnet prefix") it refers to all of the address from the left
- up to and including this field.
-
- In IPv6, all zeros and all ones are legal values for any field,
- unless specifically excluded. Specifically, prefixes may contain, or
- end with, zero-valued fields.
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 3]</span>
-<a name="page-4" id="page-4" href="#page-4"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h3"><h3><a name="section-2.1">2.1</a> Addressing Model</h3></span>
-
- IPv6 addresses of all types are assigned to interfaces, not nodes.
- An IPv6 unicast address refers to a single interface. Since each
- interface belongs to a single node, any of that node's interfaces'
- unicast addresses may be used as an identifier for the node.
-
- All interfaces are required to have at least one link-local unicast
- address (see <a href="#section-2.8">section 2.8</a> for additional required addresses). A
- single interface may also have multiple IPv6 addresses of any type
- (unicast, anycast, and multicast) or scope. Unicast addresses with
- scope greater than link-scope are not needed for interfaces that are
- not used as the origin or destination of any IPv6 packets to or from
- non-neighbors. This is sometimes convenient for point-to-point
- interfaces. There is one exception to this addressing model:
-
- A unicast address or a set of unicast addresses may be assigned to
- multiple physical interfaces if the implementation treats the
- multiple physical interfaces as one interface when presenting it
- to the internet layer. This is useful for load-sharing over
- multiple physical interfaces.
-
- Currently IPv6 continues the IPv4 model that a subnet prefix is
- associated with one link. Multiple subnet prefixes may be assigned
- to the same link.
-
-<span class="h3"><h3><a name="section-2.2">2.2</a> Text Representation of Addresses</h3></span>
-
- There are three conventional forms for representing IPv6 addresses as
- text strings:
-
- 1. The preferred form is x:x:x:x:x:x:x:x, where the 'x's are the
- hexadecimal values of the eight 16-bit pieces of the address.
-
- Examples:
-
- FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
-
- 1080:0:0:0:8:800:200C:417A
-
- Note that it is not necessary to write the leading zeros in an
- individual field, but there must be at least one numeral in every
- field (except for the case described in 2.).
-
- 2. Due to some methods of allocating certain styles of IPv6
- addresses, it will be common for addresses to contain long strings
- of zero bits. In order to make writing addresses containing zero
- bits easier a special syntax is available to compress the zeros.
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 4]</span>
-<a name="page-5" id="page-5" href="#page-5"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- The use of "::" indicates one or more groups of 16 bits of zeros.
- The "::" can only appear once in an address. The "::" can also be
- used to compress leading or trailing zeros in an address.
-
- For example, the following addresses:
-
- 1080:0:0:0:8:800:200C:417A a unicast address
- FF01:0:0:0:0:0:0:101 a multicast address
- 0:0:0:0:0:0:0:1 the loopback address
- 0:0:0:0:0:0:0:0 the unspecified addresses
-
- may be represented as:
-
- 1080::8:800:200C:417A a unicast address
- FF01::101 a multicast address
- ::1 the loopback address
- :: the unspecified addresses
-
- 3. An alternative form that is sometimes more convenient when dealing
- with a mixed environment of IPv4 and IPv6 nodes is
- x:x:x:x:x:x:d.d.d.d, where the 'x's are the hexadecimal values of
- the six high-order 16-bit pieces of the address, and the 'd's are
- the decimal values of the four low-order 8-bit pieces of the
- address (standard IPv4 representation). Examples:
-
- 0:0:0:0:0:0:13.1.68.3
-
- 0:0:0:0:0:FFFF:129.144.52.38
-
- or in compressed form:
-
- ::13.1.68.3
-
- ::FFFF:129.144.52.38
-
-<span class="h3"><h3><a name="section-2.3">2.3</a> Text Representation of Address Prefixes</h3></span>
-
- The text representation of IPv6 address prefixes is similar to the
- way IPv4 addresses prefixes are written in CIDR notation [<a href="#ref-CIDR" title=""Classless Inter-Domain Routing (CIDR): An Address Assignment and Aggregation Strategy"">CIDR</a>]. An
- IPv6 address prefix is represented by the notation:
-
- ipv6-address/prefix-length
-
- where
-
- ipv6-address is an IPv6 address in any of the notations listed
- in <a href="#section-2.2">section 2.2</a>.
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 5]</span>
-<a name="page-6" id="page-6" href="#page-6"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- prefix-length is a decimal value specifying how many of the
- leftmost contiguous bits of the address comprise
- the prefix.
-
- For example, the following are legal representations of the 60-bit
- prefix 12AB00000000CD3 (hexadecimal):
-
- 12AB:0000:0000:CD30:0000:0000:0000:0000/60
- 12AB::CD30:0:0:0:0/60
- 12AB:0:0:CD30::/60
-
- The following are NOT legal representations of the above prefix:
-
- 12AB:0:0:CD3/60 may drop leading zeros, but not trailing zeros,
- within any 16-bit chunk of the address
-
- 12AB::CD30/60 address to left of "/" expands to
- 12AB:0000:0000:0000:0000:000:0000:CD30
-
- 12AB::CD3/60 address to left of "/" expands to
- 12AB:0000:0000:0000:0000:000:0000:0CD3
-
- When writing both a node address and a prefix of that node address
- (e.g., the node's subnet prefix), the two can combined as follows:
-
- the node address 12AB:0:0:CD30:123:4567:89AB:CDEF
- and its subnet number 12AB:0:0:CD30::/60
-
- can be abbreviated as 12AB:0:0:CD30:123:4567:89AB:CDEF/60
-
-<a href="#section-2.4">2.4</a> Address Type Identification
-
- The type of an IPv6 address is identified by the high-order bits of
- the address, as follows:
-
- Address type Binary prefix IPv6 notation Section
- ------------ ------------- ------------- -------
- Unspecified 00...0 (128 bits) ::/128 2.5.2
- Loopback 00...1 (128 bits) ::1/128 2.5.3
- Multicast 11111111 FF00::/8 2.7
- Link-local unicast 1111111010 FE80::/10 2.5.6
- Site-local unicast 1111111011 FEC0::/10 2.5.6
- Global unicast (everything else)
-
- Anycast addresses are taken from the unicast address spaces (of any
- scope) and are not syntactically distinguishable from unicast
- addresses.
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 6]</span>
-<a name="page-7" id="page-7" href="#page-7"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- The general format of global unicast addresses is described in
- <a href="#section-2.5.4">section 2.5.4</a>. Some special-purpose subtypes of global unicast
- addresses which contain embedded IPv4 addresses (for the purposes of
- IPv4-IPv6 interoperation) are described in <a href="#section-2.5.5">section 2.5.5</a>.
-
- Future specifications may redefine one or more sub-ranges of the
- global unicast space for other purposes, but unless and until that
- happens, implementations must treat all addresses that do not start
- with any of the above-listed prefixes as global unicast addresses.
-
-<span class="h3"><h3><a name="section-2.5">2.5</a> Unicast Addresses</h3></span>
-
- IPv6 unicast addresses are aggregable with prefixes of arbitrary
- bit-length similar to IPv4 addresses under Classless Interdomain
- Routing.
-
- There are several types of unicast addresses in IPv6, in particular
- global unicast, site-local unicast, and link-local unicast. There
- are also some special-purpose subtypes of global unicast, such as
- IPv6 addresses with embedded IPv4 addresses or encoded NSAP
- addresses. Additional address types or subtypes can be defined in
- the future.
-
- IPv6 nodes may have considerable or little knowledge of the internal
- structure of the IPv6 address, depending on the role the node plays
- (for instance, host versus router). At a minimum, a node may
- consider that unicast addresses (including its own) have no internal
- structure:
-
- | 128 bits |
- +-----------------------------------------------------------------+
- | node address |
- +-----------------------------------------------------------------+
-
- A slightly sophisticated host (but still rather simple) may
- additionally be aware of subnet prefix(es) for the link(s) it is
- attached to, where different addresses may have different values for
- n:
-
- | n bits | 128-n bits |
- +------------------------------------------------+----------------+
- | subnet prefix | interface ID |
- +------------------------------------------------+----------------+
-
- Though a very simple router may have no knowledge of the internal
- structure of IPv6 unicast addresses, routers will more generally have
- knowledge of one or more of the hierarchical boundaries for the
- operation of routing protocols. The known boundaries will differ
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 7]</span>
-<a name="page-8" id="page-8" href="#page-8"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- from router to router, depending on what positions the router holds
- in the routing hierarchy.
-
-<span class="h4"><h4><a name="section-2.5.1">2.5.1</a> Interface Identifiers</h4></span>
-
- Interface identifiers in IPv6 unicast addresses are used to identify
- interfaces on a link. They are required to be unique within a subnet
- prefix. It is recommended that the same interface identifier not be
- assigned to different nodes on a link. They may also be unique over
- a broader scope. In some cases an interface's identifier will be
- derived directly from that interface's link-layer address. The same
- interface identifier may be used on multiple interfaces on a single
- node, as long as they are attached to different subnets.
-
- Note that the uniqueness of interface identifiers is independent of
- the uniqueness of IPv6 addresses. For example, a global unicast
- address may be created with a non-global scope interface identifier
- and a site-local address may be created with a global scope interface
- identifier.
-
- For all unicast addresses, except those that start with binary value
- 000, Interface IDs are required to be 64 bits long and to be
- constructed in Modified EUI-64 format.
-
- Modified EUI-64 format based Interface identifiers may have global
- scope when derived from a global token (e.g., IEEE 802 48-bit MAC or
- IEEE EUI-64 identifiers [<a href="#ref-EUI64" title=""./rfc3513"">EUI64</a>]) or may have local scope where a
- global token is not available (e.g., serial links, tunnel end-points,
- etc.) or where global tokens are undesirable (e.g., temporary tokens
- for privacy [<a href="#ref-PRIV" title=""Privacy Extensions for Stateless Address Autoconfiguration in IPv6"">PRIV</a>]).
-
- Modified EUI-64 format interface identifiers are formed by inverting
- the "u" bit (universal/local bit in IEEE EUI-64 terminology) when
- forming the interface identifier from IEEE EUI-64 identifiers. In
- the resulting Modified EUI-64 format the "u" bit is set to one (1) to
- indicate global scope, and it is set to zero (0) to indicate local
- scope. The first three octets in binary of an IEEE EUI-64 identifier
- are as follows:
-
- 0 0 0 1 1 2
- |0 7 8 5 6 3|
- +----+----+----+----+----+----+
- |cccc|ccug|cccc|cccc|cccc|cccc|
- +----+----+----+----+----+----+
-
- written in Internet standard bit-order , where "u" is the
- universal/local bit, "g" is the individual/group bit, and "c" are the
- bits of the company_id. Appendix A: "Creating Modified EUI-64 format
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 8]</span>
-<a name="page-9" id="page-9" href="#page-9"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- Interface Identifiers" provides examples on the creation of Modified
- EUI-64 format based interface identifiers.
-
- The motivation for inverting the "u" bit when forming an interface
- identifier is to make it easy for system administrators to hand
- configure non-global identifiers when hardware tokens are not
- available. This is expected to be case for serial links, tunnel end-
- points, etc. The alternative would have been for these to be of the
- form 0200:0:0:1, 0200:0:0:2, etc., instead of the much simpler 1, 2,
- etc.
-
- The use of the universal/local bit in the Modified EUI-64 format
- identifier is to allow development of future technology that can take
- advantage of interface identifiers with global scope.
-
- The details of forming interface identifiers are defined in the
- appropriate "IPv6 over <link>" specification such as "IPv6 over
- Ethernet" [<a href="#ref-ETHER" title=""Transmission of IPv6 Packets over Ethernet Networks"">ETHER</a>], "IPv6 over FDDI" [<a href="#ref-FDDI" title=""Transmission of IPv6 Packets over FDDI Networks"">FDDI</a>], etc.
-
-<span class="h4"><h4><a name="section-2.5.2">2.5.2</a> The Unspecified Address</h4></span>
-
- The address 0:0:0:0:0:0:0:0 is called the unspecified address. It
- must never be assigned to any node. It indicates the absence of an
- address. One example of its use is in the Source Address field of
- any IPv6 packets sent by an initializing host before it has learned
- its own address.
-
- The unspecified address must not be used as the destination address
- of IPv6 packets or in IPv6 Routing Headers. An IPv6 packet with a
- source address of unspecified must never be forwarded by an IPv6
- router.
-
-<span class="h4"><h4><a name="section-2.5.3">2.5.3</a> The Loopback Address</h4></span>
-
- The unicast address 0:0:0:0:0:0:0:1 is called the loopback address.
- It may be used by a node to send an IPv6 packet to itself. It may
- never be assigned to any physical interface. It is treated as
- having link-local scope, and may be thought of as the link-local
- unicast address of a virtual interface (typically called "the
- loopback interface") to an imaginary link that goes nowhere.
-
- The loopback address must not be used as the source address in IPv6
- packets that are sent outside of a single node. An IPv6 packet with
- a destination address of loopback must never be sent outside of a
- single node and must never be forwarded by an IPv6 router. A packet
- received on an interface with destination address of loopback must be
- dropped.
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 9]</span>
-<a name="page-10" id="page-10" href="#page-10"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h4"><h4><a name="section-2.5.4">2.5.4</a> Global Unicast Addresses</h4></span>
-
- The general format for IPv6 global unicast addresses is as follows:
-
- | n bits | m bits | 128-n-m bits |
- +------------------------+-----------+----------------------------+
- | global routing prefix | subnet ID | interface ID |
- +------------------------+-----------+----------------------------+
-
- where the global routing prefix is a (typically hierarchically-
- structured) value assigned to a site (a cluster of subnets/links),
- the subnet ID is an identifier of a link within the site, and the
- interface ID is as defined in <a href="#section-2.5.1">section 2.5.1</a>.
-
- All global unicast addresses other than those that start with binary
- 000 have a 64-bit interface ID field (i.e., n + m = 64), formatted as
- described in <a href="#section-2.5.1">section 2.5.1</a>. Global unicast addresses that start with
- binary 000 have no such constraint on the size or structure of the
- interface ID field.
-
- Examples of global unicast addresses that start with binary 000 are
- the IPv6 address with embedded IPv4 addresses described in section
- 2.5.5 and the IPv6 address containing encoded NSAP addresses
- specified in [<a href="#ref-NSAP" title=""OSI NSAPs and IPv6"">NSAP</a>]. An example of global addresses starting with a
- binary value other than 000 (and therefore having a 64-bit interface
- ID field) can be found in [<a href="#ref-AGGR" title=""An Aggregatable Global Unicast Address Format"">AGGR</a>].
-
-<span class="h4"><h4><a name="section-2.5.5">2.5.5</a> IPv6 Addresses with Embedded IPv4 Addresses</h4></span>
-
- The IPv6 transition mechanisms [<a href="#ref-TRAN" title=""Transition Mechanisms for IPv6 Hosts and Routers"">TRAN</a>] include a technique for hosts
- and routers to dynamically tunnel IPv6 packets over IPv4 routing
- infrastructure. IPv6 nodes that use this technique are assigned
- special IPv6 unicast addresses that carry a global IPv4 address in
- the low-order 32 bits. This type of address is termed an "IPv4-
- compatible IPv6 address" and has the format:
-
- | 80 bits | 16 | 32 bits |
- +--------------------------------------+--------------------------+
- |0000..............................0000|0000| IPv4 address |
- +--------------------------------------+----+---------------------+
-
- Note: The IPv4 address used in the "IPv4-compatible IPv6 address"
- must be a globally-unique IPv4 unicast address.
-
- A second type of IPv6 address which holds an embedded IPv4 address is
- also defined. This address type is used to represent the addresses
- of IPv4 nodes as IPv6 addresses. This type of address is termed an
- "IPv4-mapped IPv6 address" and has the format:
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 10]</span>
-<a name="page-11" id="page-11" href="#page-11"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- | 80 bits | 16 | 32 bits |
- +--------------------------------------+--------------------------+
- |0000..............................0000|FFFF| IPv4 address |
- +--------------------------------------+----+---------------------+
-
-<span class="h4"><h4><a name="section-2.5.6">2.5.6</a> Local-Use IPv6 Unicast Addresses</h4></span>
-
- There are two types of local-use unicast addresses defined. These
- are Link-Local and Site-Local. The Link-Local is for use on a single
- link and the Site-Local is for use in a single site. Link-Local
- addresses have the following format:
-
- | 10 |
- | bits | 54 bits | 64 bits |
- +----------+-------------------------+----------------------------+
- |1111111010| 0 | interface ID |
- +----------+-------------------------+----------------------------+
-
- Link-Local addresses are designed to be used for addressing on a
- single link for purposes such as automatic address configuration,
- neighbor discovery, or when no routers are present.
-
- Routers must not forward any packets with link-local source or
- destination addresses to other links.
-
- Site-Local addresses have the following format:
-
- | 10 |
- | bits | 54 bits | 64 bits |
- +----------+-------------------------+----------------------------+
- |1111111011| subnet ID | interface ID |
- +----------+-------------------------+----------------------------+
-
- Site-local addresses are designed to be used for addressing inside of
- a site without the need for a global prefix. Although a subnet ID
- may be up to 54-bits long, it is expected that globally-connected
- sites will use the same subnet IDs for site-local and global
- prefixes.
-
- Routers must not forward any packets with site-local source or
- destination addresses outside of the site.
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 11]</span>
-<a name="page-12" id="page-12" href="#page-12"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h3"><h3><a name="section-2.6">2.6</a> Anycast Addresses</h3></span>
-
- An IPv6 anycast address is an address that is assigned to more than
- one interface (typically belonging to different nodes), with the
- property that a packet sent to an anycast address is routed to the
- "nearest" interface having that address, according to the routing
- protocols' measure of distance.
-
- Anycast addresses are allocated from the unicast address space, using
- any of the defined unicast address formats. Thus, anycast addresses
- are syntactically indistinguishable from unicast addresses. When a
- unicast address is assigned to more than one interface, thus turning
- it into an anycast address, the nodes to which the address is
- assigned must be explicitly configured to know that it is an anycast
- address.
-
- For any assigned anycast address, there is a longest prefix P of that
- address that identifies the topological region in which all
- interfaces belonging to that anycast address reside. Within the
- region identified by P, the anycast address must be maintained as a
- separate entry in the routing system (commonly referred to as a "host
- route"); outside the region identified by P, the anycast address may
- be aggregated into the routing entry for prefix P.
-
- Note that in the worst case, the prefix P of an anycast set may be
- the null prefix, i.e., the members of the set may have no topological
- locality. In that case, the anycast address must be maintained as a
- separate routing entry throughout the entire internet, which presents
- a severe scaling limit on how many such "global" anycast sets may be
- supported. Therefore, it is expected that support for global anycast
- sets may be unavailable or very restricted.
-
- One expected use of anycast addresses is to identify the set of
- routers belonging to an organization providing internet service.
- Such addresses could be used as intermediate addresses in an IPv6
- Routing header, to cause a packet to be delivered via a particular
- service provider or sequence of service providers.
-
- Some other possible uses are to identify the set of routers attached
- to a particular subnet, or the set of routers providing entry into a
- particular routing domain.
-
- There is little experience with widespread, arbitrary use of internet
- anycast addresses, and some known complications and hazards when
- using them in their full generality [<a href="#ref-ANYCST" title=""Host Anycasting Service"">ANYCST</a>]. Until more experience
- has been gained and solutions are specified, the following
- restrictions are imposed on IPv6 anycast addresses:
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 12]</span>
-<a name="page-13" id="page-13" href="#page-13"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- o An anycast address must not be used as the source address of an
- IPv6 packet.
-
- o An anycast address must not be assigned to an IPv6 host, that is,
- it may be assigned to an IPv6 router only.
-
-<span class="h4"><h4><a name="section-2.6.1">2.6.1</a> Required Anycast Address</h4></span>
-
- The Subnet-Router anycast address is predefined. Its format is as
- follows:
-
- | n bits | 128-n bits |
- +------------------------------------------------+----------------+
- | subnet prefix | 00000000000000 |
- +------------------------------------------------+----------------+
-
- The "subnet prefix" in an anycast address is the prefix which
- identifies a specific link. This anycast address is syntactically
- the same as a unicast address for an interface on the link with the
- interface identifier set to zero.
-
- Packets sent to the Subnet-Router anycast address will be delivered
- to one router on the subnet. All routers are required to support the
- Subnet-Router anycast addresses for the subnets to which they have
- interfaces.
-
- The subnet-router anycast address is intended to be used for
- applications where a node needs to communicate with any one of the
- set of routers.
-
-<span class="h3"><h3><a name="section-2.7">2.7</a> Multicast Addresses</h3></span>
-
- An IPv6 multicast address is an identifier for a group of interfaces
- (typically on different nodes). An interface may belong to any
- number of multicast groups. Multicast addresses have the following
- format:
-
- | 8 | 4 | 4 | 112 bits |
- +------ -+----+----+---------------------------------------------+
- |11111111|flgs|scop| group ID |
- +--------+----+----+---------------------------------------------+
-
- binary 11111111 at the start of the address identifies the
- address as being a multicast address.
-
- +-+-+-+-+
- flgs is a set of 4 flags: |0|0|0|T|
- +-+-+-+-+
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 13]</span>
-<a name="page-14" id="page-14" href="#page-14"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- The high-order 3 flags are reserved, and must be initialized
- to 0.
-
- T = 0 indicates a permanently-assigned ("well-known")
- multicast address, assigned by the Internet Assigned Number
- Authority (IANA).
-
- T = 1 indicates a non-permanently-assigned ("transient")
- multicast address.
-
- scop is a 4-bit multicast scope value used to limit the scope
- of the multicast group. The values are:
-
- 0 reserved
- 1 interface-local scope
- 2 link-local scope
- 3 reserved
- 4 admin-local scope
- 5 site-local scope
- 6 (unassigned)
- 7 (unassigned)
- 8 organization-local scope
- 9 (unassigned)
- A (unassigned)
- B (unassigned)
- C (unassigned)
- D (unassigned)
- E global scope
- F reserved
-
- interface-local scope spans only a single interface on a
- node, and is useful only for loopback transmission of
- multicast.
-
- link-local and site-local multicast scopes span the same
- topological regions as the corresponding unicast scopes.
-
- admin-local scope is the smallest scope that must be
- administratively configured, i.e., not automatically derived
- from physical connectivity or other, non- multicast-related
- configuration.
-
- organization-local scope is intended to span multiple sites
- belonging to a single organization.
-
- scopes labeled "(unassigned)" are available for
- administrators to define additional multicast regions.
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 14]</span>
-<a name="page-15" id="page-15" href="#page-15"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- group ID identifies the multicast group, either permanent or
- transient, within the given scope.
-
- The "meaning" of a permanently-assigned multicast address is
- independent of the scope value. For example, if the "NTP servers
- group" is assigned a permanent multicast address with a group ID of
- 101 (hex), then:
-
- FF01:0:0:0:0:0:0:101 means all NTP servers on the same interface
- (i.e., the same node) as the sender.
-
- FF02:0:0:0:0:0:0:101 means all NTP servers on the same link as the
- sender.
-
- FF05:0:0:0:0:0:0:101 means all NTP servers in the same site as the
- sender.
-
- FF0E:0:0:0:0:0:0:101 means all NTP servers in the internet.
-
- Non-permanently-assigned multicast addresses are meaningful only
- within a given scope. For example, a group identified by the non-
- permanent, site-local multicast address FF15:0:0:0:0:0:0:101 at one
- site bears no relationship to a group using the same address at a
- different site, nor to a non-permanent group using the same group ID
- with different scope, nor to a permanent group with the same group
- ID.
-
- Multicast addresses must not be used as source addresses in IPv6
- packets or appear in any Routing header.
-
- Routers must not forward any multicast packets beyond of the scope
- indicated by the scop field in the destination multicast address.
-
- Nodes must not originate a packet to a multicast address whose scop
- field contains the reserved value 0; if such a packet is received, it
- must be silently dropped. Nodes should not originate a packet to a
- multicast address whose scop field contains the reserved value F; if
- such a packet is sent or received, it must be treated the same as
- packets destined to a global (scop E) multicast address.
-
-<span class="h4"><h4><a name="section-2.7.1">2.7.1</a> Pre-Defined Multicast Addresses</h4></span>
-
- The following well-known multicast addresses are pre-defined. The
- group ID's defined in this section are defined for explicit scope
- values.
-
- Use of these group IDs for any other scope values, with the T flag
- equal to 0, is not allowed.
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 15]</span>
-<a name="page-16" id="page-16" href="#page-16"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- Reserved Multicast Addresses: FF00:0:0:0:0:0:0:0
- FF01:0:0:0:0:0:0:0
- FF02:0:0:0:0:0:0:0
- FF03:0:0:0:0:0:0:0
- FF04:0:0:0:0:0:0:0
- FF05:0:0:0:0:0:0:0
- FF06:0:0:0:0:0:0:0
- FF07:0:0:0:0:0:0:0
- FF08:0:0:0:0:0:0:0
- FF09:0:0:0:0:0:0:0
- FF0A:0:0:0:0:0:0:0
- FF0B:0:0:0:0:0:0:0
- FF0C:0:0:0:0:0:0:0
- FF0D:0:0:0:0:0:0:0
- FF0E:0:0:0:0:0:0:0
- FF0F:0:0:0:0:0:0:0
-
- The above multicast addresses are reserved and shall never be
- assigned to any multicast group.
-
- All Nodes Addresses: FF01:0:0:0:0:0:0:1
- FF02:0:0:0:0:0:0:1
-
- The above multicast addresses identify the group of all IPv6 nodes,
- within scope 1 (interface-local) or 2 (link-local).
-
- All Routers Addresses: FF01:0:0:0:0:0:0:2
- FF02:0:0:0:0:0:0:2
- FF05:0:0:0:0:0:0:2
-
- The above multicast addresses identify the group of all IPv6 routers,
- within scope 1 (interface-local), 2 (link-local), or 5 (site-local).
-
- Solicited-Node Address: FF02:0:0:0:0:1:FFXX:XXXX
-
- Solicited-node multicast address are computed as a function of a
- node's unicast and anycast addresses. A solicited-node multicast
- address is formed by taking the low-order 24 bits of an address
- (unicast or anycast) and appending those bits to the prefix
- FF02:0:0:0:0:1:FF00::/104 resulting in a multicast address in the
- range
-
- FF02:0:0:0:0:1:FF00:0000
-
- to
-
- FF02:0:0:0:0:1:FFFF:FFFF
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 16]</span>
-<a name="page-17" id="page-17" href="#page-17"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- For example, the solicited node multicast address corresponding to
- the IPv6 address 4037::01:800:200E:8C6C is FF02::1:FF0E:8C6C. IPv6
- addresses that differ only in the high-order bits, e.g., due to
- multiple high-order prefixes associated with different aggregations,
- will map to the same solicited-node address thereby, reducing the
- number of multicast addresses a node must join.
-
- A node is required to compute and join (on the appropriate interface)
- the associated Solicited-Node multicast addresses for every unicast
- and anycast address it is assigned.
-
-<span class="h3"><h3><a name="section-2.8">2.8</a> A Node's Required Addresses</h3></span>
-
- A host is required to recognize the following addresses as
- identifying itself:
-
- o Its required Link-Local Address for each interface.
- o Any additional Unicast and Anycast Addresses that have been
- configured for the node's interfaces (manually or
- automatically).
- o The loopback address.
- o The All-Nodes Multicast Addresses defined in <a href="#section-2.7.1">section 2.7.1</a>.
- o The Solicited-Node Multicast Address for each of its unicast
- and anycast addresses.
- o Multicast Addresses of all other groups to which the node
- belongs.
-
- A router is required to recognize all addresses that a host is
- required to recognize, plus the following addresses as identifying
- itself:
-
- o The Subnet-Router Anycast Addresses for all interfaces for
- which it is configured to act as a router.
- o All other Anycast Addresses with which the router has been
- configured.
- o The All-Routers Multicast Addresses defined in <a href="#section-2.7.1">section 2.7.1</a>.
-
-<span class="h2"><h2><a name="section-3">3</a>. Security Considerations</h2></span>
-
- IPv6 addressing documents do not have any direct impact on Internet
- infrastructure security. Authentication of IPv6 packets is defined
- in [<a href="#ref-AUTH" title=""IP Authentication Header"">AUTH</a>].
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 17]</span>
-<a name="page-18" id="page-18" href="#page-18"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h2"><h2><a name="section-4">4</a>. IANA Considerations</h2></span>
-
- The table and notes at <a href="http://www.isi.edu/in-notes/iana/assignments/ipv6-address-space.txt">http://www.isi.edu/in-</a>
- <a href="http://www.isi.edu/in-notes/iana/assignments/ipv6-address-space.txt">notes/iana/assignments/ipv6-address-space.txt</a> should be replaced with
- the following:
-
- INTERNET PROTOCOL VERSION 6 ADDRESS SPACE
-
- The initial assignment of IPv6 address space is as follows:
-
- Allocation Prefix Fraction of
- (binary) Address Space
- ----------------------------------- -------- -------------
- Unassigned (see Note 1 below) 0000 0000 1/256
- Unassigned 0000 0001 1/256
- Reserved for NSAP Allocation 0000 001 1/128 [<a href="http://tools.ietf.org/html/rfc1888">RFC1888</a>]
- Unassigned 0000 01 1/64
- Unassigned 0000 1 1/32
- Unassigned 0001 1/16
- Global Unicast 001 1/8 [<a href="http://tools.ietf.org/html/rfc2374">RFC2374</a>]
- Unassigned 010 1/8
- Unassigned 011 1/8
- Unassigned 100 1/8
- Unassigned 101 1/8
- Unassigned 110 1/8
- Unassigned 1110 1/16
- Unassigned 1111 0 1/32
- Unassigned 1111 10 1/64
- Unassigned 1111 110 1/128
- Unassigned 1111 1110 0 1/512
- Link-Local Unicast Addresses 1111 1110 10 1/1024
- Site-Local Unicast Addresses 1111 1110 11 1/1024
- Multicast Addresses 1111 1111 1/256
-
- Notes:
-
- 1. The "unspecified address", the "loopback address", and the IPv6
- Addresses with Embedded IPv4 Addresses are assigned out of the
- 0000 0000 binary prefix space.
-
- 2. For now, IANA should limit its allocation of IPv6 unicast address
- space to the range of addresses that start with binary value 001.
- The rest of the global unicast address space (approximately 85% of
- the IPv6 address space) is reserved for future definition and use,
- and is not to be assigned by IANA at this time.
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 18]</span>
-<a name="page-19" id="page-19" href="#page-19"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-<span class="h2"><h2><a name="section-5">5</a>. References</h2></span>
-
-<span class="h3"><h3><a name="section-5.1">5.1</a> Normative References</h3></span>
-
- [<a name="ref-IPV6" id="ref-IPV6">IPV6</a>] Deering, S. and R. Hinden, "Internet Protocol, Version 6
- (IPv6) Specification", <a href="http://tools.ietf.org/html/rfc2460">RFC 2460</a>, December 1998.
-
- [<a name="ref-RFC2026" id="ref-RFC2026">RFC2026</a>] Bradner, S., "The Internet Standards Process -- Revision
- 3", <a href="http://tools.ietf.org/html/bcp9">BCP 9</a> , <a href="http://tools.ietf.org/html/rfc2026">RFC 2026</a>, October 1996.
-
-<span class="h3"><h3><a name="section-5.2">5.2</a> Informative References</h3></span>
-
- [<a name="ref-ANYCST" id="ref-ANYCST">ANYCST</a>] Partridge, C., Mendez, T. and W. Milliken, "Host Anycasting
- Service", <a href="http://tools.ietf.org/html/rfc1546">RFC 1546</a>, November 1993.
-
- [<a name="ref-AUTH" id="ref-AUTH">AUTH</a>] Kent, S. and R. Atkinson, "IP Authentication Header", <a href="http://tools.ietf.org/html/rfc2402">RFC</a>
- <a href="http://tools.ietf.org/html/rfc2402">2402</a>, November 1998.
-
- [<a name="ref-AGGR" id="ref-AGGR">AGGR</a>] Hinden, R., O'Dell, M. and S. Deering, "An Aggregatable
- Global Unicast Address Format", <a href="http://tools.ietf.org/html/rfc2374">RFC 2374</a>, July 1998.
-
- [<a name="ref-CIDR" id="ref-CIDR">CIDR</a>] Fuller, V., Li, T., Yu, J. and K. Varadhan, "Classless
- Inter-Domain Routing (CIDR): An Address Assignment and
- Aggregation Strategy", <a href="http://tools.ietf.org/html/rfc1519">RFC 1519</a>, September 1993.
-
- [<a name="ref-ETHER" id="ref-ETHER">ETHER</a>] Crawford, M., "Transmission of IPv6 Packets over Ethernet
- Networks", <a href="http://tools.ietf.org/html/rfc2464">RFC 2464</a>, December 1998.
-
- [<a name="ref-EUI64" id="ref-EUI64">EUI64</a>] IEEE, "Guidelines for 64-bit Global Identifier (EUI-64)
- Registration Authority",
- <a href="http://standards.ieee.org/regauth/oui/tutorials/EUI64.html">http://standards.ieee.org/regauth/oui/tutorials/EUI64.html</a>,
- March 1997.
-
- [<a name="ref-FDDI" id="ref-FDDI">FDDI</a>] Crawford, M., "Transmission of IPv6 Packets over FDDI
- Networks", <a href="http://tools.ietf.org/html/rfc2467">RFC 2467</a>, December 1998.
-
- [<a name="ref-MASGN" id="ref-MASGN">MASGN</a>] Hinden, R. and S. Deering, "IPv6 Multicast Address
- Assignments", <a href="http://tools.ietf.org/html/rfc2375">RFC 2375</a>, July 1998.
-
- [<a name="ref-NSAP" id="ref-NSAP">NSAP</a>] Bound, J., Carpenter, B., Harrington, D., Houldsworth, J.
- and A. Lloyd, "OSI NSAPs and IPv6", <a href="http://tools.ietf.org/html/rfc1888">RFC 1888</a>, August 1996.
-
- [<a name="ref-PRIV" id="ref-PRIV">PRIV</a>] Narten, T. and R. Draves, "Privacy Extensions for Stateless
- Address Autoconfiguration in IPv6", <a href="http://tools.ietf.org/html/rfc3041">RFC 3041</a>, January 2001.
-
- [<a name="ref-TOKEN" id="ref-TOKEN">TOKEN</a>] Crawford, M., Narten, T. and S. Thomas, "Transmission of
- IPv6 Packets over Token Ring Networks", <a href="http://tools.ietf.org/html/rfc2470">RFC 2470</a>, December
- 1998.
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 19]</span>
-<a name="page-20" id="page-20" href="#page-20"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- [<a name="ref-TRAN" id="ref-TRAN">TRAN</a>] Gilligan, R. and E. Nordmark, "Transition Mechanisms for
- IPv6 Hosts and Routers", <a href="http://tools.ietf.org/html/rfc2893">RFC 2893</a>, August 2000.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 20]</span>
-<a name="page-21" id="page-21" href="#page-21"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-APPENDIX A: Creating Modified EUI-64 format Interface Identifiers
-
- Depending on the characteristics of a specific link or node there are
- a number of approaches for creating Modified EUI-64 format interface
- identifiers. This appendix describes some of these approaches.
-
-Links or Nodes with IEEE EUI-64 Identifiers
-
- The only change needed to transform an IEEE EUI-64 identifier to an
- interface identifier is to invert the "u" (universal/local) bit. For
- example, a globally unique IEEE EUI-64 identifier of the form:
-
- |0 1|1 3|3 4|4 6|
- |0 5|6 1|2 7|8 3|
- +----------------+----------------+----------------+----------------+
- |cccccc0gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|mmmmmmmmmmmmmmmm|
- +----------------+----------------+----------------+----------------+
-
- where "c" are the bits of the assigned company_id, "0" is the value
- of the universal/local bit to indicate global scope, "g" is
- individual/group bit, and "m" are the bits of the manufacturer-
- selected extension identifier. The IPv6 interface identifier would
- be of the form:
-
- |0 1|1 3|3 4|4 6|
- |0 5|6 1|2 7|8 3|
- +----------------+----------------+----------------+----------------+
- |cccccc1gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|mmmmmmmmmmmmmmmm|
- +----------------+----------------+----------------+----------------+
-
- The only change is inverting the value of the universal/local bit.
-
-Links or Nodes with IEEE 802 48 bit MAC's
-
- [<a name="ref-EUI64" id="ref-EUI64">EUI64</a>] defines a method to create a IEEE EUI-64 identifier from an
- IEEE 48bit MAC identifier. This is to insert two octets, with
- hexadecimal values of 0xFF and 0xFE, in the middle of the 48 bit MAC
- (between the company_id and vendor supplied id). For example, the 48
- bit IEEE MAC with global scope:
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 21]</span>
-<a name="page-22" id="page-22" href="#page-22"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- |0 1|1 3|3 4|
- |0 5|6 1|2 7|
- +----------------+----------------+----------------+
- |cccccc0gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|
- +----------------+----------------+----------------+
-
- where "c" are the bits of the assigned company_id, "0" is the value
- of the universal/local bit to indicate global scope, "g" is
- individual/group bit, and "m" are the bits of the manufacturer-
- selected extension identifier. The interface identifier would be of
- the form:
-
- |0 1|1 3|3 4|4 6|
- |0 5|6 1|2 7|8 3|
- +----------------+----------------+----------------+----------------+
- |cccccc1gcccccccc|cccccccc11111111|11111110mmmmmmmm|mmmmmmmmmmmmmmmm|
- +----------------+----------------+----------------+----------------+
-
- When IEEE 802 48bit MAC addresses are available (on an interface or a
- node), an implementation may use them to create interface identifiers
- due to their availability and uniqueness properties.
-
-Links with Other Kinds of Identifiers
-
- There are a number of types of links that have link-layer interface
- identifiers other than IEEE EIU-64 or IEEE 802 48-bit MACs. Examples
- include LocalTalk and Arcnet. The method to create an Modified EUI-
- 64 format identifier is to take the link identifier (e.g., the
- LocalTalk 8 bit node identifier) and zero fill it to the left. For
- example, a LocalTalk 8 bit node identifier of hexadecimal value 0x4F
- results in the following interface identifier:
-
- |0 1|1 3|3 4|4 6|
- |0 5|6 1|2 7|8 3|
- +----------------+----------------+----------------+----------------+
- |0000000000000000|0000000000000000|0000000000000000|0000000001001111|
- +----------------+----------------+----------------+----------------+
-
- Note that this results in the universal/local bit set to "0" to
- indicate local scope.
-
-Links without Identifiers
-
- There are a number of links that do not have any type of built-in
- identifier. The most common of these are serial links and configured
- tunnels. Interface identifiers must be chosen that are unique within
- a subnet-prefix.
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 22]</span>
-<a name="page-23" id="page-23" href="#page-23"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- When no built-in identifier is available on a link the preferred
- approach is to use a global interface identifier from another
- interface or one which is assigned to the node itself. When using
- this approach no other interface connecting the same node to the same
- subnet-prefix may use the same identifier.
-
- If there is no global interface identifier available for use on the
- link the implementation needs to create a local-scope interface
- identifier. The only requirement is that it be unique within a
- subnet prefix. There are many possible approaches to select a
- subnet-prefix-unique interface identifier. These include:
-
- Manual Configuration
- Node Serial Number
- Other node-specific token
-
- The subnet-prefix-unique interface identifier should be generated in
- a manner that it does not change after a reboot of a node or if
- interfaces are added or deleted from the node.
-
- The selection of the appropriate algorithm is link and implementation
- dependent. The details on forming interface identifiers are defined
- in the appropriate "IPv6 over <link>" specification. It is strongly
- recommended that a collision detection algorithm be implemented as
- part of any automatic algorithm.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 23]</span>
-<a name="page-24" id="page-24" href="#page-24"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-APPENDIX B: Changes from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a>
-
- The following changes were made from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a> "IP Version 6
- Addressing Architecture":
-
- - Clarified text in <a href="#section-2.2">section 2.2</a> to allow "::" to represent one or
- more groups of 16 bits of zeros.
- - Changed uniqueness requirement of Interface Identifiers from
- unique on a link to unique within a subnet prefix. Also added a
- recommendation that the same interface identifier not be assigned
- to different machines on a link.
- - Change site-local format to make the subnet ID field 54-bit long
- and remove the 38-bit zero's field.
- - Added description of multicast scop values and rules to handle the
- reserved scop value 0.
- - Revised sections 2.4 and 2.5.6 to simplify and clarify how
- different address types are identified. This was done to insure
- that implementations do not build in any knowledge about global
- unicast format prefixes. Changes include:
- o Removed Format Prefix (FP) terminology
- o Revised list of address types to only include exceptions to
- global unicast and a singe entry that identifies everything
- else as Global Unicast.
- o Removed list of defined prefix exceptions from <a href="#section-2.5.6">section 2.5.6</a>
- as it is now the main part of <a href="#section-2.4">section 2.4</a>.
- - Clarified text relating to EUI-64 identifiers to distinguish
- between IPv6's "Modified EUI-64 format" identifiers and IEEE EUI-
- 64 identifiers.
- - Combined the sections on the Global Unicast Addresses and NSAP
- Addresses into a single section on Global Unicast Addresses,
- generalized the Global Unicast format, and cited [<a href="#ref-AGGR" title=""An Aggregatable Global Unicast Address Format"">AGGR</a>] and [<a href="#ref-NSAP" title=""OSI NSAPs and IPv6"">NSAP</a>]
- as examples.
- - Reordered sections 2.5.4 and 2.5.5.
- - Removed <a href="#section-2.7.2">section 2.7.2</a> Assignment of New IPv6 Multicast Addresses
- because this is being redefined elsewhere.
- - Added an IANA considerations section that updates the IANA IPv6
- address allocations and documents the NSAP and AGGR allocations.
- - Added clarification that the "IPv4-compatible IPv6 address" must
- use global IPv4 unicast addresses.
- - Divided references in to normative and non-normative sections.
- - Added reference to [<a href="#ref-PRIV" title=""Privacy Extensions for Stateless Address Autoconfiguration in IPv6"">PRIV</a>] in <a href="#section-2.5.1">section 2.5.1</a>
- - Added clarification that routers must not forward multicast
- packets outside of the scope indicated in the multicast address.
- - Added clarification that routers must not forward packets with
- source address of the unspecified address.
- - Added clarification that routers must drop packets received on an
- interface with destination address of loopback.
- - Clarified the definition of IPv4-mapped addresses.
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 24]</span>
-<a name="page-25" id="page-25" href="#page-25"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
- - Removed the ABNF Description of Text Representations Appendix.
- - Removed the address block reserved for IPX addresses.
- - Multicast scope changes:
- o Changed name of scope value 1 from "node-local" to
- "interface-local"
- o Defined scope value 4 as "admin-local"
- - Corrected reference to <a href="http://tools.ietf.org/html/rfc1933">RFC1933</a> and updated references.
- - Many small changes to clarify and make the text more consistent.
-
-Authors' Addresses
-
- Robert M. Hinden
- Nokia
- 313 Fairchild Drive
- Mountain View, CA 94043
- USA
-
- Phone: +1 650 625-2004
- EMail: hinden at iprg.nokia.com
-
-
- Stephen E. Deering
- Cisco Systems, Inc.
- 170 West Tasman Drive
- San Jose, CA 95134-1706
- USA
-
- Phone: +1 408 527-8213
- EMail: deering at cisco.com
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Hinden & Deering Standards Track [Page 25]</span>
-<a name="page-26" id="page-26" href="#page-26"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
-
-
-Full Copyright Statement
-
- Copyright (C) The Internet Society (2003). All Rights Reserved.
-
- This document and translations of it may be copied and furnished to
- others, and derivative works that comment on or otherwise explain it
- or assist in its implementation may be prepared, copied, published
- and distributed, in whole or in part, without restriction of any
- kind, provided that the above copyright notice and this paragraph are
- included on all such copies and derivative works. However, this
- document itself may not be modified in any way, such as by removing
- the copyright notice or references to the Internet Society or other
- Internet organizations, except as needed for the purpose of
- developing Internet standards in which case the procedures for
- copyrights defined in the Internet Standards process must be
- followed, or as required to translate it into languages other than
- English.
-
- The limited permissions granted above are perpetual and will not be
- revoked by the Internet Society or its successors or assigns.
-
- This document and the information contained herein is provided on an
- "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
- TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
- BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
- HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
- MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-Acknowledgement
-
- Funding for the RFC Editor function is currently provided by the
- Internet Society.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Hinden & Deering Standards Track [Page 26]
-<span class="break"> </span>
-
-</pre><br>
-<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.46, available from
-<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
-</small></small></span>
-
-</body></html>
\ No newline at end of file
diff --git a/third_party/uriparser-0.7.5/doc/rfc3986.htm b/third_party/uriparser-0.7.5/doc/rfc3986.htm
deleted file mode 100644
index b38bbea..0000000
--- a/third_party/uriparser-0.7.5/doc/rfc3986.htm
+++ /dev/null
@@ -1,3539 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xml:lang="en" lang="en"><head>
-
-
- <meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
- <meta name="robots" content="index,follow">
- <meta name="creator" content="rfcmarkup version 1.46">
- <link rel="icon" href="http://tools.ietf.org/images/rfc.png" type="image/png">
- <link rel="shortcut icon" href="http://tools.ietf.org/images/rfc.png" type="image/png"><title>RFC 3986 Uniform Resource Identifier (URI): Generic Syntax</title>
-
-
- <style type="text/css">
- body {
- margin: 0px 8px;
- font-size: 1em;
- }
- h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
- font-weight: bold;
- line-height: 0pt;
- display: inline;
- white-space: pre;
- font-family: monospace;
- font-size: 1em;
- font-weight: bold;
- }
- pre {
- font-size: 1em;
- }
- .pre {
- white-space: pre;
- font-family: monospace;
- }
- .header{
- font-weight: bold;
- }
- @media print {
- body {
- font-size: 10.5pt;
- }
- h1, h2, h3, h4, h5, h6 {
- font-size: 10.5pt;
- }
-
- a:link, a:visited {
- color: inherit;
- text-decoration: none;
- }
- .break {
- page-break-before: always;
- text-decoration: none;
- }
- .noprint {
- display: none;
- }
- }
- @media screen {
- .grey, .grey a:link, .grey a:visited {
- color: #777;
- }
- .break {
- text-decoration: none;
- display: none;
- }
- .docinfo {
- background-color: #EEE;
- }
- .top {
- border-top: 2px solid #EEE;
- }
- .bgwhite { background-color: white; }
- .bgred { background-color: #F44; }
- .bggrey { background-color: #666; }
- .bgbrown { background-color: #840; }
- .bgorange { background-color: #FA0; }
- .bgyellow { background-color: #EE0; }
- .bgmagenta{ background-color: #F4F; }
- .bgblue { background-color: #66F; }
- .bgcyan { background-color: #4DD; }
- .bggreen { background-color: #4F4; }
-
- .legend { font-size: 90%; }
- .cplate { font-size: 70%; border: solid grey 1px; }
- }
- </style>
-
- <script type="text/javascript"><!--
- function addHeaderTags() {
- var spans = document.getElementsByTagName("span");
- for (var i=0; i < spans.length; i++) {
- var elem = spans[i];
- if (elem) {
- var level = elem.getAttribute("class");
- if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
- elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
- }
- }
- }
- }
- var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'> </span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'> </span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'> </span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'> &nb [...]
- function showElem(id) {
- var elem = document.getElementById(id);
- elem.innerHTML = eval(id+"_html");
- elem.style.visibility='visible';
- }
- function hideElem(id) {
- var elem = document.getElementById(id);
- elem.style.visibility='hidden';
- elem.innerHTML = "";
- }
- // -->
- </script></head><body onload="addHeaderTags()">
- <div style="height: 8px;">
- <span style="cursor: pointer;" onmouseover="this.style.cursor='pointer';" onclick="showElem('legend');" onmouseout="hideElem('legend')" class="pre noprint docinfo bggreen" title="Click for colour legend."> </span>
- <div id="legend" class="docinfo noprint pre legend" style="border: 1px solid rgb(51, 68, 85); padding: 4px 9px 5px 7px; position: absolute; top: 4px; left: 4ex; visibility: hidden; background-color: white;" onmouseover="showElem('legend');" onmouseout="hideElem('legend');"></div>
- </div>
-<span class="pre noprint docinfo top">[<a href="http://tools.ietf.org/html/">RFCs/IDs</a>] [<a href="http://tools.ietf.org/rfc/rfc3986.txt">Plain Text</a>] [From <a href="http://tools.ietf.org/html/draft-fielding-uri-rfc2396bis">draft-fielding-uri-rfc2396bis</a>] </span><br>
-<span class="pre noprint docinfo"> </span><br>
-<span class="pre noprint docinfo"> STANDARD</span><br>
-<span class="pre noprint docinfo"> </span><br>
-<pre>Network Working Group T. Berners-Lee
-Request for Comments: 3986 W3C/MIT
-STD: 66 R. Fielding
-Updates: <a href="http://tools.ietf.org/html/rfc1738">1738</a> Day Software
-Obsoletes: <a href="http://tools.ietf.org/html/rfc2732">2732</a>, <a href="http://tools.ietf.org/html/rfc2396">2396</a>, <a href="http://tools.ietf.org/html/rfc1808">1808</a> L. Masinter
-Category: Standards Track Adobe Systems
- January 2005
-
-
- <span class="h1"><h1>Uniform Resource Identifier (URI): Generic Syntax</h1></span>
-
-Status of This Memo
-
- This document specifies an Internet standards track protocol for the
- Internet community, and requests discussion and suggestions for
- improvements. Please refer to the current edition of the "Internet
- Official Protocol Standards" (STD 1) for the standardization state
- and status of this protocol. Distribution of this memo is unlimited.
-
-Copyright Notice
-
- Copyright (C) The Internet Society (2005).
-
-Abstract
-
- A Uniform Resource Identifier (URI) is a compact sequence of
- characters that identifies an abstract or physical resource. This
- specification defines the generic URI syntax and a process for
- resolving URI references that might be in relative form, along with
- guidelines and security considerations for the use of URIs on the
- Internet. The URI syntax defines a grammar that is a superset of all
- valid URIs, allowing an implementation to parse the common components
- of a URI reference without knowing the scheme-specific requirements
- of every possible identifier. This specification does not define a
- generative grammar for URIs; that task is performed by the individual
- specifications of each URI scheme.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 1]</span>
-<a name="page-2" id="page-2" href="#page-2"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-Table of Contents
-
- <a href="#section-1">1</a>. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-4">4</a>
- <a href="#section-1.1">1.1</a>. Overview of URIs . . . . . . . . . . . . . . . . . . . . <a href="#page-4">4</a>
- <a href="#section-1.1.1">1.1.1</a>. Generic Syntax . . . . . . . . . . . . . . . . . <a href="#page-6">6</a>
- <a href="#section-1.1.2">1.1.2</a>. Examples . . . . . . . . . . . . . . . . . . . . <a href="#page-7">7</a>
- <a href="#section-1.1.3">1.1.3</a>. URI, URL, and URN . . . . . . . . . . . . . . . <a href="#page-7">7</a>
- <a href="#section-1.2">1.2</a>. Design Considerations . . . . . . . . . . . . . . . . . <a href="#page-8">8</a>
- <a href="#section-1.2.1">1.2.1</a>. Transcription . . . . . . . . . . . . . . . . . <a href="#page-8">8</a>
- <a href="#section-1.2.2">1.2.2</a>. Separating Identification from Interaction . . . <a href="#page-9">9</a>
- <a href="#section-1.2.3">1.2.3</a>. Hierarchical Identifiers . . . . . . . . . . . . <a href="#page-10">10</a>
- <a href="#section-1.3">1.3</a>. Syntax Notation . . . . . . . . . . . . . . . . . . . . <a href="#page-11">11</a>
- <a href="#section-2">2</a>. Characters . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-11">11</a>
- <a href="#section-2.1">2.1</a>. Percent-Encoding . . . . . . . . . . . . . . . . . . . . <a href="#page-12">12</a>
- <a href="#section-2.2">2.2</a>. Reserved Characters . . . . . . . . . . . . . . . . . . <a href="#page-12">12</a>
- <a href="#section-2.3">2.3</a>. Unreserved Characters . . . . . . . . . . . . . . . . . <a href="#page-13">13</a>
- <a href="#section-2.4">2.4</a>. When to Encode or Decode . . . . . . . . . . . . . . . . <a href="#page-14">14</a>
- <a href="#section-2.5">2.5</a>. Identifying Data . . . . . . . . . . . . . . . . . . . . <a href="#page-14">14</a>
- <a href="#section-3">3</a>. Syntax Components . . . . . . . . . . . . . . . . . . . . . . <a href="#page-16">16</a>
- <a href="#section-3.1">3.1</a>. Scheme . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-17">17</a>
- <a href="#section-3.2">3.2</a>. Authority . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-17">17</a>
- <a href="#section-3.2.1">3.2.1</a>. User Information . . . . . . . . . . . . . . . . <a href="#page-18">18</a>
- <a href="#section-3.2.2">3.2.2</a>. Host . . . . . . . . . . . . . . . . . . . . . . <a href="#page-18">18</a>
- <a href="#section-3.2.3">3.2.3</a>. Port . . . . . . . . . . . . . . . . . . . . . . <a href="#page-22">22</a>
- <a href="#section-3.3">3.3</a>. Path . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-22">22</a>
- <a href="#section-3.4">3.4</a>. Query . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-23">23</a>
- <a href="#section-3.5">3.5</a>. Fragment . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-24">24</a>
- <a href="#section-4">4</a>. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-25">25</a>
- <a href="#section-4.1">4.1</a>. URI Reference . . . . . . . . . . . . . . . . . . . . . <a href="#page-25">25</a>
- <a href="#section-4.2">4.2</a>. Relative Reference . . . . . . . . . . . . . . . . . . . <a href="#page-26">26</a>
- <a href="#section-4.3">4.3</a>. Absolute URI . . . . . . . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
- <a href="#section-4.4">4.4</a>. Same-Document Reference . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
- <a href="#section-4.5">4.5</a>. Suffix Reference . . . . . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
- <a href="#section-5">5</a>. Reference Resolution . . . . . . . . . . . . . . . . . . . . . <a href="#page-28">28</a>
- <a href="#section-5.1">5.1</a>. Establishing a Base URI . . . . . . . . . . . . . . . . <a href="#page-28">28</a>
- <a href="#section-5.1.1">5.1.1</a>. Base URI Embedded in Content . . . . . . . . . . <a href="#page-29">29</a>
- <a href="#section-5.1.2">5.1.2</a>. Base URI from the Encapsulating Entity . . . . . <a href="#page-29">29</a>
- <a href="#section-5.1.3">5.1.3</a>. Base URI from the Retrieval URI . . . . . . . . <a href="#page-30">30</a>
- <a href="#section-5.1.4">5.1.4</a>. Default Base URI . . . . . . . . . . . . . . . . <a href="#page-30">30</a>
- <a href="#section-5.2">5.2</a>. Relative Resolution . . . . . . . . . . . . . . . . . . <a href="#page-30">30</a>
- <a href="#section-5.2.1">5.2.1</a>. Pre-parse the Base URI . . . . . . . . . . . . . <a href="#page-31">31</a>
- <a href="#section-5.2.2">5.2.2</a>. Transform References . . . . . . . . . . . . . . <a href="#page-31">31</a>
- <a href="#section-5.2.3">5.2.3</a>. Merge Paths . . . . . . . . . . . . . . . . . . <a href="#page-32">32</a>
- <a href="#section-5.2.4">5.2.4</a>. Remove Dot Segments . . . . . . . . . . . . . . <a href="#page-33">33</a>
- <a href="#section-5.3">5.3</a>. Component Recomposition . . . . . . . . . . . . . . . . <a href="#page-35">35</a>
- <a href="#section-5.4">5.4</a>. Reference Resolution Examples . . . . . . . . . . . . . <a href="#page-35">35</a>
- <a href="#section-5.4.1">5.4.1</a>. Normal Examples . . . . . . . . . . . . . . . . <a href="#page-36">36</a>
- <a href="#section-5.4.2">5.4.2</a>. Abnormal Examples . . . . . . . . . . . . . . . <a href="#page-36">36</a>
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 2]</span>
-<a name="page-3" id="page-3" href="#page-3"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- <a href="#section-6">6</a>. Normalization and Comparison . . . . . . . . . . . . . . . . . <a href="#page-38">38</a>
- <a href="#section-6.1">6.1</a>. Equivalence . . . . . . . . . . . . . . . . . . . . . . <a href="#page-38">38</a>
- <a href="#section-6.2">6.2</a>. Comparison Ladder . . . . . . . . . . . . . . . . . . . <a href="#page-39">39</a>
- <a href="#section-6.2.1">6.2.1</a>. Simple String Comparison . . . . . . . . . . . . <a href="#page-39">39</a>
- <a href="#section-6.2.2">6.2.2</a>. Syntax-Based Normalization . . . . . . . . . . . <a href="#page-40">40</a>
- <a href="#section-6.2.3">6.2.3</a>. Scheme-Based Normalization . . . . . . . . . . . <a href="#page-41">41</a>
- <a href="#section-6.2.4">6.2.4</a>. Protocol-Based Normalization . . . . . . . . . . <a href="#page-42">42</a>
- <a href="#section-7">7</a>. Security Considerations . . . . . . . . . . . . . . . . . . . <a href="#page-43">43</a>
- <a href="#section-7.1">7.1</a>. Reliability and Consistency . . . . . . . . . . . . . . <a href="#page-43">43</a>
- <a href="#section-7.2">7.2</a>. Malicious Construction . . . . . . . . . . . . . . . . . <a href="#page-43">43</a>
- <a href="#section-7.3">7.3</a>. Back-End Transcoding . . . . . . . . . . . . . . . . . . <a href="#page-44">44</a>
- <a href="#section-7.4">7.4</a>. Rare IP Address Formats . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
- <a href="#section-7.5">7.5</a>. Sensitive Information . . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
- <a href="#section-7.6">7.6</a>. Semantic Attacks . . . . . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
- <a href="#section-8">8</a>. IANA Considerations . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
- <a href="#section-9">9</a>. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
- <a href="#section-10">10</a>. References . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
- <a href="#section-10.1">10.1</a>. Normative References . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
- <a href="#section-10.2">10.2</a>. Informative References . . . . . . . . . . . . . . . . . <a href="#page-47">47</a>
- A. Collected ABNF for URI . . . . . . . . . . . . . . . . . . . . <a href="#page-49">49</a>
- B. Parsing a URI Reference with a Regular Expression . . . . . . <a href="#page-50">50</a>
- C. Delimiting a URI in Context . . . . . . . . . . . . . . . . . <a href="#page-51">51</a>
- D. Changes from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
- D.1. Additions . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
- D.2. Modifications . . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
- Index . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-56">56</a>
- Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-60">60</a>
- Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . <a href="#page-61">61</a>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 3]</span>
-<a name="page-4" id="page-4" href="#page-4"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h2"><h2><a name="section-1">1</a>. Introduction</h2></span>
-
- A Uniform Resource Identifier (URI) provides a simple and extensible
- means for identifying a resource. This specification of URI syntax
- and semantics is derived from concepts introduced by the World Wide
- Web global information initiative, whose use of these identifiers
- dates from 1990 and is described in "Universal Resource Identifiers
- in WWW" [<a href="http://tools.ietf.org/html/rfc1630" title=""Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the Network as used in the World-Wide Web"">RFC1630</a>]. The syntax is designed to meet the
- recommendations laid out in "Functional Recommendations for Internet
- Resource Locators" [<a href="http://tools.ietf.org/html/rfc1736" title=""Functional Recommendations for Internet Resource Locators"">RFC1736</a>] and "Functional Requirements for Uniform
- Resource Names" [<a href="http://tools.ietf.org/html/rfc1737" title=""Functional Requirements for Uniform Resource Names"">RFC1737</a>].
-
- This document obsoletes [<a href="http://tools.ietf.org/html/rfc2396" title=""Uniform Resource Identifiers (URI): Generic Syntax"">RFC2396</a>], which merged "Uniform Resource
- Locators" [<a href="http://tools.ietf.org/html/rfc1738" title=""Uniform Resource Locators (URL)"">RFC1738</a>] and "Relative Uniform Resource Locators"
- [<a href="http://tools.ietf.org/html/rfc1808" title=""Relative Uniform Resource Locators"">RFC1808</a>] in order to define a single, generic syntax for all URIs.
- It obsoletes [<a href="http://tools.ietf.org/html/rfc2732" title=""Format for Literal IPv6 Addresses in URL's"">RFC2732</a>], which introduced syntax for an IPv6 address.
- It excludes portions of <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a> that defined the specific syntax of
- individual URI schemes; those portions will be updated as separate
- documents. The process for registration of new URI schemes is
- defined separately by [<a href="#ref-BCP35" title=""Registration Procedures for URL Scheme Names"">BCP35</a>]. Advice for designers of new URI
- schemes can be found in [<a href="http://tools.ietf.org/html/rfc2718" title=""Guidelines for new URL Schemes"">RFC2718</a>]. All significant changes from <a href="http://tools.ietf.org/html/rfc2396">RFC</a>
- <a href="http://tools.ietf.org/html/rfc2396">2396</a> are noted in Appendix D.
-
- This specification uses the terms "character" and "coded character
- set" in accordance with the definitions provided in [<a href="#ref-BCP19" title=""IANA Charset Registration Procedures"">BCP19</a>], and
- "character encoding" in place of what [<a href="#ref-BCP19" title=""IANA Charset Registration Procedures"">BCP19</a>] refers to as a
- "charset".
-
-<span class="h3"><h3><a name="section-1.1">1.1</a>. Overview of URIs</h3></span>
-
- URIs are characterized as follows:
-
- Uniform
-
- Uniformity provides several benefits. It allows different types
- of resource identifiers to be used in the same context, even when
- the mechanisms used to access those resources may differ. It
- allows uniform semantic interpretation of common syntactic
- conventions across different types of resource identifiers. It
- allows introduction of new types of resource identifiers without
- interfering with the way that existing identifiers are used. It
- allows the identifiers to be reused in many different contexts,
- thus permitting new applications or protocols to leverage a pre-
- existing, large, and widely used set of resource identifiers.
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 4]</span>
-<a name="page-5" id="page-5" href="#page-5"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- Resource
-
- This specification does not limit the scope of what might be a
- resource; rather, the term "resource" is used in a general sense
- for whatever might be identified by a URI. Familiar examples
- include an electronic document, an image, a source of information
- with a consistent purpose (e.g., "today's weather report for Los
- Angeles"), a service (e.g., an HTTP-to-SMS gateway), and a
- collection of other resources. A resource is not necessarily
- accessible via the Internet; e.g., human beings, corporations, and
- bound books in a library can also be resources. Likewise,
- abstract concepts can be resources, such as the operators and
- operands of a mathematical equation, the types of a relationship
- (e.g., "parent" or "employee"), or numeric values (e.g., zero,
- one, and infinity).
-
- Identifier
-
- An identifier embodies the information required to distinguish
- what is being identified from all other things within its scope of
- identification. Our use of the terms "identify" and "identifying"
- refer to this purpose of distinguishing one resource from all
- other resources, regardless of how that purpose is accomplished
- (e.g., by name, address, or context). These terms should not be
- mistaken as an assumption that an identifier defines or embodies
- the identity of what is referenced, though that may be the case
- for some identifiers. Nor should it be assumed that a system
- using URIs will access the resource identified: in many cases,
- URIs are used to denote resources without any intention that they
- be accessed. Likewise, the "one" resource identified might not be
- singular in nature (e.g., a resource might be a named set or a
- mapping that varies over time).
-
- A URI is an identifier consisting of a sequence of characters
- matching the syntax rule named <URI> in <a href="#section-3">Section 3</a>. It enables
- uniform identification of resources via a separately defined
- extensible set of naming schemes (<a href="#section-3.1">Section 3.1</a>). How that
- identification is accomplished, assigned, or enabled is delegated to
- each scheme specification.
-
- This specification does not place any limits on the nature of a
- resource, the reasons why an application might seek to refer to a
- resource, or the kinds of systems that might use URIs for the sake of
- identifying resources. This specification does not require that a
- URI persists in identifying the same resource over time, though that
- is a common goal of all URI schemes. Nevertheless, nothing in this
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 5]</span>
-<a name="page-6" id="page-6" href="#page-6"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- specification prevents an application from limiting itself to
- particular types of resources, or to a subset of URIs that maintains
- characteristics desired by that application.
-
- URIs have a global scope and are interpreted consistently regardless
- of context, though the result of that interpretation may be in
- relation to the end-user's context. For example, "<a href="http://localhost/">http://localhost/</a>"
- has the same interpretation for every user of that reference, even
- though the network interface corresponding to "localhost" may be
- different for each end-user: interpretation is independent of access.
- However, an action made on the basis of that reference will take
- place in relation to the end-user's context, which implies that an
- action intended to refer to a globally unique thing must use a URI
- that distinguishes that resource from all other things. URIs that
- identify in relation to the end-user's local context should only be
- used when the context itself is a defining aspect of the resource,
- such as when an on-line help manual refers to a file on the end-
- user's file system (e.g., "file:///etc/hosts").
-
-<span class="h4"><h4><a name="section-1.1.1">1.1.1</a>. Generic Syntax</h4></span>
-
- Each URI begins with a scheme name, as defined in <a href="#section-3.1">Section 3.1</a>, that
- refers to a specification for assigning identifiers within that
- scheme. As such, the URI syntax is a federated and extensible naming
- system wherein each scheme's specification may further restrict the
- syntax and semantics of identifiers using that scheme.
-
- This specification defines those elements of the URI syntax that are
- required of all URI schemes or are common to many URI schemes. It
- thus defines the syntax and semantics needed to implement a scheme-
- independent parsing mechanism for URI references, by which the
- scheme-dependent handling of a URI can be postponed until the
- scheme-dependent semantics are needed. Likewise, protocols and data
- formats that make use of URI references can refer to this
- specification as a definition for the range of syntax allowed for all
- URIs, including those schemes that have yet to be defined. This
- decouples the evolution of identification schemes from the evolution
- of protocols, data formats, and implementations that make use of
- URIs.
-
- A parser of the generic URI syntax can parse any URI reference into
- its major components. Once the scheme is determined, further
- scheme-specific parsing can be performed on the components. In other
- words, the URI generic syntax is a superset of the syntax of all URI
- schemes.
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 6]</span>
-<a name="page-7" id="page-7" href="#page-7"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h4"><h4><a name="section-1.1.2">1.1.2</a>. Examples</h4></span>
-
- The following example URIs illustrate several URI schemes and
- variations in their common syntax components:
-
- <a href="ftp://ftp.is.co.za/rfc/rfc1808.txt">ftp://ftp.is.co.za/rfc/rfc1808.txt</a>
-
- <a href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/rfc2396.txt</a>
-
- ldap://[2001:db8::7]/c=GB?objectClass?one
-
- mailto:John.Doe at example.com
-
- news:comp.infosystems.www.servers.unix
-
- tel:+1-816-555-1212
-
- telnet://192.0.2.16:80/
-
- urn:oasis:names:specification:docbook:dtd:xml:4.1.2
-
-
-<span class="h4"><h4><a name="section-1.1.3">1.1.3</a>. URI, URL, and URN</h4></span>
-
- A URI can be further classified as a locator, a name, or both. The
- term "Uniform Resource Locator" (URL) refers to the subset of URIs
- that, in addition to identifying a resource, provide a means of
- locating the resource by describing its primary access mechanism
- (e.g., its network "location"). The term "Uniform Resource Name"
- (URN) has been used historically to refer to both URIs under the
- "urn" scheme [<a href="http://tools.ietf.org/html/rfc2141" title=""URN Syntax"">RFC2141</a>], which are required to remain globally unique
- and persistent even when the resource ceases to exist or becomes
- unavailable, and to any other URI with the properties of a name.
-
- An individual scheme does not have to be classified as being just one
- of "name" or "locator". Instances of URIs from any given scheme may
- have the characteristics of names or locators or both, often
- depending on the persistence and care in the assignment of
- identifiers by the naming authority, rather than on any quality of
- the scheme. Future specifications and related documentation should
- use the general term "URI" rather than the more restrictive terms
- "URL" and "URN" [<a href="http://tools.ietf.org/html/rfc3305" title=""Report from the Joint W3C/IETF URI Planning Interest Group: Uniform Resource Identifiers (URIs), URLs, and Uniform Resource Names (URNs): Clarifications and Recommendations"">RFC3305</a>].
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 7]</span>
-<a name="page-8" id="page-8" href="#page-8"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-1.2">1.2</a>. Design Considerations</h3></span>
-
-<span class="h4"><h4><a name="section-1.2.1">1.2.1</a>. Transcription</h4></span>
-
- The URI syntax has been designed with global transcription as one of
- its main considerations. A URI is a sequence of characters from a
- very limited set: the letters of the basic Latin alphabet, digits,
- and a few special characters. A URI may be represented in a variety
- of ways; e.g., ink on paper, pixels on a screen, or a sequence of
- character encoding octets. The interpretation of a URI depends only
- on the characters used and not on how those characters are
- represented in a network protocol.
-
- The goal of transcription can be described by a simple scenario.
- Imagine two colleagues, Sam and Kim, sitting in a pub at an
- international conference and exchanging research ideas. Sam asks Kim
- for a location to get more information, so Kim writes the URI for the
- research site on a napkin. Upon returning home, Sam takes out the
- napkin and types the URI into a computer, which then retrieves the
- information to which Kim referred.
-
- There are several design considerations revealed by the scenario:
-
- o A URI is a sequence of characters that is not always represented
- as a sequence of octets.
-
- o A URI might be transcribed from a non-network source and thus
- should consist of characters that are most likely able to be
- entered into a computer, within the constraints imposed by
- keyboards (and related input devices) across languages and
- locales.
-
- o A URI often has to be remembered by people, and it is easier for
- people to remember a URI when it consists of meaningful or
- familiar components.
-
- These design considerations are not always in alignment. For
- example, it is often the case that the most meaningful name for a URI
- component would require characters that cannot be typed into some
- systems. The ability to transcribe a resource identifier from one
- medium to another has been considered more important than having a
- URI consist of the most meaningful of components.
-
- In local or regional contexts and with improving technology, users
- might benefit from being able to use a wider range of characters;
- such use is not defined by this specification. Percent-encoded
- octets (<a href="#section-2.1">Section 2.1</a>) may be used within a URI to represent characters
- outside the range of the US-ASCII coded character set if this
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 8]</span>
-<a name="page-9" id="page-9" href="#page-9"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- representation is allowed by the scheme or by the protocol element in
- which the URI is referenced. Such a definition should specify the
- character encoding used to map those characters to octets prior to
- being percent-encoded for the URI.
-
-<span class="h4"><h4><a name="section-1.2.2">1.2.2</a>. Separating Identification from Interaction</h4></span>
-
- A common misunderstanding of URIs is that they are only used to refer
- to accessible resources. The URI itself only provides
- identification; access to the resource is neither guaranteed nor
- implied by the presence of a URI. Instead, any operation associated
- with a URI reference is defined by the protocol element, data format
- attribute, or natural language text in which it appears.
-
- Given a URI, a system may attempt to perform a variety of operations
- on the resource, as might be characterized by words such as "access",
- "update", "replace", or "find attributes". Such operations are
- defined by the protocols that make use of URIs, not by this
- specification. However, we do use a few general terms for describing
- common operations on URIs. URI "resolution" is the process of
- determining an access mechanism and the appropriate parameters
- necessary to dereference a URI; this resolution may require several
- iterations. To use that access mechanism to perform an action on the
- URI's resource is to "dereference" the URI.
-
- When URIs are used within information retrieval systems to identify
- sources of information, the most common form of URI dereference is
- "retrieval": making use of a URI in order to retrieve a
- representation of its associated resource. A "representation" is a
- sequence of octets, along with representation metadata describing
- those octets, that constitutes a record of the state of the resource
- at the time when the representation is generated. Retrieval is
- achieved by a process that might include using the URI as a cache key
- to check for a locally cached representation, resolution of the URI
- to determine an appropriate access mechanism (if any), and
- dereference of the URI for the sake of applying a retrieval
- operation. Depending on the protocols used to perform the retrieval,
- additional information might be supplied about the resource (resource
- metadata) and its relation to other resources.
-
- URI references in information retrieval systems are designed to be
- late-binding: the result of an access is generally determined when it
- is accessed and may vary over time or due to other aspects of the
- interaction. These references are created in order to be used in the
- future: what is being identified is not some specific result that was
- obtained in the past, but rather some characteristic that is expected
- to be true for future results. In such cases, the resource referred
- to by the URI is actually a sameness of characteristics as observed
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 9]</span>
-<a name="page-10" id="page-10" href="#page-10"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- over time, perhaps elucidated by additional comments or assertions
- made by the resource provider.
-
- Although many URI schemes are named after protocols, this does not
- imply that use of these URIs will result in access to the resource
- via the named protocol. URIs are often used simply for the sake of
- identification. Even when a URI is used to retrieve a representation
- of a resource, that access might be through gateways, proxies,
- caches, and name resolution services that are independent of the
- protocol associated with the scheme name. The resolution of some
- URIs may require the use of more than one protocol (e.g., both DNS
- and HTTP are typically used to access an "http" URI's origin server
- when a representation isn't found in a local cache).
-
-<span class="h4"><h4><a name="section-1.2.3">1.2.3</a>. Hierarchical Identifiers</h4></span>
-
- The URI syntax is organized hierarchically, with components listed in
- order of decreasing significance from left to right. For some URI
- schemes, the visible hierarchy is limited to the scheme itself:
- everything after the scheme component delimiter (":") is considered
- opaque to URI processing. Other URI schemes make the hierarchy
- explicit and visible to generic parsing algorithms.
-
- The generic syntax uses the slash ("/"), question mark ("?"), and
- number sign ("#") characters to delimit components that are
- significant to the generic parser's hierarchical interpretation of an
- identifier. In addition to aiding the readability of such
- identifiers through the consistent use of familiar syntax, this
- uniform representation of hierarchy across naming schemes allows
- scheme-independent references to be made relative to that hierarchy.
-
- It is often the case that a group or "tree" of documents has been
- constructed to serve a common purpose, wherein the vast majority of
- URI references in these documents point to resources within the tree
- rather than outside it. Similarly, documents located at a particular
- site are much more likely to refer to other resources at that site
- than to resources at remote sites. Relative referencing of URIs
- allows document trees to be partially independent of their location
- and access scheme. For instance, it is possible for a single set of
- hypertext documents to be simultaneously accessible and traversable
- via each of the "file", "http", and "ftp" schemes if the documents
- refer to each other with relative references. Furthermore, such
- document trees can be moved, as a whole, without changing any of the
- relative references.
-
- A relative reference (<a href="#section-4.2">Section 4.2</a>) refers to a resource by describing
- the difference within a hierarchical name space between the reference
- context and the target URI. The reference resolution algorithm,
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 10]</span>
-<a name="page-11" id="page-11" href="#page-11"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- presented in <a href="#section-5">Section 5</a>, defines how such a reference is transformed
- to the target URI. As relative references can only be used within
- the context of a hierarchical URI, designers of new URI schemes
- should use a syntax consistent with the generic syntax's hierarchical
- components unless there are compelling reasons to forbid relative
- referencing within that scheme.
-
- NOTE: Previous specifications used the terms "partial URI" and
- "relative URI" to denote a relative reference to a URI. As some
- readers misunderstood those terms to mean that relative URIs are a
- subset of URIs rather than a method of referencing URIs, this
- specification simply refers to them as relative references.
-
- All URI references are parsed by generic syntax parsers when used.
- However, because hierarchical processing has no effect on an absolute
- URI used in a reference unless it contains one or more dot-segments
- (complete path segments of "." or "..", as described in <a href="#section-3.3">Section 3.3</a>),
- URI scheme specifications can define opaque identifiers by
- disallowing use of slash characters, question mark characters, and
- the URIs "scheme:." and "scheme:..".
-
-<span class="h3"><h3><a name="section-1.3">1.3</a>. Syntax Notation</h3></span>
-
- This specification uses the Augmented Backus-Naur Form (ABNF)
- notation of [<a href="http://tools.ietf.org/html/rfc2234" title=""Augmented BNF for Syntax Specifications: ABNF"">RFC2234</a>], including the following core ABNF syntax rules
- defined by that specification: ALPHA (letters), CR (carriage return),
- DIGIT (decimal digits), DQUOTE (double quote), HEXDIG (hexadecimal
- digits), LF (line feed), and SP (space). The complete URI syntax is
- collected in Appendix A.
-
-<span class="h2"><h2><a name="section-2">2</a>. Characters</h2></span>
-
- The URI syntax provides a method of encoding data, presumably for the
- sake of identifying a resource, as a sequence of characters. The URI
- characters are, in turn, frequently encoded as octets for transport
- or presentation. This specification does not mandate any particular
- character encoding for mapping between URI characters and the octets
- used to store or transmit those characters. When a URI appears in a
- protocol element, the character encoding is defined by that protocol;
- without such a definition, a URI is assumed to be in the same
- character encoding as the surrounding text.
-
- The ABNF notation defines its terminal values to be non-negative
- integers (codepoints) based on the US-ASCII coded character set
- [<a href="#ref-ASCII" title=""Coded Character Set -- 7-bit American Standard Code for Information Interchange"">ASCII</a>]. Because a URI is a sequence of characters, we must invert
- that relation in order to understand the URI syntax. Therefore, the
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 11]</span>
-<a name="page-12" id="page-12" href="#page-12"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- integer values used by the ABNF must be mapped back to their
- corresponding characters via US-ASCII in order to complete the syntax
- rules.
-
- A URI is composed from a limited set of characters consisting of
- digits, letters, and a few graphic symbols. A reserved subset of
- those characters may be used to delimit syntax components within a
- URI while the remaining characters, including both the unreserved set
- and those reserved characters not acting as delimiters, define each
- component's identifying data.
-
-<span class="h3"><h3><a name="section-2.1">2.1</a>. Percent-Encoding</h3></span>
-
- A percent-encoding mechanism is used to represent a data octet in a
- component when that octet's corresponding character is outside the
- allowed set or is being used as a delimiter of, or within, the
- component. A percent-encoded octet is encoded as a character
- triplet, consisting of the percent character "%" followed by the two
- hexadecimal digits representing that octet's numeric value. For
- example, "%20" is the percent-encoding for the binary octet
- "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
- character (SP). <a href="#section-2.4">Section 2.4</a> describes when percent-encoding and
- decoding is applied.
-
- pct-encoded = "%" HEXDIG HEXDIG
-
- The uppercase hexadecimal digits 'A' through 'F' are equivalent to
- the lowercase digits 'a' through 'f', respectively. If two URIs
- differ only in the case of hexadecimal digits used in percent-encoded
- octets, they are equivalent. For consistency, URI producers and
- normalizers should use uppercase hexadecimal digits for all percent-
- encodings.
-
-<span class="h3"><h3><a name="section-2.2">2.2</a>. Reserved Characters</h3></span>
-
- URIs include components and subcomponents that are delimited by
- characters in the "reserved" set. These characters are called
- "reserved" because they may (or may not) be defined as delimiters by
- the generic syntax, by each scheme-specific syntax, or by the
- implementation-specific syntax of a URI's dereferencing algorithm.
- If data for a URI component would conflict with a reserved
- character's purpose as a delimiter, then the conflicting data must be
- percent-encoded before the URI is formed.
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 12]</span>
-<a name="page-13" id="page-13" href="#page-13"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- reserved = gen-delims / sub-delims
-
- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-
- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- / "*" / "+" / "," / ";" / "="
-
- The purpose of reserved characters is to provide a set of delimiting
- characters that are distinguishable from other data within a URI.
- URIs that differ in the replacement of a reserved character with its
- corresponding percent-encoded octet are not equivalent. Percent-
- encoding a reserved character, or decoding a percent-encoded octet
- that corresponds to a reserved character, will change how the URI is
- interpreted by most applications. Thus, characters in the reserved
- set are protected from normalization and are therefore safe to be
- used by scheme-specific and producer-specific algorithms for
- delimiting data subcomponents within a URI.
-
- A subset of the reserved characters (gen-delims) is used as
- delimiters of the generic URI components described in <a href="#section-3">Section 3</a>. A
- component's ABNF syntax rule will not use the reserved or gen-delims
- rule names directly; instead, each syntax rule lists the characters
- allowed within that component (i.e., not delimiting it), and any of
- those characters that are also in the reserved set are "reserved" for
- use as subcomponent delimiters within the component. Only the most
- common subcomponents are defined by this specification; other
- subcomponents may be defined by a URI scheme's specification, or by
- the implementation-specific syntax of a URI's dereferencing
- algorithm, provided that such subcomponents are delimited by
- characters in the reserved set allowed within that component.
-
- URI producing applications should percent-encode data octets that
- correspond to characters in the reserved set unless these characters
- are specifically allowed by the URI scheme to represent data in that
- component. If a reserved character is found in a URI component and
- no delimiting role is known for that character, then it must be
- interpreted as representing the data octet corresponding to that
- character's encoding in US-ASCII.
-
-<span class="h3"><h3><a name="section-2.3">2.3</a>. Unreserved Characters</h3></span>
-
- Characters that are allowed in a URI but do not have a reserved
- purpose are called unreserved. These include uppercase and lowercase
- letters, decimal digits, hyphen, period, underscore, and tilde.
-
- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 13]</span>
-<a name="page-14" id="page-14" href="#page-14"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- URIs that differ in the replacement of an unreserved character with
- its corresponding percent-encoded US-ASCII octet are equivalent: they
- identify the same resource. However, URI comparison implementations
- do not always perform normalization prior to comparison (see Section
- 6). For consistency, percent-encoded octets in the ranges of ALPHA
- (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
- underscore (%5F), or tilde (%7E) should not be created by URI
- producers and, when found in a URI, should be decoded to their
- corresponding unreserved characters by URI normalizers.
-
-<span class="h3"><h3><a name="section-2.4">2.4</a>. When to Encode or Decode</h3></span>
-
- Under normal circumstances, the only time when octets within a URI
- are percent-encoded is during the process of producing the URI from
- its component parts. This is when an implementation determines which
- of the reserved characters are to be used as subcomponent delimiters
- and which can be safely used as data. Once produced, a URI is always
- in its percent-encoded form.
-
- When a URI is dereferenced, the components and subcomponents
- significant to the scheme-specific dereferencing process (if any)
- must be parsed and separated before the percent-encoded octets within
- those components can be safely decoded, as otherwise the data may be
- mistaken for component delimiters. The only exception is for
- percent-encoded octets corresponding to characters in the unreserved
- set, which can be decoded at any time. For example, the octet
- corresponding to the tilde ("~") character is often encoded as "%7E"
- by older URI processing implementations; the "%7E" can be replaced by
- "~" without changing its interpretation.
-
- Because the percent ("%") character serves as the indicator for
- percent-encoded octets, it must be percent-encoded as "%25" for that
- octet to be used as data within a URI. Implementations must not
- percent-encode or decode the same string more than once, as decoding
- an already decoded string might lead to misinterpreting a percent
- data octet as the beginning of a percent-encoding, or vice versa in
- the case of percent-encoding an already percent-encoded string.
-
-<span class="h3"><h3><a name="section-2.5">2.5</a>. Identifying Data</h3></span>
-
- URI characters provide identifying data for each of the URI
- components, serving as an external interface for identification
- between systems. Although the presence and nature of the URI
- production interface is hidden from clients that use its URIs (and is
- thus beyond the scope of the interoperability requirements defined by
- this specification), it is a frequent source of confusion and errors
- in the interpretation of URI character issues. Implementers have to
- be aware that there are multiple character encodings involved in the
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 14]</span>
-<a name="page-15" id="page-15" href="#page-15"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- production and transmission of URIs: local name and data encoding,
- public interface encoding, URI character encoding, data format
- encoding, and protocol encoding.
-
- Local names, such as file system names, are stored with a local
- character encoding. URI producing applications (e.g., origin
- servers) will typically use the local encoding as the basis for
- producing meaningful names. The URI producer will transform the
- local encoding to one that is suitable for a public interface and
- then transform the public interface encoding into the restricted set
- of URI characters (reserved, unreserved, and percent-encodings).
- Those characters are, in turn, encoded as octets to be used as a
- reference within a data format (e.g., a document charset), and such
- data formats are often subsequently encoded for transmission over
- Internet protocols.
-
- For most systems, an unreserved character appearing within a URI
- component is interpreted as representing the data octet corresponding
- to that character's encoding in US-ASCII. Consumers of URIs assume
- that the letter "X" corresponds to the octet "01011000", and even
- when that assumption is incorrect, there is no harm in making it. A
- system that internally provides identifiers in the form of a
- different character encoding, such as EBCDIC, will generally perform
- character translation of textual identifiers to UTF-8 [<a href="#ref-STD63" title=""UTF-8, a transformation format of ISO 10646"">STD63</a>] (or
- some other superset of the US-ASCII character encoding) at an
- internal interface, thereby providing more meaningful identifiers
- than those resulting from simply percent-encoding the original
- octets.
-
- For example, consider an information service that provides data,
- stored locally using an EBCDIC-based file system, to clients on the
- Internet through an HTTP server. When an author creates a file with
- the name "Laguna Beach" on that file system, the "http" URI
- corresponding to that resource is expected to contain the meaningful
- string "Laguna%20Beach". If, however, that server produces URIs by
- using an overly simplistic raw octet mapping, then the result would
- be a URI containing "%D3%81%87%A4%95%81@%C2%85%81%83%88". An
- internal transcoding interface fixes this problem by transcoding the
- local name to a superset of US-ASCII prior to producing the URI.
- Naturally, proper interpretation of an incoming URI on such an
- interface requires that percent-encoded octets be decoded (e.g.,
- "%20" to SP) before the reverse transcoding is applied to obtain the
- local name.
-
- In some cases, the internal interface between a URI component and the
- identifying data that it has been crafted to represent is much less
- direct than a character encoding translation. For example, portions
- of a URI might reflect a query on non-ASCII data, or numeric
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 15]</span>
-<a name="page-16" id="page-16" href="#page-16"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- coordinates on a map. Likewise, a URI scheme may define components
- with additional encoding requirements that are applied prior to
- forming the component and producing the URI.
-
- When a new URI scheme defines a component that represents textual
- data consisting of characters from the Universal Character Set [<a href="#ref-UCS" title=""Information Technology - Universal Multiple-Octet Coded Character Set (UCS)"">UCS</a>],
- the data should first be encoded as octets according to the UTF-8
- character encoding [<a href="#ref-STD63" title=""UTF-8, a transformation format of ISO 10646"">STD63</a>]; then only those octets that do not
- correspond to characters in the unreserved set should be percent-
- encoded. For example, the character A would be represented as "A",
- the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
- as "%C3%80", and the character KATAKANA LETTER A would be represented
- as "%E3%82%A2".
-
-<span class="h2"><h2><a name="section-3">3</a>. Syntax Components</h2></span>
-
- The generic URI syntax consists of a hierarchical sequence of
- components referred to as the scheme, authority, path, query, and
- fragment.
-
- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-
- hier-part = "//" authority path-abempty
- / path-absolute
- / path-rootless
- / path-empty
-
- The scheme and path components are required, though the path may be
- empty (no characters). When authority is present, the path must
- either be empty or begin with a slash ("/") character. When
- authority is not present, the path cannot begin with two slash
- characters ("//"). These restrictions result in five different ABNF
- rules for a path (<a href="#section-3.3">Section 3.3</a>), only one of which will match any
- given URI reference.
-
- The following are two example URIs and their component parts:
-
- foo://example.com:8042/over/there?name=ferret#nose
- \_/ \______________/\_________/ \_________/ \__/
- | | | | |
- scheme authority path query fragment
- | _____________________|__
- / \ / \
- urn:example:animal:ferret:nose
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 16]</span>
-<a name="page-17" id="page-17" href="#page-17"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-3.1">3.1</a>. Scheme</h3></span>
-
- Each URI begins with a scheme name that refers to a specification for
- assigning identifiers within that scheme. As such, the URI syntax is
- a federated and extensible naming system wherein each scheme's
- specification may further restrict the syntax and semantics of
- identifiers using that scheme.
-
- Scheme names consist of a sequence of characters beginning with a
- letter and followed by any combination of letters, digits, plus
- ("+"), period ("."), or hyphen ("-"). Although schemes are case-
- insensitive, the canonical form is lowercase and documents that
- specify schemes must do so with lowercase letters. An implementation
- should accept uppercase letters as equivalent to lowercase in scheme
- names (e.g., allow "HTTP" as well as "http") for the sake of
- robustness but should only produce lowercase scheme names for
- consistency.
-
- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-
- Individual schemes are not specified by this document. The process
- for registration of new URI schemes is defined separately by [<a href="#ref-BCP35" title=""Registration Procedures for URL Scheme Names"">BCP35</a>].
- The scheme registry maintains the mapping between scheme names and
- their specifications. Advice for designers of new URI schemes can be
- found in [<a href="http://tools.ietf.org/html/rfc2718" title=""Guidelines for new URL Schemes"">RFC2718</a>]. URI scheme specifications must define their own
- syntax so that all strings matching their scheme-specific syntax will
- also match the <absolute-URI> grammar, as described in <a href="#section-4.3">Section 4.3</a>.
-
- When presented with a URI that violates one or more scheme-specific
- restrictions, the scheme-specific resolution process should flag the
- reference as an error rather than ignore the unused parts; doing so
- reduces the number of equivalent URIs and helps detect abuses of the
- generic syntax, which might indicate that the URI has been
- constructed to mislead the user (<a href="#section-7.6">Section 7.6</a>).
-
-<span class="h3"><h3><a name="section-3.2">3.2</a>. Authority</h3></span>
-
- Many URI schemes include a hierarchical element for a naming
- authority so that governance of the name space defined by the
- remainder of the URI is delegated to that authority (which may, in
- turn, delegate it further). The generic syntax provides a common
- means for distinguishing an authority based on a registered name or
- server address, along with optional port and user information.
-
- The authority component is preceded by a double slash ("//") and is
- terminated by the next slash ("/"), question mark ("?"), or number
- sign ("#") character, or by the end of the URI.
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 17]</span>
-<a name="page-18" id="page-18" href="#page-18"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- authority = [ userinfo "@" ] host [ ":" port ]
-
- URI producers and normalizers should omit the ":" delimiter that
- separates host from port if the port component is empty. Some
- schemes do not allow the userinfo and/or port subcomponents.
-
- If a URI contains an authority component, then the path component
- must either be empty or begin with a slash ("/") character. Non-
- validating parsers (those that merely separate a URI reference into
- its major components) will often ignore the subcomponent structure of
- authority, treating it as an opaque string from the double-slash to
- the first terminating delimiter, until such time as the URI is
- dereferenced.
-
-<span class="h4"><h4><a name="section-3.2.1">3.2.1</a>. User Information</h4></span>
-
- The userinfo subcomponent may consist of a user name and, optionally,
- scheme-specific information about how to gain authorization to access
- the resource. The user information, if present, is followed by a
- commercial at-sign ("@") that delimits it from the host.
-
- userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
-
- Use of the format "user:password" in the userinfo field is
- deprecated. Applications should not render as clear text any data
- after the first colon (":") character found within a userinfo
- subcomponent unless the data after the colon is the empty string
- (indicating no password). Applications may choose to ignore or
- reject such data when it is received as part of a reference and
- should reject the storage of such data in unencrypted form. The
- passing of authentication information in clear text has proven to be
- a security risk in almost every case where it has been used.
-
- Applications that render a URI for the sake of user feedback, such as
- in graphical hypertext browsing, should render userinfo in a way that
- is distinguished from the rest of a URI, when feasible. Such
- rendering will assist the user in cases where the userinfo has been
- misleadingly crafted to look like a trusted domain name
- (<a href="#section-7.6">Section 7.6</a>).
-
-<span class="h4"><h4><a name="section-3.2.2">3.2.2</a>. Host</h4></span>
-
- The host subcomponent of authority is identified by an IP literal
- encapsulated within square brackets, an IPv4 address in dotted-
- decimal form, or a registered name. The host subcomponent is case-
- insensitive. The presence of a host subcomponent within a URI does
- not imply that the scheme requires access to the given host on the
- Internet. In many cases, the host syntax is used only for the sake
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 18]</span>
-<a name="page-19" id="page-19" href="#page-19"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- of reusing the existing registration process created and deployed for
- DNS, thus obtaining a globally unique name without the cost of
- deploying another registry. However, such use comes with its own
- costs: domain name ownership may change over time for reasons not
- anticipated by the URI producer. In other cases, the data within the
- host component identifies a registered name that has nothing to do
- with an Internet host. We use the name "host" for the ABNF rule
- because that is its most common purpose, not its only purpose.
-
- host = IP-literal / IPv4address / reg-name
-
- The syntax rule for host is ambiguous because it does not completely
- distinguish between an IPv4address and a reg-name. In order to
- disambiguate the syntax, we apply the "first-match-wins" algorithm:
- If host matches the rule for IPv4address, then it should be
- considered an IPv4 address literal and not a reg-name. Although host
- is case-insensitive, producers and normalizers should use lowercase
- for registered names and hexadecimal addresses for the sake of
- uniformity, while only using uppercase letters for percent-encodings.
-
- A host identified by an Internet Protocol literal address, version 6
- [<a href="http://tools.ietf.org/html/rfc3513" title=""Internet Protocol Version 6 (IPv6) Addressing Architecture"">RFC3513</a>] or later, is distinguished by enclosing the IP literal
- within square brackets ("[" and "]"). This is the only place where
- square bracket characters are allowed in the URI syntax. In
- anticipation of future, as-yet-undefined IP literal address formats,
- an implementation may use an optional version flag to indicate such a
- format explicitly rather than rely on heuristic determination.
-
- IP-literal = "[" ( IPv6address / IPvFuture ) "]"
-
- IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
-
- The version flag does not indicate the IP version; rather, it
- indicates future versions of the literal format. As such,
- implementations must not provide the version flag for the existing
- IPv4 and IPv6 literal address forms described below. If a URI
- containing an IP-literal that starts with "v" (case-insensitive),
- indicating that the version flag is present, is dereferenced by an
- application that does not know the meaning of that version flag, then
- the application should return an appropriate error for "address
- mechanism not supported".
-
- A host identified by an IPv6 literal address is represented inside
- the square brackets without a preceding version flag. The ABNF
- provided here is a translation of the text definition of an IPv6
- literal address provided in [<a href="http://tools.ietf.org/html/rfc3513" title=""Internet Protocol Version 6 (IPv6) Addressing Architecture"">RFC3513</a>]. This syntax does not support
- IPv6 scoped addressing zone identifiers.
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 19]</span>
-<a name="page-20" id="page-20" href="#page-20"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- A 128-bit IPv6 address is divided into eight 16-bit pieces. Each
- piece is represented numerically in case-insensitive hexadecimal,
- using one to four hexadecimal digits (leading zeroes are permitted).
- The eight encoded pieces are given most-significant first, separated
- by colon characters. Optionally, the least-significant two pieces
- may instead be represented in IPv4 address textual format. A
- sequence of one or more consecutive zero-valued 16-bit pieces within
- the address may be elided, omitting all their digits and leaving
- exactly two consecutive colons in their place to mark the elision.
-
- IPv6address = 6( h16 ":" ) ls32
- / "::" 5( h16 ":" ) ls32
- / [ h16 ] "::" 4( h16 ":" ) ls32
- / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
- / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
- / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
- / [ *4( h16 ":" ) h16 ] "::" ls32
- / [ *5( h16 ":" ) h16 ] "::" h16
- / [ *6( h16 ":" ) h16 ] "::"
-
- ls32 = ( h16 ":" h16 ) / IPv4address
- ; least-significant 32 bits of address
-
- h16 = 1*4HEXDIG
- ; 16 bits of address represented in hexadecimal
-
- A host identified by an IPv4 literal address is represented in
- dotted-decimal notation (a sequence of four decimal numbers in the
- range 0 to 255, separated by "."), as described in [<a href="http://tools.ietf.org/html/rfc1123" title=""Requirements for Internet Hosts - Application and Support"">RFC1123</a>] by
- reference to [<a href="http://tools.ietf.org/html/rfc0952" title=""DoD Internet host table specification"">RFC0952</a>]. Note that other forms of dotted notation may
- be interpreted on some platforms, as described in <a href="#section-7.4">Section 7.4</a>, but
- only the dotted-decimal form of four octets is allowed by this
- grammar.
-
- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
-
- dec-octet = DIGIT ; 0-9
- / %x31-39 DIGIT ; 10-99
- / "1" 2DIGIT ; 100-199
- / "2" %x30-34 DIGIT ; 200-249
- / "25" %x30-35 ; 250-255
-
- A host identified by a registered name is a sequence of characters
- usually intended for lookup within a locally defined host or service
- name registry, though the URI's scheme-specific semantics may require
- that a specific registry (or fixed name table) be used instead. The
- most common name registry mechanism is the Domain Name System (DNS).
- A registered name intended for lookup in the DNS uses the syntax
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 20]</span>
-<a name="page-21" id="page-21" href="#page-21"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- defined in <a href="#section-3.5">Section 3.5</a> of [<a href="http://tools.ietf.org/html/rfc1034" title=""Domain names - concepts and facilities"">RFC1034</a>] and <a href="#section-2.1">Section 2.1</a> of [<a href="http://tools.ietf.org/html/rfc1123" title=""Requirements for Internet Hosts - Application and Support"">RFC1123</a>].
- Such a name consists of a sequence of domain labels separated by ".",
- each domain label starting and ending with an alphanumeric character
- and possibly also containing "-" characters. The rightmost domain
- label of a fully qualified domain name in DNS may be followed by a
- single "." and should be if it is necessary to distinguish between
- the complete domain name and some local domain.
-
- reg-name = *( unreserved / pct-encoded / sub-delims )
-
- If the URI scheme defines a default for host, then that default
- applies when the host subcomponent is undefined or when the
- registered name is empty (zero length). For example, the "file" URI
- scheme is defined so that no authority, an empty host, and
- "localhost" all mean the end-user's machine, whereas the "http"
- scheme considers a missing authority or empty host invalid.
-
- This specification does not mandate a particular registered name
- lookup technology and therefore does not restrict the syntax of reg-
- name beyond what is necessary for interoperability. Instead, it
- delegates the issue of registered name syntax conformance to the
- operating system of each application performing URI resolution, and
- that operating system decides what it will allow for the purpose of
- host identification. A URI resolution implementation might use DNS,
- host tables, yellow pages, NetInfo, WINS, or any other system for
- lookup of registered names. However, a globally scoped naming
- system, such as DNS fully qualified domain names, is necessary for
- URIs intended to have global scope. URI producers should use names
- that conform to the DNS syntax, even when use of DNS is not
- immediately apparent, and should limit these names to no more than
- 255 characters in length.
-
- The reg-name syntax allows percent-encoded octets in order to
- represent non-ASCII registered names in a uniform way that is
- independent of the underlying name resolution technology. Non-ASCII
- characters must first be encoded according to UTF-8 [<a href="#ref-STD63" title=""UTF-8, a transformation format of ISO 10646"">STD63</a>], and then
- each octet of the corresponding UTF-8 sequence must be percent-
- encoded to be represented as URI characters. URI producing
- applications must not use percent-encoding in host unless it is used
- to represent a UTF-8 character sequence. When a non-ASCII registered
- name represents an internationalized domain name intended for
- resolution via the DNS, the name must be transformed to the IDNA
- encoding [<a href="http://tools.ietf.org/html/rfc3490" title=""Internationalizing Domain Names in Applications (IDNA)"">RFC3490</a>] prior to name lookup. URI producers should
- provide these registered names in the IDNA encoding, rather than a
- percent-encoding, if they wish to maximize interoperability with
- legacy URI resolvers.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 21]</span>
-<a name="page-22" id="page-22" href="#page-22"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h4"><h4><a name="section-3.2.3">3.2.3</a>. Port</h4></span>
-
- The port subcomponent of authority is designated by an optional port
- number in decimal following the host and delimited from it by a
- single colon (":") character.
-
- port = *DIGIT
-
- A scheme may define a default port. For example, the "http" scheme
- defines a default port of "80", corresponding to its reserved TCP
- port number. The type of port designated by the port number (e.g.,
- TCP, UDP, SCTP) is defined by the URI scheme. URI producers and
- normalizers should omit the port component and its ":" delimiter if
- port is empty or if its value would be the same as that of the
- scheme's default.
-
-<span class="h3"><h3><a name="section-3.3">3.3</a>. Path</h3></span>
-
- The path component contains data, usually organized in hierarchical
- form, that, along with data in the non-hierarchical query component
- (<a href="#section-3.4">Section 3.4</a>), serves to identify a resource within the scope of the
- URI's scheme and naming authority (if any). The path is terminated
- by the first question mark ("?") or number sign ("#") character, or
- by the end of the URI.
-
- If a URI contains an authority component, then the path component
- must either be empty or begin with a slash ("/") character. If a URI
- does not contain an authority component, then the path cannot begin
- with two slash characters ("//"). In addition, a URI reference
- (<a href="#section-4.1">Section 4.1</a>) may be a relative-path reference, in which case the
- first path segment cannot contain a colon (":") character. The ABNF
- requires five separate rules to disambiguate these cases, only one of
- which will match the path substring within a given URI reference. We
- use the generic term "path component" to describe the URI substring
- matched by the parser to one of these rules.
-
- path = path-abempty ; begins with "/" or is empty
- / path-absolute ; begins with "/" but not "//"
- / path-noscheme ; begins with a non-colon segment
- / path-rootless ; begins with a segment
- / path-empty ; zero characters
-
- path-abempty = *( "/" segment )
- path-absolute = "/" [ segment-nz *( "/" segment ) ]
- path-noscheme = segment-nz-nc *( "/" segment )
- path-rootless = segment-nz *( "/" segment )
- path-empty = 0<pchar>
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 22]</span>
-<a name="page-23" id="page-23" href="#page-23"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- segment = *pchar
- segment-nz = 1*pchar
- segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- ; non-zero-length segment without any colon ":"
-
- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
-
- A path consists of a sequence of path segments separated by a slash
- ("/") character. A path is always defined for a URI, though the
- defined path may be empty (zero length). Use of the slash character
- to indicate hierarchy is only required when a URI will be used as the
- context for relative references. For example, the URI
- <mailto:fred at example.com> has a path of "fred at example.com", whereas
- the URI <foo://info.example.com?fred> has an empty path.
-
- The path segments "." and "..", also known as dot-segments, are
- defined for relative reference within the path name hierarchy. They
- are intended for use at the beginning of a relative-path reference
- (<a href="#section-4.2">Section 4.2</a>) to indicate relative position within the hierarchical
- tree of names. This is similar to their role within some operating
- systems' file directory structures to indicate the current directory
- and parent directory, respectively. However, unlike in a file
- system, these dot-segments are only interpreted within the URI path
- hierarchy and are removed as part of the resolution process (Section
- 5.2).
-
- Aside from dot-segments in hierarchical paths, a path segment is
- considered opaque by the generic syntax. URI producing applications
- often use the reserved characters allowed in a segment to delimit
- scheme-specific or dereference-handler-specific subcomponents. For
- example, the semicolon (";") and equals ("=") reserved characters are
- often used to delimit parameters and parameter values applicable to
- that segment. The comma (",") reserved character is often used for
- similar purposes. For example, one URI producer might use a segment
- such as "name;v=1.1" to indicate a reference to version 1.1 of
- "name", whereas another might use a segment such as "name,1.1" to
- indicate the same. Parameter types may be defined by scheme-specific
- semantics, but in most cases the syntax of a parameter is specific to
- the implementation of the URI's dereferencing algorithm.
-
-<span class="h3"><h3><a name="section-3.4">3.4</a>. Query</h3></span>
-
- The query component contains non-hierarchical data that, along with
- data in the path component (<a href="#section-3.3">Section 3.3</a>), serves to identify a
- resource within the scope of the URI's scheme and naming authority
- (if any). The query component is indicated by the first question
- mark ("?") character and terminated by a number sign ("#") character
- or by the end of the URI.
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 23]</span>
-<a name="page-24" id="page-24" href="#page-24"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- query = *( pchar / "/" / "?" )
-
- The characters slash ("/") and question mark ("?") may represent data
- within the query component. Beware that some older, erroneous
- implementations may not handle such data correctly when it is used as
- the base URI for relative references (<a href="#section-5.1">Section 5.1</a>), apparently
- because they fail to distinguish query data from path data when
- looking for hierarchical separators. However, as query components
- are often used to carry identifying information in the form of
- "key=value" pairs and one frequently used value is a reference to
- another URI, it is sometimes better for usability to avoid percent-
- encoding those characters.
-
-<span class="h3"><h3><a name="section-3.5">3.5</a>. Fragment</h3></span>
-
- The fragment identifier component of a URI allows indirect
- identification of a secondary resource by reference to a primary
- resource and additional identifying information. The identified
- secondary resource may be some portion or subset of the primary
- resource, some view on representations of the primary resource, or
- some other resource defined or described by those representations. A
- fragment identifier component is indicated by the presence of a
- number sign ("#") character and terminated by the end of the URI.
-
- fragment = *( pchar / "/" / "?" )
-
- The semantics of a fragment identifier are defined by the set of
- representations that might result from a retrieval action on the
- primary resource. The fragment's format and resolution is therefore
- dependent on the media type [<a href="http://tools.ietf.org/html/rfc2046" title=""Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types"">RFC2046</a>] of a potentially retrieved
- representation, even though such a retrieval is only performed if the
- URI is dereferenced. If no such representation exists, then the
- semantics of the fragment are considered unknown and are effectively
- unconstrained. Fragment identifier semantics are independent of the
- URI scheme and thus cannot be redefined by scheme specifications.
-
- Individual media types may define their own restrictions on or
- structures within the fragment identifier syntax for specifying
- different types of subsets, views, or external references that are
- identifiable as secondary resources by that media type. If the
- primary resource has multiple representations, as is often the case
- for resources whose representation is selected based on attributes of
- the retrieval request (a.k.a., content negotiation), then whatever is
- identified by the fragment should be consistent across all of those
- representations. Each representation should either define the
- fragment so that it corresponds to the same secondary resource,
- regardless of how it is represented, or should leave the fragment
- undefined (i.e., not found).
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 24]</span>
-<a name="page-25" id="page-25" href="#page-25"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- As with any URI, use of a fragment identifier component does not
- imply that a retrieval action will take place. A URI with a fragment
- identifier may be used to refer to the secondary resource without any
- implication that the primary resource is accessible or will ever be
- accessed.
-
- Fragment identifiers have a special role in information retrieval
- systems as the primary form of client-side indirect referencing,
- allowing an author to specifically identify aspects of an existing
- resource that are only indirectly provided by the resource owner. As
- such, the fragment identifier is not used in the scheme-specific
- processing of a URI; instead, the fragment identifier is separated
- from the rest of the URI prior to a dereference, and thus the
- identifying information within the fragment itself is dereferenced
- solely by the user agent, regardless of the URI scheme. Although
- this separate handling is often perceived to be a loss of
- information, particularly for accurate redirection of references as
- resources move over time, it also serves to prevent information
- providers from denying reference authors the right to refer to
- information within a resource selectively. Indirect referencing also
- provides additional flexibility and extensibility to systems that use
- URIs, as new media types are easier to define and deploy than new
- schemes of identification.
-
- The characters slash ("/") and question mark ("?") are allowed to
- represent data within the fragment identifier. Beware that some
- older, erroneous implementations may not handle this data correctly
- when it is used as the base URI for relative references (Section
- 5.1).
-
-<span class="h2"><h2><a name="section-4">4</a>. Usage</h2></span>
-
- When applications make reference to a URI, they do not always use the
- full form of reference defined by the "URI" syntax rule. To save
- space and take advantage of hierarchical locality, many Internet
- protocol elements and media type formats allow an abbreviation of a
- URI, whereas others restrict the syntax to a particular form of URI.
- We define the most common forms of reference syntax in this
- specification because they impact and depend upon the design of the
- generic syntax, requiring a uniform parsing algorithm in order to be
- interpreted consistently.
-
-<span class="h3"><h3><a name="section-4.1">4.1</a>. URI Reference</h3></span>
-
- URI-reference is used to denote the most common usage of a resource
- identifier.
-
- URI-reference = URI / relative-ref
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 25]</span>
-<a name="page-26" id="page-26" href="#page-26"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- A URI-reference is either a URI or a relative reference. If the
- URI-reference's prefix does not match the syntax of a scheme followed
- by its colon separator, then the URI-reference is a relative
- reference.
-
- A URI-reference is typically parsed first into the five URI
- components, in order to determine what components are present and
- whether the reference is relative. Then, each component is parsed
- for its subparts and their validation. The ABNF of URI-reference,
- along with the "first-match-wins" disambiguation rule, is sufficient
- to define a validating parser for the generic syntax. Readers
- familiar with regular expressions should see Appendix B for an
- example of a non-validating URI-reference parser that will take any
- given string and extract the URI components.
-
-<span class="h3"><h3><a name="section-4.2">4.2</a>. Relative Reference</h3></span>
-
- A relative reference takes advantage of the hierarchical syntax
- (<a href="#section-1.2.3">Section 1.2.3</a>) to express a URI reference relative to the name space
- of another hierarchical URI.
-
- relative-ref = relative-part [ "?" query ] [ "#" fragment ]
-
- relative-part = "//" authority path-abempty
- / path-absolute
- / path-noscheme
- / path-empty
-
- The URI referred to by a relative reference, also known as the target
- URI, is obtained by applying the reference resolution algorithm of
- <a href="#section-5">Section 5</a>.
-
- A relative reference that begins with two slash characters is termed
- a network-path reference; such references are rarely used. A
- relative reference that begins with a single slash character is
- termed an absolute-path reference. A relative reference that does
- not begin with a slash character is termed a relative-path reference.
-
- A path segment that contains a colon character (e.g., "this:that")
- cannot be used as the first segment of a relative-path reference, as
- it would be mistaken for a scheme name. Such a segment must be
- preceded by a dot-segment (e.g., "./this:that") to make a relative-
- path reference.
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 26]</span>
-<a name="page-27" id="page-27" href="#page-27"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-4.3">4.3</a>. Absolute URI</h3></span>
-
- Some protocol elements allow only the absolute form of a URI without
- a fragment identifier. For example, defining a base URI for later
- use by relative references calls for an absolute-URI syntax rule that
- does not allow a fragment.
-
- absolute-URI = scheme ":" hier-part [ "?" query ]
-
- URI scheme specifications must define their own syntax so that all
- strings matching their scheme-specific syntax will also match the
- <absolute-URI> grammar. Scheme specifications will not define
- fragment identifier syntax or usage, regardless of its applicability
- to resources identifiable via that scheme, as fragment identification
- is orthogonal to scheme definition. However, scheme specifications
- are encouraged to include a wide range of examples, including
- examples that show use of the scheme's URIs with fragment identifiers
- when such usage is appropriate.
-
-<span class="h3"><h3><a name="section-4.4">4.4</a>. Same-Document Reference</h3></span>
-
- When a URI reference refers to a URI that is, aside from its fragment
- component (if any), identical to the base URI (<a href="#section-5.1">Section 5.1</a>), that
- reference is called a "same-document" reference. The most frequent
- examples of same-document references are relative references that are
- empty or include only the number sign ("#") separator followed by a
- fragment identifier.
-
- When a same-document reference is dereferenced for a retrieval
- action, the target of that reference is defined to be within the same
- entity (representation, document, or message) as the reference;
- therefore, a dereference should not result in a new retrieval action.
-
- Normalization of the base and target URIs prior to their comparison,
- as described in Sections 6.2.2 and 6.2.3, is allowed but rarely
- performed in practice. Normalization may increase the set of same-
- document references, which may be of benefit to some caching
- applications. As such, reference authors should not assume that a
- slightly different, though equivalent, reference URI will (or will
- not) be interpreted as a same-document reference by any given
- application.
-
-<span class="h3"><h3><a name="section-4.5">4.5</a>. Suffix Reference</h3></span>
-
- The URI syntax is designed for unambiguous reference to resources and
- extensibility via the URI scheme. However, as URI identification and
- usage have become commonplace, traditional media (television, radio,
- newspapers, billboards, etc.) have increasingly used a suffix of the
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 27]</span>
-<a name="page-28" id="page-28" href="#page-28"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- URI as a reference, consisting of only the authority and path
- portions of the URI, such as
-
- www.w3.org/Addressing/
-
- or simply a DNS registered name on its own. Such references are
- primarily intended for human interpretation rather than for machines,
- with the assumption that context-based heuristics are sufficient to
- complete the URI (e.g., most registered names beginning with "www"
- are likely to have a URI prefix of "http://"). Although there is no
- standard set of heuristics for disambiguating a URI suffix, many
- client implementations allow them to be entered by the user and
- heuristically resolved.
-
- Although this practice of using suffix references is common, it
- should be avoided whenever possible and should never be used in
- situations where long-term references are expected. The heuristics
- noted above will change over time, particularly when a new URI scheme
- becomes popular, and are often incorrect when used out of context.
- Furthermore, they can lead to security issues along the lines of
- those described in [<a href="http://tools.ietf.org/html/rfc1535" title=""A Security Problem and Proposed Correction With Widely Deployed DNS Software"">RFC1535</a>].
-
- As a URI suffix has the same syntax as a relative-path reference, a
- suffix reference cannot be used in contexts where a relative
- reference is expected. As a result, suffix references are limited to
- places where there is no defined base URI, such as dialog boxes and
- off-line advertisements.
-
-<span class="h2"><h2><a name="section-5">5</a>. Reference Resolution</h2></span>
-
- This section defines the process of resolving a URI reference within
- a context that allows relative references so that the result is a
- string matching the <URI> syntax rule of <a href="#section-3">Section 3</a>.
-
-<span class="h3"><h3><a name="section-5.1">5.1</a>. Establishing a Base URI</h3></span>
-
- The term "relative" implies that a "base URI" exists against which
- the relative reference is applied. Aside from fragment-only
- references (<a href="#section-4.4">Section 4.4</a>), relative references are only usable when a
- base URI is known. A base URI must be established by the parser
- prior to parsing URI references that might be relative. A base URI
- must conform to the <absolute-URI> syntax rule (<a href="#section-4.3">Section 4.3</a>). If the
- base URI is obtained from a URI reference, then that reference must
- be converted to absolute form and stripped of any fragment component
- prior to its use as a base URI.
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 28]</span>
-<a name="page-29" id="page-29" href="#page-29"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- The base URI of a reference can be established in one of four ways,
- discussed below in order of precedence. The order of precedence can
- be thought of in terms of layers, where the innermost defined base
- URI has the highest precedence. This can be visualized graphically
- as follows:
-
- .----------------------------------------------------------.
- | .----------------------------------------------------. |
- | | .----------------------------------------------. | |
- | | | .----------------------------------------. | | |
- | | | | .----------------------------------. | | | |
- | | | | | <relative-reference> | | | | |
- | | | | `----------------------------------' | | | |
- | | | | (5.1.1) Base URI embedded in content | | | |
- | | | `----------------------------------------' | | |
- | | | (5.1.2) Base URI of the encapsulating entity | | |
- | | | (message, representation, or none) | | |
- | | `----------------------------------------------' | |
- | | (5.1.3) URI used to retrieve the entity | |
- | `----------------------------------------------------' |
- | (5.1.4) Default Base URI (application-dependent) |
- `----------------------------------------------------------'
-
-<span class="h4"><h4><a name="section-5.1.1">5.1.1</a>. Base URI Embedded in Content</h4></span>
-
- Within certain media types, a base URI for relative references can be
- embedded within the content itself so that it can be readily obtained
- by a parser. This can be useful for descriptive documents, such as
- tables of contents, which may be transmitted to others through
- protocols other than their usual retrieval context (e.g., email or
- USENET news).
-
- It is beyond the scope of this specification to specify how, for each
- media type, a base URI can be embedded. The appropriate syntax, when
- available, is described by the data format specification associated
- with each media type.
-
-<span class="h4"><h4><a name="section-5.1.2">5.1.2</a>. Base URI from the Encapsulating Entity</h4></span>
-
- If no base URI is embedded, the base URI is defined by the
- representation's retrieval context. For a document that is enclosed
- within another entity, such as a message or archive, the retrieval
- context is that entity. Thus, the default base URI of a
- representation is the base URI of the entity in which the
- representation is encapsulated.
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 29]</span>
-<a name="page-30" id="page-30" href="#page-30"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- A mechanism for embedding a base URI within MIME container types
- (e.g., the message and multipart types) is defined by MHTML
- [<a href="http://tools.ietf.org/html/rfc2557" title=""MIME Encapsulation of Aggregate Documents, such as HTML (MHTML)"">RFC2557</a>]. Protocols that do not use the MIME message header syntax,
- but that do allow some form of tagged metadata to be included within
- messages, may define their own syntax for defining a base URI as part
- of a message.
-
-<span class="h4"><h4><a name="section-5.1.3">5.1.3</a>. Base URI from the Retrieval URI</h4></span>
-
- If no base URI is embedded and the representation is not encapsulated
- within some other entity, then, if a URI was used to retrieve the
- representation, that URI shall be considered the base URI. Note that
- if the retrieval was the result of a redirected request, the last URI
- used (i.e., the URI that resulted in the actual retrieval of the
- representation) is the base URI.
-
-<span class="h4"><h4><a name="section-5.1.4">5.1.4</a>. Default Base URI</h4></span>
-
- If none of the conditions described above apply, then the base URI is
- defined by the context of the application. As this definition is
- necessarily application-dependent, failing to define a base URI by
- using one of the other methods may result in the same content being
- interpreted differently by different types of applications.
-
- A sender of a representation containing relative references is
- responsible for ensuring that a base URI for those references can be
- established. Aside from fragment-only references, relative
- references can only be used reliably in situations where the base URI
- is well defined.
-
-<span class="h3"><h3><a name="section-5.2">5.2</a>. Relative Resolution</h3></span>
-
- This section describes an algorithm for converting a URI reference
- that might be relative to a given base URI into the parsed components
- of the reference's target. The components can then be recomposed, as
- described in <a href="#section-5.3">Section 5.3</a>, to form the target URI. This algorithm
- provides definitive results that can be used to test the output of
- other implementations. Applications may implement relative reference
- resolution by using some other algorithm, provided that the results
- match what would be given by this one.
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 30]</span>
-<a name="page-31" id="page-31" href="#page-31"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h4"><h4><a name="section-5.2.1">5.2.1</a>. Pre-parse the Base URI</h4></span>
-
- The base URI (Base) is established according to the procedure of
- <a href="#section-5.1">Section 5.1</a> and parsed into the five main components described in
- <a href="#section-3">Section 3</a>. Note that only the scheme component is required to be
- present in a base URI; the other components may be empty or
- undefined. A component is undefined if its associated delimiter does
- not appear in the URI reference; the path component is never
- undefined, though it may be empty.
-
- Normalization of the base URI, as described in Sections 6.2.2 and
- 6.2.3, is optional. A URI reference must be transformed to its
- target URI before it can be normalized.
-
-<span class="h4"><h4><a name="section-5.2.2">5.2.2</a>. Transform References</h4></span>
-
- For each URI reference (R), the following pseudocode describes an
- algorithm for transforming R into its target URI (T):
-
- -- The URI reference is parsed into the five URI components
- --
- (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R);
-
- -- A non-strict parser may ignore a scheme in the reference
- -- if it is identical to the base URI's scheme.
- --
- if ((not strict) and (R.scheme == Base.scheme)) then
- undefine(R.scheme);
- endif;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 31]</span>
-<a name="page-32" id="page-32" href="#page-32"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- if defined(R.scheme) then
- T.scheme = R.scheme;
- T.authority = R.authority;
- T.path = remove_dot_segments(R.path);
- T.query = R.query;
- else
- if defined(R.authority) then
- T.authority = R.authority;
- T.path = remove_dot_segments(R.path);
- T.query = R.query;
- else
- if (R.path == "") then
- T.path = Base.path;
- if defined(R.query) then
- T.query = R.query;
- else
- T.query = Base.query;
- endif;
- else
- if (R.path starts-with "/") then
- T.path = remove_dot_segments(R.path);
- else
- T.path = merge(Base.path, R.path);
- T.path = remove_dot_segments(T.path);
- endif;
- T.query = R.query;
- endif;
- T.authority = Base.authority;
- endif;
- T.scheme = Base.scheme;
- endif;
-
- T.fragment = R.fragment;
-
-<span class="h4"><h4><a name="section-5.2.3">5.2.3</a>. Merge Paths</h4></span>
-
- The pseudocode above refers to a "merge" routine for merging a
- relative-path reference with the path of the base URI. This is
- accomplished as follows:
-
- o If the base URI has a defined authority component and an empty
- path, then return a string consisting of "/" concatenated with the
- reference's path; otherwise,
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 32]</span>
-<a name="page-33" id="page-33" href="#page-33"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- o return a string consisting of the reference's path component
- appended to all but the last segment of the base URI's path (i.e.,
- excluding any characters after the right-most "/" in the base URI
- path, or excluding the entire base URI path if it does not contain
- any "/" characters).
-
-<span class="h4"><h4><a name="section-5.2.4">5.2.4</a>. Remove Dot Segments</h4></span>
-
- The pseudocode also refers to a "remove_dot_segments" routine for
- interpreting and removing the special "." and ".." complete path
- segments from a referenced path. This is done after the path is
- extracted from a reference, whether or not the path was relative, in
- order to remove any invalid or extraneous dot-segments prior to
- forming the target URI. Although there are many ways to accomplish
- this removal process, we describe a simple method using two string
- buffers.
-
- 1. The input buffer is initialized with the now-appended path
- components and the output buffer is initialized to the empty
- string.
-
- 2. While the input buffer is not empty, loop as follows:
-
- A. If the input buffer begins with a prefix of "../" or "./",
- then remove that prefix from the input buffer; otherwise,
-
- B. if the input buffer begins with a prefix of "/./" or "/.",
- where "." is a complete path segment, then replace that
- prefix with "/" in the input buffer; otherwise,
-
- C. if the input buffer begins with a prefix of "/../" or "/..",
- where ".." is a complete path segment, then replace that
- prefix with "/" in the input buffer and remove the last
- segment and its preceding "/" (if any) from the output
- buffer; otherwise,
-
- D. if the input buffer consists only of "." or "..", then remove
- that from the input buffer; otherwise,
-
- E. move the first path segment in the input buffer to the end of
- the output buffer, including the initial "/" character (if
- any) and any subsequent characters up to, but not including,
- the next "/" character or the end of the input buffer.
-
- 3. Finally, the output buffer is returned as the result of
- remove_dot_segments.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 33]</span>
-<a name="page-34" id="page-34" href="#page-34"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- Note that dot-segments are intended for use in URI references to
- express an identifier relative to the hierarchy of names in the base
- URI. The remove_dot_segments algorithm respects that hierarchy by
- removing extra dot-segments rather than treat them as an error or
- leaving them to be misinterpreted by dereference implementations.
-
- The following illustrates how the above steps are applied for two
- examples of merged paths, showing the state of the two buffers after
- each step.
-
- STEP OUTPUT BUFFER INPUT BUFFER
-
- 1 : /a/b/c/./../../g
- 2E: /a /b/c/./../../g
- 2E: /a/b /c/./../../g
- 2E: /a/b/c /./../../g
- 2B: /a/b/c /../../g
- 2C: /a/b /../g
- 2C: /a /g
- 2E: /a/g
-
- STEP OUTPUT BUFFER INPUT BUFFER
-
- <a href="#section-1">1</a> : mid/content=5/../6
- 2E: mid /content=5/../6
- 2E: mid/content=5 /../6
- 2C: mid /6
- 2E: mid/6
-
- Some applications may find it more efficient to implement the
- remove_dot_segments algorithm by using two segment stacks rather than
- strings.
-
- Note: Beware that some older, erroneous implementations will fail
- to separate a reference's query component from its path component
- prior to merging the base and reference paths, resulting in an
- interoperability failure if the query component contains the
- strings "/../" or "/./".
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 34]</span>
-<a name="page-35" id="page-35" href="#page-35"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-5.3">5.3</a>. Component Recomposition</h3></span>
-
- Parsed URI components can be recomposed to obtain the corresponding
- URI reference string. Using pseudocode, this would be:
-
- result = ""
-
- if defined(scheme) then
- append scheme to result;
- append ":" to result;
- endif;
-
- if defined(authority) then
- append "//" to result;
- append authority to result;
- endif;
-
- append path to result;
-
- if defined(query) then
- append "?" to result;
- append query to result;
- endif;
-
- if defined(fragment) then
- append "#" to result;
- append fragment to result;
- endif;
-
- return result;
-
- Note that we are careful to preserve the distinction between a
- component that is undefined, meaning that its separator was not
- present in the reference, and a component that is empty, meaning that
- the separator was present and was immediately followed by the next
- component separator or the end of the reference.
-
-<span class="h3"><h3><a name="section-5.4">5.4</a>. Reference Resolution Examples</h3></span>
-
- Within a representation with a well defined base URI of
-
- http://a/b/c/d;p?q
-
- a relative reference is transformed to its target URI as follows.
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 35]</span>
-<a name="page-36" id="page-36" href="#page-36"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h4"><h4><a name="section-5.4.1">5.4.1</a>. Normal Examples</h4></span>
-
- "g:h" = "g:h"
- "g" = "<a href="http://a/b/c/g">http://a/b/c/g</a>"
- "./g" = "<a href="http://a/b/c/g">http://a/b/c/g</a>"
- "g/" = "<a href="http://a/b/c/g/">http://a/b/c/g/</a>"
- "/g" = "<a href="http://a/g">http://a/g</a>"
- "//g" = "http://g"
- "?y" = "http://a/b/c/d;p?y"
- "g?y" = "<a href="http://a/b/c/g?y">http://a/b/c/g?y</a>"
- "#s" = "http://a/b/c/d;p?q#s"
- "g#s" = "<a href="http://a/b/c/g#s">http://a/b/c/g#s</a>"
- "g?y#s" = "<a href="http://a/b/c/g?y#s">http://a/b/c/g?y#s</a>"
- ";x" = "http://a/b/c/;x"
- "g;x" = "http://a/b/c/g;x"
- "g;x?y#s" = "http://a/b/c/g;x?y#s"
- "" = "http://a/b/c/d;p?q"
- "." = "<a href="http://a/b/c/">http://a/b/c/</a>"
- "./" = "<a href="http://a/b/c/">http://a/b/c/</a>"
- ".." = "<a href="http://a/b/">http://a/b/</a>"
- "../" = "<a href="http://a/b/">http://a/b/</a>"
- "../g" = "<a href="http://a/b/g">http://a/b/g</a>"
- "../.." = "<a href="http://a/">http://a/</a>"
- "../../" = "<a href="http://a/">http://a/</a>"
- "../../g" = "<a href="http://a/g">http://a/g</a>"
-
-<span class="h4"><h4><a name="section-5.4.2">5.4.2</a>. Abnormal Examples</h4></span>
-
- Although the following abnormal examples are unlikely to occur in
- normal practice, all URI parsers should be capable of resolving them
- consistently. Each example uses the same base as that above.
-
- Parsers must be careful in handling cases where there are more ".."
- segments in a relative-path reference than there are hierarchical
- levels in the base URI's path. Note that the ".." syntax cannot be
- used to change the authority component of a URI.
-
- "../../../g" = "<a href="http://a/g">http://a/g</a>"
- "../../../../g" = "<a href="http://a/g">http://a/g</a>"
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 36]</span>
-<a name="page-37" id="page-37" href="#page-37"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- Similarly, parsers must remove the dot-segments "." and ".." when
- they are complete components of a path, but not when they are only
- part of a segment.
-
- "/./g" = "<a href="http://a/g">http://a/g</a>"
- "/../g" = "<a href="http://a/g">http://a/g</a>"
- "g." = "<a href="http://a/b/c/g">http://a/b/c/g</a>."
- ".g" = "<a href="http://a/b/c/.g">http://a/b/c/.g</a>"
- "g.." = "<a href="http://a/b/c/g">http://a/b/c/g</a>.."
- "..g" = "<a href="http://a/b/c/..g">http://a/b/c/..g</a>"
-
- Less likely are cases where the relative reference uses unnecessary
- or nonsensical forms of the "." and ".." complete path segments.
-
- "./../g" = "<a href="http://a/b/g">http://a/b/g</a>"
- "./g/." = "<a href="http://a/b/c/g/">http://a/b/c/g/</a>"
- "g/./h" = "<a href="http://a/b/c/g/h">http://a/b/c/g/h</a>"
- "g/../h" = "<a href="http://a/b/c/h">http://a/b/c/h</a>"
- "g;x=1/./y" = "http://a/b/c/g;x=1/y"
- "g;x=1/../y" = "<a href="http://a/b/c/y">http://a/b/c/y</a>"
-
- Some applications fail to separate the reference's query and/or
- fragment components from the path component before merging it with
- the base path and removing dot-segments. This error is rarely
- noticed, as typical usage of a fragment never includes the hierarchy
- ("/") character and the query component is not normally used within
- relative references.
-
- "g?y/./x" = "<a href="http://a/b/c/g?y/./x">http://a/b/c/g?y/./x</a>"
- "g?y/../x" = "<a href="http://a/b/c/g?y/../x">http://a/b/c/g?y/../x</a>"
- "g#s/./x" = "<a href="http://a/b/c/g#s/./x">http://a/b/c/g#s/./x</a>"
- "g#s/../x" = "<a href="http://a/b/c/g#s/../x">http://a/b/c/g#s/../x</a>"
-
- Some parsers allow the scheme name to be present in a relative
- reference if it is the same as the base URI scheme. This is
- considered to be a loophole in prior specifications of partial URI
- [<a href="http://tools.ietf.org/html/rfc1630" title=""Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the Network as used in the World-Wide Web"">RFC1630</a>]. Its use should be avoided but is allowed for backward
- compatibility.
-
- "http:g" = "http:g" ; for strict parsers
- / "<a href="http://a/b/c/g">http://a/b/c/g</a>" ; for backward compatibility
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 37]</span>
-<a name="page-38" id="page-38" href="#page-38"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h2"><h2><a name="section-6">6</a>. Normalization and Comparison</h2></span>
-
- One of the most common operations on URIs is simple comparison:
- determining whether two URIs are equivalent without using the URIs to
- access their respective resource(s). A comparison is performed every
- time a response cache is accessed, a browser checks its history to
- color a link, or an XML parser processes tags within a namespace.
- Extensive normalization prior to comparison of URIs is often used by
- spiders and indexing engines to prune a search space or to reduce
- duplication of request actions and response storage.
-
- URI comparison is performed for some particular purpose. Protocols
- or implementations that compare URIs for different purposes will
- often be subject to differing design trade-offs in regards to how
- much effort should be spent in reducing aliased identifiers. This
- section describes various methods that may be used to compare URIs,
- the trade-offs between them, and the types of applications that might
- use them.
-
-<span class="h3"><h3><a name="section-6.1">6.1</a>. Equivalence</h3></span>
-
- Because URIs exist to identify resources, presumably they should be
- considered equivalent when they identify the same resource. However,
- this definition of equivalence is not of much practical use, as there
- is no way for an implementation to compare two resources unless it
- has full knowledge or control of them. For this reason,
- determination of equivalence or difference of URIs is based on string
- comparison, perhaps augmented by reference to additional rules
- provided by URI scheme definitions. We use the terms "different" and
- "equivalent" to describe the possible outcomes of such comparisons,
- but there are many application-dependent versions of equivalence.
-
- Even though it is possible to determine that two URIs are equivalent,
- URI comparison is not sufficient to determine whether two URIs
- identify different resources. For example, an owner of two different
- domain names could decide to serve the same resource from both,
- resulting in two different URIs. Therefore, comparison methods are
- designed to minimize false negatives while strictly avoiding false
- positives.
-
- In testing for equivalence, applications should not directly compare
- relative references; the references should be converted to their
- respective target URIs before comparison. When URIs are compared to
- select (or avoid) a network action, such as retrieval of a
- representation, fragment components (if any) should be excluded from
- the comparison.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 38]</span>
-<a name="page-39" id="page-39" href="#page-39"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-6.2">6.2</a>. Comparison Ladder</h3></span>
-
- A variety of methods are used in practice to test URI equivalence.
- These methods fall into a range, distinguished by the amount of
- processing required and the degree to which the probability of false
- negatives is reduced. As noted above, false negatives cannot be
- eliminated. In practice, their probability can be reduced, but this
- reduction requires more processing and is not cost-effective for all
- applications.
-
- If this range of comparison practices is considered as a ladder, the
- following discussion will climb the ladder, starting with practices
- that are cheap but have a relatively higher chance of producing false
- negatives, and proceeding to those that have higher computational
- cost and lower risk of false negatives.
-
-<span class="h4"><h4><a name="section-6.2.1">6.2.1</a>. Simple String Comparison</h4></span>
-
- If two URIs, when considered as character strings, are identical,
- then it is safe to conclude that they are equivalent. This type of
- equivalence test has very low computational cost and is in wide use
- in a variety of applications, particularly in the domain of parsing.
-
- Testing strings for equivalence requires some basic precautions.
- This procedure is often referred to as "bit-for-bit" or
- "byte-for-byte" comparison, which is potentially misleading. Testing
- strings for equality is normally based on pair comparison of the
- characters that make up the strings, starting from the first and
- proceeding until both strings are exhausted and all characters are
- found to be equal, until a pair of characters compares unequal, or
- until one of the strings is exhausted before the other.
-
- This character comparison requires that each pair of characters be
- put in comparable form. For example, should one URI be stored in a
- byte array in EBCDIC encoding and the second in a Java String object
- (UTF-16), bit-for-bit comparisons applied naively will produce
- errors. It is better to speak of equality on a character-for-
- character basis rather than on a byte-for-byte or bit-for-bit basis.
- In practical terms, character-by-character comparisons should be done
- codepoint-by-codepoint after conversion to a common character
- encoding.
-
- False negatives are caused by the production and use of URI aliases.
- Unnecessary aliases can be reduced, regardless of the comparison
- method, by consistently providing URI references in an already-
- normalized form (i.e., a form identical to what would be produced
- after normalization is applied, as described below).
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 39]</span>
-<a name="page-40" id="page-40" href="#page-40"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- Protocols and data formats often limit some URI comparisons to simple
- string comparison, based on the theory that people and
- implementations will, in their own best interest, be consistent in
- providing URI references, or at least consistent enough to negate any
- efficiency that might be obtained from further normalization.
-
-<span class="h4"><h4><a name="section-6.2.2">6.2.2</a>. Syntax-Based Normalization</h4></span>
-
- Implementations may use logic based on the definitions provided by
- this specification to reduce the probability of false negatives.
- This processing is moderately higher in cost than character-for-
- character string comparison. For example, an application using this
- approach could reasonably consider the following two URIs equivalent:
-
- example://a/b/c/%7Bfoo%7D
- eXAMPLE://a/./b/../b/%63/%7bfoo%7d
-
- Web user agents, such as browsers, typically apply this type of URI
- normalization when determining whether a cached response is
- available. Syntax-based normalization includes such techniques as
- case normalization, percent-encoding normalization, and removal of
- dot-segments.
-
-<span class="h5"><h5><a name="section-6.2.2.1">6.2.2.1</a>. Case Normalization</h5></span>
-
- For all URIs, the hexadecimal digits within a percent-encoding
- triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore
- should be normalized to use uppercase letters for the digits A-F.
-
- When a URI uses components of the generic syntax, the component
- syntax equivalence rules always apply; namely, that the scheme and
- host are case-insensitive and therefore should be normalized to
- lowercase. For example, the URI <HTTP://www.EXAMPLE.com/> is
- equivalent to <http://www.example.com/>. The other generic syntax
- components are assumed to be case-sensitive unless specifically
- defined otherwise by the scheme (see <a href="#section-6.2.3">Section 6.2.3</a>).
-
-<span class="h5"><h5><a name="section-6.2.2.2">6.2.2.2</a>. Percent-Encoding Normalization</h5></span>
-
- The percent-encoding mechanism (<a href="#section-2.1">Section 2.1</a>) is a frequent source of
- variance among otherwise identical URIs. In addition to the case
- normalization issue noted above, some URI producers percent-encode
- octets that do not require percent-encoding, resulting in URIs that
- are equivalent to their non-encoded counterparts. These URIs should
- be normalized by decoding any percent-encoded octet that corresponds
- to an unreserved character, as described in <a href="#section-2.3">Section 2.3</a>.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 40]</span>
-<a name="page-41" id="page-41" href="#page-41"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h5"><h5><a name="section-6.2.2.3">6.2.2.3</a>. Path Segment Normalization</h5></span>
-
- The complete path segments "." and ".." are intended only for use
- within relative references (<a href="#section-4.1">Section 4.1</a>) and are removed as part of
- the reference resolution process (<a href="#section-5.2">Section 5.2</a>). However, some
- deployed implementations incorrectly assume that reference resolution
- is not necessary when the reference is already a URI and thus fail to
- remove dot-segments when they occur in non-relative paths. URI
- normalizers should remove dot-segments by applying the
- remove_dot_segments algorithm to the path, as described in
- <a href="#section-5.2.4">Section 5.2.4</a>.
-
-<span class="h4"><h4><a name="section-6.2.3">6.2.3</a>. Scheme-Based Normalization</h4></span>
-
- The syntax and semantics of URIs vary from scheme to scheme, as
- described by the defining specification for each scheme.
- Implementations may use scheme-specific rules, at further processing
- cost, to reduce the probability of false negatives. For example,
- because the "http" scheme makes use of an authority component, has a
- default port of "80", and defines an empty path to be equivalent to
- "/", the following four URIs are equivalent:
-
- http://example.com
- http://example.com/
- <a href="http://example.com/">http://example.com:/</a>
- <a href="http://example.com/">http://example.com:80/</a>
-
- In general, a URI that uses the generic syntax for authority with an
- empty path should be normalized to a path of "/". Likewise, an
- explicit ":port", for which the port is empty or the default for the
- scheme, is equivalent to one where the port and its ":" delimiter are
- elided and thus should be removed by scheme-based normalization. For
- example, the second URI above is the normal form for the "http"
- scheme.
-
- Another case where normalization varies by scheme is in the handling
- of an empty authority component or empty host subcomponent. For many
- scheme specifications, an empty authority or host is considered an
- error; for others, it is considered equivalent to "localhost" or the
- end-user's host. When a scheme defines a default for authority and a
- URI reference to that default is desired, the reference should be
- normalized to an empty authority for the sake of uniformity, brevity,
- and internationalization. If, however, either the userinfo or port
- subcomponents are non-empty, then the host should be given explicitly
- even if it matches the default.
-
- Normalization should not remove delimiters when their associated
- component is empty unless licensed to do so by the scheme
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 41]</span>
-<a name="page-42" id="page-42" href="#page-42"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- specification. For example, the URI "http://example.com/?" cannot be
- assumed to be equivalent to any of the examples above. Likewise, the
- presence or absence of delimiters within a userinfo subcomponent is
- usually significant to its interpretation. The fragment component is
- not subject to any scheme-based normalization; thus, two URIs that
- differ only by the suffix "#" are considered different regardless of
- the scheme.
-
- Some schemes define additional subcomponents that consist of case-
- insensitive data, giving an implicit license to normalizers to
- convert this data to a common case (e.g., all lowercase). For
- example, URI schemes that define a subcomponent of path to contain an
- Internet hostname, such as the "mailto" URI scheme, cause that
- subcomponent to be case-insensitive and thus subject to case
- normalization (e.g., "mailto:Joe at Example.COM" is equivalent to
- "mailto:Joe at example.com", even though the generic syntax considers
- the path component to be case-sensitive).
-
- Other scheme-specific normalizations are possible.
-
-<span class="h4"><h4><a name="section-6.2.4">6.2.4</a>. Protocol-Based Normalization</h4></span>
-
- Substantial effort to reduce the incidence of false negatives is
- often cost-effective for web spiders. Therefore, they implement even
- more aggressive techniques in URI comparison. For example, if they
- observe that a URI such as
-
- http://example.com/data
-
- redirects to a URI differing only in the trailing slash
-
- http://example.com/data/
-
- they will likely regard the two as equivalent in the future. This
- kind of technique is only appropriate when equivalence is clearly
- indicated by both the result of accessing the resources and the
- common conventions of their scheme's dereference algorithm (in this
- case, use of redirection by HTTP origin servers to avoid problems
- with relative references).
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 42]</span>
-<a name="page-43" id="page-43" href="#page-43"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h2"><h2><a name="section-7">7</a>. Security Considerations</h2></span>
-
- A URI does not in itself pose a security threat. However, as URIs
- are often used to provide a compact set of instructions for access to
- network resources, care must be taken to properly interpret the data
- within a URI, to prevent that data from causing unintended access,
- and to avoid including data that should not be revealed in plain
- text.
-
-<span class="h3"><h3><a name="section-7.1">7.1</a>. Reliability and Consistency</h3></span>
-
- There is no guarantee that once a URI has been used to retrieve
- information, the same information will be retrievable by that URI in
- the future. Nor is there any guarantee that the information
- retrievable via that URI in the future will be observably similar to
- that retrieved in the past. The URI syntax does not constrain how a
- given scheme or authority apportions its namespace or maintains it
- over time. Such guarantees can only be obtained from the person(s)
- controlling that namespace and the resource in question. A specific
- URI scheme may define additional semantics, such as name persistence,
- if those semantics are required of all naming authorities for that
- scheme.
-
-<span class="h3"><h3><a name="section-7.2">7.2</a>. Malicious Construction</h3></span>
-
- It is sometimes possible to construct a URI so that an attempt to
- perform a seemingly harmless, idempotent operation, such as the
- retrieval of a representation, will in fact cause a possibly damaging
- remote operation. The unsafe URI is typically constructed by
- specifying a port number other than that reserved for the network
- protocol in question. The client unwittingly contacts a site running
- a different protocol service, and data within the URI contains
- instructions that, when interpreted according to this other protocol,
- cause an unexpected operation. A frequent example of such abuse has
- been the use of a protocol-based scheme with a port component of
- "25", thereby fooling user agent software into sending an unintended
- or impersonating message via an SMTP server.
-
- Applications should prevent dereference of a URI that specifies a TCP
- port number within the "well-known port" range (0 - 1023) unless the
- protocol being used to dereference that URI is compatible with the
- protocol expected on that well-known port. Although IANA maintains a
- registry of well-known ports, applications should make such
- restrictions user-configurable to avoid preventing the deployment of
- new services.
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 43]</span>
-<a name="page-44" id="page-44" href="#page-44"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- When a URI contains percent-encoded octets that match the delimiters
- for a given resolution or dereference protocol (for example, CR and
- LF characters for the TELNET protocol), these percent-encodings must
- not be decoded before transmission across that protocol. Transfer of
- the percent-encoding, which might violate the protocol, is less
- harmful than allowing decoded octets to be interpreted as additional
- operations or parameters, perhaps triggering an unexpected and
- possibly harmful remote operation.
-
-<span class="h3"><h3><a name="section-7.3">7.3</a>. Back-End Transcoding</h3></span>
-
- When a URI is dereferenced, the data within it is often parsed by
- both the user agent and one or more servers. In HTTP, for example, a
- typical user agent will parse a URI into its five major components,
- access the authority's server, and send it the data within the
- authority, path, and query components. A typical server will take
- that information, parse the path into segments and the query into
- key/value pairs, and then invoke implementation-specific handlers to
- respond to the request. As a result, a common security concern for
- server implementations that handle a URI, either as a whole or split
- into separate components, is proper interpretation of the octet data
- represented by the characters and percent-encodings within that URI.
-
- Percent-encoded octets must be decoded at some point during the
- dereference process. Applications must split the URI into its
- components and subcomponents prior to decoding the octets, as
- otherwise the decoded octets might be mistaken for delimiters.
- Security checks of the data within a URI should be applied after
- decoding the octets. Note, however, that the "%00" percent-encoding
- (NUL) may require special handling and should be rejected if the
- application is not expecting to receive raw data within a component.
-
- Special care should be taken when the URI path interpretation process
- involves the use of a back-end file system or related system
- functions. File systems typically assign an operational meaning to
- special characters, such as the "/", "\", ":", "[", and "]"
- characters, and to special device names like ".", "..", "...", "aux",
- "lpt", etc. In some cases, merely testing for the existence of such
- a name will cause the operating system to pause or invoke unrelated
- system calls, leading to significant security concerns regarding
- denial of service and unintended data transfer. It would be
- impossible for this specification to list all such significant
- characters and device names. Implementers should research the
- reserved names and characters for the types of storage device that
- may be attached to their applications and restrict the use of data
- obtained from URI components accordingly.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 44]</span>
-<a name="page-45" id="page-45" href="#page-45"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-<span class="h3"><h3><a name="section-7.4">7.4</a>. Rare IP Address Formats</h3></span>
-
- Although the URI syntax for IPv4address only allows the common
- dotted-decimal form of IPv4 address literal, many implementations
- that process URIs make use of platform-dependent system routines,
- such as gethostbyname() and inet_aton(), to translate the string
- literal to an actual IP address. Unfortunately, such system routines
- often allow and process a much larger set of formats than those
- described in <a href="#section-3.2.2">Section 3.2.2</a>.
-
- For example, many implementations allow dotted forms of three
- numbers, wherein the last part is interpreted as a 16-bit quantity
- and placed in the right-most two bytes of the network address (e.g.,
- a Class B network). Likewise, a dotted form of two numbers means
- that the last part is interpreted as a 24-bit quantity and placed in
- the right-most three bytes of the network address (Class A), and a
- single number (without dots) is interpreted as a 32-bit quantity and
- stored directly in the network address. Adding further to the
- confusion, some implementations allow each dotted part to be
- interpreted as decimal, octal, or hexadecimal, as specified in the C
- language (i.e., a leading 0x or 0X implies hexadecimal; a leading 0
- implies octal; otherwise, the number is interpreted as decimal).
-
- These additional IP address formats are not allowed in the URI syntax
- due to differences between platform implementations. However, they
- can become a security concern if an application attempts to filter
- access to resources based on the IP address in string literal format.
- If this filtering is performed, literals should be converted to
- numeric form and filtered based on the numeric value, and not on a
- prefix or suffix of the string form.
-
-<span class="h3"><h3><a name="section-7.5">7.5</a>. Sensitive Information</h3></span>
-
- URI producers should not provide a URI that contains a username or
- password that is intended to be secret. URIs are frequently
- displayed by browsers, stored in clear text bookmarks, and logged by
- user agent history and intermediary applications (proxies). A
- password appearing within the userinfo component is deprecated and
- should be considered an error (or simply ignored) except in those
- rare cases where the 'password' parameter is intended to be public.
-
-<span class="h3"><h3><a name="section-7.6">7.6</a>. Semantic Attacks</h3></span>
-
- Because the userinfo subcomponent is rarely used and appears before
- the host in the authority component, it can be used to construct a
- URI intended to mislead a human user by appearing to identify one
- (trusted) naming authority while actually identifying a different
- authority hidden behind the noise. For example
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 45]</span>
-<a name="page-46" id="page-46" href="#page-46"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
-
- might lead a human user to assume that the host is 'cnn.example.com',
- whereas it is actually '10.0.0.1'. Note that a misleading userinfo
- subcomponent could be much longer than the example above.
-
- A misleading URI, such as that above, is an attack on the user's
- preconceived notions about the meaning of a URI rather than an attack
- on the software itself. User agents may be able to reduce the impact
- of such attacks by distinguishing the various components of the URI
- when they are rendered, such as by using a different color or tone to
- render userinfo if any is present, though there is no panacea. More
- information on URI-based semantic attacks can be found in [<a href="#ref-Siedzik" title=""Semantic Attacks: What's in a URL?"">Siedzik</a>].
-
-<span class="h2"><h2><a name="section-8">8</a>. IANA Considerations</h2></span>
-
- URI scheme names, as defined by <scheme> in <a href="#section-3.1">Section 3.1</a>, form a
- registered namespace that is managed by IANA according to the
- procedures defined in [<a href="#ref-BCP35" title=""Registration Procedures for URL Scheme Names"">BCP35</a>]. No IANA actions are required by this
- document.
-
-<span class="h2"><h2><a name="section-9">9</a>. Acknowledgements</h2></span>
-
- This specification is derived from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> [<a href="http://tools.ietf.org/html/rfc2396" title=""Uniform Resource Identifiers (URI): Generic Syntax"">RFC2396</a>], <a href="http://tools.ietf.org/html/rfc1808">RFC 1808</a>
- [<a href="http://tools.ietf.org/html/rfc1808" title=""Relative Uniform Resource Locators"">RFC1808</a>], and <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a> [<a href="http://tools.ietf.org/html/rfc1738" title=""Uniform Resource Locators (URL)"">RFC1738</a>]; the acknowledgements in those
- documents still apply. It also incorporates the update (with
- corrections) for IPv6 literals in the host syntax, as defined by
- Robert M. Hinden, Brian E. Carpenter, and Larry Masinter in
- [<a href="http://tools.ietf.org/html/rfc2732" title=""Format for Literal IPv6 Addresses in URL's"">RFC2732</a>]. In addition, contributions by Gisle Aas, Reese Anschultz,
- Daniel Barclay, Tim Bray, Mike Brown, Rob Cameron, Jeremy Carroll,
- Dan Connolly, Adam M. Costello, John Cowan, Jason Diamond, Martin
- Duerst, Stefan Eissing, Clive D.W. Feather, Al Gilman, Tony Hammond,
- Elliotte Harold, Pat Hayes, Henry Holtzman, Ian B. Jacobs, Michael
- Kay, John C. Klensin, Graham Klyne, Dan Kohn, Bruce Lilly, Andrew
- Main, Dave McAlpin, Ira McDonald, Michael Mealling, Ray Merkert,
- Stephen Pollei, Julian Reschke, Tomas Rokicki, Miles Sabin, Kai
- Schaetzl, Mark Thomson, Ronald Tschalaer, Norm Walsh, Marc Warne,
- Stuart Williams, and Henry Zongaro are gratefully acknowledged.
-
-<span class="h2"><h2><a name="section-10">10</a>. References</h2></span>
-
-<span class="h3"><h3><a name="section-10.1">10.1</a>. Normative References</h3></span>
-
- [<a name="ref-ASCII" id="ref-ASCII">ASCII</a>] American National Standards Institute, "Coded Character
- Set -- 7-bit American Standard Code for Information
- Interchange", ANSI X3.4, 1986.
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 46]</span>
-<a name="page-47" id="page-47" href="#page-47"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- [<a name="ref-RFC2234" id="ref-RFC2234">RFC2234</a>] Crocker, D. and P. Overell, "Augmented BNF for Syntax
- Specifications: ABNF", <a href="http://tools.ietf.org/html/rfc2234">RFC 2234</a>, November 1997.
-
- [<a name="ref-STD63" id="ref-STD63">STD63</a>] Yergeau, F., "UTF-8, a transformation format of
- ISO 10646", STD 63, <a href="http://tools.ietf.org/html/rfc3629">RFC 3629</a>, November 2003.
-
- [<a name="ref-UCS" id="ref-UCS">UCS</a>] International Organization for Standardization,
- "Information Technology - Universal Multiple-Octet Coded
- Character Set (UCS)", ISO/IEC 10646:2003, December 2003.
-
-<span class="h3"><h3><a name="section-10.2">10.2</a>. Informative References</h3></span>
-
- [<a name="ref-BCP19" id="ref-BCP19">BCP19</a>] Freed, N. and J. Postel, "IANA Charset Registration
- Procedures", <a href="http://tools.ietf.org/html/bcp19">BCP 19</a>, <a href="http://tools.ietf.org/html/rfc2978">RFC 2978</a>, October 2000.
-
- [<a name="ref-BCP35" id="ref-BCP35">BCP35</a>] Petke, R. and I. King, "Registration Procedures for URL
- Scheme Names", <a href="http://tools.ietf.org/html/bcp35">BCP 35</a>, <a href="http://tools.ietf.org/html/rfc2717">RFC 2717</a>, November 1999.
-
- [<a name="ref-RFC0952" id="ref-RFC0952">RFC0952</a>] Harrenstien, K., Stahl, M., and E. Feinler, "DoD Internet
- host table specification", <a href="http://tools.ietf.org/html/rfc952">RFC 952</a>, October 1985.
-
- [<a name="ref-RFC1034" id="ref-RFC1034">RFC1034</a>] Mockapetris, P., "Domain names - concepts and facilities",
- STD 13, <a href="http://tools.ietf.org/html/rfc1034">RFC 1034</a>, November 1987.
-
- [<a name="ref-RFC1123" id="ref-RFC1123">RFC1123</a>] Braden, R., "Requirements for Internet Hosts - Application
- and Support", STD 3, <a href="http://tools.ietf.org/html/rfc1123">RFC 1123</a>, October 1989.
-
- [<a name="ref-RFC1535" id="ref-RFC1535">RFC1535</a>] Gavron, E., "A Security Problem and Proposed Correction
- With Widely Deployed DNS Software", <a href="http://tools.ietf.org/html/rfc1535">RFC 1535</a>,
- October 1993.
-
- [<a name="ref-RFC1630" id="ref-RFC1630">RFC1630</a>] Berners-Lee, T., "Universal Resource Identifiers in WWW: A
- Unifying Syntax for the Expression of Names and Addresses
- of Objects on the Network as used in the World-Wide Web",
- <a href="http://tools.ietf.org/html/rfc1630">RFC 1630</a>, June 1994.
-
- [<a name="ref-RFC1736" id="ref-RFC1736">RFC1736</a>] Kunze, J., "Functional Recommendations for Internet
- Resource Locators", <a href="http://tools.ietf.org/html/rfc1736">RFC 1736</a>, February 1995.
-
- [<a name="ref-RFC1737" id="ref-RFC1737">RFC1737</a>] Sollins, K. and L. Masinter, "Functional Requirements for
- Uniform Resource Names", <a href="http://tools.ietf.org/html/rfc1737">RFC 1737</a>, December 1994.
-
- [<a name="ref-RFC1738" id="ref-RFC1738">RFC1738</a>] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform
- Resource Locators (URL)", <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a>, December 1994.
-
- [<a name="ref-RFC1808" id="ref-RFC1808">RFC1808</a>] Fielding, R., "Relative Uniform Resource Locators",
- <a href="http://tools.ietf.org/html/rfc1808">RFC 1808</a>, June 1995.
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 47]</span>
-<a name="page-48" id="page-48" href="#page-48"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- [<a name="ref-RFC2046" id="ref-RFC2046">RFC2046</a>] Freed, N. and N. Borenstein, "Multipurpose Internet Mail
- Extensions (MIME) Part Two: Media Types", <a href="http://tools.ietf.org/html/rfc2046">RFC 2046</a>,
- November 1996.
-
- [<a name="ref-RFC2141" id="ref-RFC2141">RFC2141</a>] Moats, R., "URN Syntax", <a href="http://tools.ietf.org/html/rfc2141">RFC 2141</a>, May 1997.
-
- [<a name="ref-RFC2396" id="ref-RFC2396">RFC2396</a>] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform
- Resource Identifiers (URI): Generic Syntax", <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>,
- August 1998.
-
- [<a name="ref-RFC2518" id="ref-RFC2518">RFC2518</a>] Goland, Y., Whitehead, E., Faizi, A., Carter, S., and D.
- Jensen, "HTTP Extensions for Distributed Authoring --
- WEBDAV", <a href="http://tools.ietf.org/html/rfc2518">RFC 2518</a>, February 1999.
-
- [<a name="ref-RFC2557" id="ref-RFC2557">RFC2557</a>] Palme, J., Hopmann, A., and N. Shelness, "MIME
- Encapsulation of Aggregate Documents, such as HTML
- (MHTML)", <a href="http://tools.ietf.org/html/rfc2557">RFC 2557</a>, March 1999.
-
- [<a name="ref-RFC2718" id="ref-RFC2718">RFC2718</a>] Masinter, L., Alvestrand, H., Zigmond, D., and R. Petke,
- "Guidelines for new URL Schemes", <a href="http://tools.ietf.org/html/rfc2718">RFC 2718</a>, November 1999.
-
- [<a name="ref-RFC2732" id="ref-RFC2732">RFC2732</a>] Hinden, R., Carpenter, B., and L. Masinter, "Format for
- Literal IPv6 Addresses in URL's", <a href="http://tools.ietf.org/html/rfc2732">RFC 2732</a>, December 1999.
-
- [<a name="ref-RFC3305" id="ref-RFC3305">RFC3305</a>] Mealling, M. and R. Denenberg, "Report from the Joint
- W3C/IETF URI Planning Interest Group: Uniform Resource
- Identifiers (URIs), URLs, and Uniform Resource Names
- (URNs): Clarifications and Recommendations", <a href="http://tools.ietf.org/html/rfc3305">RFC 3305</a>,
- August 2002.
-
- [<a name="ref-RFC3490" id="ref-RFC3490">RFC3490</a>] Faltstrom, P., Hoffman, P., and A. Costello,
- "Internationalizing Domain Names in Applications (IDNA)",
- <a href="http://tools.ietf.org/html/rfc3490">RFC 3490</a>, March 2003.
-
- [<a name="ref-RFC3513" id="ref-RFC3513">RFC3513</a>] Hinden, R. and S. Deering, "Internet Protocol Version 6
- (IPv6) Addressing Architecture", <a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a>, April 2003.
-
- [<a name="ref-Siedzik" id="ref-Siedzik">Siedzik</a>] Siedzik, R., "Semantic Attacks: What's in a URL?",
- April 2001, <<a href="http://www.giac.org/practical/gsec/Richard_Siedzik_GSEC.pdf">http://www.giac.org/practical/gsec/</a>
- <a href="http://www.giac.org/practical/gsec/Richard_Siedzik_GSEC.pdf">Richard_Siedzik_GSEC.pdf</a>>.
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 48]</span>
-<a name="page-49" id="page-49" href="#page-49"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-Appendix A. Collected ABNF for URI
-
- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-
- hier-part = "//" authority path-abempty
- / path-absolute
- / path-rootless
- / path-empty
-
- URI-reference = URI / relative-ref
-
- absolute-URI = scheme ":" hier-part [ "?" query ]
-
- relative-ref = relative-part [ "?" query ] [ "#" fragment ]
-
- relative-part = "//" authority path-abempty
- / path-absolute
- / path-noscheme
- / path-empty
-
- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-
- authority = [ userinfo "@" ] host [ ":" port ]
- userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
- host = IP-literal / IPv4address / reg-name
- port = *DIGIT
-
- IP-literal = "[" ( IPv6address / IPvFuture ) "]"
-
- IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
-
- IPv6address = 6( h16 ":" ) ls32
- / "::" 5( h16 ":" ) ls32
- / [ h16 ] "::" 4( h16 ":" ) ls32
- / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
- / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
- / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
- / [ *4( h16 ":" ) h16 ] "::" ls32
- / [ *5( h16 ":" ) h16 ] "::" h16
- / [ *6( h16 ":" ) h16 ] "::"
-
- h16 = 1*4HEXDIG
- ls32 = ( h16 ":" h16 ) / IPv4address
- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 49]</span>
-<a name="page-50" id="page-50" href="#page-50"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- dec-octet = DIGIT ; 0-9
- / %x31-39 DIGIT ; 10-99
- / "1" 2DIGIT ; 100-199
- / "2" %x30-34 DIGIT ; 200-249
- / "25" %x30-35 ; 250-255
-
- reg-name = *( unreserved / pct-encoded / sub-delims )
-
- path = path-abempty ; begins with "/" or is empty
- / path-absolute ; begins with "/" but not "//"
- / path-noscheme ; begins with a non-colon segment
- / path-rootless ; begins with a segment
- / path-empty ; zero characters
-
- path-abempty = *( "/" segment )
- path-absolute = "/" [ segment-nz *( "/" segment ) ]
- path-noscheme = segment-nz-nc *( "/" segment )
- path-rootless = segment-nz *( "/" segment )
- path-empty = 0<pchar>
-
- segment = *pchar
- segment-nz = 1*pchar
- segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- ; non-zero-length segment without any colon ":"
-
- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
-
- query = *( pchar / "/" / "?" )
-
- fragment = *( pchar / "/" / "?" )
-
- pct-encoded = "%" HEXDIG HEXDIG
-
- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- reserved = gen-delims / sub-delims
- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- / "*" / "+" / "," / ";" / "="
-
-Appendix B. Parsing a URI Reference with a Regular Expression
-
- As the "first-match-wins" algorithm is identical to the "greedy"
- disambiguation method used by POSIX regular expressions, it is
- natural and commonplace to use a regular expression for parsing the
- potential five components of a URI reference.
-
- The following line is the regular expression for breaking-down a
- well-formed URI reference into its components.
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 50]</span>
-<a name="page-51" id="page-51" href="#page-51"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
- 12 3 4 5 6 7 8 9
-
- The numbers in the second line above are only to assist readability;
- they indicate the reference points for each subexpression (i.e., each
- paired parenthesis). We refer to the value matched for subexpression
- <n> as $<n>. For example, matching the above expression to
-
- <a href="http://www.ics.uci.edu/pub/ietf/uri/#Related">http://www.ics.uci.edu/pub/ietf/uri/#Related</a>
-
- results in the following subexpression matches:
-
- $1 = http:
- $2 = http
- $3 = //www.ics.uci.edu
- $4 = www.ics.uci.edu
- $5 = /pub/ietf/uri/
- $6 = <undefined>
- $7 = <undefined>
- $8 = #Related
- $9 = Related
-
- where <undefined> indicates that the component is not present, as is
- the case for the query component in the above example. Therefore, we
- can determine the value of the five components as
-
- scheme = $2
- authority = $4
- path = $5
- query = $7
- fragment = $9
-
- Going in the opposite direction, we can recreate a URI reference from
- its components by using the algorithm of <a href="#section-5.3">Section 5.3</a>.
-
-Appendix C. Delimiting a URI in Context
-
- URIs are often transmitted through formats that do not provide a
- clear context for their interpretation. For example, there are many
- occasions when a URI is included in plain text; examples include text
- sent in email, USENET news, and on printed paper. In such cases, it
- is important to be able to delimit the URI from the rest of the text,
- and in particular from punctuation marks that might be mistaken for
- part of the URI.
-
- In practice, URIs are delimited in a variety of ways, but usually
- within double-quotes "http://example.com/", angle brackets
- <http://example.com/>, or just by using whitespace:
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 51]</span>
-<a name="page-52" id="page-52" href="#page-52"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- http://example.com/
-
- These wrappers do not form part of the URI.
-
- In some cases, extra whitespace (spaces, line-breaks, tabs, etc.) may
- have to be added to break a long URI across lines. The whitespace
- should be ignored when the URI is extracted.
-
- No whitespace should be introduced after a hyphen ("-") character.
- Because some typesetters and printers may (erroneously) introduce a
- hyphen at the end of line when breaking it, the interpreter of a URI
- containing a line break immediately after a hyphen should ignore all
- whitespace around the line break and should be aware that the hyphen
- may or may not actually be part of the URI.
-
- Using <> angle brackets around each URI is especially recommended as
- a delimiting style for a reference that contains embedded whitespace.
-
- The prefix "URL:" (with or without a trailing space) was formerly
- recommended as a way to help distinguish a URI from other bracketed
- designators, though it is not commonly used in practice and is no
- longer recommended.
-
- For robustness, software that accepts user-typed URI should attempt
- to recognize and strip both delimiters and embedded whitespace.
-
- For example, the text
-
- Yes, Jim, I found it under "<a href="http://www.w3.org/Addressing/">http://www.w3.org/Addressing/</a>",
- but you can probably pick it up from <ftp://foo.example.
- http://www.ics.uci.edu/pub/
- <a href="http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING">ietf/uri/historical.html#WARNING</a>>.
-
- contains the URI references
-
- <a href="http://www.w3.org/Addressing/">http://www.w3.org/Addressing/</a>
- ftp://foo.example.com/rfc/
- <a href="http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING">http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING</a>
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 52]</span>
-<a name="page-53" id="page-53" href="#page-53"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-Appendix D. Changes from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>
-
-D.1. Additions
-
- An ABNF rule for URI has been introduced to correspond to one common
- usage of the term: an absolute URI with optional fragment.
-
- IPv6 (and later) literals have been added to the list of possible
- identifiers for the host portion of an authority component, as
- described by [<a href="http://tools.ietf.org/html/rfc2732" title=""Format for Literal IPv6 Addresses in URL's"">RFC2732</a>], with the addition of "[" and "]" to the
- reserved set and a version flag to anticipate future versions of IP
- literals. Square brackets are now specified as reserved within the
- authority component and are not allowed outside their use as
- delimiters for an IP literal within host. In order to make this
- change without changing the technical definition of the path, query,
- and fragment components, those rules were redefined to directly
- specify the characters allowed.
-
- As [<a href="http://tools.ietf.org/html/rfc2732" title=""Format for Literal IPv6 Addresses in URL's"">RFC2732</a>] defers to [<a href="http://tools.ietf.org/html/rfc3513" title=""Internet Protocol Version 6 (IPv6) Addressing Architecture"">RFC3513</a>] for definition of an IPv6 literal
- address, which, unfortunately, lacks an ABNF description of
- IPv6address, we created a new ABNF rule for IPv6address that matches
- the text representations defined by <a href="#section-2.2">Section 2.2</a> of [<a href="http://tools.ietf.org/html/rfc3513" title=""Internet Protocol Version 6 (IPv6) Addressing Architecture"">RFC3513</a>].
- Likewise, the definition of IPv4address has been improved in order to
- limit each decimal octet to the range 0-255.
-
- <a href="#section-6">Section 6</a>, on URI normalization and comparison, has been completely
- rewritten and extended by using input from Tim Bray and discussion
- within the W3C Technical Architecture Group.
-
-D.2. Modifications
-
- The ad-hoc BNF syntax of <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> has been replaced with the ABNF of
- [<a href="http://tools.ietf.org/html/rfc2234" title=""Augmented BNF for Syntax Specifications: ABNF"">RFC2234</a>]. This change required all rule names that formerly
- included underscore characters to be renamed with a dash instead. In
- addition, a number of syntax rules have been eliminated or simplified
- to make the overall grammar more comprehensible. Specifications that
- refer to the obsolete grammar rules may be understood by replacing
- those rules according to the following table:
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 53]</span>
-<a name="page-54" id="page-54" href="#page-54"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- +----------------+--------------------------------------------------+
- | obsolete rule | translation |
- +----------------+--------------------------------------------------+
- | absoluteURI | absolute-URI |
- | relativeURI | relative-part [ "?" query ] |
- | hier_part | ( "//" authority path-abempty / |
- | | path-absolute ) [ "?" query ] |
- | | |
- | opaque_part | path-rootless [ "?" query ] |
- | net_path | "//" authority path-abempty |
- | abs_path | path-absolute |
- | rel_path | path-rootless |
- | rel_segment | segment-nz-nc |
- | reg_name | reg-name |
- | server | authority |
- | hostport | host [ ":" port ] |
- | hostname | reg-name |
- | path_segments | path-abempty |
- | param | *<pchar excluding ";"> |
- | | |
- | uric | unreserved / pct-encoded / ";" / "?" / ":" |
- | | / "@" / "&" / "=" / "+" / "$" / "," / "/" |
- | | |
- | uric_no_slash | unreserved / pct-encoded / ";" / "?" / ":" |
- | | / "@" / "&" / "=" / "+" / "$" / "," |
- | | |
- | mark | "-" / "_" / "." / "!" / "~" / "*" / "'" |
- | | / "(" / ")" |
- | | |
- | escaped | pct-encoded |
- | hex | HEXDIG |
- | alphanum | ALPHA / DIGIT |
- +----------------+--------------------------------------------------+
-
- Use of the above obsolete rules for the definition of scheme-specific
- syntax is deprecated.
-
- <a href="#section-2">Section 2</a>, on characters, has been rewritten to explain what
- characters are reserved, when they are reserved, and why they are
- reserved, even when they are not used as delimiters by the generic
- syntax. The mark characters that are typically unsafe to decode,
- including the exclamation mark ("!"), asterisk ("*"), single-quote
- ("'"), and open and close parentheses ("(" and ")"), have been moved
- to the reserved set in order to clarify the distinction between
- reserved and unreserved and, hopefully, to answer the most common
- question of scheme designers. Likewise, the section on
- percent-encoded characters has been rewritten, and URI normalizers
- are now given license to decode any percent-encoded octets
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 54]</span>
-<a name="page-55" id="page-55" href="#page-55"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- corresponding to unreserved characters. In general, the terms
- "escaped" and "unescaped" have been replaced with "percent-encoded"
- and "decoded", respectively, to reduce confusion with other forms of
- escape mechanisms.
-
- The ABNF for URI and URI-reference has been redesigned to make them
- more friendly to LALR parsers and to reduce complexity. As a result,
- the layout form of syntax description has been removed, along with
- the uric, uric_no_slash, opaque_part, net_path, abs_path, rel_path,
- path_segments, rel_segment, and mark rules. All references to
- "opaque" URIs have been replaced with a better description of how the
- path component may be opaque to hierarchy. The relativeURI rule has
- been replaced with relative-ref to avoid unnecessary confusion over
- whether they are a subset of URI. The ambiguity regarding the
- parsing of URI-reference as a URI or a relative-ref with a colon in
- the first segment has been eliminated through the use of five
- separate path matching rules.
-
- The fragment identifier has been moved back into the section on
- generic syntax components and within the URI and relative-ref rules,
- though it remains excluded from absolute-URI. The number sign ("#")
- character has been moved back to the reserved set as a result of
- reintegrating the fragment syntax.
-
- The ABNF has been corrected to allow the path component to be empty.
- This also allows an absolute-URI to consist of nothing after the
- "scheme:", as is present in practice with the "dav:" namespace
- [<a href="http://tools.ietf.org/html/rfc2518" title=""HTTP Extensions for Distributed Authoring -- WEBDAV"">RFC2518</a>] and with the "about:" scheme used internally by many WWW
- browser implementations. The ambiguity regarding the boundary
- between authority and path has been eliminated through the use of
- five separate path matching rules.
-
- Registry-based naming authorities that use the generic syntax are now
- defined within the host rule. This change allows current
- implementations, where whatever name provided is simply fed to the
- local name resolution mechanism, to be consistent with the
- specification. It also removes the need to re-specify DNS name
- formats here. Furthermore, it allows the host component to contain
- percent-encoded octets, which is necessary to enable
- internationalized domain names to be provided in URIs, processed in
- their native character encodings at the application layers above URI
- processing, and passed to an IDNA library as a registered name in the
- UTF-8 character encoding. The server, hostport, hostname,
- domainlabel, toplabel, and alphanum rules have been removed.
-
- The resolving relative references algorithm of [<a href="http://tools.ietf.org/html/rfc2396" title=""Uniform Resource Identifiers (URI): Generic Syntax"">RFC2396</a>] has been
- rewritten with pseudocode for this revision to improve clarity and
- fix the following issues:
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 55]</span>
-<a name="page-56" id="page-56" href="#page-56"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- o [<a href="http://tools.ietf.org/html/rfc2396" title=""Uniform Resource Identifiers (URI): Generic Syntax"">RFC2396</a>] <a href="#section-5.2">section 5.2</a>, step 6a, failed to account for a base URI
- with no path.
-
- o Restored the behavior of [<a href="http://tools.ietf.org/html/rfc1808" title=""Relative Uniform Resource Locators"">RFC1808</a>] where, if the reference
- contains an empty path and a defined query component, the target
- URI inherits the base URI's path component.
-
- o The determination of whether a URI reference is a same-document
- reference has been decoupled from the URI parser, simplifying the
- URI processing interface within applications in a way consistent
- with the internal architecture of deployed URI processing
- implementations. The determination is now based on comparison to
- the base URI after transforming a reference to absolute form,
- rather than on the format of the reference itself. This change
- may result in more references being considered "same-document"
- under this specification than there would be under the rules given
- in <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>, especially when normalization is used to reduce
- aliases. However, it does not change the status of existing
- same-document references.
-
- o Separated the path merge routine into two routines: merge, for
- describing combination of the base URI path with a relative-path
- reference, and remove_dot_segments, for describing how to remove
- the special "." and ".." segments from a composed path. The
- remove_dot_segments algorithm is now applied to all URI reference
- paths in order to match common implementations and to improve the
- normalization of URIs in practice. This change only impacts the
- parsing of abnormal references and same-scheme references wherein
- the base URI has a non-hierarchical path.
-
-Index
-
- A
- ABNF 11
- absolute 27
- absolute-path 26
- absolute-URI 27
- access 9
- authority 17, 18
-
- B
- base URI 28
-
- C
- character encoding 4
- character 4
- characters 8, 11
- coded character set 4
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 56]</span>
-<a name="page-57" id="page-57" href="#page-57"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- D
- dec-octet 20
- dereference 9
- dot-segments 23
-
- F
- fragment 16, 24
-
- G
- gen-delims 13
- generic syntax 6
-
- H
- h16 20
- hier-part 16
- hierarchical 10
- host 18
-
- I
- identifier 5
- IP-literal 19
- IPv4 20
- IPv4address 19, 20
- IPv6 19
- IPv6address 19, 20
- IPvFuture 19
-
- L
- locator 7
- ls32 20
-
- M
- merge 32
-
- N
- name 7
- network-path 26
-
- P
- path 16, 22, 26
- path-abempty 22
- path-absolute 22
- path-empty 22
- path-noscheme 22
- path-rootless 22
- path-abempty 16, 22, 26
- path-absolute 16, 22, 26
- path-empty 16, 22, 26
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 57]</span>
-<a name="page-58" id="page-58" href="#page-58"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- path-rootless 16, 22
- pchar 23
- pct-encoded 12
- percent-encoding 12
- port 22
-
- Q
- query 16, 23
-
- R
- reg-name 21
- registered name 20
- relative 10, 28
- relative-path 26
- relative-ref 26
- remove_dot_segments 33
- representation 9
- reserved 12
- resolution 9, 28
- resource 5
- retrieval 9
-
- S
- same-document 27
- sameness 9
- scheme 16, 17
- segment 22, 23
- segment-nz 23
- segment-nz-nc 23
- sub-delims 13
- suffix 27
-
- T
- transcription 8
-
- U
- uniform 4
- unreserved 13
- URI grammar
- absolute-URI 27
- ALPHA 11
- authority 18
- CR 11
- dec-octet 20
- DIGIT 11
- DQUOTE 11
- fragment 24
- gen-delims 13
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 58]</span>
-<a name="page-59" id="page-59" href="#page-59"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
- h16 20
- HEXDIG 11
- hier-part 16
- host 19
- IP-literal 19
- IPv4address 20
- IPv6address 20
- IPvFuture 19
- LF 11
- ls32 20
- OCTET 11
- path 22
- path-abempty 22
- path-absolute 22
- path-empty 22
- path-noscheme 22
- path-rootless 22
- pchar 23
- pct-encoded 12
- port 22
- query 24
- reg-name 21
- relative-ref 26
- reserved 13
- scheme 17
- segment 23
- segment-nz 23
- segment-nz-nc 23
- SP 11
- sub-delims 13
- unreserved 13
- URI 16
- URI-reference 25
- userinfo 18
- URI 16
- URI-reference 25
- URL 7
- URN 7
- userinfo 18
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 59]</span>
-<a name="page-60" id="page-60" href="#page-60"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-Authors' Addresses
-
- Tim Berners-Lee
- World Wide Web Consortium
- Massachusetts Institute of Technology
- 77 Massachusetts Avenue
- Cambridge, MA 02139
- USA
-
- Phone: +1-617-253-5702
- Fax: +1-617-258-5999
- EMail: timbl at w3.org
- URI: <a href="http://www.w3.org/People/Berners-Lee/">http://www.w3.org/People/Berners-Lee/</a>
-
-
- Roy T. Fielding
- Day Software
- 5251 California Ave., Suite 110
- Irvine, CA 92617
- USA
-
- Phone: +1-949-679-2960
- Fax: +1-949-679-2972
- EMail: fielding at gbiv.com
- URI: <a href="http://roy.gbiv.com/">http://roy.gbiv.com/</a>
-
-
- Larry Masinter
- Adobe Systems Incorporated
- 345 Park Ave
- San Jose, CA 95110
- USA
-
- Phone: +1-408-536-3024
- EMail: LMM at acm.org
- URI: <a href="http://larry.masinter.net/">http://larry.masinter.net/</a>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<span class="grey">Berners-Lee, et al. Standards Track [Page 60]</span>
-<a name="page-61" id="page-61" href="#page-61"><span class="break"> </span></a>
-<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
-
-
-Full Copyright Statement
-
- Copyright (C) The Internet Society (2005).
-
- This document is subject to the rights, licenses and restrictions
- contained in <a href="http://tools.ietf.org/html/bcp78">BCP 78</a>, and except as set forth therein, the authors
- retain all their rights.
-
- This document and the information contained herein are provided on an
- "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
- OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
- ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
- INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
- INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
- WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
-Intellectual Property
-
- The IETF takes no position regarding the validity or scope of any
- Intellectual Property Rights or other rights that might be claimed to
- pertain to the implementation or use of the technology described in
- this document or the extent to which any license under such rights
- might or might not be available; nor does it represent that it has
- made any independent effort to identify any such rights. Information
- on the IETF's procedures with respect to rights in IETF Documents can
- be found in <a href="http://tools.ietf.org/html/bcp78">BCP 78</a> and <a href="http://tools.ietf.org/html/bcp79">BCP 79</a>.
-
- Copies of IPR disclosures made to the IETF Secretariat and any
- assurances of licenses to be made available, or the result of an
- attempt made to obtain a general license or permission for the use of
- such proprietary rights by implementers or users of this
- specification can be obtained from the IETF on-line IPR repository at
- <a href="http://www.ietf.org/ipr">http://www.ietf.org/ipr</a>.
-
- The IETF invites any interested party to bring to its attention any
- copyrights, patents or patent applications, or other proprietary
- rights that may cover technology that may be required to implement
- this standard. Please address the information to the IETF at ietf-
- ipr at ietf.org.
-
-
-Acknowledgement
-
- Funding for the RFC Editor function is currently provided by the
- Internet Society.
-
-
-
-
-
-
-Berners-Lee, et al. Standards Track [Page 61]
-<span class="break"> </span>
-
-</pre><br>
-<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.46, available from
-<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
-</small></small></span>
-
-</body></html>
\ No newline at end of file
diff --git a/third_party/uriparser-0.7.5/doc/rfc3986_grammar_only.txt b/third_party/uriparser-0.7.5/doc/rfc3986_grammar_only.txt
deleted file mode 100644
index b42ea35..0000000
--- a/third_party/uriparser-0.7.5/doc/rfc3986_grammar_only.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-
-hier-part = "//" authority path-abempty
- / path-absolute
- / path-rootless
- / path-empty
-
-URI-reference = URI / relative-ref
-
-absolute-URI = scheme ":" hier-part [ "?" query ]
-
-relative-ref = relative-part [ "?" query ] [ "#" fragment ]
-
-relative-part = "//" authority path-abempty
- / path-absolute
- / path-noscheme
- / path-empty
-
-scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-
-authority = [ userinfo "@" ] host [ ":" port ]
-userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
-host = IP-literal / IPv4address / reg-name
-port = *DIGIT
-
-IP-literal = "[" ( IPv6address / IPvFuture ) "]"
-
-IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
-
-IPv6address = 6( h16 ":" ) ls32
- / "::" 5( h16 ":" ) ls32
- / [ h16 ] "::" 4( h16 ":" ) ls32
- / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
- / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
- / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
- / [ *4( h16 ":" ) h16 ] "::" ls32
- / [ *5( h16 ":" ) h16 ] "::" h16
- / [ *6( h16 ":" ) h16 ] "::"
-
-h16 = 1*4HEXDIG
-ls32 = ( h16 ":" h16 ) / IPv4address
-IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
-dec-octet = DIGIT ; 0-9
- / %x31-39 DIGIT ; 10-99
- / "1" 2DIGIT ; 100-199
- / "2" %x30-34 DIGIT ; 200-249
- / "25" %x30-35 ; 250-255
-
-reg-name = *( unreserved / pct-encoded / sub-delims )
-
-path = path-abempty ; begins with "/" or is empty
- / path-absolute ; begins with "/" but not "//"
- / path-noscheme ; begins with a non-colon segment
- / path-rootless ; begins with a segment
- / path-empty ; zero characters
-
-path-abempty = *( "/" segment )
-path-absolute = "/" [ segment-nz *( "/" segment ) ]
-path-noscheme = segment-nz-nc *( "/" segment )
-path-rootless = segment-nz *( "/" segment )
-path-empty = 0<pchar>
-
-segment = *pchar
-segment-nz = 1*pchar
-segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- ; non-zero-length segment without any colon ":"
-
-pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
-
-query = *( pchar / "/" / "?" )
-
-fragment = *( pchar / "/" / "?" )
-
-pct-encoded = "%" HEXDIG HEXDIG
-
-unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-reserved = gen-delims / sub-delims
-gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- / "*" / "+" / "," / ";" / "="
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/libkml.git
More information about the Pkg-grass-devel
mailing list