[med-svn] [python-avro] 01/02: Imported Upstream version 1.7.7

Sat Oct 17 08:36:40 UTC 2015

This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository python-avro.

commit b7e7effced56ef5c594726aea1c382a096330153
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Fri Oct 16 23:21:36 2015 -0700

    Imported Upstream version 1.7.7
---
 .gitignore                                         |    9 +
 BUILD.txt                                          |   30 +
 CHANGES.txt                                        | 3039 ++++++++++++++++++++
 DIST_README.txt                                    |   14 +
 LICENSE.txt                                        |  308 ++
 NOTICE.txt                                         |    9 +
 README.txt                                         |    9 +
 build.sh                                           |  182 ++
 doc/build.xml                                      |   37 +
 doc/examples/example.py                            |   15 +
 doc/examples/java-example/pom.xml                  |   52 +
 .../src/main/java/example/GenericMain.java         |   53 +
 .../src/main/java/example/SpecificMain.java        |   55 +
 doc/examples/mr-example/pom.xml                    |   59 +
 .../src/main/java/example/AvroWordCount.java       |  105 +
 .../src/main/java/example/GenerateData.java        |   39 +
 .../main/java/example/MapReduceAvroWordCount.java  |  124 +
 .../src/main/java/example/MapReduceColorCount.java |  107 +
 .../src/main/java/example/MapredColorCount.java    |   93 +
 doc/examples/user.avsc                             |    9 +
 doc/forrest.properties                             |    3 +
 doc/src/cli.xconf                                  |  328 +++
 .../content/htmldocs/canonical-completeness.html   |  204 ++
 doc/src/content/xdocs/gettingstartedjava.xml       |  476 +++
 doc/src/content/xdocs/gettingstartedpython.xml     |  221 ++
 doc/src/content/xdocs/idl.xml                      |  448 +++
 doc/src/content/xdocs/index.xml                    |   96 +
 doc/src/content/xdocs/mr.xml                       |  580 ++++
 doc/src/content/xdocs/sasl.xml                     |  152 +
 doc/src/content/xdocs/site.xml                     |   91 +
 doc/src/content/xdocs/spec.xml                     | 1415 +++++++++
 doc/src/content/xdocs/tabs.xml                     |   39 +
 doc/src/resources/images/apache_feather.gif        |  Bin 0 -> 4128 bytes
 doc/src/resources/images/avro-logo.png             |  Bin 0 -> 4777 bytes
 doc/src/resources/images/favicon.ico               |  Bin 0 -> 766 bytes
 doc/src/skinconf.xml                               |  350 +++
 lang/py/build.xml                                  |  181 ++
 lang/py/lib/pyAntTasks-1.3-LICENSE.txt             |  202 ++
 lang/py/lib/pyAntTasks-1.3.jar                     |  Bin 0 -> 18788 bytes
 lang/py/lib/simplejson/LICENSE.txt                 |   19 +
 lang/py/lib/simplejson/__init__.py                 |  318 ++
 lang/py/lib/simplejson/_speedups.c                 | 2329 +++++++++++++++
 lang/py/lib/simplejson/decoder.py                  |  354 +++
 lang/py/lib/simplejson/encoder.py                  |  440 +++
 lang/py/lib/simplejson/scanner.py                  |   65 +
 lang/py/lib/simplejson/tool.py                     |   37 +
 lang/py/scripts/avro                               |  262 ++
 lang/py/setup.py                                   |   49 +
 lang/py/src/avro/__init__.py                       |   18 +
 lang/py/src/avro/datafile.py                       |  376 +++
 lang/py/src/avro/io.py                             |  890 ++++++
 lang/py/src/avro/ipc.py                            |  485 ++++
 lang/py/src/avro/protocol.py                       |  224 ++
 lang/py/src/avro/schema.py                         |  784 +++++
 lang/py/src/avro/tool.py                           |  160 ++
 lang/py/src/avro/txipc.py                          |  222 ++
 lang/py/test/av_bench.py                           |   77 +
 lang/py/test/gen_interop_data.py                   |   47 +
 lang/py/test/sample_http_client.py                 |   92 +
 lang/py/test/sample_http_server.py                 |   79 +
 lang/py/test/test_datafile.py                      |  202 ++
 lang/py/test/test_datafile_interop.py              |   39 +
 lang/py/test/test_io.py                            |  337 +++
 lang/py/test/test_ipc.py                           |   38 +
 lang/py/test/test_protocol.py                      |  439 +++
 lang/py/test/test_schema.py                        |  475 +++
 lang/py/test/test_script.py                        |  256 ++
 lang/py/test/txsample_http_client.py               |  106 +
 lang/py/test/txsample_http_server.py               |   70 +
 lang/py3/avro/__init__.py                          |   34 +
 lang/py3/avro/datafile.py                          |  532 ++++
 lang/py3/avro/io.py                                |  933 ++++++
 lang/py3/avro/ipc.py                               |  694 +++++
 lang/py3/avro/protocol.py                          |  402 +++
 lang/py3/avro/schema.py                            | 1283 +++++++++
 lang/py3/avro/tests/av_bench.py                    |  119 +
 lang/py3/avro/tests/gen_interop_data.py            |   56 +
 lang/py3/avro/tests/run_tests.py                   |   76 +
 lang/py3/avro/tests/sample_http_client.py          |   94 +
 lang/py3/avro/tests/sample_http_server.py          |   81 +
 lang/py3/avro/tests/test_datafile.py               |  278 ++
 lang/py3/avro/tests/test_datafile_interop.py       |   83 +
 lang/py3/avro/tests/test_io.py                     |  351 +++
 lang/py3/avro/tests/test_ipc.py                    |  158 +
 lang/py3/avro/tests/test_protocol.py               |  504 ++++
 lang/py3/avro/tests/test_schema.py                 |  625 ++++
 lang/py3/avro/tests/test_script.py                 |  321 +++
 lang/py3/avro/tests/txsample_http_client.py        |  108 +
 lang/py3/avro/tests/txsample_http_server.py        |   72 +
 lang/py3/avro/tool.py                              |  166 ++
 lang/py3/avro/txipc.py                             |  224 ++
 lang/py3/scripts/avro                              |  336 +++
 lang/py3/setup.py                                  |  151 +
 pom.xml                                            |  265 ++
 share/VERSION.txt                                  |    1 +
 share/editors/README.txt                           |    1 +
 share/editors/avro-idl.vim                         |   84 +
 share/rat-excludes.txt                             |   55 +
 share/schemas/org/apache/avro/data/Json.avsc       |   15 +
 .../org/apache/avro/ipc/HandshakeRequest.avsc      |   11 +
 .../org/apache/avro/ipc/HandshakeResponse.avsc     |   15 +
 .../org/apache/avro/ipc/trace/avroTrace.avdl       |   68 +
 .../org/apache/avro/ipc/trace/avroTrace.avpr       |   82 +
 .../apache/avro/mapred/tether/InputProtocol.avpr   |   64 +
 .../apache/avro/mapred/tether/OutputProtocol.avpr  |   82 +
 share/test/data/schema-tests.txt                   |  192 ++
 share/test/data/syncInMeta.avro                    |  Bin 0 -> 22609 bytes
 share/test/data/test.avro12                        |  Bin 0 -> 3120 bytes
 share/test/data/weather-snappy.avro                |  Bin 0 -> 330 bytes
 share/test/data/weather-sorted.avro                |  Bin 0 -> 335 bytes
 share/test/data/weather.avro                       |  Bin 0 -> 358 bytes
 share/test/data/weather.json                       |    5 +
 share/test/interop/bin/test_rpc_interop.sh         |   83 +
 share/test/interop/rpc/add/onePlusOne/request.avro |  Bin 0 -> 171 bytes
 .../test/interop/rpc/add/onePlusOne/response.avro  |  Bin 0 -> 75 bytes
 share/test/interop/rpc/echo/foo/request.avro       |  Bin 0 -> 458 bytes
 share/test/interop/rpc/echo/foo/response.avro      |  Bin 0 -> 390 bytes
 share/test/interop/rpc/hello/world/request.avro    |  Bin 0 -> 162 bytes
 share/test/interop/rpc/hello/world/response.avro   |  Bin 0 -> 89 bytes
 share/test/schemas/BulkData.avpr                   |   21 +
 share/test/schemas/FooBarSpecificRecord.avsc       |   22 +
 share/test/schemas/contexts.avdl                   |   41 +
 share/test/schemas/echo.avdl                       |   32 +
 share/test/schemas/interop.avsc                    |   28 +
 share/test/schemas/mail.avpr                       |   26 +
 share/test/schemas/namespace.avpr                  |   28 +
 share/test/schemas/simple.avpr                     |   80 +
 share/test/schemas/social.avdl                     |   33 +
 share/test/schemas/stringables.avdl                |   32 +
 share/test/schemas/weather.avsc                    |    8 +
 130 files changed, 27478 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8c6b133
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+.project
+.settings
+.classpath
+target
+*.pyc
+.svn
+/build
+test-output
+/dist
diff --git a/BUILD.txt b/BUILD.txt
new file mode 100644
index 0000000..a59c80c
--- /dev/null
+++ b/BUILD.txt
@@ -0,0 +1,30 @@
+Apache Avro Build Instructions
+
+REQUIREMENTS
+
+The following packages must be installed before Avro can be built:
+
+ - Java: JDK 1.6, Maven 2 or better, protobuf-compile
+ - PHP: php5, phpunit, php5-gmp
+ - Python: 2.5 or greater, python-setuptools for dist target
+ - C: gcc, cmake, asciidoc, source-highlight
+ - C++: cmake 2.8.4 or greater, g++, flex, bison, libboost-dev
+ - C#: mono-devel mono-gmcs nunit
+ - JavaScript: nodejs, npm
+ - Ruby: ruby 1.86 or greater, ruby-dev, gem, rake, echoe, yajl-ruby
+ - Perl: perl 5.8.1 or greater, gmake, Module::Install,
+   Module::Install::ReadmeFromPod, Module::Install::Repository,
+   Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode,
+   IO::String, Object::Tiny, Compress::ZLib, Test::More,
+   Test::Exception, Test::Pod
+ - Apache Ant 1.7
+ - Apache Forrest 0.8 (for documentation)
+ - md5sum, sha1sum, used by top-level dist target
+
+BUILDING
+
+Once the requirements are installed, build.sh can be used as follows:
+
+ './build.sh test' runs tests for all languages
+ './build.sh dist' creates all release distribution files in dist/
+ './build.sh clean' removes all generated artifacts
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 0000000..188ec44
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1,3039 @@
+Avro Change Log
+
+Avro 1.7.7 (23 July 2014)
+
+  NEW FEATURES
+
+    AVRO-1315. Java: Schema Validation utilities. (scottcarey and tomwhite)
+
+    AVRO-1439. Java: Add AvroMultipleInputs for mapred. (Harsh J via cutting)
+
+    AVRO-974. Add a Perl implementation of Avro. (Yann Kerhervé & John Karp)
+
+    AVRO-1471. Java: Permit writing generated code in different
+    character encodings. (Eugene Mustaphin via cutting)
+
+    AVRO-1402. Add optional subtypes to specification, initially
+    including a subtype of bytes and fixed for decimal values.
+    (tomwhite & Ryan Blue via cutting)
+
+    AVRO-1522. Java: Add support for compression codecs to SortedKeyValueFile.
+    (Steven Willis via cutting)
+
+    AVRO-1474. C++ resolvind decoder doesn't work when reader schema
+    has more fields than writer schema. (thiru with help from Ramana
+    Suvarapu)
+
+    AVRO-1352. Schema for fixed types corrupted when writing out in
+    JSON format (Steve Roehrs via thiru)
+
+    AVRO-1533. Java: In schema resolution, permit conversion between
+    bytes and string. (cutting)
+
+  OPTIMIZATIONS
+
+    AVRO-1455. Deep copy does not need to create new instances for primitives.
+    (tomwhite)
+
+  IMPROVEMENTS
+
+    AVRO-1441. Java: Improve documentation for Maven configuration.
+    (Jesse Anderson via cutting)
+
+    AVRO-1447. Java: Remove dead code from example in documentation.
+    (Jesse Anderson via cutting)
+
+    AVRO-1449. Java: Optionally validate default values while reading schemas.
+    (cutting)
+
+    AVRO-1472. Java: Clarify parse method in getting started guide.
+    (Michael Knapp via cutting)
+
+    AVRO-1465. Java: Improve the error message when union dispatch fails.
+    (Gabriel Reid via cutting)
+
+    AVRO-1482. In specification, place "null" first in unions as best practice.
+    (cutting)
+
+    AVRO-1476. Java: Remove transient declaration from Schema.Field#position.
+    (Robert Chu via cutting)
+
+    AVRO-1512. Java: Support Thrift unions. (cutting)
+
+    AVRO-1535. Java: Make the name .X to refer to X in the null namespace.
+    This permits aliases to names in the null namespace. (cutting)
+
+    AVRO-1536. Ruby: Remove monkeypatching of Enumerable.
+    (Willem van Bergen via martinkl)
+
+    AVRO-1546. Java: Change GenericData.Record#toString() to not
+    escape forward slashes. (Brandon Forehand via cutting)
+
+  BUG FIXES
+
+    AVRO-1446. C#: Correctly handle system errors in RPC.
+    (David Taylor via cutting)
+
+    AVRO-1445. Java: Fix protobuf support to correctly handle enums
+    with default values.  (cutting)
+
+    AVRO-1436. C#: Fix tests to run multiple times. (David Taylor via cutting)
+
+    AVRO-1458. Java: Setting char record field via reflection affects other
+    fields. (tomwhite)
+
+    AVRO-1454. Java: Fix GenericData#toString and AvroAsTextRecordReader
+    to generate valid Json for NaN and infinities.  (cutting)
+
+    AVRO-1473. Java: Fix references to names in the empty namespace.
+    (Gabriel Reid via cutting)
+
+    AVRO-1459. Ruby: Fix a typo in Rakefile that breaks 'gem install'.
+    (Tomas Svarovsky via cutting)
+
+    AVRO-1457. Java: Fix Encoder so that offset in non-array-backed
+    ByteBuffers is not altered when written. (Rob Turner via cutting)
+
+    AVRO-1442. Java: Fix ResolvingGrammarGenerator to work with fixed type.
+    (Jim Pivarski via cutting)
+
+    AVRO-1500. Java: Fix bug in handling of Thrift shorts in unions.
+    (Michael Pershyn via cutting)
+
+    AVRO-1513. Perl: Remove test plans from unit test files.
+    (John Karp via cutting)
+
+    AVRO-1462. Perl: Stop spurious serializer warnings about Non-ASCII
+    decimal characters.  (John Karp via cutting)
+
+    AVRO-1470. Perl: Fix encoding of boolean values. (John Karp via cutting)
+
+    AVRO-1525. Java: ReflectData cannot resolve union with fixed. (tomwhite)
+
+    AVRO-1499. Ruby: Fix encoding issue that caused corrupted data files
+    to be written under Ruby 2.0+. (Willem van Bergen and martinkl)
+
+    AVRO-1498. Java: Fix custom encodings to work in reflect without
+    Unsafe access. (Christopher Mann via cutting)
+
+    AVRO-1448. Python3: Fix setup.py installation through PyPI. (taton)
+
+    AVRO-1540. C++ doesn't build in Ubuntu. (thiru)
+
+Avro 1.7.6 (15 January 2014)
+
+  NEW FEATURES
+
+    AVRO-975. C#: Add RPC support. (Mark Lamley via cutting)
+
+    AVRO-1388. Java: Add fsync support to DataFileWriter.
+    (Hari Shreedharan via cutting)
+
+    AVRO-1373. Java: Add support for "xz" compresssion codec, using LZMA2.
+    (Nick White via cutting)
+
+    AVRO-1400. Java: Add AvroDefault reflect annotation to specify
+    default values. (cutting)
+
+    AVRO-1397. Java: Binary fragment tools can now read multiple
+    objects from their input.  (Rob Turner via cutting)
+
+    AVRO-1396. Java: Enable tojson command-line tool to pretty print output.
+    (Rob Turner via cutting)
+
+    AVRO-1409. Java: Add an API for testing schema compatibility.
+    (Christophe Taton via cutting)
+
+    AVRO-1379. C: avro_file_writer_append_encoded() function.
+    (Mark Teodoro via dcreager)
+
+    AVRO-1414. C++: Add support for deflate-compressed data files.
+    (Daniel Russel via cutting)
+
+    AVRO-1418. Java: Add sync support to AvroMultipleOutputs.
+    (Deepak Kumar V via cutting)
+
+    AVRO-1421. Java: Add an @AvroSchema annotation to reflect. (cutting)
+
+    AVRO-1382. Add support for Python3. (Christophe Taton via cutting)
+
+  OPTIMIZATIONS
+
+    AVRO-1348. Java: Improve UTF-8 to String conversion performance in
+    Java 6. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1355. Java: Reject schemas with duplicate field
+    names. (Christophe Taton via cutting)
+
+    AVRO-1332. C#: Improve DatumReader performance.
+    (David McIntosh via cutting)
+
+    AVRO-1387. Java: Add DataFileWriter option to inhibit flush per block.
+    (Hari Shreedharan via cutting)
+
+    AVRO-1384. Java: Permit Maven to find imports within project.
+    (Alexandre Normand via cutting)
+
+    AVRO-1397. Java: Improve error message when missing field has no default.
+    (David Carr via cutting)
+
+    AVRO-1398. Increase default sync interval from 16k to 64k.
+    (Rob Turner via cutting)
+
+    AVRO-1234. Java: Permit AvroInputFormat to process files whose
+    names don't end in .avro.  (Dave Beech & Sandy Ryza via cutting)
+
+    AVRO-1344. Java: Expose sync interval configuration in mapreduce API.
+    (Rob Turner via cutting)
+
+    AVRO-1406. C++. GenericRecord (GenericDatum, etc.) doesn't support
+    getters and setters with field name argument.
+    (Iaroslav Zeigerman via thiru)
+
+    AVRO-1063. Ruby: Use multi_json instead of requiring yajl.
+    (Duke via cutting)
+
+    AVRO-1225. Java: Add guide for MapReduce API. (Brock Noland via cutting)
+
+    AVRO-1426. Java: Add mapreduce word count example.
+    (Jesse Anderson via cutting)
+
+    AVRO-987. Java: Make Avro OSGI-ready. (Ioannis Canellos via cutting)
+
+    AVRO-1427. Java: Improve ResolvingDecoder tests. (Rob Turner via cutting)
+
+    AVRO-1432. Java: Reduce javadoc warnings. (cutting)
+
+    AVRO-1415. C++ binary encoder and decoder doesn't handle
+    uninitialzed enums (Ramana Suvarapu via thiru)
+
+    AVRO-1434. C#: Fix ObjectCreator to be thread safe.
+    (David Taylor via cutting)
+
+  BUG FIXES
+
+    AVRO-1368. Fix SpecificDatumWriter to, when writing a string
+    schema, not silently convert any object to a string.
+    (Christophe Taton via cutting)
+
+    AVRO-1374. Java: Fix compilation against Hadoop 2.1.
+    (Julian Zhou via cutting)
+
+    AVRO-1366. Fix specification's description of metadata format.
+    (cutting)
+
+    AVRO-1377. Java: Fix a bug in Schema#toString() when a namespaced
+    enum or fixed is defined within an un-namespaced record.
+    (Graham Sanderson via cutting)
+
+    AVRO-1399. Java: Fix a test within TestAvroKeyOutputFormat.
+    (Rob Turner via cutting)
+
+    AVRO-1410. Explicit version specification in pom prevents dependency
+    management. (Eric Sammer via tomwhite)
+
+    AVRO-1358. C: Hide symbols that aren't in the public API. (dcreager)
+
+    AVRO-1237. C: Bounds-check union discriminant when reading a data file.
+    (Michael Cooper via dcreager)
+
+    AVRO-1369. C: Use correct byte-swapping functions on Mac OS X.
+    (thinker0 via dcreager)
+
+    AVRO-1405. C: Check for end-of-file correctly.
+    (Mika Ristimaki via dcreager)
+
+    AVRO-1424. ValidatingDecoder hangs on large schema (thiru)
+
+    AVRO-1433. Java: Fix compiler to not drop aliases when StringType
+    is String. (cutting)
+
+Avro 1.7.5 (12 August 2013)
+
+  NEW FEATURES
+
+    AVRO-1307. Java: Add 'cat' tool to append and sample data files.
+    (Vincenz Priesnitz via cutting)
+
+    AVRO-1274. Java: Add a schema builder API. (tomwhite)
+
+    AVRO-1319. Java: Add command line tools to generate random data
+    files and to convert Avro to Trevni.  (cutting)
+
+    AVRO-823: C#: Add data file support. (David McIntosh via cutting)
+
+    AVRO-896. C: Snappy compression codec. (Grisha Trubetskoy via dcreager)
+
+    AVRO-1337. Java: Add a command line tool to generate schema files
+    from a protocol. (Bertrand Dechoux via cutting)
+
+    AVRO-1341. Java: Add reflection annotations @AvroName, @AvroIgnore,
+    @AvroMeta, @AvroAlias and @AvroEncode. (Vincenz Priesnitz via cutting)
+
+    AVRO-1353. Java: Permit specification of data model (generic,
+    specific, reflect, or other) in mapreduce job configuration.
+    (Marshall Bockrath-Vandegrift via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1260. Ruby: Improve read performance. (Martin Kleppmann via cutting)
+
+    AVRO-1267. Java: Permit dashes in IDL property names. (cutting)
+
+    AVRO-1272. Ruby: Improve schema namespace handling.
+    (Martin Kleppmann via cutting)
+
+    AVRO-1268. Java: Extend support for stringables from reflect to
+    specific.  String schemas in generated classes now support the
+    "java-class" and "java-key-class" properties.  The built-in Java
+    types BigDecimal, BigInteger, URI, URL, and File can now be fields
+    in generated classes. (Alexandre Normand and cutting)
+
+    AVRO-1259. Java: Improve Trevni's encoding of sparse columns.
+    (cutting)
+
+    AVRO-1287. Add data file with deflate codec to the interoperability
+    test suite. (martinkl)
+
+    AVRO-1288. Ruby: Add support for deflate codec in data files.
+    (martinkl)
+
+    AVRO-867. Java: Enable command-line tools to read data files from
+    any Hadoop FileSystem implementation. (Vincenz Priesnitz via cutting)
+
+    AVRO-1299. Java: SpecificRecordBase implements GenericRecord.
+    (Christophe Taton via cutting)
+
+    AVRO-1282. Java: Use sun.misc.Unsafe to improve Reflect API Performance.
+    (Leo Romanoff via scottcarey)
+
+    AVRO-1313. Java: Add system property avro.disable.unsafe for disabling
+    use of sun.misc.Unsafe. (scottcarey)
+
+    AVRO-1327. Java: Make GenericEnumSymbol implement Comparable. (cutting)
+
+    AVRO-1314. Java: add @threadSafe annotation to maven plugins.
+    (scottcarey)
+
+    AVRO-1334. Java: Upgrade snappy-java dependency to 1.0.5
+    (scottcarey)
+
+    AVRO-1238. C: EOF detection in avro_file_reader_read_value.
+    (Michael Cooper via dcreager)
+
+    AVRO-1324. C: Handle namespaces in schema parsing.
+    (Ben Walsh via dcreager)
+
+    AVRO-1290. Handling NaN and positive and negative infinities in
+    C++ Json (Daniel Russel via thiru)
+
+    AVRO-1351. Extend SortedKeyValueFile to support data models
+    besides generic. (cutting)
+
+    AVRO-1261. Clarify in documentation that generated no-arg
+    constructors do not use default values from schema. (cutting)
+
+    AVRO-1297. NettyTransceiver: Provide overloaded
+    close(boolean awaitCompletion). (jbaldassari)
+
+    AVRO-1279. C: Treat missing codec in data files as null codec.
+    (Carl Steinbach via dcreager)
+
+    AVRO-1325. Java: Enhanced Schema Builder API. (scottcarey)
+
+  BUG FIXES
+
+    AVRO-1296. Python: Fix schemas retrieved from protocol types
+    to not ignore namespaces. (Jeremy Kahn via philz)
+
+    AVRO-1266. Java: Fix mapred.AvroMultipleOutputs to support multiple
+    different schemas.  (Ashish Nagavaram via martinkl)
+
+    AVRO-1295. Java: Fix printing of a non-null namespace within a
+    null namespace. (cutting)
+
+    AVRO-1300. Java: Fix generated copy constructors to copy field
+    values. (Christophe Taton via cutting)
+
+    AVRO-1309. Java: Fix nested protobuf enums. (cutting)
+
+    AVRO-1308. Java: Fix repeated protobuf messages & enums.
+    (Steve Zesch & cutting)
+
+    AVRO-1306. Java mapred: Fix incorrect documentation for combiners.
+    (Mina Naguib via martinkl)
+
+    AVRO-1320. Java: Fix Trevni to correctly read unions and specific enums.
+    (cutting)
+
+    AVRO-1326. Java: Fix bug in BZip2 codec. (cutting)
+
+    AVRO-1322. Java: Add Hadoop version classifier to trevni-avro
+    Maven artifacts.  (massie)
+
+    AVRO-1316. Java: Fix compiler to split long schema string
+    constants so javac can compile them. (Jeremy Kahn via cutting)
+
+    AVRO-1331. Java: Fix schema-parsing incompatibility from AVRO-1295.
+    (cutting)
+
+    AVRO-1181. compileJsonSchemaFromString(std::string) declared in
+    Compiler.hh but not defined (Daniel Russel via thiru)
+
+    AVRO-1346. C++: schema parser cannot parse verbose primitive types
+    (Skye Wanderman-Milne via thiru)
+
+    AVRO-1171. Avro C++ Json Decoder: Double cannot be decoded (Sam
+    Overend via thiru)
+
+    AVRO-1293. Java: Fix potential deadlock in NettyTransceiver.
+    (James Baldassari via cutting)
+
+    AVRO-1292. Java: Fix potential client blocking in NettyTransceiver.
+    (James Baldassari via cutting)
+
+    AVRO-1144. Java: Deadlock with FSInput and Hadoop NativeS3FileSystem.
+    (scottcarey)
+
+    AVRO-1356. Java: Fix AvroMultipleOutputs for map-only jobs.
+    (Alan Paulsen via cutting)
+
+Avro 1.7.4 (22 February 2013)
+
+  NEW FEATURES
+
+    AVRO-1248. Avro Tool to dump protocol of a remote RPC Service
+    (Gareth Davis via philz)
+
+    AVRO-1229. Add support for booleans to Trevni. (cutting)
+
+    AVRO-1250. Add a command-line tool to concatenate data files.
+    (Nick White via cutting)
+
+    AVRO-1243. Java: Add support for bzip2 file compression and
+    translate Hadoop job compression options. (Ted Malaska via cutting)
+
+    AVRO-1253. Java: Add support for bzip2 file compression to Trevni.
+    (Ted Malaska via cutting)
+
+    AVRO-1254. Java: Add support for new mapreduce APIs to Trevni.
+    (Ted Malaska via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1211. Add MR guide to documentation. (Skye Wanderman-Milne via
+    tomwhite)
+
+    AVRO-1221. Java: Fix TestSaslDigestMd5 to pass on IBM JVM.
+    (Rodrigo Trujillo via cutting)
+
+    AVRO-1008. Java: Improve support for forcing connection
+    handshakes. (jbaldassari & cutting)
+
+    AVRO-970. Java: Make Codec API public. (Rui Pereira via cutting)
+
+    AVRO-1223. Java: Add a static method to generated classes that
+    returns its schema, getClassSchema().  (cutting)
+
+    AVRO-1232. Java: Add a toString() method to AvroWrapper so that it
+    works with TextOutputFormat.  (Garrett Wu via cutting)
+
+    AVRO-1241. Java: Optimize Trevni string input. (Joseph Adler via cutting)
+
+    AVRO-1198. Java: Improve error message for malformed data. (cutting)
+
+    AVRO-1252. Java: Add a '-string' option to compiler command line
+    to cause java.lang.String to be used instead of Utf8. (cutting)
+
+    AVRO-1255. Python: Make 'names' parameter optional in to_json methods.
+    (Jeremy Kahn via cutting)
+
+    AVRO-1251. Java: Add TestNettyServerWithCompression, illustrating
+    how one can add compression to Avro Netty-based RPC.
+    (Ted Malaska via cutting)
+
+  BUG FIXES
+
+    AVRO-1231. Java: Fix Trevni shredder to work on non-recursive
+    schemas with multiple references. (Mark Wagner via cutting)
+
+    AVRO-1230. avro-mapred-1.7.3-hadoop1.jar does not work with
+    Hadoop 1. (tomwhite)
+
+    AVRO-1233. Java: Fix InputBuffer's constructor to use the
+    specified initial position. (Mark Wagner via cutting)
+
+    AVRO-1227. Java: Large ByteBuffers can corrupt output. (cutting)
+
+    AVRO-1228. Java: Fix Trevni to use default values for missing Avro fields.
+    (cutting)
+
+    AVRO-1240. Java: Fix SpecificDatumReader(Class) constructor to use
+    correct ClassLoader. (cutting)
+
+    AVRO-1242. Java: Fix AvroTrevniOutputFormat to correctly get file
+    metadata from JobConf. (Ted Malaska via cutting)
+
+    AVRO-1220. Java: Fix a deadlock when reading by replacing parser
+    symbol constructors with factory methods. (cutting)
+
+    AVRO-1249. Java: Fix DataFileWriter#close() to not throw an
+    exception when called twice.  (E. Sammer via cutting)
+
+    AVRO-1247. Java: Fix Requestor and Responder implementations to
+    use correct ClassLoader. (cutting)
+
+    AVRO-1215. Java: Fix AvroMultipleOutputs when specifying baseOutputPath.
+    (Ashish Nagavaram via cutting)
+
+    AVRO-1257. Ruby: Fix UTF-8 encoding in Ruby 1.9.
+    (Martin Kleppmann via cutting)
+
+    AVRO-1258. Ruby: Fix handing of RPC errors.  (Martin Kleppmann via cutting)
+
+Avro 1.7.3 (6 December 2012)
+
+  NEW FEATURES
+
+    AVRO-485.  JavaScript: Add validator. (Quinn Slack via cutting)
+
+    AVRO-1157. Java: Extend schema and protocol property support from
+    string-only to full JSON. (cutting)
+
+    AVRO-1186. Java: Extend specific compiler to emit Java annotations
+    on interfaces, classes, fields and methods for the property
+    "javaAnnotation".  The value of the property may either be a
+    string or a JSON array of strings in order to emit multiple
+    annotations. (cutting)
+
+    AVRO-1188. Java: Permit external schema imports for schemas in
+    Maven plugin. (Sharmarke Aden via tomwhite)
+
+    AVRO-1202. Java & Python: Add "Getting Started" guides.
+    (Skye Wanderman-Milne via cutting)
+
+    AVRO-1205. Java: Add stereotype annotation to generated classes.
+    All classes generated by the specific compiler now have the
+    annotation org.apache.avro.specific.AvroGenerated.
+    (Sharmarke Aden via cutting)
+
+    AVRO-988. Java: Add option to make fields in generated classes
+    private, public, or public & deprecated.  This is specified with
+    the "fieldVisibility" option in Maven and is public_deprecated by
+    default for compatibility. (Jeff Kolesky via cutting)
+
+    AVRO-1209. Java: Add option to generate immutable classes, without
+    setter methods.  This is specified with the "createSetters"
+    boolean option in Maven and is true by default for compatibility.
+    (Jeff Kolesky via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1169. Java: Reduce memory footprint of resolver.
+    (Hernan Otero via cutting)
+
+    AVRO-1183. Java: Provide a better error message when the schema
+    for a Pair cannot be inferred. (cutting)
+
+    AVRO-1207. Java: Add tests for Maven plugins. (Sharmarke Aden via cutting)
+
+    AVRO-1210. Java: Fix mistakes in AvroMultipleOutputs error messages.
+    (Dave Beech via cutting)
+
+    AVRO-1178. Java: Fix typos in parsing document.
+    (Martin Kleppmann via cutting)
+
+    AVRO-1089. C: Remove performance penalty when using resolved writer with
+    arrays.  (dcreager)
+
+  BUG FIXES
+
+    AVRO-1171. Java: Don't call configure() twice on mappers & reducers.
+    (Dave Beech via cutting)
+
+    AVRO-1170. Java: Avro's new mapreduce APIs don't work with Hadoop 2.
+    (tomwhite)
+
+    AVRO-1197. Java: Expose mapreduce tests so that 'maven install'
+    works correctly.  (Mike Percy via cutting)
+
+    AVRO-1200. DatumWriter can write malformed data if collection is
+    modified during write. (tomwhite)
+
+    AVRO-1199. Java: Fix SortedKeyValueFile to copy the key used to
+    compare against next.  Also improve GenericData#deepCopy() to be
+    generic, so that its return type matches its parameter type. (cutting)
+
+    AVRO-1201. Java: Fix GenericData#toString() to generate valid JSON for
+    enum values. (Sharmarke Aden via cutting)
+
+    AVRO-1206. Ruby: Fix UTF-8 handling in Ruby 1.9.
+    (Nicolas Fouché via cutting)
+
+    AVRO-1177. Ruby: Fix RPC to only send handshake for first request
+    over a connection.  (Georg Franz via cutting)
+
+    AVRO-1175. Java: Clear thread local reference in BinaryData#compare().
+    (cutting)
+
+    AVRO-1163. C: Fix a memory leak in avro_fixed_private(). (Maxim
+    Pugachev via brucem)
+
+    AVRO-1174. C: Fix Windows build. (Stefan Langer via brucem)
+
+Avro 1.7.2 (20 October 2012)
+
+  NEW FEATURES
+
+    AVRO-806.  Add specification of the Trevni columnar file format
+    and a Java implementation of it. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1146. Java: Serialize several built-in Java classes as
+    strings, including BigDecimal, BigInteger, URI, URL, Date and
+    File.  (Alexandre Normand and cutting)
+
+    AVRO-1147. Java: Permit stringable map keys in reflect.
+    (Alexandre Normand)
+
+    AVRO-1151. Netty Avro server should expose the number of connections
+    currently open. (Hari Shreedharan via tomwhite)
+
+    AVRO-1149. Java: Add a constructor to generated classes with all
+    fields as parameters. (Gabriel Reid via cutting)
+
+    AVRO-1138. C: Add function for opening existing data file with non-default
+    block size.  (Maxim Pugachev via dcreager)
+
+    AVRO-1129. C: Detect when avro_schema_decref frees schema.
+    (Maxim Pugachev via dcreager)
+
+    AVRO-1162. Java: Extend AvroKeyValueOutputFormat to support
+    reflection.  (Alexandre Normand via cutting)
+
+    AVRO-1142. Clarify Snappy block compression in specification. (cutting)
+
+  BUG FIXES
+
+    AVRO-1128. Java: Fix SpecificRecordBase#equals() to work for
+    records that contain maps.  (cutting)
+
+    AVRO-1131. Generated build makefiles for MSYS/MinGW use Visual
+    Studio compiler flags (Laurent Moss via thiru)
+
+    AVRO-1103. Java: Fix SpecificData and mapreduce to use correct
+    classloader. (cutting)
+
+    AVRO-1135. Avro C++ fails to build on Mac. (thiru)
+
+    AVRO-1140. Buffer.hh includes Config.hh without "../" (Jan van der
+    Lugt via thiru)
+
+    AVRO-1141. Avro data files are created without O_TRUNC (Martin
+    Nagy via thiru)
+
+    AVRO-1143. avrogencpp generates $Undefined$ for some union types (thiru)
+
+    AVRO-1152. Java: Fix TestTraceSingletons for Java 7. (cutting)
+
+    AVRO-1111. Malformed data can cause OutOfMemoryError in Avro IPC.
+    (Mike Percy via tomwhite)
+
+    AVRO-1155. Stringable Date test in TestReflect fails if timezone doesn't
+    match locale's default. Removed Date from built-in stringables. (tomwhite)
+
+    AVRO-851. Java: Fix a bug in GenericData#toString() when escaping
+    characters. (Jeff Mesnil via cutting)
+
+    AVRO-1154. Java: Fix NettyTransciever to not hang when the server
+    is stopped. (Karel Vervaeke & Bruno Dumon via cutting)
+
+    AVRO-1158. C: Fixed infinite loop in deflate decompression codec.
+    (Lucas Martin-King via dcreager)
+
+    AVRO-1159. C: Check union discriminants in avro_value_read.
+    (Lucas Martin-King via dcreager)
+
+    AVRO-1160. C: Better error reporting in avrocat.  (Lucas Martin-King
+    via dcreager)
+
+    AVRO-1166. Java: Fix bug in SpecificData.getSchema(Map). (George
+    Fletcher via cutting)
+
+    AVRO-1150. Java: Fix tests to create all temporary files in target
+    directories.  (Gabriel Reid via cutting)
+
+    AVRO-1164. C: Clean up valgrind warnings in test_avro_schema test case.
+    (Vivek Nadkarni via dcreager)
+
+    AVRO-1165. C: Fix memory leak in generic value implementations involving
+    LINK schemas.  (Vivek Nadkarni via dcreager)
+
+    AVRO-1161. C: Fix memory leak in avro{append,cat,mod,pipe} (dcreager)
+
+Avro 1.7.1 (16 July 2012)
+
+  NEW FEATURES
+
+    AVRO-1106. Java: Add AvroMultipleOutputs for newer mapreduce API.
+    (Ashish Nagavaram via cutting)
+
+    AVRO-1112. Java: Add support for Snappy codec to newer mapreduce API.
+    (Matt Mead via cutting)
+
+    AVRO-1108. Java: Add support for reflect API to newer mapreduce API.
+    (cutting)
+
+    AVRO-1104. C: avroappend utility. (Lucas Martin-King via dcreager)
+
+    AVRO-1117. C: avro_file_writer_create_with_codec_fp and
+    avro_file_writer_create_with_fp functions, with should_close parameter.
+    (Lucas Martin-King via dcreager)
+
+  IMPROVEMENTS
+
+    AVRO-1120. Let AvroMultipleOutput jobs use multiple schemas with
+    map-only jobs. (Ashish Nagavaram via cutting)
+
+    AVRO-1119. Java: Permit NettyServer to be used with SSL.
+    (Sebastian Ortega via cutting)
+
+    AVRO-1125. Java: Remove un-needed warning about reflect API. (cutting)
+
+  BUG FIXES
+
+    AVRO-1114. Java: Update license headers for new mapreduce code.  (cutting)
+
+    AVRO-1069. Java: Fix HttpTransceiver to close streams. (cutting)
+
+    AVRO-1115. C: Fix crash error in codec cleanup code. (Maxim Pugachev via
+    dcreager)
+
+    AVRO-1116. C++ code crashes on Data files with no data. (thiru)
+
+    AVRO-1109. CSharp specific fails on multidimensional arrays.
+    (Mark Farnan via thiru)
+
+    AVRO-1153. Java: Fix reflect to be able to write unions that
+    contain stringable schemas. (Alexandre Normand via cutting)
+
+Avro 1.7.0 (11 June 2012)
+
+  NEW FEATURES
+
+    AVRO-301. Handle non-reserved properties appropriately in the Python
+    implementation. (Macrio Silva via tebeka)
+
+    AVRO-300. Support "doc" field in schemas in Python implementation.
+    (Harsh J via tebeka)
+
+    AVRO-1006.  Add schema fingerprinting to specification and Java.
+    (Raymie Stata via cutting)
+
+    AVRO-593. Java: Add support for Hadoop's newer mapreduce API.
+    (Garrett Wu via cutting)
+
+    AVRO-1052. Java: Add AvroMultipleOutputFormat, to permit splitting
+    mapreduce output to multiple locations.  (Ashish Nagavaram via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1060. Java: Upgrade Netty to version 3.4.0.  (Karthik K via cutting)
+
+    AVRO-1068. Avro Java does not build on recent Eclipse versions
+    with m2eclipse (thiru)
+
+    AVRO-551. C: Now compiles on Win32, using Visual Studio C++ 2008.
+    (Vivek Nadkarni via dcreager)
+
+    AVRO-1075. Add some Eclipse stuff to .gitignore.  (Karthik K via cutting)
+
+    AVRO-1085. Fingerprinting for C#. (Eric Hauser via thiru)
+
+    AVRO-1079. C++ Generator, improve include guard generation. (thiru)
+
+    AVRO-1062. Java: Remove use of java.rmi.server.UID so things work
+    on Android.  (Kevin Zhao via cutting)
+
+    AVRO-1090. Java: Permit appending to files besides java.io.File,
+    e.g., files in HDFS. (cutting)
+
+    AVRO-1074. Java: Optimize Utf8#length() and #toString() to not
+    allocate a String when the length is zero.  (cutting)
+
+    AVRO-1050. PHP: Optimize memory use by string append. (A B via cutting)
+
+    AVRO-1095. C++ compiler warns about control reaching end of
+    doAdavance (in JsonIO.cc) which returns something other than
+    void. (thiru)
+
+    AVRO-1026. Add namespace support to C++. (Keh-Li Sheng via thiru)
+
+    AVRO-1097. Fix BinaryDecoder so that EOFException is thrown
+    instead of a generic IOException when reading ints and longs past
+    the end of file.  (thiru & cutting)
+
+    AVRO-1098: CSharp: Fix compilation to work under older versions of Mono.
+    (cutting)
+
+  BUG FIXES
+
+    AVRO-1045. Java: Fix a bug in GenericData#deepCopy() of ByteBuffer values.
+    (cutting)
+
+    AVRO-1055. Race condition in Java fingerprinting code (thiru)
+
+    AVRO-954. Typo in JsonCodec.cc (Nebojsa Sabovic via thiru)
+
+    AVRO-1045. C: Use less stack space in avro_file_writer_create (dcreager)
+
+    AVRO-1070. AvroSequenceFileOutputFormat is in wrong package. (thiru)
+
+    AVRO-1080. JsonIO.cc should allow \u escape sequence in
+    string. (Keh-Li Sheng via thiru)
+
+    AVRO-1066. ArrayIndexOutOfBoundsException in ParsingEncoder when
+    trying to use a json encoder to serialize a deep object
+    graph. (thiru)
+
+    AVRO-1065. NodeRecord::isValid() treats records with no fields as
+    invalid. (thiru)
+
+    AVRO-1081. Java: Fix to be able to write ByteBuffers that have no
+    backing array.  Also fix reflection to correctly read ByteBuffer
+    fields.  (cutting)
+
+    AVRO-1046. Java: Fix ReflectDatumReader to be able to read generic
+    or specific arrays. (cutting)
+
+    AVRO-1056. Java: Fix reflect to correctly handle unions containing
+    maps.  (Kevin Zhao via cutting)
+
+    AVRO-1076. Java: Fix Protocol#equals() to consider
+    properties. (Karthik K via cutting)
+
+    AVRO-1094. Java: Fix specific compiler to better support
+    non-default templates.  (Ed Kohlwey via cutting)
+
+    AVRO-1082. C: Avoid leaking open FILE objects.
+    (Pugachev Maxim via dcreager)
+
+    AVRO-1096. C: Don't set default CMAKE_OSX_ARCHITECTURES. (dcreager)
+
+    AVRO-1084. C: Fix reference counting in file_reader and file_writer.
+    (Pugachev Maxim via dcreager)
+
+    AVRO-1083. C: Fix multiple memory leaks.  (Pugachev Maxim via dcreager)
+
+    AVRO-1086. C: Fix possible crash bug in default codec initialization.
+    (Pugachev Maxim via dcreager)
+
+    AVRO-1096. C: Describe CMAKE_OSX_ARCHITECTURES in installation
+    instructions.  (dcreager)
+
+    AVRO-1088. C: Performance tests for arrays and schema resolution.
+    (Vivek Nadkarni via dcreager)
+
+    AVRO-1092. C: Error management code can be defined in a thread-safe
+    manner.  (Pugachev Maxim and Vivek Nadkarni via dcreager)
+
+    AVRO-1091. C: Helper scripts for calling CMake.
+    (Vivek Nadkarni via dcreager)
+
+    AVRO-1087. C: avro_file_writer_open() and appending Avro values
+    works correctly.  (Pugachev Maxim via dcreager)
+
+    AVRO-1102. C: Remove memory leak in avro_string().  (Maxim Pugachev via
+    dcreager)
+
+    AVRO-1099. Java: Fix JsonDecoder to permit floats and doubles to
+    be read from JSON values without decimal points, and for ints and
+    longs to be read from JSON values with decimal points. (cutting)
+
+Avro 1.6.3 (5 March 2012)
+
+    AVRO-1077. Missing 'inline' for union set function. (thiru)
+
+    AVRO-1078. ostreamOutputStream declaration missing in C++ API (thiru)
+
+    AVRO-1051. Java: Fix specific RPC so that method is found when
+    parameters are a union of a primitive and null.
+    (Hamed Asghari via cutting)
+
+    AVRO-1049. Java: Fix GenericData.Record#equals() to correctly
+    compare schemas and fix Schema#equals() to consider order. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-1030. Fix a broken link in the documentation.
+
+  BUG FIXES
+
+    AVRO-1037. Problems using Avro 1.6.2 with Hadoop (CDH3 or 1.0) (scottcarey)
+
+    AVRO-1036. Fix a regression in IDL imports created by AVRO-971.
+    (George Fletcher & cutting)
+
+    AVRO-1031. C: Test cases made too many assumptions about memcmp
+    result. (dcreager)
+
+    AVRO-1033. C: Fixed x86 assembly implementation of atomic reference
+    counting primitives. (Vivek Nadkarni via dcreager)
+
+    AVRO-1034. C: Resolved readers initialize complex array values
+    correctly. (Vivek Nadkarni via dcreager)
+
+    AVRO-1038. C: Require GCC 4.5.0 to use GCC atomic instrinsics.
+    (Vivek Nadkarni via dcreager)
+
+    AVRO-1039. C: Don't use nonexistent codecs in test cases. (dcreager)
+
+    AVRO-1041. Java: Fix Utf8 to reuse array in more cases.
+    (Dave Irving via cutting)
+
+    AVRO-1027. Java: Fix deadlock in NettyTransceiver.
+    (Simon Wilkinson via cutting)
+
+Avro 1.6.2 (13 February 2012)
+
+  NEW FEATURES
+
+    AVRO-854.  Python: Permit DataFileWriter and DataFileReader to be
+    used as context managers in "with" statements. (Harsh J via cutting)
+
+    AVRO-986. C: avromod utility for modifying structural properties of
+    an Avro data file. (dcreager)
+
+  IMPROVEMENTS
+
+    AVRO-963. Java: Permit compiler template directory to be
+    overridden by Maven plugins.  (George Fletcher via cutting)
+
+    AVRO-953. Python: Permit users to override HTTP path in RPC.
+    (Craig Landry via cutting)
+
+    AVRO-972. Java: Add support for Infinity and NaN as default values
+    for float and double.  Since JSON does not permit these as numeric
+    types, we use the strings "NaN", "Infinity" and "-Infinity" in
+    schemas.  These are also permitted in IDL.  (cutting)
+
+    AVRO-965. Java: Enhance IDL to support properties for protocols
+    and messages. (George Fletcher via cutting)
+
+    AVRO-976. Java: Extend NettyServer to permit specification of an
+    ExecutionHandler, to handle multiple requests simultaneously.
+    (Bruno Dumon via cutting)
+
+    AVRO-960. C: avro_value_set_string and avro_value_set_string_len
+    input parameters are now "const char *" instead of "char *".
+    (Lucas Martin-King via dcreager)
+
+    AVRO-961. C: avrocat/avropipe can now read from stdin.
+    (Michael Cooper via dcreager)
+
+    AVRO-957. C: Codec support in C library.  (Michael Cooper and Lucas
+    Martin-King via dcreager)
+
+    AVRO-926. Java: Fix tests to pass under JDK 7.  (cutting)
+
+    AVRO-956. Remove dependency on Flex/Bison. (thiru)
+
+    AVRO-1011. Improve POM structure. (Lars Francke via scottcarey)
+
+    AVRO-1016. Java: Add Field#getAliases() method to better permit
+    copying of schemas. (cutting)
+
+    AVRO-1005. Java: Extend HttpTransceiver to permit specification of
+    a Proxy. (Craig Landry via cutting)
+
+    AVRO-1010. Java: Improve codec javadoc. (Lars Francke via cutting)
+
+    AVRO-1018. Java: add svn:ignore to eclipse generated files for
+    protobuf, thrift, and archetype modules (scottcarey)
+
+    AVRO-1019. Java: Add unit test for Netty server concurrent
+    execution. (jbaldassari)
+
+    AVRO-995. Java: Update Dependencies for 1.6.2. (scottcarey)
+
+    AVRO-1012. Java: Improve avro-service-archetype: POM and IT
+    changes. (Lars Francke via scottcarey)
+
+    AVRO-971. Java: Permit IDL imports from classpath in Maven.
+    (Victor Chau via cutting)
+
+    AVRO-1007. Java: Enhance builder API's validity checks.
+    (jbaldassari & cutting)
+
+    AVRO-1015. Support for C++ build using Micrsoft Visual Studio on Windows.
+    (thiru)
+
+    AVRO-1021. Clarify some naming issues in the specification.
+    (Raymie Stata via cutting)
+
+    AVRO-980. C: avro_schema_from_json ignores length parameter.  Add
+    avro_schema_from_json_length that doesn't.
+    (Michael Cooper and dcreager)
+
+  BUG FIXES
+
+    AVRO-962. Java: Fix Maven plugin to support string type override.
+    (George Fletcher via cutting)
+
+    AVRO-835. C#: Fix codgen for protocols to not fail.
+    (Dona Alvarez via cutting)
+
+    AVRO-966. Java: Fix ReflectDatumWriter to be able to correctly
+    write unions containing Collection and/or ByteBuffer. (cutting)
+
+    AVRO-977. Java: Fix codegen to not generate deprecated code.
+    (Hamed Asghari via cutting)
+
+    AVRO-978. Java: Fix reflect to better handle Byte type.  (cutting)
+
+    AVRO-968. C: Fixed avro_value_cmp and avro_value_cmp_fast for string
+    values. (Vivek Nadkarni via dcreager)
+
+    AVRO-982. Java: Fix NettyTransceiver to not hang when server stops.
+    (Bruno Dumon via cutting)
+
+    AVRO-984. C: Resolved writers initialize complex array values
+    correctly. (Vivek Nadkarni via dcreager)
+
+    AVRO-994. Java: TestFileSpanStorage.testTonsOfSpans() fails on my
+    slow VM. (jbaldassari)
+
+    AVRO-993. Java: Add methods back to GenericDatumReader that were
+    removed in AVRO-839. (jbaldassari)
+
+    AVRO-1000. Java: Remove incompatible implementations of equals()
+    and hashCode() from GenericData.Array.  (cutting)
+
+    AVRO-1002. Fix a broken link in the specification. (cutting)
+
+    AVRO-1003. C: Fix pkg-config file when codecs are missing.
+    (dcreager)
+
+    AVRO-1004. C: avropipe no longer displays NUL terminator for string
+    values. (dcreager)
+
+    AVRO-986. C: File headers no longer contain sync marker. (Michael
+    Cooper via dcreager)
+
+    AVRO-986. Java: DataFileReader correctly handles sync marker
+    appearing within file header. (cutting via dcreager)
+
+    AVRO-1014. C: Check for errors producing JSON output in avrocat.
+    (Lucas Martin-King via dcreager)
+
+    AVRO-996. Java: SpecificRecord builder pattern object copy fails
+    with unions in some cases. (scottcarey and jbaldassari)
+
+    AVRO-1020. Java: Fix builder API to correctly handle default
+    values for enums.  (cutting)
+
+    AVRO-1013. Java: NettyTransceiver can hang after server
+    restart. (jbaldassari)
+
+Avro 1.6.1 (8 November 2011)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-951. Java: Fix generated code to not conflict with fields
+    named 'data'.  Code generated by the 1.6.0 compiler must be
+    re-generated to work correctly with the 1.6.1 runtime. (cutting)
+
+  NEW FEATURES
+
+    AVRO-821. PHP: Add support for parsing protocols. (Andy Wick,
+    Saleem Shafi and A B via cutting)
+
+  OPTIMIZATIONS
+
+    AVRO-946. Java: Optimize union resolution when writing. (cutting)
+
+  IMPROVEMENTS
+
+  BUG FIXES
+
+    AVRO-943. Java: Fix an intermittent deadlock in
+    TestNettyServerWithCallbacks.  (James Baldassari via cutting)
+
+    AVRO-950. C: Fix source tarball to include VERSION.txt. (dcreager)
+
+Avro 1.6.0 (2 November 2011)
+
+  NEW FEATURES
+
+    AVRO-839. Java: Add accessor methods and builders to generated
+    Java classes.  Builders use default values from schemas for fields
+    that are not explicitly set.  Direct use of public fields is now
+    deprecated and may be removed in a future release.  (James
+    Baldassari via cutting)
+
+    AVRO-805: Java: Add support for reading and writing instances of
+    Protocol Buffer (protobuf) generated classes.  This permits
+    protobuf-defined data structures to be written and read from
+    Avro-format data files.  (cutting)
+
+    AVRO-881. Java: Add a 'getmeta' tool that lists a file's metadata.
+    (Tom White via cutting)
+
+    AVRO-863. C: Schema resolution using new value interface. (dcreager)
+
+    AVRO-893. C: Avro data file functions using new value interface.
+    (dcreager)
+
+    AVRO-919. C: Produce JSON encoding of Avro values using new value
+    interface. (dcreager)
+
+    AVRO-920. C: Memory readers and writers are now reusable. (dcreager)
+
+    AVRO-921. C: Default wrapped buffer implementation is zero-copy.
+    (dcreager)
+
+    AVRO-922. C: Comparison function for new value interface. (dcreager)
+
+    AVRO-929. C: Set install_name in shared library on Mac OS OX.
+    (dcreager)
+
+    AVRO-468. C: Document CMake build scripts. (dcreager)
+
+    AVRO-474. C: Added source package target to CMake build scripts.
+    (dcreager)
+
+    AVRO-467. C: Change build system to CMake. (dcreager)
+
+    AVRO-890: Java: Add Maven archetype for creating Avro service
+    projects.  (Stephen Gargan via cutting)
+
+    AVRO-804. Java: Add support for reading and writing instances of
+    Thrift generated classes.  This permits Thrift-defined data
+    structures to be written and read from Avro-format data files.
+    (cutting)
+
+    AVRO-908. Add an option to build avrocpp as a static library.
+    (Nebojsa Sabovic via thiru)
+
+    AVRO-803. Java: Optionally change string types in generated code
+    to java.lang.String instead of java.lang.CharSequence.  This is
+    achived by specifying <stringType>String</stringType> in
+    avro-maven-plugin's pom.xml configuration. (cutting)
+
+    AVRO-924. Java: Support reading & writing arbitrary JSON data
+    using an efficient Avro binary representation.  (cutting)
+
+  OPTIMIZATIONS
+
+    AVRO-853: Java: Cache Schema hash codes. (cutting)
+
+    AVRO-907. Java: Optimize access to protobuf message fields. (cutting)
+
+    AVRO-934. PHP: Remove quadratic performance bug. (abawany via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-836. Python "avro" commandline utility to display and write Avro files.
+    (Miki Tebeka via philz)
+
+    AVRO-841. Java: Implement insertion in GenericData.Array.
+    (Nick Palmer via cutting)
+
+    AVRO-847. Java: Add a unit test for Java MapReduce tether. (Jeremy Lewi)
+
+    AVRO-844. Java: Provide better errors for null or missing record fields.
+    (Bill Graham via cutting)
+
+    AVRO-746. C: Atomic reference counts. (dcreager)
+
+    AVRO-837. C: New Avro value interface. (dcreager)
+    Documented in lang/c/docs/index.html.
+
+    AVRO-861. C: Remove dependency on stdbool.h. (dcreager)
+
+    AVRO-396. C: avrocat and avropipe commands (dcreager)
+
+    AVRO-857. Add mailing lists, scm and issue management to Maven
+    metadata.  Also link top-level pom.xml to lang/java.
+    (Jan Prach via cutting)
+
+    AVRO-873. Java: Permit passing classloader to SpecificDatumReader.
+    (Michael Armbrust via cutting)
+
+    AVRO-889. Java: Change lang/java/pom.xml to use project's
+    top-level pom.xml as parent, permitting use of Maven versions
+    plugin.  (cutting)
+
+    AVRO-858. Python: Add --fields option to 'avro cat' command.
+    (Miki Tebeka via cutting)
+
+    AVRO-866. Java: Add support in IDL for documentation in protocols
+    and messages.  (George Fletcher via cutting)
+
+    AVRO-888. Java: Add SeekableByteArrayInput, a utility to permit
+    use of memory-based AvroDataFiles.  (Saleem Shafi via cutting)
+
+    AVRO-887. Java: Improve reflection error message when a field is
+    not found in a class.  (cutting)
+
+    AVRO-874. Remove experimental disclaimer from IDL documentation. (cutting)
+
+    AVRO-891. Java: In SpecificDatumReader, when no reader schema is
+    specified, use schema of currently loaded class.  (cutting)
+
+    AVRO-865. C: Upgrade Jansson to 2.1. (dcreager)
+
+    AVRO-899. C#: Include binary artifacts and documentation in
+    releases.  (cutting)
+
+    AVRO-898. Java: Extend NettyServer to support SSL.
+    (Vadim Tsarik via cutting)
+
+    AVRO-905. Java: Change JsonEncoder to write objects on separate
+    lines.  (cutting)
+
+    AVRO-910. Java: Add generated protobuf test code to subversion.  (cutting)
+
+    AVRO-917. Avrogencpp does not insert avro prefix for avro headers in the
+    generated files. (thiru)
+
+    AVRO-840. C++ generate nullable types for optional fields int the schema.
+    (thiru)
+
+    AVRO-915. Large number of warnings in C++ builds. (thiru)
+
+    AVRO-913. CMake/C++ build should work for out-of-tree builds.
+    (Nebojsa Sabovic via thiru)
+
+    AVRO-925. CMake/C++ Unable to build debug version of libavrocpp.
+    (Nebojsa Sabovic via thiru)
+
+    AVRO-932. C++ build.sh should have an option to install the built software.
+    (thiru)
+
+    AVRO-931. Avro C++ "make install" does not install the code generator.
+    (thiru)
+
+    AVRO-918. Avro C++ documentation is very old. (thiru)
+
+    AVRO-938. Some more warning when built on RHEL. (thiru)
+
+    AVRO-937. C++ CMake keeps generating code even when there is no change.
+    (thiru)
+
+    AVRO-940. C++ design for generic datum could be better. (thiru)
+
+    AVRO-935. Java: Update dependency versions for 1.6.0. (scottcarey)
+
+    AVRO-941. Java: Make generated specific classes to work in some
+    cases after Maven shade plugin is used on them.  (cutting)
+
+  BUG FIXES
+
+    AVRO-824. Java: Fix usage message of BinaryFragmentToJsonTool.
+    (Jakob Homan via cutting)
+
+    AVRO-894. C: cmake build now works on Mac OS X Lion. (dcreager)
+
+    AVRO-895. JsonDecoder does not tolerate JSON records with
+    different field order. (thiru)
+
+    AVRO-906. Java: Fix so that ordering of schema properties is
+    consistent. (cutting)
+
+    AVRO-901, Java tools unit tests fail in Windows due to line
+    termination and filenaming conventions. (thiru)
+
+    AVRO-900. On slower machines Java unit test TestFileSpanStorage
+    fails. (thiru)
+
+    AVRO-912. Mapreduce tether test fails on Windows. (thiru)
+
+    AVRO-903. C++/Cmake build fails to find Boost libraries. (Nebojsa
+    Sabovic via thiru)
+
+    AVRO-904. C++/CMake build should fail if Boost libraries are not
+    present. (Nebojsa Sabovic via thiru)
+
+    AVRO-902. C++/CMake installs into /usr/local/local/.  (Nebojsa
+    Sabovic via thiru)
+
+    AVRO-914. Java: Fix SpecificResponder to better handle
+    non-Exception Throwables. (philz via cutting)
+
+    AVRO-871. Fix specification to state that enum symbol names must
+    follow the same rules as record and field names.  (cutting)
+
+    AVRO-916. 0xff in binary stream is interpreted as end-of-stream. (thiru)
+
+    AVRO-869. Lifetimes of streams and encoder/decoders not managed propertly.
+    (thiru)
+
+    AVRO-928. Debug statement no longer reports garbage value from
+    stack. (Vivek Nadkarni via dcreager)
+
+    AVRO-933. On latest Ubuntu AvrogencppTests.cc does not compile. (thiru)
+
+    AVRO-927. Java: Fix Pair#equals() to better compare
+    schemas. (Brock Noland via cutting)
+
+    AVRO-936. Avro Java does not build with Maven 2. (thiru)
+
+    AVRO-930. C: Fixed memory lead in resolved writer class. (Vivek
+    Nadkarni via dcreager)
+
+    AVRO-942. Java: Fix reflect so that @Nullable fields have a
+    default value of null. (Binglin Chang via cutting)
+
+    AVRO-945. C# port does not build under Ubuntu 11.10. (thiru)
+
+    AVRO-948. Java: Fix to more correctly handle Thrift optional and
+    nullable fields.  (cutting)
+
+    AVRO-944. Java: Fix mapred so that reduce-side combines use
+    correct serializer. (cutting)
+
+Avro 1.5.4 (12 September 2011)
+
+  IMPROVEMENTS
+
+    AVRO-866. Python: Add support for snappy compression codec.
+    (Tom White via cutting)
+
+  BUG FIXES
+
+    AVRO-884.  Java: Fix a regression in RPC so that one-way messages
+    fail when the transciever cannot connect. (Tom White via cutting)
+
+    AVRO-892. Python: Fix an "integer out of range" error with snappy
+    compression.  (Michael Cooper via cutting)
+
+Avro 1.5.3 (25 August 2011)
+
+  IMPROVEMENTS
+
+    AVRO-872. Java: Improved Schema parsing API and permit IDL imports
+    to depend on names defined in prior imports. (cutting)
+
+    AVRO-877. Java: Add support for compiling multiple, dependent
+    schemas. (Bill Graham via cutting)
+
+    AVRO-880. Java: Upgrade snappy-java to 1.0.3.2.
+    (Alejandro Abdelnur via cutting)
+
+Avro 1.5.2 (12 August 2011)
+
+  NEW FEATURES
+
+    AVRO-830. Java: Add AvroTextOutputFormat to permit Hadoop
+    streaming jobs to easily write Avro format output with "bytes" as
+    schema.  (Tom White via cutting)
+
+    AVRO-539. Java: Add asynchronous RPC support, through either
+    callbacks or futures.  (James Baldassari via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-469. C: Set library's libtool-style soversion when using CMake
+    build scripts. (dcreager)
+
+    AVRO-470. C: Build asciidoc documentation when using CMake build
+    scripts. (Daniel Lundin via dcreager)
+
+    AVRO-820. Java: Permit applications to catch exceptions thrown
+    while writing data to a file and then continue writing to that
+    file.  (scottcarey & cutting)
+
+    AVRO-826. C#: Add MD5 and hashcode functions to Protocol.
+    (Dona Alvarez via cutting)
+
+    AVRO-838. Java: Permit invalid characters in record and field
+    names of schemas read from data files, for compatibility with
+    1.4. (cutting)
+
+    AVRO-810: C#: Add strong naming to assemblies. (Eric Hauser)
+
+    AVRO-833. Python: Don't require simplejson for python >= 2.6.
+    (Miki Tebeka via philz)
+
+    AVRO-845. Python: setup.py uses Python2.7+ specific code
+    (Miki Tebeka via philz)
+
+    AVRO-856. Java: Update Snappy to 1.0.3-rc4. (cutting)
+
+  BUG FIXES
+
+    AVRO-818. C: Fix data file corruption bug in C library (dcreager)
+
+    AVRO-819. C: Fix file reader EOF detection (dcreager)
+
+    AVRO-809. Java: Fix reflect for classes that have no package. (cutting)
+
+    AVRO-832. Java: Fix RPC client to correctly perform schema
+    resolution on message responses.  (cutting)
+
+    AVRO-815. Java: Netty Transceiver fails processing one-way messages.
+    Implemented writeBuffers for the NettyTransceiver to allow it to
+    process one-way messages. (sgargan)
+
+    AVRO-776. Java: Fix SocketServer to close socket. (scottcarey)
+
+    AVRO-842. Java: Fix Netty-based IPC client to provide better
+    errors when attempting to use a closed connection.
+    (James Baldassari via cutting)
+
+    AVRO-825: C++: Fix bugs in codegen with recursive schemas. (thiru)
+
+    AVRO-864. Java: Fix reflect to be able to write unions containing
+    generic and/or specific records.  (Isabel Drost & cutting)
+
+Avro 1.5.1 (3 May 2011)
+
+  NEW FEATURES
+
+    AVRO-533. Add a C# implementation.
+    (Jeremy Custenborder, Dona Alvarez and thiru)
+
+    AVRO-788. Java: Add Snappy compression for data files, including
+    MapReduce API support. (cutting)
+
+    AVRO-808. Java: Add AvroAsTextInputFormat for use with streaming.
+    (Tom White via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-785. Java: Squash a Velocity warning by upgrading to Velocity 1.7.
+    (cutting)
+
+    AVRO-781. Generic data support in C++. (thiru)
+
+    AVRO-783. Specific object support in C++. (thiru)
+
+    AVRO-789. Datafile support in C++. (thiru)
+
+    AVRO-787. Ruby: Make compatible with Ruby 1.9. (Michael L. Artz via cutting)
+
+    AVRO-296. IDL: Use double-asterisk comments for schema documentation.
+    (cutting)
+
+    AVRO-709. Python: Optimize property lookup. (Justin Azoff via cutting)
+
+    AVRO-794. Makefile.am is no longer required in C++. (thiru)
+
+    AVRO-795. C++ Datafile reader makes it hard to build adaptive
+    clients. (thiru)
+
+    AVRO-802. Java: Add documentation for non-Avro input, map-only
+    jobs. (cutting)
+
+    AVRO-799. Java: Add support for --codec parameter to the
+    'fromtext' command.  Also made some performance improvements, bug
+    fixes and added tests for this command. (cutting)
+
+    AVRO-798. Add checksum to Snappy compressed blocks. (cutting)
+
+    AVRO-763. Java MapReduce API: add support for configure() and
+    close() methods to mappers and reducers. (Marshall Pierce via cutting)
+
+    AVRO-807. C#: Some improvements and bugfixes, including making AvroGen
+    extensible and changing ISpecificRecord to an interface.
+    (Dona Alvarez via cutting)
+
+    AVRO-791. Java: Add avro-tools-nodeps jar.  Also change 'mvn
+    install' to not GPG sign things by default.  (scottcarey via cutting)
+
+    AVRO-812. Java: Implement help goal for Maven plugin.
+    (Holger Hoffstätte via cutting)
+
+  BUG FIXES
+
+    AVRO-786. Java: Fix equals() to work on objects containing maps. (cutting)
+
+    AVRO-780. Java: Fix a NullPointerException with reflect data when
+    a union contains an array and null. (cutting)
+
+    AVRO-790. Java: GenericDatumReader can fail when reusing objects with unions
+    containing 'bytes' fields. (scottcarey)
+
+    AVRO-801. Java: Fix a bug in SaslSocketTransceiver where large
+    messages were truncated on write. (cutting)
+
+    AVRO-793. Java: Fix a bug in the resolver when skipping an array
+    within a record. (thiru via cutting)
+
+Avro 1.5.0 (10 March 2011)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-751. C: Each avro_datum_t instance now contains a reference to
+    the schema that the datum is an instance of.  As a result, the
+    signatures of several of the functions that operate on avro_datum_t
+    instances have changed.
+
+    AVRO-647. Java: Break avro.jar up into multiple parts: avro.jar,
+    avro-compiler.jar, avro-ipc.jar, avro-mapred.jar, avro-tools.jar,
+    and avro-maven-plugin.jar.
+
+    Summary of artifacts:
+    * avro.jar
+      Contains 'core' avro features:  schemas, data files,
+      specific, generic, and reflect APIs.
+      Dependencies: slf4j, Paranamer, Jackson.
+    * avro-ipc.jar
+      Contains Trancievers, Requestors, and Responders.
+      Dependencies:  avro.jar, Jetty, Netty, and Velocity
+    * avro-compiler.jar
+      Contains SpecificCompiler, IDL compiler and Ant tasks.
+      Dependencies: avro.jar, commmons-lang, and Velocity.
+    * avro-maven-plugin.jar
+      A Maven plugin for Avro's compiler.
+      Dependencies: avro-compiler.jar
+    * avro-mapred.jar
+      API for Hadoop MapReduce with Avro data.
+      Dependencies: avro-ipc.jar, hadoop-core, and jopt-simple.
+    * avro-tools.jar
+      Avro command-line tools stand-alone jar.
+      Dependencies are contained within the jar.
+      Dependencies: all of the above.
+
+    (scottcarey)
+
+    AVRO-737. Java: Improve correlation between packages and modules.
+    Each module introduced by AVRO-647 now exclusively provides
+    different java packages.  This required moving several classes
+    around into new packages and will therefore require users to
+    change their package imports when upgrading to Avro 1.5.0.
+    Summary of changes:
+    * AvroRemoteException has moved to org.apache.avro
+    * ByteBufferInputStream and ByteBufferInputStream have moved
+      to org.apache.avro.util
+    * InduceSchemaTool has moved to org.apache.avro.tools
+    * SpecificCompiler, SchemaTask, and ProtocolTask have moved
+      to org.apache.avro.compiler.specific
+    * The Idl compiler has moved to org.apache.avro.compiler.idl
+    * ReflectRequestor and ReflectResponder have moved to
+      org.apache.avro.ipc.reflect
+    * GenericRequestor and GenericResponder have moved to
+      org.apache.avro.ipc.generic
+    * SpecificRequestor and SpecificResponder have moved to
+      org.apache.avro.ipc.specific
+    (scottcarey)
+
+    AVRO-753. Java: Improve BinaryEncoder Performance.
+    The Encoder API has several resulting changes:
+    * Construction and configuration is handled by EncoderFactory.  All
+      Constructors are hidden, and Encoder.init(OutputStream) is removed.
+    * Some Encoders previously did not buffer output.  Users must call
+      Encoder.flush() to ensure output is written unless the EncoderFactory
+      method used to construct an instance explicitly states that the Encoder
+      does not buffer output.
+    (scottcarey)
+
+    AVRO-769. Java: Align Decoder/Encoder APIs for consistency and long term
+    stability.  Avro's Decoder and Encoder APIs are aligned and now consist of
+    only read and write operations.  EncoderFactory and DecoderFactory handle
+    all construction and common configuration.  Some specialized implementations
+    have separate configuration APIs.
+
+    (scottcarey)
+
+    AVRO-670. Allow DataFileWriteTool to accept schema files as input with new
+    --schema-file and --schema command-line flags. (Ron Bodkin via philz)
+
+    AVRO-671. Java: Check that type and field names conform to
+    specified requirements. (cutting)
+
+    AVRO-678. Java: Implement ReflectData#compare().  Incompatibly
+    moves some protected GenericDatumReader/Writer methods to
+    GenericData, potentially breaking subclasses. (cutting)
+
+    AVRO-696. Java: Make DataFileWriter.setMetaInternal(String,String)
+    private. (Patrick Linehan via cutting)
+
+    AVRO-741. C: Minor API change to handling of bytes data.
+    (Douglas Creager via brucem)
+
+    AVRO-656. Java: Add required Schema parameter to GenericData.Fixed
+    and GenericData.EnumSymbol constructors.  Also fix union dispatch
+    to conform to specification, using full schema name for records,
+    enums and fixed.
+
+  NEW FEATURES
+
+    AVRO-762. C: New and improved schema resolution API.  The new API
+    correctly handles all of the schema resolution rules listed in the
+    spec.  It performs resolution against two schemas separately from
+    reading in any data, so that we don't have to re-resolve for each
+    data record.  Please see the avro/consumer.h header file for
+    details. (dcreager)
+
+    AVRO-463. C: Error message API.  The new avro_strerror() function
+    can be used to get a textual description of the error codes returned
+    by the other C API functions.  In particular, this includes any JSON
+    parse errors that occur while trying to load a schema. (dcreager)
+
+    AVRO-684. Java: Add command-line "recodec" tool to change file
+    compression codecs.  (Patrick Linehan via cutting)
+
+    AVRO-689. Java: Permit setting timeout of HttpTransceiver. (cutting)
+
+    AVRO-687. Java: Permit RPC applications to view remote protocol. (cutting)
+
+    AVRO-159 Java: Allow maven builds to use avro: avro-maven-plugin
+    (Hiram Chirino, Patrick Hunt via Scott Carey)
+
+    AVRO-549. C: Route all memory allocations through an interface. (Douglas
+    Creager via brucem)
+
+    AVRO-729. C: JSON encoded Avro values. (Douglas Creager via brucem)
+
+    AVRO-757. Java: Permit data files to be re-opened without
+    re-reading the header. (Stu Hood via cutting)
+
+    AVRO-750. C: Install a pkg-config file
+    (dcreager)
+
+    AVRO-730. Java: Add set() and remove() methods to GenericData.Array.
+    (Chase Bradford via cutting)
+
+    AVRO-711. JSON encoder and decoder for C++.
+
+    AVRO-701 and AVRO-772. Java: Add new constructors for HttpServer
+    and an example of using SSL for the HTTP RPC. (brucem)
+
+  IMPROVEMENTS
+
+    AVRO-771. Java: Update dependency versions
+    (scottcarey)
+
+    AVRO-726. Java: Make GenericDatum{Reader,Writer} easier to extend.
+    (scottcarey)
+
+    AVRO-765. Java: Improvement to BinaryDecoder readLong performance
+    (scottcarey)
+
+    AVRO-716. Java: integrate AVRO-647 changes with top level build
+    (scottcarey)
+
+    AVRO-752. Java: Enhanced Performance Test Suite
+    (scottcarey)
+
+    AVRO-682. Java: Add method DataFileStream.getMetaKeys().
+    (Harsh J Chouraria via cutting)
+
+    AVRO-683. Java: Fix RPC proxy factories to not require casting.
+    (Stephen Gargan via cutting)
+
+    AVRO-642. Java, Python: Pretty-print schemas in some validation
+    error messages.  (Harsh J Chouraria via cutting)
+
+    AVRO-648. Java: Use Velocity templates to generate specific code.
+    (philz via cutting)
+
+    AVRO-698. Java: Add MapReduce tests and documentation for jobs
+    that mix Avro and non-Avro data. (cutting)
+
+    AVRO-692. Java: Permit Avro 1.2 format files to be read. (cutting)
+
+    AVRO-707. Java: Promote SASL-based RPC. (cutting)
+
+    AVRO-714. Fix Forrest to work with Java 6. (Carl Steinbach via cutting)
+
+    AVRO-669. Java: Make MapReduce to work with reflection-based data.
+    (cutting)
+
+    AVRO-723. Java: Pass error messages for unexpected RPC exceptions
+    through to client. (Stephen Gargan via cutting)
+
+    AVRO-719. Java: Permit MapReduce programs to alter output file
+    sync interval.  (Joe Crobak via cutting)
+
+    AVRO-725. C: avro_schema_get_subschema function. (Douglas Creager via
+    brucem)
+
+    AVRO-630. C: Add size accessors for map and list data. (Douglas Creager
+    via brucem)
+
+    AVRO-727. C: Add many new accessor and mutator functions. (Douglas Creager
+    via brucem)
+
+    AVRO-729. C: avro_schema_to_json can take const schema. (Douglas Creager
+    via brucem)
+
+    AVRO-729. C: Update to Jansson 1.3. (Douglas Creager via brucem)
+
+    AVRO-731. Documentation: Improve identification of Apache
+    trademarks. (cutting)
+
+    AVRO-734. Java: Update maven build plugin versions. (Holger Hoffstätte
+    via scottcarey)
+
+    AVRO-700. Change C++ build system to CMake (thiru)
+
+    AVRO-749. Don't install Jansson build artifacts. (Douglas Creager via
+    brucem)
+
+    AVRO-744. C: Helper macros for extracting and setting record field
+    values (dcreager)
+
+    AVRO-773. Java: Add no-arg constructor to AvroWrapper.
+    (Jan Prach via cutting)
+
+    AVRO-774. Java: Clean up repositories in pom.xml. (Lars Francke via cutting)
+
+    AVRO-754. Java: Permit passing custom channel factory to NettyTransceiver.
+    (Bruno Dumon via cutting)
+
+  BUG FIXES
+
+    AVRO-764. Java: Bug in BinaryData.compare() with offset comparison.
+    (Harsh J Chouraria via scottcarey)
+
+    AVRO-743. Java: Performance Regression and memory pressure with
+    GenericDatumReader. (scottcarey)
+
+    AVRO-675. C: Bytes and fixed setters don't update datum size.
+    (Douglas Creager via massie)
+
+    AVRO-681. IDL: Fix documentation example with illegal syntax.
+    (Jingguo Yao via cutting)
+
+    AVRO-685. Java: Fix Schema#equals() and hashCode() to not require
+    exponential time for some recursive schemas.
+    (Richard Ahrens via cutting)
+
+    AVRO-693. Java: Configure Velocity to use null logger, removing a
+    dependency that was breaking the build.  (Stephen Gargan via cutting)
+
+    AVRO-702. Java: Fix a bug printing nested record namespaces. (cutting)
+
+    AVRO-706. Java: Type promotion not succeeding for long -> float. (thiru)
+
+    AVRO-704. Java: Fix SocketServer connection threads to exit rather
+    than busywait when client closes connection. (cutting)
+
+    AVRO-705. Java: Fix DirectBinaryDecoder to correctly reinitialize.
+    (thiru via cutting)
+
+    AVRO-710. Java: Add bounds checking to GenericData.Array#get(int).
+    (Bo Shi via cutting)
+
+    AVRO-713. Java: Fix GenericData.Record#toString() to produce valid
+    JSON for enum symbols. (Jay Kreps via cutting)
+
+    AVRO-643. Java: Fix intermittent failures in TestTraceCollection. (cutting)
+
+    AVRO-722. Java: Fix ordering of calls to RPC plugins.
+    (Stephen Gargan via cutting)
+
+    AVRO-708. Java: Fix Netty-based RPC to keep connection open.
+    (Stephen Gargan via cutting)
+
+    AVRO-694. Python: Fix schema parse error for maps of records.
+    (hammer via cutting)
+
+    AVRO-663. Java: avro-tools.jar does not meet maven2 layout standard.
+    (scottcarey)
+
+    AVRO-688. Java: Only require that one-way'ness of messages match
+    over stateful connections, permitting interoperability with
+    Python and Ruby, which drop the one-way message attribute. (cutting)
+
+    AVRO-759. Java: Fix NullPointerException when some but not all
+    fields are aliased. (Xiaolu Ye via cutting)
+
+    AVRO-755. Java: Fix SpecificResponder to correctly handle message
+    parameter lists that differ between client and server.  (cutting)
+
+    AVRO-775. Java: Fix a file handle leak in DataFileReader. (cutting)
+
+    AVRO-761. Java: Fix Requestor to not send client's protocol on
+    each handshake with stateless (HTTP) transport when protocol
+    differs from server's. (cutting)
+
+Avro 1.4.1 (13 October 2010)
+
+  NEW FEATURES
+
+    AVRO-674. Vim editor support for IDL files (Daniel Lundin via philz)
+
+    AVRO-641. Java: Add SASL security for socket-based RPC. (cutting)
+
+    AVRO-634. Java: Add support for reading Hadoop sequence files as
+    Avro data to MapReduce API. (cutting)
+
+  OPTIMIZATIONS
+
+    AVRO-673. Python: Remove unneeded schema validations.
+    (Erik Frey via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-655. Change build so that 'dist' target no longer also runs C
+    and C++ unit tests. (cutting)
+
+    AVRO-634. IDL: Add support for aliases. (cutting)
+
+    AVRO-668. Java: Reduce object allocations while writing strings.
+    (scottcarey)
+
+    AVRO-537. Ruby: Reuse client connection for multiple requests.
+    (Gabor Torok via cutting)
+
+  BUG FIXES
+
+    AVRO-666. Remove an extraneous pdb.set_trace() that crept into schema.py
+    (hammer)
+
+    AVRO-657. Fix build so that md5 and sha1 checksum files contain
+    only the file's basename without any directories. (cutting)
+
+    AVRO-664. Ruby server takes a long time to start in interop tests. (thiru)
+
+    AVRO-667. GenericArray fails to compare with List. SpecificRecord
+    compare gets ClassCastException. (scottcarey & cutting)
+
+    AVRO-717. Java: Fix SpecificDatumWriter default constructor to
+    correctly reference SpecificData.  (Joe Crobak via cutting)
+
+Avro 1.4.0 (31 August 2010)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-372.  Rename GenAvro to be Avro IDL.  The tool name is now
+    'idl'.  The file suffix is now '.avdl'.  (cutting)
+
+    AVRO-544. Java: Change Server interface, adding start() and join()
+    methods.  Servers are no longer started in their constructor.
+    (hammer & cutting)
+
+    AVRO-605. Java: Change Utf8 to implement CharSequence and change
+    specific, generic & reflect APIs to accept any CharSequence
+    implementation for string schemas, including java.lang.String.
+    This incompatibly changes method signatures of generated protocol
+    interfaces.  It also incompatibly changes the generic
+    representation of enum symbols from java.lang.String to
+    org.apache.avro.generic.GenericEnumSymbol.
+
+    AVRO-637. Java: Change GenericArray to implement List.  Also
+    incompatibly change generated array signatures to List.  The
+    runtime will now accept any Collection implementation for array
+    types.  (cutting)
+
+  NEW FEATURES
+
+    AVRO-627. Add PHP implementation. (Michael Glaesemann via cutting)
+
+    AVRO-613. Create basic frontend to view trace results.
+    (Patrick Wendell via philz)
+
+    AVRO-606. Add File-Based Span Storage to TracePlugin
+    (Patrick Wendell via philz)
+
+    AVRO-595. Add Basic Trace Collection and Propagation.
+    (Patrick Wendell via philz)
+
+    AVRO-493. Add support for Hadoop Mapreduce with Avro data files. (cutting)
+
+    AVRO-285: Specify one-way messages and implement in Java. (cutting)
+
+    AVRO-512. Java: Define and implement MapReduce connector
+    protocols. (cutting)
+
+    AVRO-577. Java: Add MapReduce InputFormat for plain text files.
+    (Tom White via cutting)
+
+    AVRO-567. Add command-line tools for text file import & export.
+    (Patrick Wendell via cutting)
+
+    AVRO-578. Java: add payload data to RPC context for use by
+    plugins.  (Patrick Wendell via cutting)
+
+    AVRO-405: Java: Add Netty-based RPC transceiver and server
+    implementation. (Harry Wang via cutting)
+
+    AVRO-580. Permit intermixing of generic and specific data.
+    SpecificDatumReader and SpecificDatumWriter will now use generic
+    types when no specific class is available.  (cutting)
+
+    AVRO-600. Add support for type and field name aliases,
+    facilitating schema migration. (cutting)
+
+    AVRO-495. IDL: Add support for file includes. (cutting)
+
+    AVRO-611. IDL: Add support for one-way messages. (cutting)
+
+    AVRO-528. Python: Add support for Twisted.  (Esteve Fernandez via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-636. Expose Singleton Method for TracePlugin. (Patrick Wendell via
+    philz)
+
+    AVRO-614. Improve Trace frontend UI. (Patrick Wendell via philz)
+
+    AVRO-629. Prefer the JSON module of python's stdlib over simplejson.
+    (Harsh J Chouraria via philz)
+
+    AVRO-587. Add Charts and Templating to Stats View
+    (Patrick Wendell via philz)
+
+    AVRO-584. Update Histogram for Stats Plugin
+    (Patrick Wendell via philz)
+
+    AVRO-501. missing function in C api to access array elements after
+    decoding an array. (Bruce Mitchener via massie)
+
+    AVRO-497. Minor changes to C++ autotools, makefiles, and code
+    generator. (sbanacho)
+
+    AVRO-508. Use page-backed buffers for C++ serialization input or
+    output. (sbanacho)
+
+    AVRO-520. Refactor C++ validation code. (sbanacho)
+
+    AVRO-521. Out of memory and other issues with Junit tests for
+    mapreduce (thiru)
+
+    AVRO-540. Java: Make GenericArray reversible.  (Eric Evans via cutting)
+
+    AVRO-284. Handle namespaces correctly in new Python implementation
+    (Patrick Wendell via hammer)
+
+    AVRO-331. Inline shared state during the Python build process
+    (version, handshake schemas, and interop data directory)
+    (Patrick Wendell via hammer)
+
+    AVRO-447. Describe implicit protocol "system" error in spec. (cutting)
+
+    AVRO-150. Java: fix compiler to not re-generate up-to-date code.
+    (John Yu via cutting)
+
+    AVRO-494. Add support for default values to IDL.  (cutting)
+
+    AVRO-596. Start Netty server eagerly in constructor.
+    (Patrick Linehan via cutting)
+
+    AVRO-581. Java: Update MapReduce APIs to use key/value pairs for
+    intermediate data.  (cutting)
+
+    AVRO-582. Java: Add comment to generated code indicating that
+    set() and get() are not for use by applications.  (cutting)
+
+    AVRO-601. Java: Add per-field property support. (cutting)
+
+    AVRO-583. Java: Improve error message when types not correctly
+    nested. (cutting)
+
+    AVRO-603. Java: Add a constructor for SpecificDatumReader that
+    accepts both reader's and writer's schema.  Also improve javadoc
+    for related constructors and setters.  (Stu Hood via cutting)
+
+    AVRO-557. Java: Cache ResolvingDecoder instances, speeding
+    DatumReader benchmarks by 5x to 9x.  (cutting & scotcarey)
+
+    AVRO-586. Java: Permit specification of MapReduce output file
+    metadata properties. (Ken Krugler via cutting)
+
+    AVRO-616. Java: Add comment to generated source files noting that
+    they should not be edited. (Patrick Wendell via cutting)
+
+    AVRO-615. Java: Improve error message for NullPointerException
+    when writing data.  (cutting)
+
+    AVRO-534. Java: Permit mapred jobs to specify a different input
+    schema from the input file.  (Harsh J Chouraria via cutting)
+
+    AVRO-617. Java: Detect erroneous default field values. (cutting)
+
+    AVRO-598. Java: Use generic data structures when reading with
+    reflect API and classes are not defined. (cutting)
+
+    AVRO-631. Java: Make RPC plugin payload reporting consistent.
+    (Patrick Wendell via cutting)
+
+    AVRO-632. Java: Change RPC responder to log stack traces for user
+    exceptions. (cutting)
+
+    AVRO-639. Python: Use namespace-unqualified names for references
+    to schemas in the same namespace. (cutting)
+
+    AVRO-644: PHP: Add requirements to README. (Michael Glaesemann via cutting)
+
+    AVRO-652. Java: Expose sync points in DataFileReader.
+    (Stu Hood via cutting)
+
+  BUG FIXES
+
+    AVRO-622. python avro.ipc doesn't work with python2.4 (philz)
+
+    AVRO-620. Python implementation doesn't stringify sub-schemas
+    correctly. (philz)
+
+    AVRO-618. Avro doesn't work with python 2.4 (philz)
+
+    AVRO-502. Memory leak from parsing JSON schema.
+    (Robert G. Jakabosky via massie)
+
+    AVRO-515. Fix build and compatibility problems. (sbanacho)
+
+    AVRO-518. Add link to boost filesystem library.  (John Plevyak via sbanacho)
+
+    AVRO-566. Java: fix so that JAVA_HOME is bound by build.xml for
+    test_tools.sh.  (cutting)
+
+    AVRO-571. Fix how we handle out-of-bounds indexes for union and
+    enum parsing in Python (hammer)
+
+    AVRO-589. ClassCastException:
+    org.apache.avro.io.parsing.Symbol$Alternative cannot be cast to
+    org.apache.avro.io.parsing.Symbol$UnionAdjustAction (thiru)
+
+    AVRO-573. Java: Fix various bugs with undeclared RPC exceptions. (cutting)
+
+    AVRO-604. Java: Fix missing build dependency for checkstyle target.
+    (Patrick Wendell via cutting)
+
+    AVRO-602. C++: Update documentation to match API.  (Jingguo Yao via cutting)
+
+    AVRO-609. Java: Fix JsonToBinaryFragmentTool to flush output. (cutting)
+
+    AVRO-612. Java: Preserve field documentation when writing schemas. (cutting)
+
+    AVRO-590. IDL: Fix order specifications. (cutting)
+
+    AVRO-541. Java: Fix sporadic corruption when appending a
+    compressed file to an uncompressed file. (scottcarey via cutting)
+
+    AVRO-86. Java: Fix NullPointerException when reflect API infers
+    schema for a class without a package. (cutting)
+
+    AVRO-510. C: Fix some memory leaks in datafile reader &
+    writer. (Robert G. Jakabosky via cutting)
+
+    AVRO-633. Ruby: Implement skip_union to correct issues with
+    updating protocols
+
+    AVRO-640. Python: Fix path to sources in RPC interop test.  (cutting)
+
+    AVRO-653. Python: Fix so distribution contains correct files.
+    (Eric Evans via cutting)
+
+    AVRO-650. Java: Fix GenericDatumReader to be thread-safe. (cutting)
+
+Avro 1.3.3 (7 June 2010)
+
+  IMPROVEMENTS
+
+    AVRO-525. remove unused imports (Esteve Fernandez via hammer)
+
+    AVRO-526. Fall back to pure Python StringIO if cStringIO is not available
+    (Esteve Fernandez via hammer)
+
+    AVRO-560. Python impl should include system errors in every protocol (hammer)
+
+    AVRO-486. DataFile.open for the ruby side (jmhodges)
+
+    AVRO-559. Handle read_union error where the list index of the union branch
+    to follow exceeds the size of the union schema (hammer)
+
+    AVRO-491. Doing doubles and floats better in the ruby impl. (jmhodges)
+
+    AVRO-450. HTTP IPC for ruby. (jmhodges)
+
+    AVRO-514. Removing unnecessary ruby StringIO calls. (jmhodges)
+
+    AVRO-511. Ruby implementation passes the rpc interop tests.
+
+    AVRO-543. Schema comparison is hella slow on the Ruby side. (jmhodges)
+
+    AVRO-504. ruby impl could stand better error messages on schema parsing (jmhodges)
+
+    AVRO-556. Poor performance for Reader::readBytes improved
+    (Dave Wright via sbanacho)
+
+
+  BUG FIXES
+    AVRO-461. Skipping primitives in the ruby side (jmhodges)
+
+    AVRO-496. python sample_http_client.py is broken (Jeff Hodges via hammer)
+
+    AVRO-527. Undefined variable "schm" error (Esteve Fernandez via hammer)
+
+    AVRO-548. Python client should handle CLIENT handshake match status
+    correctly. (hammer)
+
+    AVR0-555 Missing license headers in some ruby source
+    files. (jmhodges)
+
+    AVRO-554 Fixing syncing in ruby data file writing. (Grant Rodgers
+    via jmhodges)
+
+    AVRO-562 ruby side had busted client handshaking. (jmhodges)
+
+    AVRO-517. Resolving Decoder fails in some cases. (thiru)
+
+    AVRO-524. DataFileWriter.appendTo leads to intermittent IOException during write() (thiru)
+
+    AVRO-499. Java: Fix protocol reflection to reject interfaces with
+    multiple methods of the same name.  (cutting)
+
+    AVRO-489. Skipping complex objects in the ruby impl. (jmhodges)
+
+    AVR0-555 Missing license headers in some ruby source
+    files. (jmhodges)
+
+    AVRO-500. ruby side dev packaging (jmhodges)
+
+    AVRO-516. ruby: buffer length should not be little-endian in socket rpc (jmhodges)
+
+Avro 1.3.2 (31 March 2010)
+
+  IMPROVEMENTS
+
+    AVRO-449. CMake-based build system for Avro/C (Bruce Mitchener via massie)
+
+    AVRO-418. avro.h generates errors when included in C++ code
+    (Bruce Mitchener via massie)
+
+    AVRO-480. avro_flush() is in the header, but not implemented
+    (Bruce Mitchener via massie)
+
+    AVRO-481. Buildbot warning fixes (Bruce Mitchener via massie)
+
+    AVRO-451. Try to use hashlib in Python implementation and fall
+    back to md5 if we can't find it (Bruce Mitchener via hammer)
+
+    AVRO-423. HTTPTransceiver does not reuse connections
+    (Eric Evans via hammer)
+
+    AVRO-490. Add Ant task to deploy Java artifacts to Maven repo. (cutting)
+
+  BUG FIXES
+
+    AVRO-479. Fix 'sign' target in top-level build.sh to generate md5
+    checksums. (cutting)
+
+    AVRO-487. Fix Java reflect protocols to transmit error messages. (cutting)
+
+Avro 1.3.1 (16 March 2010)
+
+  NEW FEATURES
+
+    AVRO-432. Add @Nullable annotation to Java reflect API. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-426. Include a ruby gem in distributions.
+    (Ryan King via cutting)
+
+    AVRO-439. Remove unused headers from being checked in configure.in
+    (Bruce Mitchener via massie)
+
+    AVRO-438. Clarify spec.  (Amichai Rothman via cutting)
+
+    AVRO-445. avro_size_data() to pre-calculate the size of an
+    avro_datum_t in serialized form (Bruce Mitchener via massie)
+
+    AVRO-443. Endianness is determined at configure time rather
+    than compile time (Bruce Mitchener via massie)
+
+    AVRO-448. encoding_binary.c doesn't build on big endian platforms
+    (Bruce Mitchener via massie)
+
+    AVRO-442. sizeof void* and sizeof long detected at configure time
+    (Bruce Mitchener via massie)
+
+    AVRO-444. Fix warnings (Bruce Mitchener via massie)
+
+    AVRO-452. Include cleanup (Bruce Mitchener via massie)
+
+    AVRO-453. More warning cleanup (Bruce Mitchener via massie)
+
+    AVRO-440. config.h output not correctly used (Bruce Mitchener via massie)
+
+    AVRO-460. Performance improvement to write_long() (Bruce Mitchener
+    via massie)
+
+    AVRO-455. Update Java dependencies. (David Dabbs via cutting)
+
+    AVRO-446. Add a build.sh task that signs and checksums artifacts. (cutting)
+
+    AVRO-454. Change DataFileStream to implement Closeable. (cutting)
+
+  BUG FIXES
+
+    AVRO-424. Fix the specification of the deflate codec.
+    (Scott Carey via cutting)
+
+    AVRO-431. Fix Java's mvn-install Ant target to work in clean build.
+    (cutting)
+
+    AVRO-437. Fix some typos in docs. (Amichai Rothman via cutting)
+
+    AVRO-433. Fix exceptions in Java reflect RPC. (cutting)
+
+Avro 1.3.0 (24 February 2010)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-185. Java's specific API no longer depends on reflection.
+    This reverses the inheritance of most classes in the specific and
+    reflect packages. (cutting)
+
+    AVRO-201.  Move Python data file code into its own module.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-80.  Java reflect API no longer uses Avro-specific classes
+    for string and array.  Instead now Java strings and arrays or
+    Lists are used. (cutting)
+
+    AVRO-237.  Reflect API now represents any Java Collection as an
+    Avro array.  Also inherited fields are included in records, and
+    inherited methods in protocols.  Finally, Java shorts are
+    supported as integers.  (cutting)
+
+    AVRO-160. Revised data file format and Java API.  Simplified
+    format now permits streaming but no longer supports multiple
+    schemas per file.  Java API for reading is iterator-based.
+
+    AVRO-278. Changed GenericRecord API and implementation to be
+    array-based rather than Map-based. (cutting)
+
+    AVRO-163. Re-organized source tree into separate directories for
+    each language.  (cutting)
+
+    AVRO-344. Complete rewrite of C implementation (massie)
+
+    AVRO-349. Fix C++ build for post-AVRO-163. (sbanacho)
+
+    AVRO-374. Remove and ignore files that are created by autoreconf. (sbanacho)
+
+    AVRO-387. Add IndexedRecord interface, common to both specific and
+    generic records, so that toString() and hashCode() implementations
+    can be shared.  Also fix toString() and hashCode() to not throw
+    NPE for uninitialized records. (cutting)
+
+  NEW FEATURES
+
+    AVRO-151. Validating Avro schema parser for C (massie)
+
+    AVRO-158. Permit appending to a data file from Java.  (cutting)
+
+    AVRO-154. Add 'induce' sub-command to avroj command line tool.
+    (Philip Zeyliger via cutting)
+
+    AVRO-245. Add four new avroj commands:
+      - fromjson Reads JSON records and writes to an Avro data file.
+      - tojson  Dumps an Avro data file as JSON, one record per line.
+      - fragtojson Renders a binary-encoded Avro datum as JSON.
+      - jsontofrag Renders a JSON-encoded Avro datum as binary.
+    (Philip Zeyliger via cutting)
+
+    AVRO-272. Extend RPCContext to include message.
+    (Philip Zeyliger via cutting)
+
+    AVRO-258. Add GenAvro language tool.  (Todd Lipcon via cutting)
+
+    AVRO-267. Add two new avroj commands: rpcsend and rpcreceive.
+    (Philip Zeyliger via cutting)
+
+    AVRO-271. Add a Java local RPC transceiver. (Philip Zeyliger via cutting)
+
+    AVRO-273, AVRO-275, & AVRO-279. Add Java RPC statistics collection
+    and display. (Philip Zeyliger via cutting)
+
+    AVRO-152. Add support for documentation strings to schemas,
+    protocols, and messages. (Philip Zeyliger via cutting)
+
+    AVRO-274. Make Java's data file sync interval configurable.  (cutting)
+
+    AVRO-346. Add function to validate a datum against a schema. (massie)
+
+    AVRO-306. Add Ruby implementation. (Jeff Hodges via cutting)
+
+    AVRO-135. Add compression to data files. (philz)
+
+    AVRO-368. Reserve avro.* in object container files, and
+    rename existing reserved words. (philz)
+
+    AVRO-380. Avro Container File format change: add block size to block
+    descriptor.  (Scott Carey via philz)
+
+    AVRO-322. Add a working client and server to Python implementation
+    using HTTP as a transport (hammer)
+
+    AVRO-287. Make RPC interop tests work with new Python implementation
+    (hammer)
+
+    AVRO-136. Add support for building/releasing python eggs (hammer)
+
+    AVRO-414. Add Java support for concatenating and appending data
+    files. (Scott Carey via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-157. Changes from code review comments for C++. (sbanacho)
+
+    AVRO-168. Correct shared library versioning for C implementation (massie)
+
+    AVRO-142. Remove some Java unused fields and imports.  Start
+    running checkstyle on Java test code.  (Philip Zeyliger via cutting)
+
+    AVRO-147. Use configure to create makefile for C++ builds. (sbanacho)
+
+    AVRO-155. Make python avro.io.DataFileReader iterable.
+    (Jeff Hammerbacher via sharad)
+
+    AVRO-148. Add ant target to build C++ project.  (sbanacho)
+
+    AVRO-166. Improve error checking in Java schema parser.
+    (Philip Zeyliger via cutting)
+
+    AVRO-167. Refactor Java SpecificCompiler to simplify testing, and
+    add some tests. (Philip Zeyliger via cutting)
+
+    AVRO-146. Add support for using Eclipse to develop Avro's Java.
+    (Philip Zeyliger via cutting)
+
+    AVRO-149. Add Java command-line executable, "avroj".
+    (Philip Zeyliger via cutting)
+
+    AVRO-175. Split the avro_io interface into two interfaces: avro_reader
+    and avro_writer (massie)
+
+    AVRO-179. Add units tests for all Avro C primitives (massie)
+
+    AVRO-177. Upgrade Java dependencies to recent versions. (cutting)
+
+    AVRO-180. Enhance code generator script and unit tests. (sbanacho)
+
+    AVRO-186. Full read-path interoperability test (massie)
+
+    AVRO-187. Move top-level source files into separate directories
+    for easier maintenance (massie)
+
+    AVRO-188. Need to update svn ignores (massie)
+
+    AVRO-190. Use fixed size C++ types for Avro fixed types. (sbanacho)
+
+    AVRO-192. Improved errors for Java schema parsing problems. (cutting)
+
+    AVRO-195. Complex type support for write streams (massie)
+
+    AVRO-197. Add mapping of name to index for records and enums. (sbanacho)
+
+    AVRO-204. Change the way symbolic references are tracked. (sbanacho)
+
+    AVRO-205. APIs for checking schema resolution. (sbanacho)
+
+    AVRO-203. Reformat license in Python sources.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-199. Make Python test schemas more readable.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-216. Formatting cleanups to schema.py.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-202. Add __all__ listing to Python module, to ease import.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-229. Change Java to implement Flushable and Closeable
+    interfaces where appropriate.  (tomwhite via cutting)
+
+    AVRO-231. Tutorial added to C++ docs. (sbanacho)
+
+    AVRO-220. Dynamic schema resolution from writer to reader. (sbanacho)
+
+    AVRO-213. Add Apache RAT to tests, to validate licenses.  (cutting)
+
+    AVRO-233. Elaborate Java tool API. (Philip Zeyliger via cutting)
+
+    AVRO-236. Add protocol support to avroj induce tool.  (cutting)
+
+    AVRO-234. C++ code cleanup. (sbanacho)
+
+    AVRO-240. In Python, if simplejson is not available, try using
+    2.6's built-in json module. (Jeff Hammerbacher via cutting)
+
+    AVRO-242. In Java, add support for extensible string-valued
+    properties to schemas.  (cutting)
+
+    AVRO-241. In Java, add a union annotation for reflection. (cutting)
+
+    AVRO-249. In reflection, implement Java short as an int whose
+    "java-class" property is set to java.lang.Short. (cutting)
+
+    AVRO-247. In reflection, add Stringable annotation to indicate
+    classes that can be represented by an Avro string.  (cutting)
+
+    AVRO-246 Java schema parser should take schema from InputStream
+    in addition to file. (thiru)
+
+    AVRO-250. Make reflect's Union annotation applicable to message
+    parameters and return types too.  (cutting)
+
+    AVRO-253. Improve documentation of schema names in specification. (cutting)
+
+    AVRO-257. Remove some dead Java code and un-needed casts.
+    (Kevin Oliver via cutting)
+
+    AVRO-263. Change avroj command line tools to return exit codes.
+    (Todd Lipcon via cutting)
+
+    AVRO-260. Upgrade to Jackson 1.4.0. (cutting)
+
+    AVRO-269. Use java compiler to validate specific compiler's output.
+    (Philip Zeyliger via cutting)
+
+    AVRO-219. Rework Python API.  (Jeff Hammerbacher via cutting)
+
+    AVRO-264. Rework Python RPC.  (Jeff Hammerbacher via cutting)
+
+    AVRO-75. Clarify that missing values with no default values cause
+    errors, and fix Java implementation.  (cutting)
+
+    AVRO-259. Add null schema check in GenericData.Record and
+    GenericData.Array construtors. (Kevin Oliver via cutting)
+
+    AVRO-294. Clarify that bytes and fixed are unsigned, and how their
+    JSON default values are interpreted.  (Jeff Hammerbacher & cutting)
+
+    AVRO-298. Fix Java's DatumReader and DatumWriter APIs to better
+    use generics.  (philz via cutting)
+
+    AVRO-288. Implement schema resolution for Python parameters.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-282. Improve avroj build to better specify dependencies.
+    (philz via cutting)
+
+    AVRO-309. Fix python build, post-AVRO-163. (cutting)
+
+    AVRO-310. Improve top-level build.sh. (cutting)
+
+    AVRO-317. Restore Java data interop tests. (cutting)
+
+    AVRO-320. Rename avroj to be avro-tools.  Also add LICENSE.txt and
+    NOTICE.txt to it, print the NOTICE.txt and version in help, and
+    include the tools jar in distributions.  (cutting)
+
+    AVRO-314. Add mvn-install ant task to publish jar to local Maven
+    repository.  (Aaron Kimball via cutting)
+
+    AVRO-243. Use automake generated Makefile.in. (sbanacho)
+
+    AVRO-198. Fix specification of protocol name, also clarify which
+    properties are required.  (cutting)
+
+    AVRO-336. Check that appropriate schemas are passed to
+    GenericData#Record and #Array.  (cutting)
+
+    AVRO-353. Publish the C API to avro-doc package when 'dist' target
+    run (massie)
+
+    AVRO-359. Add support for encoding/decoding arrays and maps (massie)
+
+    AVRO-360. Standardize on Linux coding style instead of GNU (massie)
+
+    AVRO-362. Add test to ensure Python implementation handles Union schema
+    with two fixed types of different names (hammer)
+
+    AVRO-364. Add support for encoding/decoding records (massie)
+
+    AVRO-367. Complete memory management for the C implementation (massie)
+
+    AVRO-369. Add support for encoding/decoding enum values (massie)
+
+    AVRO-370. Add support for encoding/decoding fixed data (massie)
+
+    AVRO-371. Add support for encoding/decoding unions (massie)
+
+    AVRO-377. Add getters and setters for all Avro datum types (massie)
+
+    AVRO-378. Add example code to the C implementation and update
+              documentation (massie)
+
+    AVRO-379. Changed record getter/setter API to match other datatypes (massie)
+
+    AVRO-381. Update documentation to talk about reference counting and
+              memory management (massie)
+
+    AVRO-384. Add schema projection to the C implementation (massie)
+
+    AVRO-388. Using ResolvingDecoder in GenericDatumReader (thiru)
+
+    AVRO-386. Python implementaiton of compression (philz)
+
+    AVRO-394. Simplify and consolidate all data structures into hash
+    tables (massie)
+
+    AVRO-393.  Add a constructor for Utf8 that accepts byte[].
+    (Jeff Hodges via cutting)
+
+    AVRO-395. Add a cscope Makefile target (Eli Collins via massie)
+
+    AVRO-397. Whitespace change and comment clarification in
+    datafile.py (hammer)
+
+    AVRO-340. Define usage of HTTP as RPC transport in spec.  (cutting)
+
+    AVRO-342. Document that Java's socket and datagram RPC transports
+    are non-standard.  (cutting)
+
+    AVRO-208. Clarify that enum symbols must be unique.  (cutting)
+
+    AVRO-321. Restore Java RPC interop tests. (cutting)
+
+    AVRO-402. Add method for writing avro_schema_t structure to an
+              avro_writer_t (massie)
+
+    AVRO-398. avro_read_file doesn't detect eof (Eli Collins via massie)
+
+    AVRO-403. Add file object container support to C implementation (massie)
+
+    AVRO-400. Adding warning for unused parameters (Eli Collins via massie)
+
+    AVRO-409. Update contact database example to use a file object
+              container for C implementation (massie)
+
+    AVRO-420. Add namespace support to C implementation (massie)
+
+    AVRO-261. Allow Schemas to be immutable (thiru)
+
+    AVRO-412. Allow schema validation to be optional (massie)
+
+    AVRO-295. JsonEncoder is not flushed after writing using ReflectDatumWriter (thiru)
+
+    AVRO-416. Produce Java source archive.  (Ryan Rawson via cutting)
+
+    AVRO-417. Produce Java documentation archive.  (Scott Carey via cutting)
+
+    AVRO-428. Improve file read performance by buffering data (massie)
+
+    AVRO-430. Remove subversion directories from Avro C tarball (massie)
+
+  OPTIMIZATIONS
+
+    AVRO-172. More efficient schema processing (massie)
+
+    AVRO-291. Set NODELAY in Java's SocketTransceiver.
+    (Eric Evans via cutting)
+
+    AVRO-315. Performance improvements to BinaryDecoder (thiru)
+
+    AVRO-316. Optiminzing inner loop functions of Avro io (thiru)
+
+    AVRO-328. Performance improvements Validating encoder/decoder for nested records (thiru)
+
+    AVRO-345. Optimization for ResolvingDecoder (thiru)
+
+    AVRO-363. estSchema had two tests disabled; new test for named schemas
+    named after primitives. (philz)
+
+    AVRO-354. Performance improvement to BinaryDecoder.readInt() (Kevin Oliver via thiru)
+
+    AVRO-343. Minor fixes to Eclipse config after build re-org (philz)
+
+    AVRO-383. Optimizing ResolvingDecoder for default values (thiru)
+
+    AVRO-411, AVRO-413. Add Ruby data file interop tests. (Jeff Hodges
+    via cutting)
+
+    AVRO-399. Make data file interop tests work with the Python implementation (hammer)
+
+    AVRO-392. Overhaul of Java binary decoder to significantly improve
+    performance.  (Scott Carey via cutting)
+
+  BUG FIXES
+
+    AVRO-176. Safeguard against bad istreams before reading. (sbanacho)
+
+    AVRO-141.  Fix a NullPointerException in ReflectData#isRecord().
+    (Isabel Drost via cutting)
+
+    AVRO-156.  Fix broken links to Wiki in documentation.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-165.  Fix an equals implementation in TestReflect.
+    (Philip Zeyliger via cutting)
+
+    AVRO-169.  Fix a typo in the spec. (Jeff Hammerbacher via cutting)
+
+    AVRO-189. test-c target fails (massie)
+
+    AVRO-182. Fix Java's generic and specific implementations of
+    equals() and hashCode() to be consistent with compareTo().
+    (cutting)
+
+    AVRO-193. Fix 'ant test-avroj' on Ubuntu 9.10. (cutting)
+
+    AVRO-171. Fix Java's Protocol#toString() to correctly handle
+    forward-references. (cutting)
+
+    AVRO-191. Explicitly include stdint.h for C++. (cutting via sbanacho)
+
+    AVRO-194. C++ varint encoding buffer too small. (sbanacho)
+
+    AVRO-210. Memory leak with recursive schemas when constructed
+    by hand. (sbanacho)
+
+    AVRO-211. Nested schema does not get parsed in C++. (sbanacho)
+
+    AVRO-222. Fix Python interop tests broken by AVRO-201.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-223. Fix test-avroj on Mac OS X.  (Philip Zeyliger via cutting)
+
+    AVRO-224. Code cleanup: cleaner distinction between public and private
+    methods (massie)
+
+    AVRO-221. Mangle Java reserved words in generated code to avoid
+    name conflicts.  (Philip Zeyliger via cutting)
+
+    AVRO-225. In generated Java, use dollar-sign, not underscore, to
+    prevent name conflicts. (cutting)
+
+    AVRO-227. Fix a typo in the spec document.  (Todd Lipcon via cutting)
+
+    AVRO-232. Fix C++ build in cygwin. (sbanacho)
+
+    AVRO-238. Fix so that slf4j-simple is only required by tests.  (cutting)
+
+    AVRO-184. Better eclipse configuration support. (thiru)
+
+    AVRO-256. Use fully-qualified class names in generated Java code
+    to eliminate name conflicts. (cutting)
+
+    AVRO-255. Fix Java so that, when parsing schemas, unspecified
+    namespaces are defaulted to nearest enclosing namespace. (cutting)
+
+    AVRO-262. Fix two typos in the spec.  (Jeff Hammerbacher via cutting)
+
+    AVRO-276. Fix GenAvro to specify file encoding as UTF-8.
+    (Philip Zeyliger via cutting)
+
+    AVRO-280. Fix file header schema in specification.  Also fix
+    "forrestdoc" build target to work on clean checkout.
+    (Jeff Hammerbacher & cutting)
+
+    AVRO-292. Fix Python skipping of ints and longs.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-289. Fix Python schema resolution.
+    (Jeff Hammerbacher via cutting)
+
+    AVRO-281. Symlink in build.xml does not work well with Cygwin (thiru)
+
+    AVRO-299. Fix Python numeric promotion.  (Jeff Hammerbacher via cutting)
+
+    AVRO-207. Fix Python to detect duplicate enum symbols and add
+    tests for duplicates in unions.  (Jeff Hammerbacher via cutting)
+
+    AVRO-313. Default values for fields or records and array (or map) don't work with ResolvingDecoder (thiru)
+
+    AVRO-47. Use void* for byte sequences. (sbanacho)
+
+    AVRO-337. ant test-java fails in Cygwin due to CRLF v LF problem (thiru)
+
+    AVRO-347. Add the --unsafe flag to asciidoc in order to include source/header files (massie)
+
+    AVRO-352. Incorrect binary encoding for strings and bytes (massie)
+
+    AVRO-356. RAT fails with "Unknown license" error (massie)
+
+    AVRO-355. io.Perf test harness uses different random number seeds for each run (Kevin Oliver via thiru)
+
+    AVRO-375. Initializing uninizialized encoders fail (thiru)
+
+    AVRO-373. EOF detection broken in JsonDecoder (thiru)
+
+    AVRO-382. Avro hashCode throws a NullPointerException when fields are uninitialized (Michael Armbrust via philz)
+
+    AVRO-385. Initializing uninizialized BlockingBinaryEncoder fails (thiru)
+
+    AVRO-389. ResolvingDecoder does not resolve enum well (thiru)
+
+    AVRO-390. ResolvingDecoder does not handle default values for records well (thiru)
+
+    AVRO-361. Specific Compiler fails to handle union with two fixed branches (Scott Carey via philz)
+
+    AVRO-350. Fix GenericData.Record#get(String) to return null rather than
+    throw NPE when passed a field name that's not in the record.
+    (Kevin Oliver via cutting)
+
+    AVRO-401. Fix a typo in the specification.  (Tom White via cutting)
+
+    AVRO-408. lang/c/build.sh 'dist' broken (massie)
+
+    AVRO-407. Fix a bug in the Java data file reader. (Scott Carey via cutting)
+
+    AVRO-415. Fix Ruby to work with Ruby 1.8.6.
+
+    AVRO-421. Fix some dist target issues.  (cutting)
+
+    AVRO-422. Build c++ docs in correct location. (sbanacho)
+
+Avro 1.2.0 (14 October 2009)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-115. Remove RPC's session notion to facilliate the use of
+    stateless transports like UDP and HTTP.  Add a UDP transport.
+    (cutting)
+
+    AVRO-120. Improved package and namespace handling, including:
+
+     * Removed explicit package-name specification from specific and
+       reflect public APIs.  Package names are now determined either
+       by namespace declarations or by a Java classes, as appropriate.
+
+     * Changed the specific compiler to generate separate java files
+       per class, rather than nested classes.  This permits generated
+       classes to be in packages declared in their schema namespaces.
+
+     * Fix namespace defaulting.  The default namespace is declared in
+       the outermost schema or protocol.  Nested schemas can now
+       declare different namespaces than the default.
+
+     * Names may now be specified with a dotted notation, e.g.,
+       "foo.bar.Baz", to indicate the name "Baz" in namespace
+       "foo.bar".  This permits one to refer to schemas in a namespace
+       other than the default.
+
+  NEW FEATURES
+
+    AVRO-121.  Permit reflect and specific datum readers to read data
+    written with a different version of the schema than is current.
+    (cutting)
+
+    AVRO-129.  Add HTTP-based RPC client and server.  (cutting)
+
+    AVRO-24.  Add a simple bulk-data benchmark.  (cutting)
+
+    AVRO-139. Refactor HTTP servlet to separate, public class. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-99.  Use Boost framework for C++ unit tests.
+    (Scott Banachowski via cutting)
+
+    AVRO-116.  Make C++ compatible with Boost 1.32.
+    (Scott Banachowski via cutting)
+
+    AVRO-119.  Add Java GenericData.Array#toString() implementation,
+    to facillitate debugging. (cutting)
+
+    AVRO-118.  JSON encoder and decoder now permit one to write
+    multiple instances without flushing or explicitly resetting the
+    codec between each instance.  (Thiruvalluvan M. G. via cutting)
+
+    AVRO-133.  Update version number in specification document and
+    documentation tab automatically from build version. (cutting)
+
+    AVRO-131.  Permit specification of JUnit test output format.
+    (Giridharan Kesavan via cutting)
+
+    AVRO-134.  Update data file format specification to include
+    reserved metadata keys "codec" and "sync".  The only codec
+    currently defined is "null".  (Thiruvalluvan M. G. via cutting)
+
+    AVRO-138.  Add a "unit-test-java" Ant target that runs tests
+    without running checkstyle or javadoc.  (Thiruvalluvan M. G. via
+    cutting)
+
+    AVRO-140.  Add javadoc to public classes with none.  (cutting)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    AVRO-132.  Fix multi-threading race condition when threads share schema objects.
+    (sbanacho)
+
+    AVRO-113.  Fix endian bug with C++ integer/long varint codec.
+    (Scott Banachowski via cutting)
+
+    AVRO-117.  Fix memory leak in C++ JSON parser.
+    (Scott Banachowski via cutting)
+
+    AVRO-122.  Fix so that, when multiple Ant targets are specified on
+    the command line that depend on ivy, ivy does not fail.  (phunt
+    via cutting)
+
+    AVRO-123.  Fix Java's specific protocol compiler so that
+    parameters and return types are unboxed. (cutting)
+
+    AVRO-125.  Fix sample protocol in specification document to use
+    the correct syntax.  (cutting)
+
+    AVRO-101.  Add Java reflect API test case using nested classes.
+    (Eelco Hillenius via cutting)
+
+    AVRO-124.  Remove Ivy's jar from distributions.  (cutting)
+
+    AVRO-137. Suppress warnings in generated java code. (cutting via sharad)
+
+Avro 1.1.0 (8 September 2009)
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-110. GenericData and ReflectData have been converted to use a
+    singleton pattern.  Calls to static methods on these classes must
+    be replaced with calls on the singleton instance. (cutting)
+
+    AVRO-41. GenericArray's constructor now requires a Schema, so that
+    it may implement Comparable consistently with AVRO-108. (cutting)
+
+    AVRO-41. Several GenericDatumWriter methods (instanceOf(),
+    isRecord(), etc.) have been moved to GenericData, where they can
+    better be shared with comparators.  Applications which subclassed
+    GenericDatumWriter overriding these methods must now instead
+    subclass GenericData and pass their subclass to
+    GenericDatumWriter. (cutting)
+
+    AVRO-41. SpecificRecord's schema() method has been renamed
+    getSchema(), since it now implements the new GenericContainer
+    interface shared with GenericRecord. (cutting)
+
+  NEW FEATURES
+
+    AVRO-50. Implmenent JSON data codec in Java. (Thiruvalluvan
+    M. G. & cutting)
+
+    AVRO-76. Add Java RPC plugin framework.  (George Porter)
+
+    AVRO-104. Permit null fields in Java reflection.
+    (Eelco Hillenius via cutting)
+
+    AVRO-92. Describe JSON data encoding in specification
+    document. (cutting)
+
+    AVRO-108.  Add Java implementation of binary comparator.
+    (cutting)
+
+    AVRO-41. Java generic and specific data instances now implement
+    Comparable.  The implementation is consistent with the binary
+    comparator added in AVRO-108. (cutting)
+
+    AVRO-109.  Add Java support for controlling sort order via schema
+    annotations.  Record fields now support an "order" attribute whose
+    possible values are "increasing" (the default), "decreasing", and
+    "ignore".  (cutting)
+
+    AVRO-111.  Document sort ordering in the specification. (cutting)
+
+  IMPROVEMENTS
+
+    AVRO-71.  C++: make deserializer more generic.  (Scott Banachowski
+    via cutting)
+
+    AVRO-60. Fix C JSON parser to correctly handle escapes and
+    multi-byte characters.  Add tests.  (Matt Massie via cutting)
+
+    AVRO-54. Re-upgrade to testng 5.9 and re-enable listeners. (cutting)
+
+    AVRO-82. Add checkstyle to java compilation.  (Thiruvalluvan
+    M. G. via cutting)
+
+    AVRO-81. Switch back from TestNG to JUnit. (Konstantin Boudnik via
+    cutting)
+
+    AVRO-84, AVRO-85.  Clarify a few things in the specification
+    document.  (Thiruvalluvan M. G. and cutting)
+
+    AVRO-89. In fields of Java generated classes, use unboxed numeric
+    types.  (cutting)
+
+    AVRO-83. In generated Java code, elide unions with null. (cutting)
+
+    AVRO-98. Fix C++ schema parser to permit JSON attributes in any
+    order and to ignore extra attributes. (Scott Banachowski via cutting)
+
+  BUG FIXES
+
+    AVRO-78. Fix Java reflect to work on non-public fields. (cutting)
+
+    AVRO-79. Specify format for default fixed values, and implement
+    correctly in Java.  (Thiruvalluvan M. G. via cutting)
+
+    AVRO-87. Fix broken links in javadoc introduced by AVRO-82.  Also
+    change test-java build target to fail on javadoc warnings.
+    (Thiruvalluvan M. G. and cutting)
+
+    AVRO-90. Fix Java's JSON codec to correctly encode unions. (cutting)
+
+    AVRO-95. Fix writing of Java reflect-based unions.  Also extend
+    DataFileWriter to permit adding branches to a union schema while
+    writing.
+
+    AVRO-88. Fix Java's BlockingBinaryEncoder to correctly override
+    writeEnum().  (Ravi Gummadi via cutting)
+
+    AVRO-61. Add Python support for reading blocked data.
+    (Ravi Gummadi via cutting)
+
+    AVRO-97.  Fix various C++ bugs.  (Scott Banachowski via cutting)
+
+    AVRO-100.  In spec, remove warning about blocking being draft. (cutting)
+
+    AVRO-107.  Fix Protocol#equals() and Protocol#hashCode() to
+    consider the protocol's types, and also fix Schema#equals() to not
+    throw ClassCastException when a fixed schema is compared to
+    non-fixed. (cutting)
+
+    AVRO-112.  Turn off C++ debug output.  (Scott Banachowski via cutting)
+
+    AVRO-114.  Fix "cdoc" Ant target to correctly run doxygen.
+    (Matt Massie via cutting)
+
+Avro 1.0.0 -- 9 July 2009
+
+  INCOMPATIBLE CHANGES
+
+    AVRO-1. Record fields are now defined with JSON arrays, rather
+    than JSON objects, since fields are ordered.  (cutting & sharad)
+
+    AVRO-9. Restrict map keys to strings.  (cutting & sharad)
+
+    AVRO-2. Optimized RPC handshake protocol for Java.  (cutting)
+
+    AVRO-57. Make ValueWriter an abstract class named Encoder and make
+    ValueReader an abstract class named Decoder, and add concrete
+    implementations named BinaryEncoder and BinaryDecoder. (cutting)
+
+    AVRO-46. Optimized RPC handshake protocol for Python.  (sharad)
+
+    AVRO-66.  Add per-call RPC metadata to Java and Python. (George
+    Porter & cutting)
+
+  NEW FEATURES
+
+    AVRO-6. Permit easier implementation of alternate generic data
+    representations, especially records with integer-indexed fields.
+    (Hong Tang via cutting)
+
+    AVRO-8. Add Java support for default values. (cutting)
+
+    AVRO-33.  C support for primitive types.  (Matt Massie via cutting)
+
+    AVRO-18.  Add support for enum types.  (cutting & sharad)
+
+    AVRO-10.  Add Java support for fixed-sized types. (cutting)
+
+    AVRO-38.  Add Python support for fixed-sized types. (sharad)
+
+    AVRO-42.  Add partial C++ implementation. (Scott Banachowski via cutting)
+
+    AVRO-25.  Add blocking value writer that permits arbitrarily long
+    arrays and maps to be efficiently written as sequences of blocks.
+    (Thiruvalluvan M. G. via cutting)
+
+    AVRO-48.  Add JSON parser for C.  (Matt Massie via cutting)
+
+    AVRO-29.  Add to Java a validating encoder & decoder, and a
+    resolving decoder.  (Thiruvalluvan M. G. & Raymie Stata)
+
+    AVRO-67.  Add per-call RPC metadata to spec. (George Porter via cutting)
+
+    AVRO-28. Add Python support for default values. (sharad via cutting)
+
+  IMPROVEMENTS
+
+    AVRO-11.  Re-implement specific and reflect datum readers and
+    writers to leverage AVRO-6.  (cutting)
+
+    AVRO-13. Use dictionary instead of if-else in validate. (sharad)
+
+    AVRO-5. Add java versus python RPC interoperability tests.
+    (sharad)
+
+    AVRO-16.  Minor documentation improvements.  (cutting)
+
+    AVRO-15. Override __eq__() and __hash__() in Schema classes.
+    (sharad)
+
+    AVRO-26. Switch tests from JUnit to TestNG.  (Konstantin Boudnik
+    via cutting)
+
+    AVRO-34. Upgrade to Jackson version 1.0.0.  (cutting)
+
+    AVRO-37. Add C api docs.  Also link to py docs.  (Matt Massie & cutting)
+
+    AVRO-32. Java specific generated record classes now implement
+    equals() and hashCode().  (cutting)
+
+    AVRO-48. Remove unused imports and annotations.
+    (Thiruvalluvan M. G. via cutting)
+
+    AVRO-53. Use Ivy to retrieve Java dependencies.  (cutting)
+
+    AVRO-56. Use Jackson to generate JSON from Java.  (cutting)
+
+    AVRO-36. Correctly encode and decode binary default values.
+    (cutting)
+
+    AVRO-59.  C++: make serializer more generic.  (Scott Banachowski
+    via cutting)
+
+    AVRO-68. Add license headers to C sources and improve C packaging.
+    (Matt Massie via cutting)
+
+    AVRO-351. Shorten induce tool description; add check to avoid overly verbose
+    descriptions. (philz)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    AVRO-3. Fix ValueReader to throw an exception at EOF.
+    (Pat Hunt via cutting)
+
+    AVRO-4. Fix so that specific code generation works under Eclipse.
+    (Pat Hunt via cutting)
+
+    AVRO-14. Fix so that EOF is not thrown when one attempts to read
+    an empty buffer.  (sharad via cutting)
+
+    AVRO-31. Fix Java package imports in generated specific classes.
+    (sharad via cutting)
+
+    AVRO-21. Default Java namespace from containing definition. (cutting)
+
+    AVRO-12. Fix recursive schemas in Java so that equals() and
+    hashCode() do not cause a stack overflow.  (cutting)
+
+    AVRO-22. When parsing schemas in Java, do not permit anonymous.
+    (cutting)
+
+    AVRO-39. Fix bug in Java record schema toString().  (sharad)
+
+    AVRO-40. Fix typo in specfication, where 'unsigned' was used where
+    'signed' was intended.  (cutting)
+
+    AVRO-44. Fix so that 'ant clean' works even if C has not been
+    built.  (Matt Massie via cutting)
+
+    AVRO-45. Fix c++ compliation so that python script need not be
+    made executable.  (Scott Banachowski via cutting)
+
+    AVRO-51. Fix testio.py to exit correctly. (Philip Zeyliger
+    via sharad)
+
+    AVRO-55. Fix two spec document typos.  (cutting)
+
+    AVRO-69. Make C's install-sh script executable.  (Matt Massie via cutting)
+
+    AVRO-70. Add license header to json_schema.y.  (Matt Massie via cutting)
+
+    AVRO-74. Add missing license headers in C++.  (cutting)
+
+    AVRO-73. Workaround in python to fix simplejson bug on Mac OS. (sharad)
+
+    AVRO-64. Fix socket and parser issue on Mac OS. (sharad)
+
+    AVRO-77. Fix C unit tests on Mac OS.  (Matt Massie via cutting)
diff --git a/DIST_README.txt b/DIST_README.txt
new file mode 100644
index 0000000..1f6c8fd
--- /dev/null
+++ b/DIST_README.txt
@@ -0,0 +1,14 @@
+Apache Avro Distribution 
+
+Avro is a data serialization system.
+
+This distribution contains the following files:
+
+  - avro-src-x.y.z.tar.gz contains the full source for Avro, including
+    all programming language implementations, documentation source, etc.
+
+  - avro-doc-x.y.z.tar.gz contains Avro's pre-built documentation.
+
+  - the c/, cpp/, java/, php/, py/, and ruby/ subdirectories contain
+    pre-built, language-specific binaries, bundles, etc. as
+    conveniences.
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..6d3f211
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,308 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+----------------------------------------------------------------------
+License for the Jansson C JSON parser used in the C implementation:
+
+Copyright (c) 2009 Petri Lehtinen <petri at digip.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+----------------------------------------------------------------------
+License for the Json.NET used in the C# implementation:
+
+Copyright (c) 2007 James Newton-King
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+----------------------------------------------------------------------
+License for msinttypes used in the C implementation:
+Source from:
+http://code.google.com/p/msinttypes/downloads/detail?name=msinttypes-r26.zip
+
+Copyright (c) 2006-2008 Alexander Chemeris
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  1. Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+  3. The name of the author may be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+License for Dirent API for Microsoft Visual Studio used in the C implementation:
+Source from:
+http://www.softagalleria.net/download/dirent/dirent-1.11.zip
+
+Copyright (C) 2006 Toni Ronkko
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL TONI RONKKO BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+----------------------------------------------------------------------
diff --git a/NOTICE.txt b/NOTICE.txt
new file mode 100644
index 0000000..e601a8e
--- /dev/null
+++ b/NOTICE.txt
@@ -0,0 +1,9 @@
+Apache Avro
+Copyright 2010 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+C JSON parsing provided by Jansson and
+written by Petri Lehtinen. The original software is
+available from http://www.digip.org/jansson/.
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..a8f66f7
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,9 @@
+Apache Avro™ is a data serialization system.
+
+Learn more about Avro, please visit our website at:
+
+  http://avro.apache.org/
+
+To contribute to Avro, please read:
+
+  https://cwiki.apache.org/AVRO/how-to-contribute.html
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..06961c0
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e						  # exit on error
+
+cd `dirname "$0"`				  # connect to root
+
+VERSION=`cat share/VERSION.txt`
+
+function usage {
+  echo "Usage: $0 {test|dist|sign|clean}"
+  exit 1
+}
+
+if [ $# -eq 0 ]
+then
+  usage
+fi
+
+set -x						  # echo commands
+
+for target in "$@"
+do
+
+case "$target" in
+
+    test)
+	# run lang-specific tests
+        (cd lang/java; mvn test)
+	(cd lang/py; ant test)
+	(cd lang/py3; python3 setup.py test)
+	(cd lang/c; ./build.sh test)
+	(cd lang/c++; ./build.sh test)
+	(cd lang/csharp; ./build.sh test)
+	(cd lang/js; ./build.sh test)
+	(cd lang/ruby; ./build.sh test)
+	(cd lang/php; ./build.sh test)
+	(cd lang/perl; perl ./Makefile.PL && make test)
+
+	# create interop test data
+        mkdir -p build/interop/data
+	(cd lang/java/avro; mvn -P interop-data-generate generate-resources)
+	(cd lang/py; ant interop-data-generate)
+	(cd lang/c; ./build.sh interop-data-generate)
+	#(cd lang/c++; make interop-data-generate)
+	(cd lang/ruby; rake generate_interop)
+	(cd lang/php; ./build.sh interop-data-generate)
+
+	# run interop data tests
+	(cd lang/java; mvn test -P interop-data-test)
+	(cd lang/py; ant interop-data-test)
+	(cd lang/c; ./build.sh interop-data-test)
+	#(cd lang/c++; make interop-data-test)
+	(cd lang/ruby; rake interop)
+	(cd lang/php; ./build.sh test-interop)
+
+	# java needs to package the jars for the interop rpc tests
+        (cd lang/java; mvn package -DskipTests)
+	# run interop rpc test
+        /bin/bash share/test/interop/bin/test_rpc_interop.sh
+
+	;;
+
+    dist)
+        # ensure version matches
+        # FIXME: enforcer is broken:MENFORCER-42
+        # mvn enforcer:enforce -Davro.version=$VERSION
+        
+	# build source tarball
+        mkdir -p build
+
+        SRC_DIR=avro-src-$VERSION
+
+	rm -rf build/${SRC_DIR}
+	svn export --force . build/${SRC_DIR}
+
+	#runs RAT on artifacts
+        mvn -N -P rat antrun:run
+
+	mkdir -p dist
+        (cd build; tar czf ../dist/${SRC_DIR}.tar.gz ${SRC_DIR})
+
+	# build lang-specific artifacts
+        
+	(cd lang/java; mvn package -DskipTests -Dhadoop.version=2; rm -rf mapred/target/classes/;
+	  mvn -P dist package -DskipTests -Davro.version=$VERSION javadoc:aggregate) 
+        (cd lang/java/trevni/doc; mvn site)
+        (mvn -N -P copy-artifacts antrun:run) 
+
+	(cd lang/py; ant dist)
+	(cd lang/py3; python3 setup.py sdist; cp -r dist ../../dist/py3)
+
+	(cd lang/c; ./build.sh dist)
+
+	(cd lang/c++; ./build.sh dist)
+
+	(cd lang/csharp; ./build.sh dist)
+
+	(cd lang/js; ./build.sh dist)
+
+	(cd lang/ruby; ./build.sh dist)
+
+	(cd lang/php; ./build.sh dist)
+
+        mkdir -p dist/perl
+	(cd lang/perl; perl ./Makefile.PL && make dist)
+        cp lang/perl/Avro-$VERSION.tar.gz dist/perl/
+
+	# build docs
+	(cd doc; ant)
+	(cd build; tar czf ../dist/avro-doc-$VERSION.tar.gz avro-doc-$VERSION)
+
+	cp DIST_README.txt dist/README.txt
+	;;
+
+    sign)
+
+	set +x
+
+	echo -n "Enter password: "
+	stty -echo
+	read password
+	stty echo
+
+	for f in $(find dist -type f \
+	    \! -name '*.md5' \! -name '*.sha1' \
+	    \! -name '*.asc' \! -name '*.txt' );
+	do
+	    (cd `dirname $f`; md5sum `basename $f`) > $f.md5
+	    (cd `dirname $f`; sha1sum `basename $f`) > $f.sha1
+	    gpg --passphrase $password --armor --output $f.asc --detach-sig $f
+	done
+
+	set -x
+	;;
+
+    clean)
+	rm -rf build dist
+	(cd doc; ant clean)
+
+        (mvn clean)         
+
+	(cd lang/py; ant clean)
+	(cd lang/py3; python3 setup.py clean)
+
+	(cd lang/c; ./build.sh clean)
+
+	(cd lang/c++; ./build.sh clean)
+
+	(cd lang/csharp; ./build.sh clean)
+
+	(cd lang/js; ./build.sh clean)
+
+	(cd lang/ruby; ./build.sh clean)
+
+	(cd lang/php; ./build.sh clean)
+
+	(cd lang/perl; [ -f Makefile ] && make clean)
+	;;
+
+    *)
+        usage
+        ;;
+esac
+
+done
+
+exit 0
diff --git a/doc/build.xml b/doc/build.xml
new file mode 100644
index 0000000..5f7269b
--- /dev/null
+++ b/doc/build.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0"?>
+
+<project name="doc" default="doc" basedir=".">
+  
+  <!-- Load user's default properties. -->
+  <property file="${user.home}/build.properties" />
+  
+  <loadresource property="version">
+    <file file="${basedir}/../share/VERSION.txt"/>
+  </loadresource>
+
+  <property name="build.dir" value="../build/avro-doc-${version}"/>
+
+  <target name="doc" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=<base of Apache Forrest installation> on the command line." if="forrest.home">
+    <mkdir dir="${build.dir}"/>
+    <echo  file="../build/avro.ent" append="false">
+      <!ENTITY AvroVersion "${version}">
+    </echo>
+    <exec executable="${forrest.home}/bin/forrest" failonerror="true">
+      <arg value="-Dproject.configfile=${basedir}/src/cli.xconf"/>
+      <arg value="-Dproject.content-dir=src"/>
+      <arg value="-Dproject.site=../${build.dir}/"/>
+    </exec>
+    <copy todir="${build.dir}/examples">
+      <fileset dir="examples"/>
+    </copy>
+  </target>
+
+  <target name="forrest.check" unless="forrest.home">
+    <fail message="'forrest.home' is not defined. Please pass -Dforrest.home=<base of Apache Forrest installation> to Ant on the command-line." />
+  </target>
+
+  <target name="clean" description="Delete build files, and their directories">
+    <delete dir="${basedir}/build"/>
+  </target>
+
+</project>
diff --git a/doc/examples/example.py b/doc/examples/example.py
new file mode 100644
index 0000000..8e8927c
--- /dev/null
+++ b/doc/examples/example.py
@@ -0,0 +1,15 @@
+import avro.schema
+from avro.datafile import DataFileReader, DataFileWriter
+from avro.io import DatumReader, DatumWriter
+
+schema = avro.schema.parse(open("user.avsc").read())
+
+writer = DataFileWriter(open("/tmp/users.avro", "w"), DatumWriter(), schema)
+writer.append({"name": "Alyssa", "favorite_number": 256, "WTF": 2})
+writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
+writer.close()
+
+reader = DataFileReader(open("/tmp/users.avro", "r"), DatumReader())
+for user in reader:
+    print user
+reader.close()
diff --git a/doc/examples/java-example/pom.xml b/doc/examples/java-example/pom.xml
new file mode 100644
index 0000000..a38d275
--- /dev/null
+++ b/doc/examples/java-example/pom.xml
@@ -0,0 +1,52 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>example</groupId>
+  <artifactId>java-example</artifactId>
+  <packaging>jar</packaging>
+  <version>1.0-SNAPSHOT</version>
+  <name>java-example</name>
+  <url>http://maven.apache.org</url>
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.5</version>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>1.7.5</version>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>schema</goal>
+            </goals>
+            <configuration>
+              <sourceDirectory>${project.basedir}/../</sourceDirectory>
+              <outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.6</source>
+          <target>1.6</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/doc/examples/java-example/src/main/java/example/GenericMain.java b/doc/examples/java-example/src/main/java/example/GenericMain.java
new file mode 100644
index 0000000..6a2995b
--- /dev/null
+++ b/doc/examples/java-example/src/main/java/example/GenericMain.java
@@ -0,0 +1,53 @@
+package example;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Parser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+
+public class GenericMain {
+	public static void main(String[] args) throws IOException {
+		Schema schema = new Parser().parse(new File("/home/skye/code/cloudera/avro/doc/examples/user.avsc"));
+		
+		GenericRecord user1 = new GenericData.Record(schema);
+		user1.put("name", "Alyssa");
+		user1.put("favorite_number", 256);
+		// Leave favorite color null
+		
+		GenericRecord user2 = new GenericData.Record(schema);
+		user2.put("name", "Ben");
+		user2.put("favorite_number", 7);
+		user2.put("favorite_color", "red");
+		
+		// Serialize user1 and user2 to disk
+		File file = new File("users.avro");
+		DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
+		DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+		dataFileWriter.create(schema, file);
+		dataFileWriter.append(user1);
+		dataFileWriter.append(user2);
+		dataFileWriter.close();
+
+		// Deserialize users from disk
+		DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
+		DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
+		GenericRecord user = null;
+		while (dataFileReader.hasNext()) {
+			// Reuse user object by passing it to next(). This saves us from
+			// allocating and garbage collecting many objects for files with
+			// many items.
+			user = dataFileReader.next(user);
+			System.out.println(user);
+		}
+		
+	}
+}
diff --git a/doc/examples/java-example/src/main/java/example/SpecificMain.java b/doc/examples/java-example/src/main/java/example/SpecificMain.java
new file mode 100644
index 0000000..36d63b2
--- /dev/null
+++ b/doc/examples/java-example/src/main/java/example/SpecificMain.java
@@ -0,0 +1,55 @@
+package example;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+
+import example.avro.User;
+
+public class SpecificMain {
+	public static void main(String[] args) throws IOException {
+		User user1 = new User();
+		user1.setName("Alyssa");
+		user1.setFavoriteNumber(256);
+		// Leave favorite color null
+
+		// Alternate constructor
+		User user2 = new User("Ben", 7, "red");
+		
+		// Construct via builder
+		User user3 = User.newBuilder()
+				     .setName("Charlie")
+				     .setFavoriteColor("blue")
+				     .setFavoriteNumber(null)
+				     .build();
+
+		// Serialize user1 and user2 to disk
+		File file = new File("users.avro");
+		DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
+		DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
+		dataFileWriter.create(user1.getSchema(), file);
+		dataFileWriter.append(user1);
+		dataFileWriter.append(user2);
+		dataFileWriter.append(user3);
+		dataFileWriter.close();
+
+		// Deserialize Users from disk
+		DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
+		DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
+		User user = null;
+		while (dataFileReader.hasNext()) {
+			// Reuse user object by passing it to next(). This saves us from
+			// allocating and garbage collecting many objects for files with
+			// many items.
+			user = dataFileReader.next(user);
+			System.out.println(user);
+		}
+
+	}
+}
diff --git a/doc/examples/mr-example/pom.xml b/doc/examples/mr-example/pom.xml
new file mode 100644
index 0000000..0f7b15e
--- /dev/null
+++ b/doc/examples/mr-example/pom.xml
@@ -0,0 +1,59 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>example</groupId>
+  <artifactId>mr-example</artifactId>
+  <version>1.0</version>
+  <packaging>jar</packaging>
+
+  <name>mr-example</name>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.6</source>
+          <target>1.6</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>1.7.5</version>
+        <executions>
+          <execution>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>schema</goal>
+            </goals>
+            <configuration>
+              <sourceDirectory>${project.basedir}/../</sourceDirectory>
+              <outputDirectory>${project.build.directory}/generated-sources/java</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <version>1.7.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-core</artifactId>
+      <version>1.1.0</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/doc/examples/mr-example/src/main/java/example/AvroWordCount.java b/doc/examples/mr-example/src/main/java/example/AvroWordCount.java
new file mode 100644
index 0000000..7611198
--- /dev/null
+++ b/doc/examples/mr-example/src/main/java/example/AvroWordCount.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package example;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.avro.*;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.*;
+
+/**
+ * The classic WordCount example modified to output Avro Pair<CharSequence,
+ * Integer> records instead of text.
+ */
+public class AvroWordCount extends Configured implements Tool {
+
+  public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+
+    public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
+        throws IOException {
+      String line = value.toString();
+      StringTokenizer tokenizer = new StringTokenizer(line);
+      while (tokenizer.hasMoreTokens()) {
+        word.set(tokenizer.nextToken());
+        output.collect(word, one);
+      }
+    }
+  }
+
+  public static class Reduce extends MapReduceBase
+    implements Reducer<Text, IntWritable,
+                       AvroWrapper<Pair<CharSequence, Integer>>, NullWritable> {
+
+    public void reduce(Text key, Iterator<IntWritable> values,
+        OutputCollector<AvroWrapper<Pair<CharSequence, Integer>>, NullWritable> output,
+        Reporter reporter) throws IOException {
+      int sum = 0;
+      while (values.hasNext()) {
+        sum += values.next().get();
+      }
+      output.collect(new AvroWrapper<Pair<CharSequence, Integer>>(
+          new Pair<CharSequence, Integer>(key.toString(), sum)),
+          NullWritable.get());
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: AvroWordCount <input path> <output path>");
+      return -1;
+    }
+
+    JobConf conf = new JobConf(AvroWordCount.class);
+    conf.setJobName("wordcount");
+
+    // We call setOutputSchema first so we can override the configuration
+    // parameters it sets
+    AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+        Schema.create(Type.INT)));
+
+    conf.setMapperClass(Map.class);
+    conf.setReducerClass(Reduce.class);
+
+    conf.setInputFormat(TextInputFormat.class);
+
+    conf.setMapOutputKeyClass(Text.class);
+    conf.setMapOutputValueClass(IntWritable.class);
+    conf.setOutputKeyComparatorClass(Text.Comparator.class);
+
+    FileInputFormat.setInputPaths(conf, new Path(args[0]));
+    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+    JobClient.runJob(conf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new AvroWordCount(), args);
+    System.exit(res);
+  }
+}
diff --git a/doc/examples/mr-example/src/main/java/example/GenerateData.java b/doc/examples/mr-example/src/main/java/example/GenerateData.java
new file mode 100644
index 0000000..afef802
--- /dev/null
+++ b/doc/examples/mr-example/src/main/java/example/GenerateData.java
@@ -0,0 +1,39 @@
+package example;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumWriter;
+
+import example.avro.User;
+
+public class GenerateData {
+  public static final String[] COLORS = {"red", "orange", "yellow", "green", "blue", "purple", null};
+  public static final int USERS = 20;
+  public static final String PATH = "./input/users.avro";
+
+  public static void main(String[] args) throws IOException {
+    // Open data file
+    File file = new File(PATH);
+    if (file.getParentFile() != null) {
+      file.getParentFile().mkdirs();
+    }
+    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
+    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
+    dataFileWriter.create(User.SCHEMA$, file);
+
+    // Create random users
+    User user;
+    Random random = new Random();
+    for (int i = 0; i < USERS; i++) {
+      user = new User("user", null, COLORS[random.nextInt(COLORS.length)]);
+      dataFileWriter.append(user);
+      System.out.println(user);
+    }
+
+    dataFileWriter.close();
+  }
+}
diff --git a/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java b/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java
new file mode 100644
index 0000000..8ca6a2f
--- /dev/null
+++ b/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package example;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.mapred.Pair;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * The classic WordCount example modified to output Avro Pair<CharSequence,
+ * Integer> records instead of text.
+ */
+public class MapReduceAvroWordCount extends Configured implements Tool {
+
+  public static class Map
+    extends Mapper<LongWritable, Text, Text, IntWritable> {
+
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+
+    public void map(LongWritable key, Text value, Context context)
+      throws IOException, InterruptedException {
+      String line = value.toString();
+      StringTokenizer tokenizer = new StringTokenizer(line);
+      while (tokenizer.hasMoreTokens()) {
+        word.set(tokenizer.nextToken());
+        context.write(word, one);
+      }
+    }
+  }
+
+  public static class Reduce
+    extends Reducer<Text, IntWritable,
+            AvroWrapper<Pair<CharSequence, Integer>>, NullWritable> {
+
+    public void reduce(Text key, Iterable<IntWritable> values,
+                       Context context)
+      throws IOException, InterruptedException {
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      context.write(new AvroWrapper<Pair<CharSequence, Integer>>
+                    (new Pair<CharSequence, Integer>(key.toString(), sum)),
+                    NullWritable.get());
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: AvroWordCount <input path> <output path>");
+      return -1;
+    }
+
+    Job job = new Job(getConf());
+    job.setJarByClass(MapReduceAvroWordCount.class);
+    job.setJobName("wordcount");
+
+    // We call setOutputSchema first so we can override the configuration
+    // parameters it sets
+    AvroJob.setOutputKeySchema(job,
+                               Pair.getPairSchema(Schema.create(Type.STRING),
+                                                  Schema.create(Type.INT)));
+    job.setOutputValueClass(NullWritable.class);
+
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+
+    job.setInputFormatClass(TextInputFormat.class);
+
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+    job.setSortComparatorClass(Text.Comparator.class);
+
+    FileInputFormat.setInputPaths(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+    job.waitForCompletion(true);
+    
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res =
+      ToolRunner.run(new Configuration(), new MapReduceAvroWordCount(), args);
+    System.exit(res);
+  }
+}
diff --git a/doc/examples/mr-example/src/main/java/example/MapReduceColorCount.java b/doc/examples/mr-example/src/main/java/example/MapReduceColorCount.java
new file mode 100644
index 0000000..bbfd71c
--- /dev/null
+++ b/doc/examples/mr-example/src/main/java/example/MapReduceColorCount.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import example.avro.User;
+
+public class MapReduceColorCount extends Configured implements Tool {
+
+  public static class ColorCountMapper extends
+      Mapper<AvroKey<User>, NullWritable, Text, IntWritable> {
+
+    @Override
+    public void map(AvroKey<User> key, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+
+      CharSequence color = key.datum().getFavoriteColor();
+      if (color == null) {
+        color = "none";
+      }
+      context.write(new Text(color.toString()), new IntWritable(1));
+    }
+  }
+
+  public static class ColorCountReducer extends
+      Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> {
+
+    @Override
+    public void reduce(Text key, Iterable<IntWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum));
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: MapReduceColorCount <input path> <output path>");
+      return -1;
+    }
+
+    Job job = new Job(getConf());
+    job.setJarByClass(MapReduceColorCount.class);
+    job.setJobName("Color Count");
+
+    FileInputFormat.setInputPaths(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+    job.setInputFormatClass(AvroKeyInputFormat.class);
+    job.setMapperClass(ColorCountMapper.class);
+    AvroJob.setInputKeySchema(job, User.getClassSchema());
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+
+    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
+    job.setReducerClass(ColorCountReducer.class);
+    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
+    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
+
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new MapReduceColorCount(), args);
+    System.exit(res);
+  }
+}
diff --git a/doc/examples/mr-example/src/main/java/example/MapredColorCount.java b/doc/examples/mr-example/src/main/java/example/MapredColorCount.java
new file mode 100644
index 0000000..8e6c2a7
--- /dev/null
+++ b/doc/examples/mr-example/src/main/java/example/MapredColorCount.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.*;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.*;
+
+import example.avro.User;
+
+public class MapredColorCount extends Configured implements Tool {
+
+  public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> {
+    @Override
+    public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
+        throws IOException {
+      CharSequence color = user.getFavoriteColor();
+      // We need this check because the User.favorite_color field has type ["string", "null"]
+      if (color == null) {
+        color = "none";
+      }
+      collector.collect(new Pair<CharSequence, Integer>(color, 1));
+    }
+  }
+
+  public static class ColorCountReducer extends AvroReducer<CharSequence, Integer,
+                                                            Pair<CharSequence, Integer>> {
+    @Override
+    public void reduce(CharSequence key, Iterable<Integer> values,
+                       AvroCollector<Pair<CharSequence, Integer>> collector,
+                       Reporter reporter)
+        throws IOException {
+      int sum = 0;
+      for (Integer value : values) {
+        sum += value;
+      }
+      collector.collect(new Pair<CharSequence, Integer>(key, sum));
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: MapredColorCount <input path> <output path>");
+      return -1;
+    }
+
+    JobConf conf = new JobConf(getConf(), MapredColorCount.class);
+    conf.setJobName("colorcount");
+
+    FileInputFormat.setInputPaths(conf, new Path(args[0]));
+    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+    AvroJob.setMapperClass(conf, ColorCountMapper.class);
+    AvroJob.setReducerClass(conf, ColorCountReducer.class);
+
+    // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
+    // relevant config options such as input/output format, map output
+    // classes, and output key class.
+    AvroJob.setInputSchema(conf, User.getClassSchema());
+    AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+        Schema.create(Type.INT)));
+
+    JobClient.runJob(conf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args);
+    System.exit(res);
+  }
+}
diff --git a/doc/examples/user.avsc b/doc/examples/user.avsc
new file mode 100644
index 0000000..117ea70
--- /dev/null
+++ b/doc/examples/user.avsc
@@ -0,0 +1,9 @@
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
diff --git a/doc/forrest.properties b/doc/forrest.properties
new file mode 100644
index 0000000..61ffba3
--- /dev/null
+++ b/doc/forrest.properties
@@ -0,0 +1,3 @@
+# Make Forrest work with Java6
+forrest.validate.sitemap=false
+
diff --git a/doc/src/cli.xconf b/doc/src/cli.xconf
new file mode 100644
index 0000000..7219869
--- /dev/null
+++ b/doc/src/cli.xconf
@@ -0,0 +1,328 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--+
+    |  This is the Apache Cocoon command line configuration file.
+    |  Here you give the command line interface details of where
+    |  to find various aspects of your Cocoon installation.
+    |
+    |  If you wish, you can also use this file to specify the URIs
+    |  that you wish to generate.
+    |
+    |  The current configuration information in this file is for
+    |  building the Cocoon documentation. Therefore, all links here
+    |  are relative to the build context dir, which, in the build.xml
+    |  file, is set to ${build.context}
+    |
+    |  Options:
+    |    verbose:            increase amount of information presented
+    |                        to standard output (default: false)
+    |    follow-links:       whether linked pages should also be
+    |                        generated (default: true)
+    |    precompile-only:    precompile sitemaps and XSP pages, but
+    |                        do not generate any pages (default: false)
+    |    confirm-extensions: check the mime type for the generated page
+    |                        and adjust filename and links extensions
+    |                        to match the mime type
+    |                        (e.g. text/html->.html)
+    |
+    |  Note: Whilst using an xconf file to configure the Cocoon
+    |        Command Line gives access to more features, the use of
+    |        command line parameters is more stable, as there are
+    |        currently plans to improve the xconf format to allow
+    |        greater flexibility. If you require a stable and
+    |        consistent method for accessing the CLI, it is recommended
+    |        that you use the command line parameters to configure
+    |        the CLI. See documentation at:
+    |        http://cocoon.apache.org/2.1/userdocs/offline/
+    |        http://wiki.apache.org/cocoon/CommandLine
+    |
+    +-->
+
+<cocoon verbose="true"
+        follow-links="true"
+        precompile-only="false"
+        confirm-extensions="false">
+
+   <!--+
+       |  The context directory is usually the webapp directory
+       |  containing the sitemap.xmap file.
+       |
+       |  The config file is the cocoon.xconf file.
+       |
+       |  The work directory is used by Cocoon to store temporary
+       |  files and cache files.
+       |
+       |  The destination directory is where generated pages will
+       |  be written (assuming the 'simple' mapper is used, see
+       |  below)
+       +-->
+   <context-dir>.</context-dir>
+   <config-file>WEB-INF/cocoon.xconf</config-file>
+   <work-dir>../tmp/cocoon-work</work-dir>
+   <dest-dir>../site</dest-dir>
+
+   <!--+
+       |  A checksum file can be used to store checksums for pages
+       |  as they are generated. When the site is next generated,
+       |  files will not be written if their checksum has not changed.
+       |  This means that it will be easier to detect which files
+       |  need to be uploaded to a server, using the timestamp.
+       |
+       |  The default path is relative to the core webapp directory.
+       |  An asolute path can be used.
+       +-->
+   <!--   <checksums-uri>build/work/checksums</checksums-uri>-->
+
+   <!--+
+       | Broken link reporting options:
+       |   Report into a text file, one link per line:
+       |     <broken-links type="text" report="filename"/>
+       |   Report into an XML file:
+       |     <broken-links type="xml" report="filename"/>
+       |   Ignore broken links (default):
+       |     <broken-links type="none"/>
+       |
+       |   Two attributes to this node specify whether a page should
+       |   be generated when an error has occured. 'generate' specifies
+       |   whether a page should be generated (default: true) and
+       |   extension specifies an extension that should be appended
+       |   to the generated page's filename (default: none)
+       |
+       |   Using this, a quick scan through the destination directory
+       |   will show broken links, by their filename extension.
+       +-->
+   <broken-links type="xml"
+                 file="../brokenlinks.xml"
+                 generate="false"
+                 extension=".error"
+                 show-referrers="true"/>
+
+   <!--+
+       |  Load classes at startup. This is necessary for generating
+       |  from sites that use SQL databases and JDBC.
+       |  The <load-class> element can be repeated if multiple classes
+       |  are needed.
+       +-->
+   <!--
+   <load-class>org.firebirdsql.jdbc.Driver</load-class>
+   -->
+
+   <!--+
+       |  Configures logging.
+       |  The 'log-kit' parameter specifies the location of the log kit
+       |  configuration file (usually called logkit.xconf.
+       |
+       |  Logger specifies the logging category (for all logging prior
+       |  to other Cocoon logging categories taking over)
+       |
+       |  Available log levels are:
+       |    DEBUG:        prints all level of log messages.
+       |    INFO:         prints all level of log messages except DEBUG
+       |                  ones.
+       |    WARN:         prints all level of log messages except DEBUG
+       |                  and INFO ones.
+       |    ERROR:        prints all level of log messages except DEBUG,
+       |                  INFO and WARN ones.
+       |    FATAL_ERROR:  prints only log messages of this level
+       +-->
+   <!-- <logging log-kit="WEB-INF/logkit.xconf" logger="cli" level="ERROR" /> -->
+
+   <!--+
+       |  Specifies the filename to be appended to URIs that
+       |  refer to a directory (i.e. end with a forward slash).
+       +-->
+   <default-filename>index.html</default-filename>
+
+   <!--+
+       |  Specifies a user agent string to the sitemap when
+       |  generating the site.
+       |
+       |  A generic term for a web browser is "user agent". Any
+       |  user agent, when connecting to a web server, will provide
+       |  a string to identify itself (e.g. as Internet Explorer or
+       |  Mozilla). It is possible to have Cocoon serve different
+       |  content depending upon the user agent string provided by
+       |  the browser. If your site does this, then you may want to
+       |  use this <user-agent> entry to provide a 'fake' user agent
+       |  to Cocoon, so that it generates the correct version of your
+       |  site.
+       |
+       |  For most sites, this can be ignored.
+       +-->
+   <!--
+   <user-agent>Cocoon Command Line Environment 2.1</user-agent>
+   -->
+
+   <!--+
+       |  Specifies an accept string to the sitemap when generating
+       |  the site.
+       |  User agents can specify to an HTTP server what types of content
+       |  (by mime-type) they are able to receive. E.g. a browser may be
+       |  able to handle jpegs, but not pngs. The HTTP accept header
+       |  allows the server to take the browser's capabilities into account,
+       |  and only send back content that it can handle.
+       |
+       |  For most sites, this can be ignored.
+       +-->
+
+   <accept>*/*</accept>
+
+   <!--+
+       | Specifies which URIs should be included or excluded, according
+       | to wildcard patterns.
+       |
+       | These includes/excludes are only relevant when you are following
+       | links. A link URI must match an include pattern (if one is given)
+       | and not match an exclude pattern, if it is to be followed by
+       | Cocoon. It can be useful, for example, where there are links in
+       | your site to pages that are not generated by Cocoon, such as
+       | references to api-documentation.
+       |
+       | By default, all URIs are included. If both include and exclude
+       | patterns are specified, a URI is first checked against the
+       | include patterns, and then against the exclude patterns.
+       |
+       | Multiple patterns can be given, using muliple include or exclude
+       | nodes.
+       |
+       | The order of the elements is not significant, as only the first
+       | successful match of each category is used.
+       |
+       | Currently, only the complete source URI can be matched (including
+       | any URI prefix). Future plans include destination URI matching
+       | and regexp matching. If you have requirements for these, contact
+       | dev at cocoon.apache.org.
+       +-->
+
+   <exclude pattern="**/"/>
+   <exclude pattern="**apidocs**"/>
+   <exclude pattern="api/**"/>
+
+<!-- Avro: Exclude trevni doc. -->
+   <exclude pattern="trevni/**"/>
+
+<!--
+  This is a workaround for FOR-284 "link rewriting broken when
+  linking to xml source views which contain site: links".
+  See the explanation there and in declare-broken-site-links.xsl
+-->
+   <exclude pattern="site:**"/>
+   <exclude pattern="ext:**"/>
+   <exclude pattern="lm:**"/>
+   <exclude pattern="**/site:**"/>
+   <exclude pattern="**/ext:**"/>
+   <exclude pattern="**/lm:**"/>
+
+   <!-- Exclude tokens used in URLs to ASF mirrors (interpreted by a CGI) -->
+   <exclude pattern="[preferred]/**"/>
+   <exclude pattern="[location]"/>
+
+   <!--   <include-links extension=".html"/>-->
+
+   <!--+
+       |  <uri> nodes specify the URIs that should be generated, and
+       |  where required, what should be done with the generated pages.
+       |  They describe the way the URI of the generated file is created
+       |  from the source page's URI. There are three ways that a generated
+       |  file URI can be created: append, replace and insert.
+       |
+       |  The "type" attribute specifies one of (append|replace|insert):
+       |
+       |  append:
+       |  Append the generated page's URI to the end of the source URI:
+       |
+       |   <uri type="append" src-prefix="documents/" src="index.html"
+       |   dest="build/dest/"/>
+       |
+       |  This means that
+       |   (1) the "documents/index.html" page is generated
+       |   (2) the file will be written to "build/dest/documents/index.html"
+       |
+       |  replace:
+       |  Completely ignore the generated page's URI - just
+       |  use the destination URI:
+       |
+       |   <uri type="replace" src-prefix="documents/" src="index.html"
+       |   dest="build/dest/docs.html"/>
+       |
+       |  This means that
+       |   (1) the "documents/index.html" page is generated
+       |   (2) the result is written to "build/dest/docs.html"
+       |   (3) this works only for "single" pages - and not when links
+       |       are followed
+       |
+       |  insert:
+       |  Insert generated page's URI into the destination
+       |  URI at the point marked with a * (example uses fictional
+       |  zip protocol)
+       |
+       |   <uri type="insert" src-prefix="documents/" src="index.html"
+       |   dest="zip://*.zip/page.html"/>
+       |
+       |  This means that
+       |   (1)
+       |
+       |  In any of these scenarios, if the dest attribute is omitted,
+       |  the value provided globally using the <dest-dir> node will
+       |  be used instead.
+       +-->
+   <!--
+   <uri type="replace"
+        src-prefix="samples/"
+        src="hello-world/hello.html"
+        dest="build/dest/hello-world.html"/>
+   -->
+
+   <!--+
+       | <uri> nodes can be grouped together in a <uris> node. This
+       | enables a group of URIs to share properties. The following
+       | properties can be set for a group of URIs:
+       |   * follow-links:       should pages be crawled for links
+       |   * confirm-extensions: should file extensions be checked
+       |                         for the correct mime type
+       |   * src-prefix:         all source URIs should be
+       |                         pre-pended with this prefix before
+       |                         generation. The prefix is not
+       |                         included when calculating the
+       |                         destination URI
+       |   * dest:               the base destination URI to be
+       |                         shared by all pages in this group
+       |   * type:               the method to be used to calculate
+       |                         the destination URI. See above
+       |                         section on <uri> node for details.
+       |
+       | Each <uris> node can have a name attribute. When a name
+       | attribute has been specified, the -n switch on the command
+       | line can be used to tell Cocoon to only process the URIs
+       | within this URI group. When no -n switch is given, all
+       | <uris> nodes are processed. Thus, one xconf file can be
+       | used to manage multiple sites.
+       +-->
+   <!--
+   <uris name="mirrors" follow-links="false">
+     <uri type="append" src="mirrors.html"/>
+   </uris>
+   -->
+
+   <!--+
+       |  File containing URIs (plain text, one per line).
+       +-->
+   <!--
+   <uri-file>uris.txt</uri-file>
+   -->
+</cocoon>
diff --git a/doc/src/content/htmldocs/canonical-completeness.html b/doc/src/content/htmldocs/canonical-completeness.html
new file mode 100644
index 0000000..697575e
--- /dev/null
+++ b/doc/src/content/htmldocs/canonical-completeness.html
@@ -0,0 +1,204 @@
+<html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head>
+<title>Completeness of "Parsing Canonical Form"</title>
+</head>
+<body>
+
+<center><h1>Completeness of "Parsing Canonical Form"</h1></center>
+
+<h2>1.0 Introduction</h2>
+
+<p>One of the defining characteristics of Avro is that a reader is assumed to have the "same" schema used by the writer of the data the reader is reading.  This assumption leads to a data format that's compact and amenable to many forms of schema evolution.  However, there are nuances to defining exactly what it means for the reader to have "the same" schema used by the writer.  We want to allow, for example, trivial transformations, such as the insertion of whitespace.  But we can't all [...]
+
+<p>To clearly define what it means for a reader to have "the same" schema as a writer, the Avro specification defines <dfn>Parsing Canonical Form</dfn> (PCF), a set of transformations on Avro schemas that strip away irrelevencies (e.g., "doc" attributes) and normalize the JSON text (e.g., dealing with whitespace).  Two schemas are defined to be "the same" as far as a reader is concerned if and only if their PCFs are textually equal.</p>
+
+<p>We believe that PCF is <em>sound</em> and <em>complete</em>.  Soundness means that the PCF of a schema is logically equivalent to the original form, i.e., we can use the PCF in place of the original form without introducing bugs.  Completeness is "maximal soundness:" if two schemas are logically equivalent, then their PFCs will be textually identical.  The Avro specification claims that PCF is complete when it says: "[if two schemas have the same PCF, then] there is no serialized data [...]
+
+<p>We believe that the transformations that define PCF are "self-evidently" sound to people familiar with Avro.  For example, fixing the order of fields in a JSON object, or eliminating irrelevant attributes like <code>doc</code>, or using the simple <code>int</code> in place of <code>{"type":"int"}</code> clearly don't change the meaning of a schema.</p>
+
+<p>Completeness, on the other hand, is much less obvious.  How do we know that there aren't two logically equivalent schemas that happen to reduce to different canonical forms?  All it takes is one such pair to foil our claim of completeness.</p>
+
+<p>In general, completeness properties like this can be tricky to prove.  It turns out that, while soundness is critical to us, completeness is not.  If two schemas are operationally equivalent (i.e., a reader can't tell their output apart), but we accidentally treat them as if they are different, then typically all that happens is that we'll do more work.  For example, we might generate a decoder object to decode some incoming data when it turns out that we had already cached a decoder  [...]
+
+<p>At the same time, if we knew that our canonical forms were complete, then we might take advantage of that fact in some circumstances (e.g., to serialize schemas).  Also, the <code>Schema.equals(Object)</code> method provided in the Avro implementation makes many of the same assumptions made in the PCF definition.  Thus, a completeness proof for our canonicalization would give us confidence in the correctness of this equality algorithm.  So this issue is not entirely academic.</p>
+
+<p>We haven't worked out a full, formal proof (we hope someone from the community will step up to that task!).  However, we've been thinking about it quite a bit, and we thought we'd share our thoughts so far.</p>
+
+
+<h2>2.0 Completeness argument for Parsing Canonical Form</h2>
+
+<p>Our formalization of Avro schemas would be based on interpreting them as grammars.  In this interpretation, Avro schemas are grammars that generate tagged data streams.  Consider, for example, the following schema for a linked-list:
+<pre>
+  {"type":"record", "name":"list", "fields":[
+     {"name":"value", "type":"int"},
+     {"name":"tail",  "type":["null", "list"]}
+   ]}
+</pre>
+Interpreted as a grammar, it can generate a tagged data-stream that looks like this:
+<pre>
+  [record,"list"][field,"value"][int,10][field,"tail"][union,1]
+    [record,"list"][field,"value"][int,22][field,"tail"][union,0]
+</pre>
+(this is a two-record linked list whose first cell contains the value "10" and second cell the value "22").  Avro schemas can trivially be interpreted as grammars for such tagged data streams.  Formal proofs involving Avro schemas can be carried out as proofs about languages and grammars.</p>
+
+<p>So what does it mean for the canonical form of a schema to be "complete?"  Let <i>L(S)</i> denote the language generated by the Avro schema <code>S</code>, and <i>C(S)</i> denote the canonical form of the schema.  The canonicalization is complete if:
+<blockquote>
+For all schemas <i>S<sub>1</sub></i> and <i>S<sub>2</sub></i>,<br>
+    <i>L(S<sub>1</sub>) = L(S<sub>2</sub>) ⇒ C(S<sub>1</sub>) = C(S<sub>2</sub>)</i>
+</blockquote>
+That is, for any two schemas that generate the same language, their canonicalizations are textually equivalent.
+
+<p>To prove this, we need to define some functions:
+<blockquote>
+<i>J</i> is a variable name we often use to denote a JSON expression representing an Avro schema<br>
+<i>C(J)</i> is the Parsing Canonical Form of <i>J</i> as defined in the Avro specification<br>
+<i>P(J)</i> is the ASG for an Avro schema generated by parsing <i>J</i> (think of <i>P(J)</i> as a <code>Schema</code> Java object)<br>
+<i>S</i> is a variable name we often use to denote such ASGs<br>
+<i>L(S)</i> is the language generated by a schema ASG
+</blockquote>
+<p>With all these symbols defined, our completeness criteria is now rendered as:
+<blockquote>
+∀ <i>J<sub>1</sub></i>, <i>J<sub>2</sub></i>:
+<i>L(P(J<sub>1</sub>)) = L(P(J<sub>2</sub>)) ⇒ C(J<sub>1</sub>) = C(J<sub>2</sub>)</i>
+</blockquote>
+We'll prove this by breaking it into two parts:
+<blockquote>
+(1): ∀ <i>S<sub>1</sub></i>, <i>S<sub>2</sub></i>:
+<i>L(S<sub>1</sub>) = L(S<sub>2</sub>) ⇒ S<sub>1</sub> ≅ S<sub>2</sub>  <br>
+(2): ∀ <i>J<sub>1</sub></i>, <i>J<sub>2</sub></i>:
+<i>P(J<sub>1</sub>) ≅ P(J<sub>2</sub>) ⇒ C(J<sub>1</sub>) = C(J<sub>2</sub>)</i>
+</i>
+</blockquote>
+In this two-step decomposition, we've introduced a new operator ≅, which compares the ASGs of two Avro schemas.  The ASG of an Avro schema can be viewed as a rooted, labeled, directed graph.  Because Avro schemas can be recursive, these graphs can be cyclic.  The ≅ operator is "true" between two ASGs when the set of minimal labeled paths (no cycles, starting from the root) on the two ASGs are the same.  (The <code>Schema.equals(Object)</code> method in the Avro implementation c [...]
+
+<p>It turns out that, implicit in the Avro Specification, there are "canonicalization" rules that are important to our proof of completeness.  In particular, the Avro Specification says that a name must be defined "before" it is used, and that a name cannot be defined more than once in a schema.  Consider the following redefinition of the linked-list schema, for example:
+<pre>
+  {"type":"record", "name":"list", "fields":[
+    {"name":"value", "type":"int"},
+    {"name":"tail",
+      "type":["null", {"type":"record", "name":"list", "fields":[
+                        {"name":"value", "type":"int"},
+                        {"name":"tail", "type":["null", "list"]}]}]}
+  ]}
+</pre>
+In this redefinition, we've "unpacked" the recursion in the linked list by one level.  In some sense, this is a perfectly fine definition of a linked list, and is operationally equivalent to the more compact version given earlier.  So it makes sense that our claim of completeness is dependent upon this kind of "unpacking" not occuring in real schemas.</p>
+
+<p>To deal with this issue in our proof, we pretend that the Avro specification does <em>not</em> require that named schemas be defined just once, and be defined "before" they are used.  Rather, we treat this requirement as an additional transformation rule in the definition of Parsing Canonical Form:
+<ul>
+  <li> [MINIMIZE] Eliminate redundant definitions of named types (records, enums, and fixeds).  That is, for each named type, have a defining instance that appears at first use, and then use just the name (rather than the full schema) everywhere else.</li>
+</ul>
+(As in the Avro spec, "first use" is defined as the first occurrence in a depth-first, left-to-right traversal of the schema abstract-syntax graph (ASG).)
+
+<p>Getting back to the proof of (1) and (2) from above, we need to introduce more functions:
+<blockquote>
+<i>P(J)=P<sub>A</sub>(P<sub>J</sub>(J))</i> - decompose parser into: <br>
+  <i>P<sub>J</sub></i> is the JSON parser<br>
+  <i>P<sub>A</sub></i> is the Avro parser (takes JSON ASTs as input)<br>
+<i>C(J)=C<sub>J</sub>(C<sub>A</sub>(C<sub>M</sub>(J)))</i> - decompose canonicalization into:<br>
+  <i>C<sub>M</sub>(J)</i> the MINIMIZE step<br>
+  <i>C<sub>A</sub>(J)</i> Avro normalizations<br>
+  <i>C<sub>J</sub>(J)</i> JSON normalizations<br>
+<i>M(S)</i> is the "named-schema NFA minimzation" of <i>S</i><br>
+</blockquote>
+"Named-schema NFA minimization" is similar to general NFA minimization, except that we only collapse nodes and edges related to named schema entities and not other nodes.  For example, we would <em>not</em> collapse the nodes associated with <code>int</code> or <code>union</code> schemas.
+
+<p> Our proof of (1) looks like this (this proof refers to lemmas (3) and (4), which are defined later):
+<blockquote>
+<table>
+<tr><td>∀<i>S<sub>1</sub>,S<sub>2</sub></i>:</td><td><i>L(S<sub>1</sub>)=L(S<sub>2</sub>)</i></td><td></td></tr>
+<tr>
+<td></td><td>⇒<i>M(S<sub>1</sub>)=M(S<sub>2</sub>)</i></td>
+<td>by (3)</td>
+</tr>
+<tr>
+<td></td><td>⇒<i>S<sub>1</sub>≅S<sub>2</sub)</i></td>
+<td>by (4)</td>
+</tr>
+</table>
+</blockquote>
+Here's the proof of (2) (this proof refers to lemmas (4)-(7), which are defined later):
+<blockquote>
+<table>
+<tr><td>∀<i>J<sub>1</sub>,J<sub>2</sub></i>:</td><td><i>P(J<sub>1</sub>)≅P(J<sub>2</sub>)</i></td><td></td></tr>
+
+<tr>
+<td></td><td>⇒<i>M(P(J<sub>1</sub>))=M(P(J<sub>2</sub>))</i></td>
+<td>by (4)</td>
+</tr>
+
+<tr>
+<td></td><td>⇒<i>P(C<sub>M</sub>(J<sub>1</sub>))=P(C<sub>M</sub>(J<sub>2</sub>))</i></td>
+<td>by (5)</td>
+</tr>
+
+<tr>
+<td></td><td>⇒<i>P<sub>A</sub>(P<sub>J</sub>(C<sub>M</sub>(J<sub>1</sub>)))=P<sub>A</sub>(P<sub>J</sub>(C<sub>M</sub>(J<sub>2</sub>)))</i></td>
+<td>by definition of <i>P</i></td>
+</tr>
+
+<tr>
+<td></td><td>⇒<i>P<sub>J</sub>(C<sub>A</sub>(C<sub>M</sub>(J<sub>1</sub>)))=P<sub>J</sub>(C<sub>A</sub>(C<sub>M</sub>(J<sub>2</sub>)))</i></td>
+<td>by (6)</td>
+</tr>
+
+<tr>
+<td></td><td>⇒<i>C<sub>J</sub>(C<sub>A</sub>(C<sub>M</sub>(J<sub>1</sub>)))=C<sub>J</sub>(C<sub>A</sub>(C<sub>M</sub>(J<sub>2</sub>)))</i></td>
+<td>by (7)</td>
+</tr>
+
+<tr>
+<td></td><td>⇒<i>C(J<sub>1</sub>)=C(J<sub>2</sub>)</i></td>
+<td>by definition of <i>C</i></td>
+</tr>
+</table>
+</blockquote>
+
+Here are the lemmas needed above:
+<blockquote>
+(3): ∀ <i>S<sub>1</sub></i>, <i>S<sub>2</sub></i>:
+<i>L(S<sub>1</sub>) = L(S<sub>2</sub>) ⇒ M(S<sub>1</sub>) = M(S<sub>2</sub>)</i><br>
+
+(4): ∀ <i>S<sub>1</sub></i>, <i>S<sub>2</sub></i>:
+<i>M(S<sub>1</sub>) = M(S<sub>2</sub>) ⇔ S<sub>1</sub> ≅ S<sub>2</sub></i> <br>
+
+(5): ∀ <i>J</i>: <i>M(P(J)) = P(C<sub>M</sub>(J))</i><br>
+
+(6): ∀ <i>J<sub>1</sub></i>, <i>J<sub>2</sub></i>:
+<i>P<sub>A</sub>(P<sub>J</sub>(J<sub>1</sub>)) = P<sub>A</sub>(P<sub>J</sub>(J<sub>2</sub>)) ⇒ P<sub>J</sub>(C<sub>A</sub>(J<sub>1</sub>)) = P<sub>J</sub>(C<sub>A</sub>(J<sub>2</sub>))</i> <br>
+
+(7): ∀ <i>J<sub>1</sub></i>, <i>J<sub>2</sub></i>:
+<i>P<sub>J</sub>(J<sub>1</sub>) = P<sub>J</sub>(J<sub>2</sub>) ⇒ C<sub>J</sub>(J<sub>1</sub>) = C<sub>J</sub>(J<sub>2</sub>)</i> <br>
+</blockquote>
+
+<p>Proving the lemmas:
+<ol start="3">
+<li> This says that the language-related part of our canonicalization is complete, i.e., <i>M</i> finds the equivalence-classes of <i>L</i>.  I would imagine one could prove this by modifying a proof that the equality of LL(1) grammars is a decidable problem.  I haven't gotten very far in showing this, however.
+<li> The right-hand direction of this follows from the definition of minimization.  The left-hand direction seems correct, but I'm not sure how to prove it (I think it also follows from the definition of minimization).
+<li> This is showing that the MINIMIZE step (which is done on JSON expressions) is equivalent to doing an named-schema NFA minimization on the ASG representation.  This should follow pretty directly from a detailed definition of <i>M</i>, if we provided one.
+<li> This says that the Avro-related part of our canonicalization is complete, i.e., that <i>C<sub>A</sub></i> finds equivalence-classes of <i>P<sub>A</sub></i>.
+<li> This says that the JSON-related part of our canonicalization is complete, i.e., that <i>C<sub>J</sub></i> finds equivalence-classes of <i>P<sub>J</sub></i>.  Note that, implicitly, this lemma ranges over only JSON expressions that are legal Avro schemas with no doc strings or default values, and thus (for example) doesn't need to worry about normalization of floating-point literals.
+</ol>
+
+
+<h2>3.0 Concluding remarks</h2>
+
+Engineers <a href="http://www.aps.org/publications/apsnews/201002/physicshistory.cfm">have a history</a> of running ahead of formal mathematical proofs, when things "seem correct" to them.  In this case, it seems pretty obvious that Parsing Canonical Form is complete as well as sound, and we should go ahead and treat it as such.  At the same time, formal proofs often turn up corner cases and exceptions that are valuable to document and account for.  Thus, it'd nice if someone could provi [...]
+
+</body>
+</html>
diff --git a/doc/src/content/xdocs/gettingstartedjava.xml b/doc/src/content/xdocs/gettingstartedjava.xml
new file mode 100644
index 0000000..ea760b3
--- /dev/null
+++ b/doc/src/content/xdocs/gettingstartedjava.xml
@@ -0,0 +1,476 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+   "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; Getting Started (Java)</title>
+  </header>
+  <body>
+    <p>
+      This is a short guide for getting started with Apache Avro™ using
+      Java.  This guide only covers using Avro for data serialization; see
+      Patrick Hunt's <a href="https://github.com/phunt/avro-rpc-quickstart">Avro
+      RPC Quick Start</a> for a good introduction to using Avro for RPC.
+    </p>
+    <section id="download_install">
+      <title>Download</title>
+      <p>
+        Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be
+        downloaded from the <a
+        href="http://avro.apache.org/releases.html">Apache Avro™
+        Releases</a> page.  This guide uses Avro &AvroVersion;, the latest
+        version at the time of writing.  For the examples in this guide,
+        download <em>avro-&AvroVersion;.jar</em> and
+        <em>avro-tools-&AvroVersion;.jar</em>.  The Avro Java implementation
+        also depends on the <a href="http://jackson.codehaus.org/">Jackson</a>
+        JSON library.  From the Jackson <a
+        href="http://wiki.fasterxml.com/JacksonDownload">download page</a>,
+        download the core-asl and mapper-asl jars.  Add
+        <em>avro-&AvroVersion;.jar</em> and the Jackson jars to your project's
+        classpath (avro-tools will be used for code generation).
+      </p>
+      <p>
+        Alternatively, if you are using Maven, add the following dependency to
+        your POM:
+      </p>
+      <source>
+<dependency>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro</artifactId>
+  <version>&AvroVersion;</version>
+</dependency>
+      </source>
+      <p>
+        As well as the Avro Maven plugin (for performing code generation):
+      </p>
+      <source>
+<plugin>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro-maven-plugin</artifactId>
+  <version>&AvroVersion;</version>
+  <executions>
+    <execution>
+      <phase>generate-sources</phase>
+      <goals>
+        <goal>schema</goal>
+      </goals>
+      <configuration>
+        <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
+        <outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
+      </configuration>
+    </execution>
+  </executions>
+</plugin>
+<plugin>
+  <groupId>org.apache.maven.plugins</groupId>
+  <artifactId>maven-compiler-plugin</artifactId>
+  <configuration>
+    <source>1.6</source>
+    <target>1.6</target>
+  </configuration>
+</plugin>
+      </source>
+      <p>
+        You may also build the required Avro jars from source.  Building Avro is
+        beyond the scope of this guide; see the <a
+        href="https://cwiki.apache.org/AVRO/build-documentation.html">Build
+        Documentation</a> page in the wiki for more information.
+      </p>
+    </section>
+
+    <section>
+      <title>Defining a schema</title>
+      <p>
+        Avro schemas are defined using JSON.  Schemas are composed of <a
+        href="spec.html#schema_primitive">primitive types</a>
+        (<code>null</code>, <code>boolean</code>, <code>int</code>,
+        <code>long</code>, <code>float</code>, <code>double</code>,
+        <code>bytes</code>, and <code>string</code>) and <a
+        href="spec.html#schema_complex">complex types</a> (<code>record</code>,
+        <code>enum</code>, <code>array</code>, <code>map</code>,
+        <code>union</code>, and <code>fixed</code>).  You can learn more about
+        Avro schemas and types from the specification, but for now let's start
+        with a simple schema example, <em>user.avsc</em>:
+      </p>
+      <source>
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+      </source>
+      <p>
+        This schema defines a record representing a hypothetical user.  (Note
+        that a schema file can only contain a single schema definition.)  At
+        minimum, a record definition must include its type (<code>"type":
+        "record"</code>), a name (<code>"name": "User"</code>), and fields, in
+        this case <code>name</code>, <code>favorite_number</code>, and
+        <code>favorite_color</code>.  We also define a namespace
+        (<code>"namespace": "example.avro"</code>), which together with the name
+        attribute defines the "full name" of the schema
+        (<code>example.avro.User</code> in this case).
+
+      </p>
+      <p>
+        Fields are defined via an array of objects, each of which defines a name
+        and type (other attributes are optional, see the <a
+        href="spec.html#schema_record">record specification</a> for more
+        details).  The type attribute of a field is another schema object, which
+        can be either a primitive or complex type.  For example, the
+        <code>name</code> field of our User schema is the primitive type
+        <code>string</code>, whereas the <code>favorite_number</code> and
+        <code>favorite_color</code> fields are both <code>union</code>s,
+        represented by JSON arrays.  <code>union</code>s are a complex type that
+        can be any of the types listed in the array; e.g.,
+        <code>favorite_number</code> can either be an <code>int</code> or
+        <code>null</code>, essentially making it an optional field.
+      </p>
+    </section>
+
+    <section>
+      <title>Serializing and deserializing with code generation</title>
+      <section>
+        <title>Compiling the schema</title>
+        <p>
+          Code generation allows us to automatically create classes based on our
+          previously-defined schema.  Once we have defined the relevant classes,
+          there is no need to use the schema directly in our programs.  We use the
+          avro-tools jar to generate code as follows:
+        </p>
+        <source>
+java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema <schema file> <destination>
+        </source>
+        <p>
+          This will generate the appropriate source files in a package based on
+          the schema's namespace in the provided destination folder.  For
+          instance, to generate a <code>User</code> class in package
+          <code>example.avro</code> from the schema defined above, run
+        </p>
+        <source>
+java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema user.avsc .
+        </source>
+        <p>
+          Note that if you using the Avro Maven plugin, there is no need to
+          manually invoke the schema compiler; the plugin automatically
+          performs code generation on any .avsc files present in the configured
+          source directory.
+        </p>
+      </section>
+      <section>
+        <title>Creating Users</title>
+        <p>
+          Now that we've completed the code generation, let's create some
+          <code>User</code>s, serialize them to a data file on disk, and then
+          read back the file and deserialize the <code>User</code> objects.
+        </p>
+        <p>
+          First let's create some <code>User</code>s and set their fields.
+        </p>
+        <source>
+User user1 = new User();
+user1.setName("Alyssa");
+user1.setFavoriteNumber(256);
+// Leave favorite color null
+
+// Alternate constructor
+User user2 = new User("Ben", 7, "red");
+
+// Construct via builder
+User user3 = User.newBuilder()
+             .setName("Charlie")
+             .setFavoriteColor("blue")
+             .setFavoriteNumber(null)
+             .build();
+        </source>
+        <p>
+          As shown in this example, Avro objects can be created either by
+          invoking a constructor directly or by using a builder.  Unlike
+          constructors, builders will automatically set any default values
+          specified in the schema.  Additionally, builders validate the data as
+          it set, whereas objects constructed directly will not cause an error
+          until the object is serialized.  However, using constructors directly
+          generally offers better performance, as builders create a copy of the
+          datastructure before it is written.
+        </p>
+        <p>
+          Note that we do not set <code>user1</code>'s favorite color. Since
+          that record is of type <code>["string", "null"]</code>, we can either
+          set it to a <code>string</code> or leave it <code>null</code>; it is
+          essentially optional.  Similarly, we set <code>user3</code>'s favorite
+          number to null (using a builder requires setting all fields, even if
+          they are null).
+        </p>
+      </section>
+      <section>
+        <title>Serializing</title>
+      <p>
+        Now let's serialize our <code>User</code>s to disk.
+      </p>
+      <source>
+// Serialize user1, user2 and user3 to disk
+DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
+DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
+dataFileWriter.create(user1.getSchema(), new File("users.avro"));
+dataFileWriter.append(user1);
+dataFileWriter.append(user2);
+dataFileWriter.append(user3);
+dataFileWriter.close();
+      </source>
+      <p>
+        We create a <code>DatumWriter</code>, which converts Java objects into
+        an in-memory serialized format.  The <code>SpecificDatumWriter</code>
+        class is used with generated classes and extracts the schema from the
+        specified generated type.
+      </p>
+      <p>
+        Next we create a <code>DataFileWriter</code>, which writes the
+        serialized records, as well as the schema, to the file specified in the
+        <code>dataFileWriter.create</code> call.  We write our users to the file
+        via calls to the <code>dataFileWriter.append</code> method.  When we are
+        done writing, we close the data file.
+      </p>
+      </section>
+      <section>
+        <title>Deserializing</title>
+        <p>
+          Finally, let's deserialize the data file we just created.
+        </p>
+        <source>
+// Deserialize Users from disk
+DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
+DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
+User user = null;
+while (dataFileReader.hasNext()) {
+// Reuse user object by passing it to next(). This saves us from
+// allocating and garbage collecting many objects for files with
+// many items.
+user = dataFileReader.next(user);
+System.out.println(user);
+}
+        </source>
+        <p>
+          This snippet will output:
+        </p>
+        <source>
+{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
+{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
+{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"}
+        </source>
+        <p>
+          Deserializing is very similar to serializing.  We create a
+          <code>SpecificDatumReader</code>, analogous to the
+          <code>SpecificDatumWriter</code> we used in serialization, which
+          converts in-memory serialized items into instances of our generated
+          class, in this case <code>User</code>.  We pass the
+          <code>DatumReader</code> and the previously created <code>File</code>
+          to a <code>DataFileReader</code>, analogous to the
+          <code>DataFileWriter</code>, which reads the data file on disk.
+        </p>
+        <p>
+          Next we use the <code>DataFileReader</code> to iterate through the
+          serialized <code>User</code>s and print the deserialized object to
+          stdout.  Note how we perform the iteration: we create a single
+          <code>User</code> object which we store the current deserialized user
+          in, and pass this record object to every call of
+          <code>dataFileReader.next</code>.  This is a performance optimization
+          that allows the <code>DataFileReader</code> to reuse the same
+          <code>User</code> object rather than allocating a new
+          <code>User</code> for every iteration, which can be very expensive in
+          terms of object allocation and garbage collection if we deserialize a
+          large data file.  While this technique is the standard way to iterate
+          through a data file, it's also possible to use <code>for (User user :
+          dataFileReader)</code> if performance is not a concern.
+        </p>
+      </section>
+      <section>
+        <title>Compiling and running the example code</title>
+        <p>
+          This example code is included as a Maven project in the
+          <em>examples/java-example</em> directory in the Avro docs.  From this
+          directory, execute the following commands to build and run the
+          example:
+        </p>
+        <source>
+$ mvn compile # includes code generation via Avro Maven plugin
+$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain
+        </source>
+      </section>
+    </section>
+
+    <section>
+      <title>Serializing and deserializing without code generation</title>
+      <p>
+        Data in Avro is always stored with its corresponding schema, meaning we
+        can always read a serialized item regardless of whether we know the
+        schema ahead of time.  This allows us to perform serialization and
+        deserialization without code generation.
+      </p>
+      <p>
+        Let's go over the same example as in the previous section, but without
+        using code generation: we'll create some users, serialize them to a data
+        file on disk, and then read back the file and deserialize the users
+        objects.
+      </p>
+      <section>
+        <title>Creating users</title>
+        <p>
+          First, we use a <code>Parser</code> to read our schema definition and
+          create a <code>Schema</code> object.
+        </p>
+        <source>
+Schema schema = new Schema.Parser().parse(new File("user.avsc"));
+        </source>
+        <p>
+          Using this schema, let's create some users.
+        </p>
+        <source>
+GenericRecord user1 = new GenericData.Record(schema);
+user1.put("name", "Alyssa");
+user1.put("favorite_number", 256);
+// Leave favorite color null
+
+GenericRecord user2 = new GenericData.Record(schema);
+user2.put("name", "Ben");
+user2.put("favorite_number", 7);
+user2.put("favorite_color", "red");
+        </source>
+        <p>
+          Since we're not using code generation, we use
+          <code>GenericRecord</code>s to represent users.
+          <code>GenericRecord</code> uses the schema to verify that we only
+          specify valid fields.  If we try to set a non-existent field (e.g.,
+          <code>user1.put("favorite_animal", "cat")</code>), we'll get an
+          <code>AvroRuntimeException</code> when we run the program.
+        </p>
+        <p>
+          Note that we do not set <code>user1</code>'s favorite color.  Since
+          that record is of type <code>["string", "null"]</code>, we can either
+          set it to a <code>string</code> or leave it <code>null</code>; it is
+          essentially optional.
+        </p>
+      </section>
+      <section>
+        <title>Serializing</title>
+        <p>
+          Now that we've created our user objects, serializing and deserializing
+          them is almost identical to the example above which uses code
+          generation.  The main difference is that we use generic instead of
+          specific readers and writers.
+        </p>
+        <p>
+          First we'll serialize our users to a data file on disk.
+        </p>
+        <source>
+// Serialize user1 and user2 to disk
+File file = new File("users.avro");
+DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
+DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+dataFileWriter.create(schema, file);
+dataFileWriter.append(user1);
+dataFileWriter.append(user2);
+dataFileWriter.close();
+        </source>
+        <p>
+          We create a <code>DatumWriter</code>, which converts Java objects into
+          an in-memory serialized format.  Since we are not using code
+          generation, we create a <code>GenericDatumWriter</code>.  It requires
+          the schema both to determine how to write the
+          <code>GenericRecord</code>s and to verify that all non-nullable fields
+          are present.
+        </p>
+        <p>
+          As in the code generation example, we also create a
+          <code>DataFileWriter</code>, which writes the serialized records, as
+          well as the schema, to the file specified in the
+          <code>dataFileWriter.create</code> call.  We write our users to the
+          file via calls to the <code>dataFileWriter.append</code> method.  When
+          we are done writing, we close the data file.
+        </p>
+      </section>
+      <section>
+        <title>Deserializing</title>
+        <p>
+          Finally, we'll deserialize the data file we just created.
+        </p>
+        <source>
+// Deserialize users from disk
+DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
+DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
+GenericRecord user = null;
+while (dataFileReader.hasNext()) {
+// Reuse user object by passing it to next(). This saves us from
+// allocating and garbage collecting many objects for files with
+// many items.
+user = dataFileReader.next(user);
+System.out.println(user);
+        </source>
+        <p>This outputs:</p>
+        <source>
+{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
+{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
+        </source>
+        <p>
+          Deserializing is very similar to serializing.  We create a
+          <code>GenericDatumReader</code>, analogous to the
+          <code>GenericDatumWriter</code> we used in serialization, which
+          converts in-memory serialized items into <code>GenericRecords</code>.
+          We pass the <code>DatumReader</code> and the previously created
+          <code>File</code> to a <code>DataFileReader</code>, analogous to the
+          <code>DataFileWriter</code>, which reads the data file on disk.
+        </p>
+        <p>
+          Next, we use the <code>DataFileReader</code> to iterate through the
+          serialized users and print the deserialized object to stdout.  Note
+          how we perform the iteration: we create a single
+          <code>GenericRecord</code> object which we store the current
+          deserialized user in, and pass this record object to every call of
+          <code>dataFileReader.next</code>.  This is a performance optimization
+          that allows the <code>DataFileReader</code> to reuse the same record
+          object rather than allocating a new <code>GenericRecord</code> for
+          every iteration, which can be very expensive in terms of object
+          allocation and garbage collection if we deserialize a large data file.
+          While this technique is the standard way to iterate through a data
+          file, it's also possible to use <code>for (GenericRecord user :
+          dataFileReader)</code> if performance is not a concern.
+        </p>
+      </section>
+      <section>
+        <title>Compiling and running the example code</title>
+        <p>
+          This example code is included as a Maven project in the
+          <em>examples/java-example</em> directory in the Avro docs.  From this
+          directory, execute the following commands to build and run the
+          example:
+        </p>
+        <source>
+$ mvn compile
+$ mvn -q exec:java -Dexec.mainClass=example.GenericMain
+        </source>
+      </section>
+    </section>
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/gettingstartedpython.xml b/doc/src/content/xdocs/gettingstartedpython.xml
new file mode 100644
index 0000000..d8d9df8
--- /dev/null
+++ b/doc/src/content/xdocs/gettingstartedpython.xml
@@ -0,0 +1,221 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+   "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; Getting Started (Python)</title>
+  </header>
+  <body>
+    <p>
+      This is a short guide for getting started with Apache Avro™ using
+      Python.  This guide only covers using Avro for data serialization; see
+      Patrick Hunt's <a href="https://github.com/phunt/avro-rpc-quickstart">Avro
+      RPC Quick Start</a> for a good introduction to using Avro for RPC.
+    </p>
+
+    <section id="download_install">
+      <title>Download</title>
+      <p>
+        Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be
+        downloaded from the <a
+        href="http://avro.apache.org/releases.html">Apache Avro™
+        Releases</a> page.  This guide uses Avro &AvroVersion;, the latest
+        version at the time of writing.  Download and unzip
+        <em>avro-&AvroVersion;.tar.gz</em>, and install via <code>python
+        setup.py</code> (this will probably require root privileges).  Ensure
+        that you can <code>import avro</code> from a Python prompt.
+      </p>
+      <source>
+$ tar xvf avro-&AvroVersion;.tar.gz
+$ cd avro-&AvroVersion;
+$ sudo python setup.py install
+$ python
+>>> import avro # should not raise ImportError
+      </source>
+      <p>
+        Alternatively, you may build the Avro Python library from source.  From
+        your the root Avro directory, run the commands
+      </p>
+      <source>
+$ cd lang/py/
+$ ant
+$ sudo python setup.py install
+$ python
+>>> import avro # should not raise ImportError
+      </source>
+    </section>
+
+    <section>
+      <title>Defining a schema</title>
+      <p>
+        Avro schemas are defined using JSON.  Schemas are composed of <a
+        href="spec.html#schema_primitive">primitive types</a>
+        (<code>null</code>, <code>boolean</code>, <code>int</code>,
+        <code>long</code>, <code>float</code>, <code>double</code>,
+        <code>bytes</code>, and <code>string</code>) and <a
+        href="spec.html#schema_complex">complex types</a> (<code>record</code>,
+        <code>enum</code>, <code>array</code>, <code>map</code>,
+        <code>union</code>, and <code>fixed</code>).  You can learn more about
+        Avro schemas and types from the specification, but for now let's start
+        with a simple schema example, <em>user.avsc</em>:
+      </p>
+      <source>
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+      </source>
+      <p>
+        This schema defines a record representing a hypothetical user.  (Note
+        that a schema file can only contain a single schema definition.)  At
+        minimum, a record definition must include its type (<code>"type":
+        "record"</code>), a name (<code>"name": "User"</code>), and fields, in
+        this case <code>name</code>, <code>favorite_number</code>, and
+        <code>favorite_color</code>.  We also define a namespace
+        (<code>"namespace": "example.avro"</code>), which together with the name
+        attribute defines the "full name" of the schema
+        (<code>example.avro.User</code> in this case).
+
+      </p>
+      <p>
+        Fields are defined via an array of objects, each of which defines a name
+        and type (other attributes are optional, see the <a
+        href="spec.html#schema_record">record specification</a> for more
+        details).  The type attribute of a field is another schema object, which
+        can be either a primitive or complex type.  For example, the
+        <code>name</code> field of our User schema is the primitive type
+        <code>string</code>, whereas the <code>favorite_number</code> and
+        <code>favorite_color</code> fields are both <code>union</code>s,
+        represented by JSON arrays.  <code>union</code>s are a complex type that
+        can be any of the types listed in the array; e.g.,
+        <code>favorite_number</code> can either be an <code>int</code> or
+        <code>null</code>, essentially making it an optional field.
+      </p>
+    </section>
+
+    <section>
+      <title>Serializing and deserializing without code generation</title>
+      <p>
+        Data in Avro is always stored with its corresponding schema, meaning we
+        can always read a serialized item, regardless of whether we know the
+        schema ahead of time.  This allows us to perform serialization and
+        deserialization without code generation.  Note that the Avro Python
+        library does not support code generation.
+      </p>
+      <p>
+        Try running the following code snippet, which serializes two users to a
+        data file on disk, and then reads back and deserializes the data file:
+      </p>
+      <source>
+import avro.schema
+from avro.datafile import DataFileReader, DataFileWriter
+from avro.io import DatumReader, DatumWriter
+
+schema = avro.schema.parse(open("user.avsc").read())
+
+writer = DataFileWriter(open("users.avro", "w"), DatumWriter(), schema)
+writer.append({"name": "Alyssa", "favorite_number": 256})
+writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
+writer.close()
+
+reader = DataFileReader(open("users.avro", "r"), DatumReader())
+for user in reader:
+    print user
+reader.close()
+      </source>
+      <p>This outputs:</p>
+      <source>
+{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'}
+{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'}
+      </source>
+      <p>
+        Let's take a closer look at what's going on here.
+      </p>
+      <source>
+schema = avro.schema.parse(open("user.avsc").read())
+      </source>
+      <p>
+        <code>avro.schema.parse</code> takes a string containing a JSON schema
+        definition as input and outputs a <code>avro.schema.Schema</code> object
+        (specifically a subclass of <code>Schema</code>, in this case
+        <code>RecordSchema</code>).  We're passing in the contents of our
+        user.avsc schema file here.
+      </p>
+      <source>
+writer = DataFileWriter(open("users.avro", "w"), DatumWriter(), schema)
+      </source>
+      <p>
+        We create a <code>DataFileWriter</code>, which we'll use to write
+        serialized items to a data file on disk.  The
+        <code>DataFileWriter</code> constructor takes three arguments:
+      </p>
+        <ul>
+          <li>The file we'll serialize to</li>
+          <li>A <code>DatumWriter</code>, which is responsible for actually
+          serializing the items to Avro's binary format
+          (<code>DatumWriter</code>s can be used separately from
+          <code>DataFileWriter</code>s, e.g., to perform IPC with Avro
+          <strong>TODO: is this true??</strong>).</li>
+          <li>The schema we're using.  The <code>DataFileWriter</code> needs the
+          schema both to write the schema to the data file, and to verify that
+          the items we write are valid items and write the appropriate
+          fields.</li>
+        </ul>
+        <source>
+writer.append({"name": "Alyssa", "favorite_number": 256})
+writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
+        </source>
+        <p>
+          We use <code>DataFileWriter.append</code> to add items to our data
+          file.  Avro records are represented as Python <code>dict</code>s.
+          Since the field <code>favorite_color</code> has type <code>["int",
+          "null"]</code>, we are not required to specify this field, as shown in
+          the first append.  Were we to omit the required <code>name</code>
+          field, an exception would be raised.  Any extra entries not
+          corresponding to a field are present in the <code>dict</code> are
+          ignored.
+        </p>
+        <source>
+reader = DataFileReader(open("users.avro", "r"), DatumReader())
+        </source>
+        <p>
+          We open the file again, this time for reading back from disk.  We use
+          a <code>DataFileReader</code> and <code>DatumReader</code> analagous
+          to the <code>DataFileWriter</code> and <code>DatumWriter</code> above.
+        </p>
+        <source>
+for user in reader:
+    print user
+        </source>
+        <p>
+          The <code>DataFileReader</code> is an iterator that returns
+          <code>dict</code>s corresponding to the serialized items.
+        </p>
+    </section>
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/idl.xml b/doc/src/content/xdocs/idl.xml
new file mode 100644
index 0000000..bd74b09
--- /dev/null
+++ b/doc/src/content/xdocs/idl.xml
@@ -0,0 +1,448 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; IDL</title>
+  </header>
+  <body>
+
+    <section id="preamble">
+      <title>Introduction</title>
+
+      <p>This document defines Avro IDL, a higher-level language for authoring Avro schemata.
+      Before reading this document, you should have familiarity with the concepts of schemata and protocols,
+      as well as the various primitive and complex types available in Avro.
+      </p>
+    </section>
+
+    <section id="overview">
+      <title>Overview</title>
+      <section id="overview_purpose">
+        <title>Purpose</title>
+        <p>The aim of the Avro IDL language is to enable developers to author schemata in a way that
+        feels more similar to common programming languages like Java, C++, or Python. Additionally,
+        the Avro IDL language may feel more familiar for those users who have previously used the
+        interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA.
+        </p>
+      </section>
+      <section id="overview_usage">
+        <title>Usage</title>
+        <p>
+          Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format
+          Avro Protocol file with extension <code>.avpr</code>.
+        </p>
+        <p>
+          To convert a <code>.avdl</code> file into a <code>.avpr</code> file, it may be processed by the
+          <code>idl</code> tool. For example:
+        </p>
+        <source>
+$ java -jar avroj-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr
+$ head /tmp/namespaces.avpr
+{
+  "protocol" : "TestNamespace",
+  "namespace" : "avro.test.protocol",
+        </source>
+        <p>
+          The <code>idl</code> tool can also process input to and from <em>stdin</em> and <em>stdout</em>.
+          See <code>idl --help</code> for full usage information.
+        </p>
+        <p>A Maven plugin is also provided to compile .avdl files.  To
+        use it, add something like the following to your pom.xml:</p>
+<source><![CDATA[
+<build>
+  <plugins>
+    <plugin>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-maven-plugin</artifactId>
+      <executions>
+        <execution>
+          <goals>
+            <goal>idl-protocol</goal>
+          </goals>
+        </execution>
+      </executions>
+    </plugin>
+  </plugins>
+</build>
+]]></source>
+      </section>
+    </section> <!-- end overview -->
+
+    <section id="defining_protocol">
+      <title>Defining a Protocol in Avro IDL</title>
+
+      <p>An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined
+      by the following code:
+      </p>
+      <source>
+protocol MyProtocol {
+}
+      </source>
+      <p>
+        This is equivalent to (and generates) the following JSON protocol definition:
+      </p>
+  <!--  "namespace" : null, TODO: this is generated but shouldnt be - AVRO-263 -->
+      <source>
+{
+"protocol" : "MyProtocol",
+  "types" : [ ],
+  "messages" : {
+  }
+}
+      </source>
+      <p>
+        The namespace of the protocol may be changed using the <code>@namespace</code> annotation:
+      </p>
+      <source>
+ at namespace("mynamespace")
+protocol MyProtocol {
+}
+      </source>
+      <p>
+        This notation is used throughout Avro IDL as a way of specifying properties for the annotated element,
+        as will be described later in this document.
+      </p>
+      <p>
+        Protocols in Avro IDL can contain the following items:
+      </p>
+        <ul>
+          <li>Imports of external protocol and schema files.</li>
+          <li>Definitions of named schemata, including <em>record</em>s, <em>error</em>s, <em>enum</em>s, and <em>fixed</em>s.</li>
+          <li>Definitions of RPC messages</li>
+        </ul>
+    </section>
+    <section id="imports">
+      <title>Imports</title>
+      <p>Files may be imported in one of three formats: </p>
+      <ul>
+        <li>An IDL file may be imported with a statement like:
+	  <source>import idl "foo.avdl";</source>
+	</li>
+        <li>A JSON protocol file may be imported with a statement like:
+	  <source>import protocol "foo.avpr";</source>
+	</li>
+        <li>A JSON schema file may be imported with a statement like:
+	  <source>import schema "foo.avsc";</source>
+	</li>
+      </ul>
+      <p>Messages and types in the imported file are added to this
+	file's protocol.</p>
+      <p>Imported file names are resolved relative to the current IDL file.</p>
+    </section>
+    <section id="format_enums">
+      <title>Defining an Enumeration</title>
+      <p>
+        Enums are defined in Avro IDL using a syntax similar to C or Java:
+      </p>
+      <source>
+enum Suit {
+  SPADES, DIAMONDS, CLUBS, HEARTS
+}
+      </source>
+      <p>
+        Note that, unlike the JSON format, anonymous enums cannot be defined.
+      </p>
+    </section>
+    <section id="format_fixed">
+      <title>Defining a Fixed Length Field</title>
+      <p>
+        Fixed fields are defined using the following syntax:
+      </p>
+      <source>
+fixed MD5(16);
+      </source>
+      <p>This example defines a fixed-length type called <code>MD5</code> which contains 16 bytes.</p>
+    </section>
+
+    <section id="format_records">
+      <title>Defining Records and Errors</title>
+      <p>
+        Records are defined in Avro IDL using a syntax similar to a <code>struct</code> definition in C:
+      </p>
+      <source>
+record Employee {
+  string name;
+  boolean active = true;
+  long salary;
+}
+      </source>
+      <p>
+        The above example defines a record with the name “Employee” with three fields.
+      </p>
+      <p>
+        To define an error, simply use the keyword <code>error</code> instead of <code>record</code>.
+        For example:
+      </p>
+      <source>
+error Kaboom {
+  string explanation;
+  int result_code = -1;
+}
+      </source>
+      <p>
+        Each field in a record or error consists of a type and a name,
+        optional property annotations and an optional default value.
+      </p>
+      <p>A type reference in Avro IDL must be one of:</p>
+      <ul>
+        <li>A primitive type</li>
+        <li>A named schema defined prior to this usage in the same Protocol</li>
+        <li>A complex type (array, map, or union)</li>
+      </ul>
+
+      <section id="primitive_types">
+        <title>Primitive Types</title>
+        <p>The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format.
+        This list includes <code>int</code>, <code>long</code>, <code>string</code>, <code>boolean</code>,
+        <code>float</code>, <code>double</code>, <code>null</code>, and <code>bytes</code>.
+        </p>
+      </section>
+
+      <section id="schema_references">
+        <title>References to Named Schemata</title>
+        <p>If a named schema has already been defined in the same Avro IDL file, it may be referenced by name
+        as if it were a primitive type:
+        </p>
+        <source>
+record Card {
+  Suit suit; // refers to the enum Card defined above
+  int number;
+}
+        </source>
+      </section>
+      <section id="default_values">
+        <title>Default Values</title>
+
+	<p>Default values for fields may be optionally
+	specified by using an equals sign after the field name
+	followed by a JSON expression indicating the default value.
+	This JSON is interpreted as described in
+	the <a href="spec.html#schema_record">spec</a>.</p>
+
+      </section> <!-- default values -->
+      <section id="complex_types">
+        <title>Complex Types</title>
+
+        <section id="arrays">
+          <title>Arrays</title>
+          <p>
+            Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of
+            any type <code>t</code> is denoted <code>array<t></code>. For example, an array of strings is
+            denoted <code>array<string></code>, and a multidimensional array of <code>Foo</code> records
+            would be <code>array<array<Foo>></code>.
+          </p>
+        </section>
+
+        <section id="maps">
+          <title>Maps</title>
+          <p>Map types are written similarly to array types. An array that contains values of type
+          <code>t</code> is written <code>map<t></code>. As in the JSON schema format, all
+          maps contain <code>string</code>-type keys.</p>
+        </section>
+
+        <section id="unions">
+          <title>Unions</title>
+          <p>Union types are denoted as <code>union { typeA, typeB, typeC, ... }</code>. For example,
+          this record contains a string field that is optional (unioned with <code>null</code>):
+          </p>
+          <source>
+record RecordWithUnion {
+  union { null, string } optionalString;
+}
+          </source>
+          <p>
+            Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the
+            JSON format; namely, a record may not contain multiple elements of the same type.
+          </p>
+        </section> <!-- unions -->
+      </section> <!-- complex types -->
+    </section> <!-- how to define records -->
+    <section id="define_messages">
+      <title>Defining RPC Messages</title>
+      <p>The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for
+      a method declaration within a C header file or a Java interface. To define an RPC message
+      <code>add</code> which takes two arguments named <code>foo</code> and <code>bar</code>,
+      returning an <code>int</code>, simply include the following definition within the protocol:
+      </p>
+      <source>
+int add(int foo, int bar = 0);
+      </source>
+      <p>Message arguments, like record fields, may specify default
+      values.</p>
+      <p>To define a message with no response, you may use the alias <code>void</code>, equivalent
+      to the Avro <code>null</code> type:
+      </p>
+      <source>
+void logMessage(string message);
+      </source>
+      <p>
+        If you have previously defined an error type within the same protocol, you may declare that
+        a message can throw this error using the syntax:
+      </p>
+      <source>
+void goKaboom() throws Kaboom;
+      </source>
+      <p>To define a one-way message, use the
+      keyword <code>oneway</code> after the parameter list, for example:
+      </p>
+      <source>
+void fireAndForget(string message) oneway;
+      </source>
+    </section> <!-- define messages -->
+    <section id="minutiae">
+      <title>Other Language Features</title>
+      <section id="minutiae_comments">
+        <title>Comments</title>
+        <p>All Java-style comments are supported within a Avro IDL file. Any text following
+        <code>//</code> on a line is ignored, as is any text between <code>/*</code> and
+        <code>*/</code>, possibly spanning multiple lines.</p>
+        <p>Comments that begin with <code>/**</code> are used as the
+        documentation string for the type or field definition that
+        follows the comment.</p>
+      </section>
+      <section id="minutiae_escaping">
+        <title>Escaping Identifiers</title>
+        <p>Occasionally, one will need to use a reserved language keyword as an identifier. In order
+        to do so, backticks (<code>`</code>) may be used to escape the identifier. For example, to define
+        a message with the literal name <em>error</em>, you may write:
+        </p>
+        <source>
+void `error`();
+        </source>
+        <p>This syntax is allowed anywhere an identifier is expected.</p>
+      </section>
+      <section id="minutiae_annotations">
+        <title>Annotations for Ordering and Namespaces</title>
+        <p>Java-style annotations may be used to add additional
+          properties to types and fields throughout Avro IDL.</p>
+
+	<p>For example, to specify the sort order of a field within
+          a record, one may use the <code>@order</code> annotation
+          before the field name as follows:</p>
+        <source>
+record MyRecord {
+  string @order("ascending") myAscendingSortField;
+  string @order("descending")  myDescendingField;
+  string @order("ignore") myIgnoredField;
+}
+        </source>
+        <p>A field's type may also be preceded by annotations, e.g.: </p>
+        <source>
+record MyRecord {
+  @java-class("java.util.ArrayList") array<string> myStrings;
+}
+        </source>
+
+        <p>This can be used to support java classes that can be
+          serialized/deserialized via their toString/String constructor, e.g.:</p>
+        <source>
+record MyRecord {
+  @java-class("java.math.BigDecimal") string value;
+  @java-key-class("java.io.File") map<string> fileStates;
+  array<@java-class("java.math.BigDecimal") string> weights;
+}
+        </source>
+
+        <p>Similarly, a <code>@namespace</code> annotation may be used to modify the namespace
+        when defining a named schema. For example:
+        </p>
+        <source>
+ at namespace("org.apache.avro.firstNamespace")
+protocol MyProto {
+  @namespace("org.apache.avro.someOtherNamespace")
+  record Foo {}
+
+  record Bar {}
+}
+        </source>
+        <p>
+          will define a protocol in the <code>firstNamespace</code> namespace. The record <code>Foo</code> will be
+          defined in <code>someOtherNamespace</code> and <code>Bar</code> will be defined in <code>firstNamespace</code>
+          as it inherits its default from its container.
+        </p>
+	<p>Type and field aliases are specified with
+	the <code>@aliases</code> annotation as follows:</p>
+        <source>
+ at aliases(["org.old.OldRecord", "org.ancient.AncientRecord"])
+record MyRecord {
+  string @aliases(["oldField", "ancientField"]) myNewField;
+}
+        </source>
+        <p>Some annotations like those listed above are handled
+        specially.  All other annotations are added as properties to
+        the protocol, message, schema or field.</p>
+      </section>
+    </section>
+    <section id="example">
+      <title>Complete Example</title>
+      <p>The following is a complete example of a Avro IDL file that shows most of the above features:</p>
+      <source>
+/**
+ * An example protocol in Avro IDL
+ */
+ at namespace("org.apache.avro.test")
+protocol Simple {
+
+  @aliases(["org.foo.KindOf"])
+  enum Kind {
+    FOO,
+    BAR, // the bar enum value
+    BAZ
+  }
+
+  fixed MD5(16);
+
+  record TestRecord {
+    @order("ignore")
+    string name;
+
+    @order("descending")
+    Kind kind;
+
+    MD5 hash;
+
+    union { MD5, null} @aliases(["hash"]) nullableHash;
+
+    array<long> arrayOfLongs;
+  }
+
+  error TestError {
+    string message;
+  }
+
+  string hello(string greeting);
+  TestRecord echo(TestRecord `record`);
+  int add(int arg1, int arg2);
+  bytes echoBytes(bytes data);
+  void `error`() throws TestError;
+  void ping() oneway;
+}
+      </source>
+      <p>Additional examples may be found in the Avro source tree under the <code>src/test/idl/input</code> directory.</p>
+    </section>
+
+  <p><em>Apache Avro, Avro, Apache, and the Avro and Apache logos are
+   trademarks of The Apache Software Foundation.</em></p>
+
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/index.xml b/doc/src/content/xdocs/index.xml
new file mode 100644
index 0000000..0f9eed2
--- /dev/null
+++ b/doc/src/content/xdocs/index.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+   "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; Documentation</title>
+  </header>
+  <body>
+    <section id="intro">
+      <title>Introduction</title>
+      <p>Apache Avro™ is a data serialization system.</p>
+      <p>Avro provides:</p>
+	<ul>
+	  <li>Rich data structures.</li>
+	  <li>A compact, fast, binary data format.</li>
+	  <li>A container file, to store persistent data.</li>
+	  <li>Remote procedure call (RPC).</li>
+	  <li>Simple integration with dynamic languages.  Code
+	    generation is not required to read or write data files nor
+	    to use or implement RPC protocols.  Code generation as an
+	    optional optimization, only worth implementing for
+	    statically typed languages.</li>
+	</ul>
+    </section>
+    <section id="schemas">
+      <title>Schemas</title>
+      <p>Avro relies on <em>schemas</em>.  When Avro data is read, the
+	schema used when writing it is always present.  This permits
+	each datum to be written with no per-value overheads, making
+	serialization both fast and small.  This also facilitates use
+	with dynamic, scripting languages, since data, together with
+	its schema, is fully self-describing.</p>
+      <p>When Avro data is stored in a file, its schema is stored with
+	it, so that files may be processed later by any program.  If
+	the program reading the data expects a different schema this
+	can be easily resolved, since both schemas are present.</p>
+      <p>When Avro is used in RPC, the client and server exchange
+	schemas in the connection handshake.  (This can be optimized
+	so that, for most calls, no schemas are actually transmitted.)
+	Since both client and server both have the other's full
+	schema, correspondence between same named fields, missing
+	fields, extra fields, etc. can all be easily resolved.</p>
+      <p>Avro schemas are defined with
+	<a href="http://www.json.org/">JSON</a> .  This
+	facilitates implementation in languages that already have
+	JSON libraries.</p>
+    </section>
+    <section id="compare">
+      <title>Comparison with other systems</title>
+      <p>Avro provides functionality similar to systems such
+	as <a href="http://thrift.apache.org/">Thrift</a>,
+	<a href="http://code.google.com/p/protobuf/">Protocol
+	  Buffers</a>, etc.  Avro differs from these systems in the
+	  following fundamental aspects.</p>
+      <ul>
+	<li><em>Dynamic typing</em>: Avro does not require that code
+	  be generated.  Data is always accompanied by a schema that
+	  permits full processing of that data without code
+	  generation, static datatypes, etc.  This facilitates
+	  construction of generic data-processing systems and
+	  languages.</li>
+	<li><em>Untagged data</em>: Since the schema is present when
+	  data is read, considerably less type information need be
+	  encoded with data, resulting in smaller serialization size.</li>
+	<li><em>No manually-assigned field IDs</em>: When a schema
+	  changes, both the old and new schema are always present when
+	  processing data, so differences may be resolved
+	  symbolically, using field names.</li>
+      </ul>
+    </section>
+
+  <p><em>Apache Avro, Avro, Apache, and the Avro and Apache logos are
+   trademarks of The Apache Software Foundation.</em></p>
+
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/mr.xml b/doc/src/content/xdocs/mr.xml
new file mode 100644
index 0000000..a93a137
--- /dev/null
+++ b/doc/src/content/xdocs/mr.xml
@@ -0,0 +1,580 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+   "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; Hadoop MapReduce guide</title>
+  </header>
+  <body>
+    <p>
+      Avro provides a convenient way to represent complex data structures within
+      a Hadoop MapReduce job.  Avro data can be used as both input to and output
+      from a MapReduce job, as well as the intermediate format.  The example in
+      this guide uses Avro data for all three, but it's possible to mix and
+      match; for instance, MapReduce can be used to aggregate a particular field
+      in an Avro record.
+    </p>
+    <p>
+      This guide assumes basic familiarity with both Hadoop MapReduce and Avro.
+      See the <a href="http://hadoop.apache.org/docs/current/">Hadoop
+      documentation</a> and the <a href="gettingstartedjava.html">Avro getting
+      started guide</a> for introductions to these projects.  This guide uses
+      the old MapReduce API (<code>org.apache.hadoop.mapred</code>) and the new 
+      MapReduce API (<code>org.apache.hadoop.mapreduce</code>).
+    </p>
+    <section>
+      <title>Setup</title>
+      <p>
+        The code from this guide is included in the Avro docs under
+        <em>examples/mr-example</em>.  The example is set up as a Maven project
+        that includes the necessary Avro and MapReduce dependencies and the Avro
+        Maven plugin for code generation, so no external jars are needed to run
+        the example.  In particular, the POM includes the following dependencies:
+      </p>
+      <source>
+<dependency>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro</artifactId>
+  <version>&AvroVersion;</version>
+</dependency>
+<dependency>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro-mapred</artifactId>
+  <version>&AvroVersion;</version>
+</dependency>
+<dependency>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-core</artifactId>
+  <version>1.1.0</version>
+</dependency>
+      </source>
+      <p>
+        And the following plugin:
+      </p>
+      <source>
+<plugin>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro-maven-plugin</artifactId>
+  <version>&AvroVersion;</version>
+  <executions>
+    <execution>
+      <phase>generate-sources</phase>
+      <goals>
+        <goal>schema</goal>
+      </goals>
+      <configuration>
+        <sourceDirectory>${project.basedir}/../</sourceDirectory>
+        <outputDirectory>${project.basedir}/target/generated-sources/</outputDirectory>
+      </configuration>
+    </execution>
+  </executions>
+</plugin>
+      </source>
+      <p>
+        If you do not configure the <em>sourceDirectory</em> and <em>outputDirectory</em> 
+        properties, the defaults will be used. The <em>sourceDirectory</em> property 
+        defaults to <em>src/main/avro</em>. The <em>outputDirectory</em> property 
+        defaults to <em>target/generated-sources</em>. You can change the paths to 
+        match your project layout.
+      </p>
+      <p>
+        Alternatively, Avro jars can be downloaded directly from the <a
+        href="http://avro.apache.org/releases.html">Apache Avro™
+        Releases</a> page.  The relevant Avro jars for this guide are
+        <em>avro-&AvroVersion;.jar</em> and
+        <em>avro-mapred-&AvroVersion;.jar</em>, as well as
+        <em>avro-tools-&AvroVersion;.jar</em> for code generation and viewing
+        Avro data files as JSON.  In addition, you will need to install Hadoop
+        in order to use MapReduce.
+      </p>
+    </section>
+
+    <section>
+      <title>Example: ColorCount</title>
+      <p>
+        Below is a simple example of a MapReduce that uses Avro. There is an example
+        for both the old (<em>org.apache.hadoop.mapred</em>) and new
+        (<em>org.apache.hadoop.mapreduce</em>) APIs under
+        <em>examples/mr-example/src/main/java/example/</em>. <em>MapredColorCount</em>
+        is the example for the older mapred API while <em>MapReduceColorCount</em> is
+        the example for the newer mapreduce API. Both examples are below, but
+        we will detail the mapred API in our subsequent examples.
+      </p>
+
+      <p>MapredColorCount:</p>
+      <source>
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.*;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.*;
+
+import example.avro.User;
+
+public class MapredColorCount extends Configured implements Tool {
+
+  public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> {
+    @Override
+    public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
+        throws IOException {
+      CharSequence color = user.getFavoriteColor();
+      // We need this check because the User.favorite_color field has type ["string", "null"]
+      if (color == null) {
+        color = "none";
+      }
+      collector.collect(new Pair<CharSequence, Integer>(color, 1));
+    }
+  }
+
+  public static class ColorCountReducer extends AvroReducer<CharSequence, Integer,
+                                                            Pair<CharSequence, Integer>> {
+    @Override
+    public void reduce(CharSequence key, Iterable<Integer> values,
+                       AvroCollector<Pair<CharSequence, Integer>> collector,
+                       Reporter reporter)
+        throws IOException {
+      int sum = 0;
+      for (Integer value : values) {
+        sum += value;
+      }
+      collector.collect(new Pair<CharSequence, Integer>(key, sum));
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: MapredColorCount <input path> <output path>");
+      return -1;
+    }
+
+    JobConf conf = new JobConf(getConf(), MapredColorCount.class);
+    conf.setJobName("colorcount");
+
+    FileInputFormat.setInputPaths(conf, new Path(args[0]));
+    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+    AvroJob.setMapperClass(conf, ColorCountMapper.class);
+    AvroJob.setReducerClass(conf, ColorCountReducer.class);
+
+    // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
+    // relevant config options such as input/output format, map output
+    // classes, and output key class.
+    AvroJob.setInputSchema(conf, User.getClassSchema());
+    AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+        Schema.create(Type.INT)));
+
+    JobClient.runJob(conf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args);
+    System.exit(res);
+  }
+}
+      </source>
+
+      <p>MapReduceColorCount:</p>
+      <source>
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import example.avro.User;
+
+public class MapReduceColorCount extends Configured implements Tool {
+
+  public static class ColorCountMapper extends
+      Mapper<AvroKey<User>, NullWritable, Text, IntWritable> {
+
+    @Override
+    public void map(AvroKey<User> key, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+
+      CharSequence color = key.datum().getFavoriteColor();
+      if (color == null) {
+        color = "none";
+      }
+      context.write(new Text(color.toString()), new IntWritable(1));
+    }
+  }
+
+  public static class ColorCountReducer extends
+      Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> {
+
+    @Override
+    public void reduce(Text key, Iterable<IntWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum));
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: MapReduceColorCount <input path> <output path>");
+      return -1;
+    }
+
+    Job job = new Job(getConf());
+    job.setJarByClass(MapReduceColorCount.class);
+    job.setJobName("Color Count");
+
+    FileInputFormat.setInputPaths(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+    job.setInputFormatClass(AvroKeyInputFormat.class);
+    job.setMapperClass(ColorCountMapper.class);
+    AvroJob.setInputKeySchema(job, User.getClassSchema());
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+
+    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
+    job.setReducerClass(ColorCountReducer.class);
+    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
+    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
+
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new MapReduceColorCount(), args);
+    System.exit(res);
+  }
+}
+      </source>
+
+      <p>
+        ColorCount reads in data files containing <code>User</code> records,
+        defined in <em>examples/user.avsc</em>, and counts the number of
+        instances of each favorite color.  (This example draws inspiration from
+        the canonical WordCount MapReduce application.)  This example uses the 
+        old MapReduce API.  See MapReduceAvroWordCount, found under 
+        <em>doc/examples/mr-example/src/main/java/example/</em> to see the new MapReduce 
+        API example.  The <code>User</code>
+        schema is defined as follows:
+      </p>
+      <source>
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+      </source>
+      <p>
+        This schema is compiled into the <code>User</code> class used by
+        ColorCount via the Avro Maven plugin (see
+        <em>examples/mr-example/pom.xml</em> for how this is set up).
+      </p>
+      <p>
+        ColorCountMapper essentially takes a <code>User</code> as input and
+        extracts the <code>User</code>'s favorite color, emitting the key-value
+        pair <code><</code><em>favoriteColor</em><code>, 1></code>.
+        ColorCountReducer then adds up how many occurrences of a particular
+        favorite color were emitted, and outputs the result as a
+        <code>Pair</code> record.  These <code>Pair</code>s are serialized to an
+        Avro data file.
+      </p>
+      <section>
+        <title>Running ColorCount</title>
+        <p>
+          The ColorCount application is provided as a Maven project in the Avro
+          docs under <em>examples/mr-example</em>.  To build the project,
+          including the code generation of the User schema, run:
+        </p>
+        <source>
+mvn compile
+        </source>
+        <p>
+          Next, run GenerateData from examples/mr-examples to create an Avro data
+          file, <em>input/users.avro</em>, containing 20 <code>User</code>s with
+          favorite colors chosen randomly from a list:
+        </p>
+        <source>
+mvn exec:java -q -Dexec.mainClass=example.GenerateData
+        </source>
+        <p>
+          Besides creating the data file, GenerateData prints the JSON
+          representations of the Users generated to stdout, for example:
+        </p>
+        <source>
+{"name": "user", "favorite_number": null, "favorite_color": "red"}
+{"name": "user", "favorite_number": null, "favorite_color": "green"}
+{"name": "user", "favorite_number": null, "favorite_color": "purple"}
+{"name": "user", "favorite_number": null, "favorite_color": null}
+...
+        </source>
+        <p>
+          Now we're ready to run ColorCount.  We specify our freshly-generated
+          <em>input</em> folder as the input path and <em>output</em> as our
+          output folder (note that MapReduce will not start a job if the output
+          folder already exists):
+        </p>
+        <source>
+mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output"
+        </source>
+        <p>
+          Once ColorCount completes, checking the contents of the new
+          <em>output</em> directory should yield the following:
+        </p>
+        <source>
+$ ls output/
+part-00000.avro  _SUCCESS
+        </source>
+        <p>
+          You can check the contents of the generated Avro file using the avro-tools jar:
+        </p>
+        <source>
+$ java -jar /path/to/avro-tools-&AvroVersion;.jar tojson output/part-00000.avro
+{"value": 3, "key": "blue"}
+{"value": 7, "key": "green"}
+{"value": 1, "key": "none"}
+{"value": 2, "key": "orange"}
+{"value": 3, "key": "purple"}
+{"value": 2, "key": "red"}
+{"value": 2, "key": "yellow"}
+        </source>
+      </section>
+    </section>
+    <p>Now let's go over the ColorCount example in detail.</p>
+    <section>
+      <title>Mapper - org.apache.hadoop.mapred API</title>
+      <p>
+        The easiest way to use Avro data files as input to a MapReduce job is to
+        subclass <code>AvroMapper</code>.  An <code>AvroMapper</code> defines a
+        map function that takes an Avro datum as input and outputs a key/value
+        pair represented as a <code>Pair</code> record.  In the ColorCount
+        example, <code>ColorCountMapper</code> is an <code>AvroMapper</code>
+        that takes a <code>User</code> as input and outputs a
+        <code>Pair<CharSequence, Integer>></code>, where the
+        <code>CharSequence</code> key is the user's favorite color and the
+        <code>Integer</code> value is 1.
+      </p>
+      <source>
+public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> {
+  @Override
+  public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
+      throws IOException {
+    CharSequence color = user.getFavoriteColor();
+    // We need this check because the User.favorite_color field has type ["string", "null"]
+    if (color == null) {
+      color = "none";
+    }
+    collector.collect(new Pair<CharSequence, Integer>(color, 1));
+  }
+}
+      </source>
+      <p>
+        In order to use our <code>AvroMapper</code>, we must call
+        <code>AvroJob.setMapperClass</code> and
+        <code>AvroJob.setInputSchema</code>.
+      </p>
+      <source>
+AvroJob.setMapperClass(conf, ColorCountMapper.class);
+AvroJob.setInputSchema(conf, User.getClassSchema());
+      </source>
+      <p>
+        Note that <code>AvroMapper</code> does not implement the
+        <code>Mapper</code> interface.  Under the hood, the specified Avro data
+        files are deserialized into <code>AvroWrapper</code>s containing the
+        actual data, which are processed by a <code>Mapper</code> that calls the
+        configured <code>AvroMapper</code>'s map function.
+        <code>AvroJob.setInputSchema</code> sets up the relevant configuration
+        parameters needed to make this happen, thus you should not need to call
+        <code>JobConf.setMapperClass</code>,
+        <code>JobConf.setInputFormat</code>,
+        <code>JobConf.setMapOutputKeyClass</code>,
+        <code>JobConf.setMapOutputValueClass</code>, or
+        <code>JobConf.setOutputKeyComparatorClass</code>.
+      </p>
+    </section>
+    <section>
+      <title>Mapper - org.apache.hadoop.mapreduce API</title>
+      <p>
+        This document will not go into all the differences between the mapred and mapreduce APIs,
+        however will describe the main differences. As you can see, ColorCountMapper is now a
+        subclass of the Hadoop Mapper class and is passed an AvroKey as it's key.
+
+        Additionally, the AvroJob method calls were slightly changed.
+      </p>
+      <source>
+  public static class ColorCountMapper extends
+      Mapper<AvroKey<User>, NullWritable, Text, IntWritable> {
+
+    @Override
+    public void map(AvroKey<User> key, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+
+      CharSequence color = key.datum().getFavoriteColor();
+      if (color == null) {
+        color = "none";
+      }
+      context.write(new Text(color.toString()), new IntWritable(1));
+    }
+  }
+      </source>
+    </section>
+    <section>
+      <title>Reducer - org.apache.hadoop.mapred API</title>
+      <p>
+        Analogously to <code>AvroMapper</code>, an <code>AvroReducer</code>
+        defines a reducer function that takes the key/value types output by an
+        <code>AvroMapper</code> (or any mapper that outputs <code>Pair</code>s)
+        and outputs a key/value pair represented a <code>Pair</code> record.  In
+        the ColorCount example, <code>ColorCountReducer</code> is an
+        <code>AvroReducer</code> that takes the <code>CharSequence</code> key
+        representing a favorite color and the <code>Iterable<Integer></code>
+        representing the counts for that color (they should all be 1 in this
+        example) and adds up the counts.
+      </p>
+      <source>
+public static class ColorCountReducer extends AvroReducer<CharSequence, Integer,
+                                                          Pair<CharSequence, Integer>> {
+  @Override
+  public void reduce(CharSequence key, Iterable<Integer> values,
+                     AvroCollector<Pair<CharSequence, Integer>> collector,
+                     Reporter reporter)
+      throws IOException {
+    int sum = 0;
+    for (Integer value : values) {
+      sum += value;
+    }
+    collector.collect(new Pair<CharSequence, Integer>(key, sum));
+  }
+}
+      </source>
+      <p>
+        In order to use our <code>AvroReducer</code>, we must call
+        <code>AvroJob.setReducerClass</code> and
+        <code>AvroJob.setOutputSchema</code>.
+      </p>
+      <source>
+AvroJob.setReducerClass(conf, ColorCountReducer.class);
+AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+                                                 Schema.create(Type.INT)));
+      </source>
+      <p>
+        Note that <code>AvroReducer</code> does not implement the
+        <code>Reducer</code> interface.  The intermediate <code>Pair</code>s
+        output by the mapper are split into <code>AvroKey</code>s and
+        <code>AvroValue</code>s, which are processed by a <code>Reducer</code>
+        that calls the configured <code>AvroReducer</code>'s reduce function.
+        <code>AvroJob.setOutputSchema</code> sets up the relevant configuration
+        parameters needed to make this happen, thus you should not need to call
+        <code>JobConf.setReducerClass</code>,
+        <code>JobConf.setOutputFormat</code>,
+        <code>JobConf.setOutputKeyClass</code>,
+        <code>JobConf.setMapOutputKeyClass</code>,
+        <code>JobConf.setMapOutputValueClass</code>, or
+        <code>JobConf.setOutputKeyComparatorClass</code>.
+      </p>
+    </section>
+    <section>
+      <title>Reduce - org.apache.hadoop.mapreduce API</title>
+      <p>
+        As before we not detail every difference between the APIs. As with the Mapper
+        change ColorCountReducer is now a subclass of Reducer and AvroKey and AvroValue
+        are emitted.
+
+        Additionally, the AvroJob method calls were slightly changed.
+      </p>
+      <source>
+  public static class ColorCountReducer extends
+      Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> {
+
+    @Override
+    public void reduce(Text key, Iterable<IntWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum));
+    }
+  }
+      </source>
+    </section>
+    <section>
+      <title>Learning more</title>
+      <p>
+        The mapred API allows users to mix Avro <code>AvroMapper</code>s and
+        <code>AvroReducer</code>s with non-Avro <code>Mapper</code>s and
+        <code>Reducer</code>s and the mapreduce API allows users input Avro
+        and output non-Avro or vice versa.
+      </p>
+
+      <p>
+        The mapred package has API <a
+        href="http://avro.apache.org/docs/current/api/java/org/apache/avro/mapred/package-summary.html">
+        <code>org.apache.avro.mapred</code> documentation</a> as does the <a
+        href="http://avro.apache.org/docs/current/api/java/org/apache/avro/mapreduce/package-summary.html">
+        <code>org.apache.avro.mapreduce</code> package</a>.
+        MapReduce API (<code>org.apache.hadoop.mapreduce</code>). Similarily to the mapreduce package,
+        it's possible with the mapred API to implement your own <code>Mapper</code>s and
+        <code>Reducer</code>s directly using the public classes provided in
+        these libraries.  See the AvroWordCount application, found under
+        <em>examples/mr-example/src/main/java/example/AvroWordCount.java</em> in
+        the Avro documentation, for an example of implementing a
+        <code>Reducer</code> that outputs Avro data using the old MapReduce API.
+        See the MapReduceAvroWordCount application, found under
+        <em>examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java</em> in
+        the Avro documentation, for an example of implementing a
+        <code>Reducer</code> that outputs Avro data using the new MapReduce API.
+      </p>
+    </section>
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/sasl.xml b/doc/src/content/xdocs/sasl.xml
new file mode 100644
index 0000000..6d0271c
--- /dev/null
+++ b/doc/src/content/xdocs/sasl.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+  -->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+   "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; SASL Profile</title>
+  </header>
+  <body>
+    <section id="intro">
+      <title>Introduction</title>
+      <p>SASL (<a href="http://www.ietf.org/rfc/rfc2222.txt">RFC 2222</a>)
+      provides a framework for authentication and security of network
+      protocols.  Each protocol that uses SASL is meant to define a
+      SASL <em>profile</em>.  This document provides a SASL profile
+      for connection-based Avro RPC.</p>
+    </section>
+
+    <section id="overview">
+      <title>Overview</title>
+      <p>SASL negotiation proceeds as a series of message interactions
+      over a connection between a client and server using a selected
+      SASL <em>mechanism</em>.  The client starts this negotiation by
+      sending its chosen mechanism name with an initial (possibly
+      empty) message.  Negotiation proceeds with the exchange of
+      messages until either side indicates success or failure.  The
+      content of the messages is mechanism-specific.  If the
+      negotiation succeeds, then the session can proceed over the
+      connection, otherwise it must be abandoned.</p>
+      <p>Some mechanisms continue to process session data after
+      negotiation (e.g., encrypting it), while some specify that
+      further session data is transmitted unmodifed.</p>
+    </section>
+
+    <section id="negotiation">
+      <title>Negotiation</title>
+      <section id="commands">
+	<title>Commands</title>
+	<p>Avro SASL negotiation uses four one-byte commands.</p>
+	<ul>
+	  <li><code>0: START</code>  Used in a client's initial message.</li>
+	  <li><code>1: CONTINUE</code> Used while negotiation is ongoing.</li>
+	  <li><code>2: FAIL</code> Terminates negotiation unsuccessfully.</li>
+	  <li><code>3: COMPLETE</code> Terminates negotiation sucessfully.</li>
+	</ul>
+	
+	<p>The format of a START message is:</p>
+	<source>| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |</source>
+	
+	<p>The format of a CONTINUE message is:</p>
+	<source>| 1 | 4-byte payload length | payload data |</source>
+	
+	<p>The format of a FAIL message is:</p>
+	<source>| 2 | 4-byte message length | UTF-8 message |</source>
+	
+	<p>The format of a COMPLETE message is:</p>
+	<source>| 3 | 4-byte payload length | payload data |</source>
+      </section>
+
+      <section id="process">
+	<title>Process</title>
+	<p>Negotiation is initiated by a client sending a START command
+	  containing the client's chosen mechanism name and any
+	  mechanism-specific payload data.</p>
+	
+	<p>The server and client then interchange some number
+	  (possibly zero) of CONTINUE messages.  Each message contains
+	  payload data that is processed by the security mechanism to
+	  generate the next message.</p>
+	
+	<p>Once either the client or server send a FAIL message then
+	  negotiation has failed.  UTF-8-encoded text is included in
+	  the failure message.  Once either a FAIL message has been
+	  sent or recieved, or any other error occurs in the
+	  negotiation, further communication on this connection must
+	  cease.</p>
+	
+	<p>Once either the client or server send a COMPLETE message
+	  then negotiation has completed successfully.  Session data
+	  may now be transmitted over the connection until it is
+	  closed by either side.</p>
+      </section>
+
+    </section>
+
+    <section id="session">
+      <title>Session Data</title>
+      <p>If no SASL QOP (quality of protection) is negotiated, then
+	all subsequent writes to/reads over this connection are
+	written/read unmodified.  In particular, messages use
+	Avro <a href="spec.html#Message+Framing">framing</a>, and are
+	of the form:</p>
+      <source>| 4-byte frame length | frame data | ... | 4 zero bytes |</source>
+      <p>If a SASL QOP is negotiated, then it must be used by the
+	connection for all subsequent messages. This is done by
+	wrapping each non-empty frame written using the security
+	mechanism and unwrapping each non-empty frame read.  The
+	length written in each non-empty frame is the length of the
+	wrapped data. Complete frames must be passed to the security
+	mechanism for unwrapping.  Unwrapped data is then passed to
+	the application as the content of the frame.</p>
+      <p>If at any point processing fails due to wrapping, unwrapping
+	or framing errors, then all further communication on this
+	connection must cease.</p>
+    </section>
+
+    <section id="anonymous">
+      <title>Anonymous Mechanism</title>
+      <p>The SASL anonymous mechanism
+      (<a href="http://www.ietf.org/rfc/rfc2222.txt">RFC 2245</a>) is
+      quite simple to implement.  In particular, an initial anonymous
+      request may be prefixed by the following static sequence:</p>
+      <source>| 0 | 0009 | ANONYMOUS | 0000 |</source>
+      <p>If a server uses the anonymous mechanism, it should check
+      that the mechanism name in the start message prefixing the first
+      request recieved is 'ANONYMOUS', then simply prefix its initial
+      response with a COMPLETE message of:</p>
+      <source>| 3 | 0000 |</source>
+      <p>If an anonymous server recieves some other mechanism name,
+      then it may respond with a FAIL message as simple as:</p>
+      <source>| 2 | 0000 |</source>
+      <p>Note that the anonymous mechanism need add no additional
+      round-trip messages between client and server.  The START
+      message can be piggybacked on the initial request and the
+      COMPLETE or FAIL message can be piggybacked on the initial
+      response.</p>
+    </section>
+
+  <p><em>Apache Avro, Avro, Apache, and the Avro and Apache logos are
+   trademarks of The Apache Software Foundation.</em></p>
+
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/site.xml b/doc/src/content/xdocs/site.xml
new file mode 100644
index 0000000..547969c
--- /dev/null
+++ b/doc/src/content/xdocs/site.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--
+Forrest site.xml
+
+This file contains an outline of the site's information content.  It is used to:
+- Generate the website menus (though these can be overridden - see docs)
+- Provide semantic, location-independent aliases for internal 'site:' URIs, eg
+<link href="site:changes"> links to changes.html (or ../changes.html if in
+  subdir).
+- Provide aliases for external URLs in the external-refs section.  Eg, <link
+  href="ext:cocoon"> links to http://cocoon.apache.org/ 
+
+See http://forrest.apache.org/docs/linking.html for more info
+-->
+<!-- The label attribute of the outer "site" element will only show
+  in the linkmap (linkmap.html).
+  Use elements project-name and group-name in skinconfig to change name of 
+  your site or project that is usually shown at the top of page.
+  No matter what you configure for the href attribute, Forrest will
+  always use index.html when you request http://yourHost/
+  See FAQ: "How can I use a start-up-page other than index.html?"
+-->
+
+<site label="Avro" href="" xmlns="http://apache.org/forrest/linkmap/1.0">
+
+  <docs label="Documentation"> 
+    <overview   label="Overview"          href="index.html" />
+    <gettingstartedjava label="Getting started (Java)" href="gettingstartedjava.html" />
+    <gettingstartedpython label="Getting started (Python)" href="gettingstartedpython.html" />
+    <spec       label="Specification"     href="spec.html" />
+    <trevni     label="Trevni"            href="ext:trevni/spec" />
+    <java-api   label="Java API"          href="ext:api/java/index" />
+    <c-api      label="C API"             href="ext:api/c/index" />
+    <cpp-api    label="C++ API"           href="ext:api/cpp/index" />
+    <csharp-api label="C# API"            href="ext:api/csharp/index" />
+    <mr         label="MapReduce guide"  href="mr.html" />
+    <idl        label="IDL language"      href="idl.html" />
+    <sasl       label="SASL profile"      href="sasl.html" />
+    <wiki       label="Wiki"              href="ext:wiki" />
+    <faq        label="FAQ"               href="ext:faq" />
+  </docs>
+  
+  <external-refs>
+    <site      href="http://hadoop.apache.org/avro/"/>
+    <lists     href="http://hadoop.apache.org/avro/mailing_lists.html"/>
+    <archive   href="http://mail-archives.apache.org/mod_mbox/hadoop-avro-commits/"/>
+    <releases  href="http://hadoop.apache.org/avro/releases.html">
+      <download href="#Download" />
+    </releases>
+    <jira      href="http://hadoop.apache.org/avro/issue_tracking.html"/>
+    <wiki      href="http://wiki.apache.org/hadoop/Avro/" />
+    <faq       href="http://wiki.apache.org/hadoop/Avro/FAQ" />
+    <json      href="http://www.json.org/" />
+    <vint      href="http://lucene.apache.org/java/3_5_0/fileformats.html#VInt"/>
+    <zigzag    href="http://code.google.com/apis/protocolbuffers/docs/encoding.html#types"/>
+    <api href="api/">
+      <c href="c/">
+	<index href="index.html" />
+      </c>
+      <cpp href="cpp/">
+	<index href="html/index.html" />
+      </cpp>
+      <csharp href="csharp/">
+	<index href="index.html" />
+      </csharp>
+      <java href="java/">
+	<index href="index.html" />
+      </java>
+    </api>
+    <trevni href="trevni/">
+      <spec href="spec.html"/>
+    </trevni>
+  </external-refs>
+ 
+</site>
diff --git a/doc/src/content/xdocs/spec.xml b/doc/src/content/xdocs/spec.xml
new file mode 100644
index 0000000..8c108c8
--- /dev/null
+++ b/doc/src/content/xdocs/spec.xml
@@ -0,0 +1,1415 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+<document>
+  <header>
+    <title>Apache Avro™ &AvroVersion; Specification</title>
+  </header>
+  <body>
+
+    <section id="preamble">
+      <title>Introduction</title>
+
+      <p>This document defines Apache Avro.  It is intended to be the
+        authoritative specification. Implementations of Avro must
+        adhere to this document.
+      </p>
+
+    </section>
+
+    <section id="schemas">
+      <title>Schema Declaration</title>
+      <p>A Schema is represented in <a href="ext:json">JSON</a> by one of:</p>
+      <ul>
+        <li>A JSON string, naming a defined type.</li>
+        
+        <li>A JSON object, of the form:
+          
+          <source>{"type": "<em>typeName</em>" ...<em>attributes</em>...}</source>
+
+          where <em>typeName</em> is either a primitive or derived
+          type name, as defined below.  Attributes not defined in this
+          document are permitted as metadata, but must not affect
+          the format of serialized data.
+          </li>
+        <li>A JSON array, representing a union of embedded types.</li>
+      </ul>
+
+      <section id="schema_primitive">
+        <title>Primitive Types</title>
+        <p>The set of primitive type names is:</p>
+        <ul>
+          <li><code>null</code>: no value</li>
+          <li><code>boolean</code>: a binary value</li>
+          <li><code>int</code>: 32-bit signed integer</li>
+          <li><code>long</code>: 64-bit signed integer</li>
+          <li><code>float</code>: single precision (32-bit) IEEE 754 floating-point number</li>
+          <li><code>double</code>: double precision (64-bit) IEEE 754 floating-point number</li>
+          <li><code>bytes</code>: sequence of 8-bit unsigned bytes</li>
+          <li><code>string</code>: unicode character sequence</li>
+        </ul>
+        
+        <p>Primitive types have no specified attributes.</p>
+        
+        <p>Primitive type names are also defined type names.  Thus, for
+          example, the schema "string" is equivalent to:</p>
+        
+        <source>{"type": "string"}</source>
+
+      </section>
+
+      <section id="schema_complex">
+        <title>Complex Types</title>
+        
+        <p>Avro supports six kinds of complex types: records, enums,
+        arrays, maps, unions and fixed.</p>
+
+        <section id="schema_record">
+          <title>Records</title>
+          
+	  <p>Records use the type name "record" and support three attributes:</p>
+	  <ul>
+	    <li><code>name</code>: a JSON string providing the name
+	    of the record (required).</li>
+	    <li><em>namespace</em>, a JSON string that qualifies the name;</li>
+	    <li><code>doc</code>: a JSON string providing documentation to the
+	    user of this schema (optional).</li>
+	    <li><code>aliases:</code> a JSON array of strings, providing
+	      alternate names for this record (optional).</li>
+	    <li><code>fields</code>: a JSON array, listing fields (required).
+	    Each field is a JSON object with the following attributes:
+	      <ul>
+		<li><code>name</code>: a JSON string providing the name
+		  of the field (required), and </li>
+		<li><code>doc</code>: a JSON string describing this field
+                  for users (optional).</li>
+		<li><code>type:</code> A JSON object defining a schema, or
+		  a JSON string naming a record definition
+		  (required).</li>
+		<li><code>default:</code> A default value for this
+		  field, used when reading instances that lack this
+		  field (optional).  Permitted values depend on the
+		  field's schema type, according to the table below.
+		  Default values for union fields correspond to the
+		  first schema in the union. Default values for bytes
+		  and fixed fields are JSON strings, where Unicode
+		  code points 0-255 are mapped to unsigned 8-bit byte
+		  values 0-255.
+		  <table class="right">
+		    <caption>field default values</caption>
+		    <tr><th>avro type</th><th>json type</th><th>example</th></tr>
+		    <tr><td>null</td><td>null</td><td>null</td></tr>
+		    <tr><td>boolean</td><td>boolean</td><td>true</td></tr>
+		    <tr><td>int,long</td><td>integer</td><td>1</td></tr>
+		    <tr><td>float,double</td><td>number</td><td>1.1</td></tr>
+		    <tr><td>bytes</td><td>string</td><td>"\u00FF"</td></tr>
+		    <tr><td>string</td><td>string</td><td>"foo"</td></tr>
+		    <tr><td>record</td><td>object</td><td>{"a": 1}</td></tr>
+		    <tr><td>enum</td><td>string</td><td>"FOO"</td></tr>
+		    <tr><td>array</td><td>array</td><td>[1]</td></tr>
+		    <tr><td>map</td><td>object</td><td>{"a": 1}</td></tr>
+		    <tr><td>fixed</td><td>string</td><td>"\u00ff"</td></tr>
+		  </table>
+		</li>
+		<li><code>order:</code> specifies how this field
+		  impacts sort ordering of this record (optional).
+		  Valid values are "ascending" (the default),
+		  "descending", or "ignore".  For more details on how
+		  this is used, see the the <a href="#order">sort
+		  order</a> section below.</li>
+		<li><code>aliases:</code> a JSON array of strings, providing
+		  alternate names for this field (optional).</li>
+	      </ul>
+	    </li>
+	  </ul>
+
+	  <p>For example, a linked-list of 64-bit values may be defined with:</p>
+	  <source>
+{
+  "type": "record", 
+  "name": "LongList",
+  "aliases": ["LinkedLongs"],                      // old name for this
+  "fields" : [
+    {"name": "value", "type": "long"},             // each element has a long
+    {"name": "next", "type": ["null", "LongList"]} // optional next element
+  ]
+}
+	  </source>
+	</section>
+        
+        <section>
+          <title>Enums</title>
+          
+	  <p>Enums use the type name "enum" and support the following
+	  attributes:</p>
+	  <ul>
+	    <li><code>name</code>: a JSON string providing the name
+	    of the enum (required).</li>
+	    <li><em>namespace</em>, a JSON string that qualifies the name;</li>
+	    <li><code>aliases:</code> a JSON array of strings, providing
+	      alternate names for this enum (optional).</li>
+	    <li><code>doc</code>: a JSON string providing documentation to the
+	    user of this schema (optional).</li>
+	    <li><code>symbols</code>: a JSON array, listing symbols,
+	    as JSON strings (required).  All symbols in an enum must
+	    be unique; duplicates are prohibited.</li>
+	  </ul>
+	  <p>For example, playing card suits might be defined with:</p>
+	  <source>
+{ "type": "enum",
+  "name": "Suit",
+  "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
+}
+	  </source>
+	</section>
+        
+        <section>
+          <title>Arrays</title>
+          <p>Arrays use the type name <code>"array"</code> and support
+          a single attribute:</p>
+	  <ul>
+            <li><code>items</code>: the schema of the array's items.</li>
+	  </ul>
+	  <p>For example, an array of strings is declared
+	  with:</p>
+	  <source>{"type": "array", "items": "string"}</source>
+	</section>
+
+        <section>
+          <title>Maps</title>
+          <p>Maps use the type name <code>"map"</code> and support
+          one attribute:</p>
+	  <ul>
+            <li><code>values</code>: the schema of the map's values.</li>
+	  </ul>
+	  <p>Map keys are assumed to be strings.</p>
+	  <p>For example, a map from string to long is declared
+	  with:</p>
+	  <source>{"type": "map", "values": "long"}</source>
+	</section>
+
+        <section>
+          <title>Unions</title>
+          <p>Unions, as mentioned above, are represented using JSON
+          arrays.  For example, <code>["null", "string"]</code>
+          declares a schema which may be either a null or string.</p>
+          <p>(Note that when a <a href="#schema_record">default
+          value</a> is specified for a record field whose type is a
+          union, the type of the default value must match the
+          <em>first</em> element of the union.  Thus, for unions
+          containing "null", the "null" is usually listed first, since
+          the default value of such unions is typically null.)</p>
+	  <p>Unions may not contain more than one schema with the same
+	  type, except for the named types record, fixed and enum.  For
+	  example, unions containing two array types or two map types
+	  are not permitted, but two types with different names are
+	  permitted.  (Names permit efficient resolution when reading
+	  and writing unions.)</p>
+	  <p>Unions may not immediately contain other unions.</p>
+        </section>
+
+        <section>
+          <title>Fixed</title>
+          <p>Fixed uses the type name <code>"fixed"</code> and supports
+          two attributes:</p>
+	  <ul>
+	    <li><code>name</code>: a string naming this fixed (required).</li>
+	    <li><em>namespace</em>, a string that qualifies the name;</li>
+	    <li><code>aliases:</code> a JSON array of strings, providing
+	      alternate names for this enum (optional).</li>
+            <li><code>size</code>: an integer, specifying the number
+            of bytes per value (required).</li>
+	  </ul>
+	  <p>For example, 16-byte quantity may be declared with:</p>
+	  <source>{"type": "fixed", "size": 16, "name": "md5"}</source>
+	</section>
+
+
+      </section> <!-- end complex types -->
+
+      <section>
+	<title>Names</title>
+        <p>Record, enums and fixed are named types.  Each has
+          a <em>fullname</em> that is composed of two parts;
+          a <em>name</em> and a <em>namespace</em>.  Equality of names
+          is defined on the fullname.</p>
+	<p>The name portion of a fullname, record field names, and
+	  enum symbols must:</p>
+	<ul>
+          <li>start with <code>[A-Za-z_]</code></li>
+          <li>subsequently contain only <code>[A-Za-z0-9_]</code></li>
+	</ul>
+        <p>A namespace is a dot-separated sequence of such names.
+        The empty string may also be used as a namespace to indicate the
+        null namespace.
+        Equality of names (including field names and enum symbols)
+        as well as fullnames is case-sensitive.</p>
+        <p>In record, enum and fixed definitions, the fullname is
+        determined in one of the following ways:</p>
+	<ul>
+	  <li>A name and namespace are both specified.  For example,
+	  one might use <code>"name": "X", "namespace":
+	  "org.foo"</code> to indicate the
+	  fullname <code>org.foo.X</code>.</li>
+	  <li>A fullname is specified.  If the name specified contains
+	  a dot, then it is assumed to be a fullname, and any
+	  namespace also specified is ignored.  For example,
+	  use <code>"name": "org.foo.X"</code> to indicate the
+	  fullname <code>org.foo.X</code>.</li>
+	  <li>A name only is specified, i.e., a name that contains no
+	  dots.  In this case the namespace is taken from the most
+	  tightly enclosing schema or protocol.  For example,
+	  if <code>"name": "X"</code> is specified, and this occurs
+	  within a field of the record definition
+	  of <code>org.foo.Y</code>, then the fullname
+	  is <code>org.foo.X</code>. If there is no enclosing
+	  namespace then the null namespace is used.</li>
+	</ul>
+	<p>References to previously defined names are as in the latter
+	two cases above: if they contain a dot they are a fullname, if
+	they do not contain a dot, the namespace is the namespace of
+	the enclosing definition.</p>
+	<p>Primitive type names have no namespace and their names may
+	not be defined in any namespace.</p>
+	<p> A schema or protocol may not contain multiple definitions
+	of a fullname.  Further, a name must be defined before it is
+	used ("before" in the depth-first, left-to-right traversal of
+	the JSON parse tree, where the <code>types</code> attribute of
+	a protocol is always deemed to come "before" the
+	<code>messages</code> attribute.)
+	</p>
+      </section>
+
+      <section>
+	<title>Aliases</title>
+	<p>Named types and fields may have aliases.  An implementation
+        may optionally use aliases to map a writer's schema to the
+        reader's.  This faciliates both schema evolution as well as
+        processing disparate datasets.</p>
+	<p>Aliases function by re-writing the writer's schema using
+        aliases from the reader's schema.  For example, if the
+        writer's schema was named "Foo" and the reader's schema is
+        named "Bar" and has an alias of "Foo", then the implementation
+        would act as though "Foo" were named "Bar" when reading.
+        Similarly, if data was written as a record with a field named
+        "x" and is read as a record with a field named "y" with alias
+        "x", then the implementation would act as though "x" were
+        named "y" when reading.</p>
+	<p>A type alias may be specified either as a fully
+        namespace-qualified, or relative to the namespace of the name
+        it is an alias for.  For example, if a type named "a.b" has
+        aliases of "c" and "x.y", then the fully qualified names of
+        its aliases are "a.c" and "x.y".</p>
+      </section>
+
+    </section> <!-- end schemas -->
+
+    <section>
+      <title>Data Serialization</title>
+
+      <p>Avro data is always serialized with its schema.  Files that
+	store Avro data should always also include the schema for that
+	data in the same file.  Avro-based remote procedure call (RPC)
+	systems must also guarantee that remote recipients of data
+	have a copy of the schema used to write that data.</p>
+
+      <p>Because the schema used to write data is always available
+	when the data is read, Avro data itself is not tagged with
+	type information.  The schema is required to parse data.</p>
+
+      <p>In general, both serialization and deserialization proceed as
+      a depth-first, left-to-right traversal of the schema,
+      serializing primitive types as they are encountered.</p>
+
+      <section>
+	<title>Encodings</title>
+	<p>Avro specifies two serialization encodings: binary and
+	  JSON.  Most applications will use the binary encoding, as it
+	  is smaller and faster.  But, for debugging and web-based
+	  applications, the JSON encoding may sometimes be
+	  appropriate.</p>
+      </section>
+
+      <section id="binary_encoding">
+        <title>Binary Encoding</title>
+
+	<section id="binary_encode_primitive">
+          <title>Primitive Types</title>
+          <p>Primitive types are encoded in binary as follows:</p>
+          <ul>
+            <li><code>null</code> is written as zero bytes.</li>
+            <li>a <code>boolean</code> is written as a single byte whose
+              value is either <code>0</code> (false) or <code>1</code>
+              (true).</li>
+            <li><code>int</code> and <code>long</code> values are written
+              using <a href="ext:vint">variable-length</a>
+	      <a href="ext:zigzag">zig-zag</a> coding.  Some examples:
+	      <table class="right">
+		<tr><th>value</th><th>hex</th></tr>
+		<tr><td><code> 0</code></td><td><code>00</code></td></tr>
+		<tr><td><code>-1</code></td><td><code>01</code></td></tr>
+		<tr><td><code> 1</code></td><td><code>02</code></td></tr>
+		<tr><td><code>-2</code></td><td><code>03</code></td></tr>
+		<tr><td><code> 2</code></td><td><code>04</code></td></tr>
+		<tr><td colspan="2"><code>...</code></td></tr>
+		<tr><td><code>-64</code></td><td><code>7f</code></td></tr>
+		<tr><td><code> 64</code></td><td><code> 80 01</code></td></tr>
+		<tr><td colspan="2"><code>...</code></td></tr>
+	      </table>
+	    </li>
+            <li>a <code>float</code> is written as 4 bytes. The float is
+              converted into a 32-bit integer using a method equivalent
+              to <a href="http://java.sun.com/javase/6/docs/api/java/lang/Float.html#floatToIntBits%28float%29">Java's floatToIntBits</a> and then encoded
+              in little-endian format.</li>
+            <li>a <code>double</code> is written as 8 bytes. The double
+              is converted into a 64-bit integer using a method equivalent
+              to <a href="http://java.sun.com/javase/6/docs/api/java/lang/Double.html#doubleToLongBits%28double%29">Java's
+		doubleToLongBits</a> and then encoded in little-endian
+              format.</li>
+            <li><code>bytes</code> are encoded as
+              a <code>long</code> followed by that many bytes of data.
+            </li>
+            <li>a <code>string</code> is encoded as
+              a <code>long</code> followed by that many bytes of UTF-8
+              encoded character data.
+              <p>For example, the three-character string "foo" would
+              be encoded as the long value 3 (encoded as
+              hex <code>06</code>) followed by the UTF-8 encoding of
+              'f', 'o', and 'o' (the hex bytes <code>66 6f
+              6f</code>):
+              </p>
+              <source>06 66 6f 6f</source>
+            </li>
+          </ul>
+
+	</section>
+
+
+	<section id="binary_encode_complex">
+          <title>Complex Types</title>
+          <p>Complex types are encoded in binary as follows:</p>
+
+          <section id="record_encoding">
+            <title>Records</title>
+	    <p>A record is encoded by encoding the values of its
+	      fields in the order that they are declared.  In other
+	      words, a record is encoded as just the concatenation of
+	      the encodings of its fields.  Field values are encoded per
+	      their schema.</p>
+	    <p>For example, the record schema</p>
+	    <source>
+	      {
+	      "type": "record", 
+	      "name": "test",
+	      "fields" : [
+	      {"name": "a", "type": "long"},
+	      {"name": "b", "type": "string"}
+	      ]
+	      }
+	    </source>
+	    <p>An instance of this record whose <code>a</code> field has
+	      value 27 (encoded as hex <code>36</code>) and
+	      whose <code>b</code> field has value "foo" (encoded as hex
+	      bytes <code>06 66 6f 6f</code>), would be encoded simply
+	      as the concatenation of these, namely the hex byte
+	      sequence:</p>
+	    <source>36 06 66 6f 6f</source>
+	  </section>
+          
+          <section id="enum_encoding">
+            <title>Enums</title>
+            <p>An enum is encoded by a <code>int</code>, representing
+              the zero-based position of the symbol in the schema.</p>
+	    <p>For example, consider the enum:</p>
+	    <source>
+	      {"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
+	    </source>
+	    <p>This would be encoded by an <code>int</code> between
+	      zero and three, with zero indicating "A", and 3 indicating
+	      "D".</p>
+	  </section>
+
+
+          <section id="array_encoding">
+            <title>Arrays</title>
+            <p>Arrays are encoded as a series of <em>blocks</em>.
+              Each block consists of a <code>long</code> <em>count</em>
+              value, followed by that many array items.  A block with
+              count zero indicates the end of the array.  Each item is
+              encoded per the array's item schema.</p>
+
+            <p>If a block's count is negative, its absolute value is used,
+              and the count is followed immediately by a <code>long</code>
+              block <em>size</em> indicating the number of bytes in the
+              block.  This block size permits fast skipping through data,
+              e.g., when projecting a record to a subset of its fields.</p>
+
+            <p>For example, the array schema</p>
+            <source>{"type": "array", "items": "long"}</source>
+            <p>an array containing the items 3 and 27 could be encoded
+              as the long value 2 (encoded as hex 04) followed by long
+              values 3 and 27 (encoded as hex <code>06 36</code>)
+              terminated by zero:</p>
+            <source>04 06 36 00</source>
+
+            <p>The blocked representation permits one to read and write
+              arrays larger than can be buffered in memory, since one can
+              start writing items without knowing the full length of the
+              array.</p>
+
+          </section>
+
+	  <section id="map_encoding">
+            <title>Maps</title>
+            <p>Maps are encoded as a series of <em>blocks</em>.  Each
+              block consists of a <code>long</code> <em>count</em>
+              value, followed by that many key/value pairs.  A block
+              with count zero indicates the end of the map.  Each item
+              is encoded per the map's value schema.</p>
+	    
+            <p>If a block's count is negative, its absolute value is used,
+              and the count is followed immediately by a <code>long</code>
+              block <em>size</em> indicating the number of bytes in the
+              block.  This block size permits fast skipping through data,
+              e.g., when projecting a record to a subset of its fields.</p>
+	    
+            <p>The blocked representation permits one to read and write
+              maps larger than can be buffered in memory, since one can
+              start writing items without knowing the full length of the
+              map.</p>
+	
+	  </section>
+
+          <section id="union_encoding">
+            <title>Unions</title>
+            <p>A union is encoded by first writing a <code>long</code>
+              value indicating the zero-based position within the
+              union of the schema of its value.  The value is then
+              encoded per the indicated schema within the union.</p>
+            <p>For example, the union
+              schema <code>["null","string"]</code> would encode:</p>
+            <ul>
+              <li><code>null</code> as zero (the index of "null" in the union):
+                <source>00</source></li>
+              <li>the string <code>"a"</code> as one (the index of
+                "string" in the union, encoded as hex <code>02</code>),
+                followed by the serialized string:
+                <source>02 02 61</source></li>
+            </ul>
+          </section>
+
+          <section id="fixed_encoding">
+            <title>Fixed</title>
+            <p>Fixed instances are encoded using the number of bytes
+              declared in the schema.</p>
+          </section>
+
+        </section> <!-- end complex types -->
+
+      </section>
+
+      <section id="json_encoding">
+        <title>JSON Encoding</title>
+        
+        <p>Except for unions, the JSON encoding is the same as is used
+        to encode <a href="#schema_record">field default
+        values</a>.</p>
+
+        <p>The value of a union is encoded in JSON as follows:</p>
+
+        <ul>
+          <li>if its type is <code>null</code>, then it is encoded as
+          a JSON null;</li>
+          <li>otherwise it is encoded as a JSON object with one
+          name/value pair whose name is the type's name and whose
+          value is the recursively encoded value.  For Avro's named
+          types (record, fixed or enum) the user-specified name is
+          used, for other types the type name is used.</li>
+        </ul>
+          
+        <p>For example, the union
+          schema <code>["null","string","Foo"]</code>, where Foo is a
+          record name, would encode:</p>
+        <ul>
+          <li><code>null</code> as <code>null</code>;</li>
+          <li>the string <code>"a"</code> as
+            <code>{"string": "a"}</code>; and</li>
+          <li>a Foo instance as <code>{"Foo": {...}}</code>,
+          where <code>{...}</code> indicates the JSON encoding of a
+          Foo instance.</li>
+        </ul>
+
+        <p>Note that a schema is still required to correctly process
+        JSON-encoded data.  For example, the JSON encoding does not
+        distinguish between <code>int</code>
+        and <code>long</code>, <code>float</code>
+        and <code>double</code>, records and maps, enums and strings,
+        etc.</p>
+
+      </section>
+
+    </section>
+
+    <section id="order">
+      <title>Sort Order</title>
+
+      <p>Avro defines a standard sort order for data.  This permits
+        data written by one system to be efficiently sorted by another
+        system.  This can be an important optimization, as sort order
+        comparisons are sometimes the most frequent per-object
+        operation.  Note also that Avro binary-encoded data can be
+        efficiently ordered without deserializing it to objects.</p>
+
+      <p>Data items may only be compared if they have identical
+        schemas.  Pairwise comparisons are implemented recursively
+        with a depth-first, left-to-right traversal of the schema.
+        The first mismatch encountered determines the order of the
+        items.</p>
+
+      <p>Two items with the same schema are compared according to the
+        following rules.</p>
+      <ul>
+        <li><code>null</code> data is always equal.</li>
+        <li><code>boolean</code> data is ordered with false before true.</li>
+        <li><code>int</code>, <code>long</code>, <code>float</code>
+          and <code>double</code> data is ordered by ascending numeric
+          value.</li>
+        <li><code>bytes</code> and <code>fixed</code> data are
+          compared lexicographically by unsigned 8-bit values.</li>
+        <li><code>string</code> data is compared lexicographically by
+          Unicode code point.  Note that since UTF-8 is used as the
+          binary encoding for strings, sorting of bytes and string
+          binary data is identical.</li>
+        <li><code>array</code> data is compared lexicographically by
+          element.</li>
+        <li><code>enum</code> data is ordered by the symbol's position
+          in the enum schema.  For example, an enum whose symbols are
+          <code>["z", "a"]</code> would sort <code>"z"</code> values
+          before <code>"a"</code> values.</li>
+        <li><code>union</code> data is first ordered by the branch
+          within the union, and, within that, by the type of the
+          branch.  For example, an <code>["int", "string"]</code>
+          union would order all int values before all string values,
+          with the ints and strings themselves ordered as defined
+          above.</li>
+        <li><code>record</code> data is ordered lexicographically by
+          field.  If a field specifies that its order is:
+          <ul>
+            <li><code>"ascending"</code>, then the order of its values
+              is unaltered.</li>
+            <li><code>"descending"</code>, then the order of its values
+              is reversed.</li>
+            <li><code>"ignore"</code>, then its values are ignored
+              when sorting.</li>
+          </ul>
+        </li>
+        <li><code>map</code> data may not be compared.  It is an error
+          to attempt to compare data containing maps unless those maps
+          are in an <code>"order":"ignore"</code> record field.
+        </li>
+      </ul>
+    </section>
+
+    <section>
+      <title>Object Container Files</title>
+      <p>Avro includes a simple object container file format.  A file
+      has a schema, and all objects stored in the file must be written
+      according to that schema, using binary encoding.  Objects are
+      stored in blocks that may be compressed.  Syncronization markers
+      are used between blocks to permit efficient splitting of files
+      for MapReduce processing.</p>
+
+      <p>Files may include arbitrary user-specified metadata.</p>
+
+      <p>A file consists of:</p>
+      <ul>
+        <li>A <em>file header</em>, followed by</li>
+        <li>one or more <em>file data blocks</em>.</li>
+      </ul>
+
+      <p>A file header consists of:</p>
+      <ul>
+        <li>Four bytes, ASCII 'O', 'b', 'j', followed by 1.</li>
+        <li><em>file metadata</em>, including the schema.</li>
+        <li>The 16-byte, randomly-generated sync marker for this file.</li>
+      </ul>
+
+      <p>File metadata is written as if defined by the following <a
+      href="#map_encoding">map</a> schema:</p>
+      <source>{"type": "map", "values": "bytes"}</source>
+
+      <p>All metadata properties that start with "avro." are reserved.
+      The following file metadata properties are currently used:</p>
+      <ul>
+        <li><strong>avro.schema</strong> contains the schema of objects
+        stored in the file, as JSON data (required).</li>
+        <li><strong>avro.codec</strong> the name of the compression codec
+        used to compress blocks, as a string.  Implementations
+        are required to support the following codecs: "null" and "deflate".  
+        If codec is absent, it is assumed to be "null".  The codecs
+        are described with more detail below.</li>
+      </ul>
+
+      <p>A file header is thus described by the following schema:</p>
+      <source>
+{"type": "record", "name": "org.apache.avro.file.Header",
+ "fields" : [
+   {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}},
+   {"name": "meta", "type": {"type": "map", "values": "bytes"}},
+   {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}},
+  ]
+}
+      </source>
+
+      <p>A file data block consists of:</p>
+      <ul>
+        <li>A long indicating the count of objects in this block.</li>
+        <li>A long indicating the size in bytes of the serialized objects
+        in the current block, after any codec is applied</li>
+        <li>The serialized objects.  If a codec is specified, this is
+        compressed by that codec.</li>
+        <li>The file's 16-byte sync marker.</li>
+      </ul>
+          <p>Thus, each block's binary data can be efficiently extracted or skipped without
+          deserializing the contents.  The combination of block size, object counts, and 
+          sync markers enable detection of corrupt blocks and help ensure data integrity.</p>
+      <section>
+      <title>Required Codecs</title>
+        <section>
+        <title>null</title>
+        <p>The "null" codec simply passes through data uncompressed.</p>
+        </section>
+
+        <section>
+        <title>deflate</title>
+        <p>The "deflate" codec writes the data block using the
+        deflate algorithm as specified in 
+        <a href="http://www.isi.edu/in-notes/rfc1951.txt">RFC 1951</a>,
+        and typically implemented using the zlib library.  Note that this
+        format (unlike the "zlib format" in RFC 1950) does not have a
+        checksum.
+        </p>
+        </section>
+      </section>
+      <section>
+	<title>Optional Codecs</title>
+        <section>
+          <title>snappy</title>
+          <p>The "snappy" codec uses
+            Google's <a href="http://code.google.com/p/snappy/">Snappy</a>
+            compression library.  Each compressed block is followed
+            by the 4-byte, big-endian CRC32 checksum of the
+            uncompressed data in the block.</p>
+        </section>
+      </section>
+    </section>
+
+    <section>
+      <title>Protocol Declaration</title>
+      <p>Avro protocols describe RPC interfaces.  Like schemas, they are
+      defined with JSON text.</p>
+
+      <p>A protocol is a JSON object with the following attributes:</p>
+      <ul>
+        <li><em>protocol</em>, a string, the name of the protocol
+        (required);</li>
+        <li><em>namespace</em>, an optional string that qualifies the name;</li>
+        <li><em>doc</em>, an optional string describing this protocol;</li>
+        <li><em>types</em>, an optional list of definitions of named types
+          (records, enums, fixed and errors).  An error definition is
+          just like a record definition except it uses "error" instead
+          of "record".  Note that forward references to named types
+          are not permitted.</li>
+        <li><em>messages</em>, an optional JSON object whose keys are
+          message names and whose values are objects whose attributes
+          are described below.  No two messages may have the same
+          name.</li>
+      </ul>
+      <p>The name and namespace qualification rules defined for schema objects
+	apply to protocols as well.</p>
+
+      <section>
+        <title>Messages</title>
+        <p>A message has attributes:</p>
+        <ul>
+          <li>a <em>doc</em>, an optional description of the message,</li>
+          <li>a <em>request</em>, a list of named,
+            typed <em>parameter</em> schemas (this has the same form
+            as the fields of a record declaration);</li>
+          <li>a <em>response</em> schema; </li> 
+          <li>an optional union of declared <em>error</em> schemas.
+	    The <em>effective</em> union has <code>"string"</code>
+	    prepended to the declared union, to permit transmission of
+	    undeclared "system" errors.  For example, if the declared
+	    error union is <code>["AccessError"]</code>, then the
+	    effective union is <code>["string", "AccessError"]</code>.
+	    When no errors are declared, the effective error union
+	    is <code>["string"]</code>.  Errors are serialized using
+	    the effective union; however, a protocol's JSON
+	    declaration contains only the declared union.
+	  </li>
+          <li>an optional <em>one-way</em> boolean parameter.</li>
+        </ul>
+        <p>A request parameter list is processed equivalently to an
+          anonymous record.  Since record field lists may vary between
+          reader and writer, request parameters may also differ
+          between the caller and responder, and such differences are
+          resolved in the same manner as record field differences.</p>
+	<p>The one-way parameter may only be true when the response type
+	  is <code>"null"</code> and no errors are listed.</p>
+      </section>
+      <section>
+        <title>Sample Protocol</title>
+        <p>For example, one may define a simple HelloWorld protocol with:</p>
+        <source>
+{
+  "namespace": "com.acme",
+  "protocol": "HelloWorld",
+  "doc": "Protocol Greetings",
+
+  "types": [
+    {"name": "Greeting", "type": "record", "fields": [
+      {"name": "message", "type": "string"}]},
+    {"name": "Curse", "type": "error", "fields": [
+      {"name": "message", "type": "string"}]}
+  ],
+
+  "messages": {
+    "hello": {
+      "doc": "Say hello.",
+      "request": [{"name": "greeting", "type": "Greeting" }],
+      "response": "Greeting",
+      "errors": ["Curse"]
+    }
+  }
+}
+        </source>
+      </section>
+    </section>
+
+    <section>
+      <title>Protocol Wire Format</title>
+
+      <section>
+        <title>Message Transport</title>
+        <p>Messages may be transmitted via
+        different <em>transport</em> mechanisms.</p>
+
+        <p>To the transport, a <em>message</em> is an opaque byte sequence.</p>
+
+        <p>A transport is a system that supports:</p>
+        <ul>
+          <li><strong>transmission of request messages</strong>
+          </li>
+          <li><strong>receipt of corresponding response messages</strong>
+            <p>Servers may send a response message back to the client
+            corresponding to a request message.  The mechanism of
+            correspondance is transport-specific.  For example, in
+            HTTP it is implicit, since HTTP directly supports requests
+            and responses.  But a transport that multiplexes many
+            client threads over a single socket would need to tag
+            messages with unique identifiers.</p>
+          </li>
+        </ul>
+
+	<p>Transports may be either <em>stateless</em>
+        or <em>stateful</em>.  In a stateless transport, messaging
+        assumes no established connection state, while stateful
+        transports establish connections that may be used for multiple
+        messages.  This distinction is discussed further in
+        the <a href="#handshake">handshake</a> section below.</p>
+
+        <section>
+          <title>HTTP as Transport</title>
+          <p>When
+            <a href="http://www.w3.org/Protocols/rfc2616/rfc2616.html">HTTP</a>
+            is used as a transport, each Avro message exchange is an
+            HTTP request/response pair.  All messages of an Avro
+            protocol should share a single URL at an HTTP server.
+            Other protocols may also use that URL.  Both normal and
+            error Avro response messages should use the 200 (OK)
+            response code.  The chunked encoding may be used for
+            requests and responses, but, regardless the Avro request
+            and response are the entire content of an HTTP request and
+            response.  The HTTP Content-Type of requests and responses
+            should be specified as "avro/binary".  Requests should be
+            made using the POST method.</p>
+	  <p>HTTP is used by Avro as a stateless transport.</p>
+        </section>
+      </section>
+
+      <section>
+        <title>Message Framing</title>
+        <p>Avro messages are <em>framed</em> as a list of buffers.</p>
+        <p>Framing is a layer between messages and the transport.
+        It exists to optimize certain operations.</p>
+
+        <p>The format of framed message data is:</p>
+        <ul>
+          <li>a series of <em>buffers</em>, where each buffer consists of:
+            <ul>
+              <li>a four-byte, big-endian <em>buffer length</em>, followed by</li>
+              <li>that many bytes of <em>buffer data</em>.</li>
+            </ul>
+          </li>
+          <li>A message is always terminated by a zero-lenghted buffer.</li>
+        </ul>
+
+        <p>Framing is transparent to request and response message
+        formats (described below).  Any message may be presented as a
+        single or multiple buffers.</p>
+
+        <p>Framing can permit readers to more efficiently get
+        different buffers from different sources and for writers to
+        more efficiently store different buffers to different
+        destinations.  In particular, it can reduce the number of
+        times large binary objects are copied.  For example, if an RPC
+        parameter consists of a megabyte of file data, that data can
+        be copied directly to a socket from a file descriptor, and, on
+        the other end, it could be written directly to a file
+        descriptor, never entering user space.</p>
+
+        <p>A simple, recommended, framing policy is for writers to
+        create a new segment whenever a single binary object is
+        written that is larger than a normal output buffer.  Small
+        objects are then appended in buffers, while larger objects are
+        written as their own buffers.  When a reader then tries to
+        read a large object the runtime can hand it an entire buffer
+        directly, without having to copy it.</p>
+      </section>
+
+      <section id="handshake">
+        <title>Handshake</title>
+
+	<p>The purpose of the handshake is to ensure that the client
+        and the server have each other's protocol definition, so that
+        the client can correctly deserialize responses, and the server
+        can correctly deserialize requests.  Both clients and servers
+        should maintain a cache of recently seen protocols, so that,
+        in most cases, a handshake will be completed without extra
+        round-trip network exchanges or the transmission of full
+        protocol text.</p>
+
+        <p>RPC requests and responses may not be processed until a
+        handshake has been completed.  With a stateless transport, all
+        requests and responses are prefixed by handshakes.  With a
+        stateful transport, handshakes are only attached to requests
+        and responses until a successful handshake response has been
+        returned over a connection.  After this, request and response
+        payloads are sent without handshakes for the lifetime of that
+        connection.</p>
+
+        <p>The handshake process uses the following record schemas:</p>
+
+        <source>
+{
+  "type": "record",
+  "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+  "fields": [
+    {"name": "clientHash",
+     "type": {"type": "fixed", "name": "MD5", "size": 16}},
+    {"name": "clientProtocol", "type": ["null", "string"]},
+    {"name": "serverHash", "type": "MD5"},
+    {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+  ]
+}
+{
+  "type": "record",
+  "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
+  "fields": [
+    {"name": "match",
+     "type": {"type": "enum", "name": "HandshakeMatch",
+              "symbols": ["BOTH", "CLIENT", "NONE"]}},
+    {"name": "serverProtocol",
+     "type": ["null", "string"]},
+    {"name": "serverHash",
+     "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
+    {"name": "meta",
+     "type": ["null", {"type": "map", "values": "bytes"}]}
+  ]
+}
+        </source>
+
+        <ul>
+          <li>A client first prefixes each request with
+          a <code>HandshakeRequest</code> containing just the hash of
+          its protocol and of the server's protocol
+          (<code>clientHash!=null, clientProtocol=null,
+          serverHash!=null</code>), where the hashes are 128-bit MD5
+          hashes of the JSON protocol text. If a client has never
+          connected to a given server, it sends its hash as a guess of
+          the server's hash, otherwise it sends the hash that it
+          previously obtained from this server.</li>
+
+          <li>The server responds with
+          a <code>HandshakeResponse</code> containing one of:
+            <ul>
+              <li><code>match=BOTH, serverProtocol=null,
+              serverHash=null</code> if the client sent the valid hash
+              of the server's protocol and the server knows what
+              protocol corresponds to the client's hash. In this case,
+              the request is complete and the response data
+              immediately follows the HandshakeResponse.</li>
+
+              <li><code>match=CLIENT, serverProtocol!=null,
+              serverHash!=null</code> if the server has previously
+              seen the client's protocol, but the client sent an
+              incorrect hash of the server's protocol. The request is
+              complete and the response data immediately follows the
+              HandshakeResponse. The client must use the returned
+              protocol to process the response and should also cache
+              that protocol and its hash for future interactions with
+              this server.</li>
+
+              <li><code>match=NONE</code> if the server has not
+              previously seen the client's protocol.
+              The <code>serverHash</code>
+              and <code>serverProtocol</code> may also be non-null if
+              the server's protocol hash was incorrect.
+
+              <p>In this case the client must then re-submit its request
+              with its protocol text (<code>clientHash!=null,
+              clientProtocol!=null, serverHash!=null</code>) and the
+              server should respond with a successful match
+              (<code>match=BOTH, serverProtocol=null,
+              serverHash=null</code>) as above.</p>
+              </li>
+            </ul>
+          </li>
+        </ul>
+
+        <p>The <code>meta</code> field is reserved for future
+        handshake enhancements.</p>
+
+      </section>
+
+      <section>
+        <title>Call Format</title>
+        <p>A <em>call</em> consists of a request message paired with
+        its resulting response or error message.  Requests and
+        responses contain extensible metadata, and both kinds of
+        messages are framed as described above.</p>
+
+        <p>The format of a call request is:</p>
+        <ul>
+          <li><em>request metadata</em>, a map with values of
+          type <code>bytes</code></li>
+          <li>the <em>message name</em>, an Avro string,
+          followed by</li>
+          <li>the message <em>parameters</em>.  Parameters are
+          serialized according to the message's request
+          declaration.</li>
+        </ul>
+
+        <p>When the empty string is used as a message name a server
+        should ignore the parameters and return an empty response.  A
+        client may use this to ping a server or to perform a handshake
+        without sending a protocol message.</p>
+
+        <p>When a message is declared one-way and a stateful
+        connection has been established by a successful handshake
+        response, no response data is sent.  Otherwise the format of
+        the call response is:</p>
+        <ul>
+          <li><em>response metadata</em>, a map with values of
+          type <code>bytes</code></li>
+          <li>a one-byte <em>error flag</em> boolean, followed by either:
+            <ul>
+              <li>if the error flag is false, the message <em>response</em>,
+                serialized per the message's response schema.</li>
+              <li>if the error flag is true, the <em>error</em>,
+              serialized per the message's effective error union
+              schema.</li>
+            </ul>
+          </li>
+        </ul>
+      </section>
+
+    </section>
+
+    <section>
+      <title>Schema Resolution</title>
+
+      <p>A reader of Avro data, whether from an RPC or a file, can
+        always parse that data because its schema is provided.  But
+        that schema may not be exactly the schema that was expected.
+        For example, if the data was written with a different version
+        of the software than it is read, then records may have had
+        fields added or removed.  This section specifies how such
+        schema differences should be resolved.</p>
+
+      <p>We call the schema used to write the data as
+        the <em>writer's</em> schema, and the schema that the
+        application expects the <em>reader's</em> schema.  Differences
+        between these should be resolved as follows:</p>
+
+      <ul>
+        <li><p>It is an error if the two schemas do not <em>match</em>.</p>
+          <p>To match, one of the following must hold:</p>
+          <ul>
+            <li>both schemas are arrays whose item types match</li>
+            <li>both schemas are maps whose value types match</li>
+            <li>both schemas are enums whose names match</li>
+            <li>both schemas are fixed whose sizes and names match</li>
+            <li>both schemas are records with the same name</li>
+            <li>either schema is a union</li>
+            <li>both schemas have same primitive type</li>
+            <li>the writer's schema may be <em>promoted</em> to the
+              reader's as follows:
+              <ul>
+                <li>int is promotable to long, float, or double</li>
+                <li>long is promotable to float or double</li>
+                <li>float is promotable to double</li>
+                <li>string is promotable to bytes</li>
+                <li>bytes is promotable to string</li>
+                </ul>
+            </li>
+          </ul>
+        </li>
+
+        <li><strong>if both are records:</strong>
+          <ul>
+            <li>the ordering of fields may be different: fields are
+              matched by name.</li>
+            
+            <li>schemas for fields with the same name in both records
+              are resolved recursively.</li>
+            
+            <li>if the writer's record contains a field with a name
+              not present in the reader's record, the writer's value
+              for that field is ignored.</li>
+            
+            <li>if the reader's record schema has a field that
+              contains a default value, and writer's schema does not
+              have a field with the same name, then the reader should
+              use the default value from its field.</li>
+
+            <li>if the reader's record schema has a field with no
+              default value, and writer's schema does not have a field
+              with the same name, an error is signalled.</li>
+          </ul>
+        </li>
+
+        <li><strong>if both are enums:</strong>
+          <p>if the writer's symbol is not present in the reader's
+            enum, then an error is signalled.</p>
+        </li>
+
+        <li><strong>if both are arrays:</strong>
+          <p>This resolution algorithm is applied recursively to the reader's and
+            writer's array item schemas.</p>
+        </li>
+
+        <li><strong>if both are maps:</strong>
+          <p>This resolution algorithm is applied recursively to the reader's and
+            writer's value schemas.</p>
+        </li>
+
+        <li><strong>if both are unions:</strong>
+          <p>The first schema in the reader's union that matches the
+            selected writer's union schema is recursively resolved
+            against it.  if none match, an error is signalled.</p>
+        </li>
+
+        <li><strong>if reader's is a union, but writer's is not</strong>
+          <p>The first schema in the reader's union that matches the
+            writer's schema is recursively resolved against it.  If none
+            match, an error is signalled.</p>
+        </li>
+          
+        <li><strong>if writer's is a union, but reader's is not</strong>
+          <p>If the reader's schema matches the selected writer's schema,
+            it is recursively resolved against it.  If they do not
+            match, an error is signalled.</p>
+        </li>
+          
+      </ul>
+
+      <p>A schema's "doc" fields are ignored for the purposes of schema resolution.  Hence,
+        the "doc" portion of a schema may be dropped at serialization.</p>
+
+    </section>
+
+    <section>
+      <title>Parsing Canonical Form for Schemas</title>
+
+      <p>One of the defining characteristics of Avro is that a reader
+      is assumed to have the "same" schema used by the writer of the
+      data the reader is reading.  This assumption leads to a data
+      format that's compact and also amenable to many forms of schema
+      evolution.  However, the specification so far has not defined
+      what it means for the reader to have the "same" schema as the
+      writer.  Does the schema need to be textually identical?  Well,
+      clearly adding or removing some whitespace to a JSON expression
+      does not change its meaning.  At the same time, reordering the
+      fields of records clearly <em>does</em> change the meaning.  So
+      what does it mean for a reader to have "the same" schema as a
+      writer?</p>
+
+      <p><em>Parsing Canonical Form</em> is a transformation of a
+      writer's schema that let's us define what it means for two
+      schemas to be "the same" for the purpose of reading data written
+      agains the schema.  It is called <em>Parsing</em> Canonical Form
+      because the transformations strip away parts of the schema, like
+      "doc" attributes, that are irrelevant to readers trying to parse
+      incoming data.  It is called <em>Canonical Form</em> because the
+      transformations normalize the JSON text (such as the order of
+      attributes) in a way that eliminates unimportant differences
+      between schemas.  If the Parsing Canonical Forms of two
+      different schemas are textually equal, then those schemas are
+      "the same" as far as any reader is concerned, i.e., there is no
+      serialized data that would allow a reader to distinguish data
+      generated by a writer using one of the original schemas from
+      data generated by a writing using the other original schema.
+      (We sketch a proof of this property in a companion
+      document.)</p>
+
+      <p>The next subsection specifies the transformations that define
+      Parsing Canonical Form.  But with a well-defined canonical form,
+      it can be convenient to go one step further, transforming these
+      canonical forms into simple integers ("fingerprints") that can
+      be used to uniquely identify schemas.  The subsection after next
+      recommends some standard practices for generating such
+      fingerprints.</p>
+
+      <section>
+        <title>Transforming into Parsing Canonical Form</title>
+
+        <p>Assuming an input schema (in JSON form) that's already
+        UTF-8 text for a <em>valid</em> Avro schema (including all
+        quotes as required by JSON), the following transformations
+        will produce its Parsing Canonical Form:</p>
+        <ul>
+          <li> [PRIMITIVES] Convert primitive schemas to their simple
+          form (e.g., <code>int</code> instead of
+          <code>{"type":"int"}</code>).</li>
+
+          <li> [FULLNAMES] Replace short names with fullnames, using
+          applicable namespaces to do so.  Then eliminate
+          <code>namespace</code> attributes, which are now redundant.</li>
+
+          <li> [STRIP] Keep only attributes that are relevant to
+          parsing data, which are: <code>type</code>,
+          <code>name</code>, <code>fields</code>,
+          <code>symbols</code>, <code>items</code>,
+          <code>values</code>, <code>size</code>.  Strip all others
+          (e.g., <code>doc</code> and <code>aliases</code>).</li>
+
+          <li> [ORDER] Order the appearance of fields of JSON objects
+          as follows: <code>name</code>, <code>type</code>,
+          <code>fields</code>, <code>symbols</code>,
+          <code>items</code>, <code>values</code>, <code>size</code>.
+          For example, if an object has <code>type</code>,
+          <code>name</code>, and <code>size</code> fields, then the
+          <code>name</code> field should appear first, followed by the
+          <code>type</code> and then the <code>size</code> fields.</li>
+
+          <li> [STRINGS] For all JSON string literals in the schema
+          text, replace any escaped characters (e.g., \uXXXX escapes)
+          with their UTF-8 equivalents.</li>
+
+          <li> [INTEGERS] Eliminate quotes around and any leading
+          zeros in front of JSON integer literals (which appear in the
+          <code>size</code> attributes of <code>fixed</code> schemas).</li>
+
+          <li> [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.</li>
+        </ul>
+      </section>
+
+      <section>
+        <title>Schema Fingerprints</title>
+
+        <p>"[A] fingerprinting algorithm is a procedure that maps an
+        arbitrarily large data item (such as a computer file) to a
+        much shorter bit string, its <em>fingerprint,</em> that
+        uniquely identifies the original data for all practical
+        purposes" (quoted from [<a
+        href="http://en.wikipedia.org/wiki/Fingerprint_(computing)">Wikipedia</a>]).
+        In the Avro context, fingerprints of Parsing Canonical Form
+        can be useful in a number of applications; for example, to
+        cache encoder and decoder objects, to tag data items with a
+        short substitute for the writer's full schema, and to quickly
+        negotiate common-case schemas between readers and writers.</p>
+
+        <p>In designing fingerprinting algorithms, there is a
+        fundamental trade-off between the length of the fingerprint
+        and the probability of collisions.  To help application
+        designers find appropriate points within this trade-off space,
+        while encouraging interoperability and ease of implementation,
+        we recommend using one of the following three algorithms when
+        fingerprinting Avro schemas:</p>
+
+        <ul>
+          <li> When applications can tolerate longer fingerprints, we
+          recommend using the <a
+          href="http://en.wikipedia.org/wiki/SHA-2">SHA-256 digest
+          algorithm</a> to generate 256-bit fingerprints of Parsing
+          Canonical Forms.  Most languages today have SHA-256
+          implementations in their libraries.</li>
+
+          <li> At the opposite extreme, the smallest fingerprint we
+          recommend is a 64-bit <a
+          href="http://en.wikipedia.org/wiki/Rabin_fingerprint">Rabin
+          fingerprint</a>.  Below, we provide pseudo-code for this
+          algorithm that can be easily translated into any programming
+          language.  64-bit fingerprints should guarantee uniqueness
+          for schema caches of up to a million entries (for such a
+          cache, the chance of a collision is 3E-8).  We don't
+          recommend shorter fingerprints, as the chances of collisions
+          is too great (for example, with 32-bit fingerprints, a cache
+          with as few as 100,000 schemas has a 50% chance of having a
+          collision).</li>
+
+          <li>Between these two extremes, we recommend using the <a
+          href="http://en.wikipedia.org/wiki/MD5">MD5 message
+          digest</a> to generate 128-bit fingerprints.  These make
+          sense only where very large numbers of schemas are being
+          manipulated (tens of millions); otherwise, 64-bit
+          fingerprints should be sufficient.  As with SHA-256, MD5
+          implementations are found in most libraries today.</li>
+        </ul>
+
+        <p> These fingerprints are <em>not</em> meant to provide any
+        security guarantees, even the longer SHA-256-based ones.  Most
+        Avro applications should be surrounded by security measures
+        that prevent attackers from writing random data and otherwise
+        interfering with the consumers of schemas.  We recommend that
+        these surrounding mechanisms be used to prevent collision and
+        pre-image attacks (i.e., "forgery") on schema fingerprints,
+        rather than relying on the security properties of the
+        fingerprints themselves.</p>
+
+        <p>Rabin fingerprints are <a
+        href="http://en.wikipedia.org/wiki/Cyclic_redundancy_check">cyclic
+        redundancy checks</a> computed using irreducible polynomials.
+        In the style of the Appendix of <a
+        href="http://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+        (pg 10), which defines the CRC-32 algorithm, here's our
+        definition of the 64-bit AVRO fingerprinting algorithm:</p>
+
+        <source>
+long fingerprint64(byte[] buf) {
+  if (FP_TABLE == null) initFPTable();
+  long fp = EMPTY;
+  for (int i = 0; i < buf.length; i++)
+    fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff];
+  return fp;
+}
+
+static long EMPTY = 0xc15d213aa4d7a795L;
+static long[] FP_TABLE = null;
+
+void initFPTable() {
+  FP_TABLE = new long[256];
+  for (int i = 0; i < 256; i++) {
+    long fp = i;
+    for (int j = 0; j < 8; j++)
+      fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L));
+    FP_TABLE[i] = fp;
+  }
+}
+        </source>
+
+        <p> Readers interested in the mathematics behind this
+        algorithm may want to read <a
+        href="http://www.scribd.com/fb-6001967/d/84795-Crc">this book
+        chapter.</a> (Unlike RFC-1952 and the book chapter, we prepend
+        a single one bit to messages.  We do this because CRCs ignore
+        leading zero bits, which can be problematic.  Our code
+        prepends a one-bit by initializing fingerprints using
+        <code>EMPTY</code>, rather than initializing using zero as in
+        RFC-1952 and the book chapter.)</p>
+      </section>
+    </section>
+
+    <section>
+      <title>Logical Types</title>
+
+      <p>A logical type is an Avro primitive or complex type with extra attributes to
+        represent a derived type. The attribute <code>logicalType</code> must
+        always be present for a logical type, and is a string with the name of one of
+        the logical types listed later in this section. Other attributes may be defined
+        for particular logical types.</p>
+
+      <p>A logical type is always serialized using its underlying Avro type so
+        that values are encoded in exactly the same way as the equivalent Avro
+        type that does not have a <code>logicalType</code> attribute. Language
+        implementations may choose to represent logical types with an
+        appropriate native type, although this is not required.</p>
+
+      <p>Language implementations must ignore unknown logical types when
+        reading, and should use the underlying Avro type. If a logical type is
+        invalid, for example a decimal with scale greater than its precision,
+        then implementations should ignore the logical type and use the
+        underlying Avro type.</p>
+
+      <section>
+        <title>Decimal</title>
+        <p>The <code>decimal</code> logical type represents an arbitrary-precision signed
+          decimal number of the form <em>unscaled × 10<sup>-scale</sup></em>.</p>
+
+        <p>A <code>decimal</code> logical type annotates Avro
+          <code>bytes</code> or <code>fixed</code> types. The byte array must
+          contain the two's-complement representation of the unscaled integer
+          value in big-endian byte order. The scale is fixed, and is specified
+          using an attribute.</p>
+
+        <p>The following attributes are supported:</p>
+        <ul>
+          <li><code>scale</code>, a JSON integer representing the scale
+            (optional). If not specified the scale is 0.</li>
+          <li><code>precision</code>, a JSON integer representing the (maximum)
+            precision of decimals stored in this type (required).</li>
+        </ul>
+
+        <p>For example, the following schema represents decimal numbers with a
+          maximum precision of 4 and a scale of 2:</p>
+        <source>
+{
+  "type": "bytes",
+  "logicalType": "decimal",
+  "precision": 4,
+  "scale": 2
+}
+</source>
+
+        <p>Precision must be a positive integer greater than zero. If the
+          underlying type is a <code>fixed</code>, then the precision is
+          limited by its size. An array of length <code>n</code> can store at
+          most <em>floor(log_10(2<sup>8 × n - 1</sup> - 1))</em>
+          base-10 digits of precision.</p>
+
+        <p>Scale must be zero or a positive integer less than or equal to the
+          precision.</p>
+
+        <p>For the purposes of schema resolution, two schemas that are
+          <code>decimal</code> logical types <em>match</em> if their scales and
+          precisions match.</p>
+
+      </section>
+    </section>
+
+  <p><em>Apache Avro, Avro, Apache, and the Avro and Apache logos are
+   trademarks of The Apache Software Foundation.</em></p>
+
+  </body>
+</document>
diff --git a/doc/src/content/xdocs/tabs.xml b/doc/src/content/xdocs/tabs.xml
new file mode 100644
index 0000000..39a026e
--- /dev/null
+++ b/doc/src/content/xdocs/tabs.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE tabs PUBLIC "-//APACHE//DTD Cocoon Documentation Tab V1.1//EN" "http://forrest.apache.org/dtd/tab-cocoon-v11.dtd" [
+  <!ENTITY % avro-entities PUBLIC "-//Apache//ENTITIES Avro//EN"
+	   "../../../../build/avro.ent">
+  %avro-entities;
+]>
+
+<tabs software="Avro"
+      title="Avro"
+      copyright="The Apache Software Foundation"
+      xmlns:xlink="http://www.w3.org/1999/xlink">
+
+  <!-- The rules are:
+    @dir will always have /index.html added.
+    @href is not modified unless it is root-relative and obviously specifies a
+    directory (ends in '/'), in which case /index.html will be added
+  -->
+
+  <tab label="Project" href="http://hadoop.apache.org/avro/" />
+  <tab label="Wiki" href="http://wiki.apache.org/hadoop/Avro/" />
+  <tab label="Avro &AvroVersion; Documentation" dir="" />  
+  
+</tabs>
diff --git a/doc/src/resources/images/apache_feather.gif b/doc/src/resources/images/apache_feather.gif
new file mode 100644
index 0000000..1a0c3e6
Binary files /dev/null and b/doc/src/resources/images/apache_feather.gif differ
diff --git a/doc/src/resources/images/avro-logo.png b/doc/src/resources/images/avro-logo.png
new file mode 100644
index 0000000..4cbe12d
Binary files /dev/null and b/doc/src/resources/images/avro-logo.png differ
diff --git a/doc/src/resources/images/favicon.ico b/doc/src/resources/images/favicon.ico
new file mode 100644
index 0000000..161bcf7
Binary files /dev/null and b/doc/src/resources/images/favicon.ico differ
diff --git a/doc/src/skinconf.xml b/doc/src/skinconf.xml
new file mode 100644
index 0000000..1b64561
--- /dev/null
+++ b/doc/src/skinconf.xml
@@ -0,0 +1,350 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+Skin configuration file. This file contains details of your project,
+which will be used to configure the chosen Forrest skin.
+-->
+
+<!DOCTYPE skinconfig PUBLIC "-//APACHE//DTD Skin Configuration V0.6-3//EN" "http://forrest.apache.org/dtd/skinconfig-v06-3.dtd">
+<skinconfig>
+  <!-- To enable lucene search add provider="lucene" (default is google).
+    Add box-location="alt" to move the search box to an alternate location
+    (if the skin supports it) and box-location="all" to show it in all
+    available locations on the page.  Remove the <search> element to show
+    no search box. @domain will enable sitesearch for the specific domain with google.
+    In other words google will search the @domain for the query string.
+
+  -->
+  <search name="Avro" domain="hadoop.apache.org" provider="google"/>
+
+  <!-- Disable the print link? If enabled, invalid HTML 4.0.1 -->
+  <disable-print-link>true</disable-print-link>  
+  <!-- Disable the PDF link? -->
+  <disable-pdf-link>false</disable-pdf-link>
+  <!-- Disable the POD link? -->
+  <disable-pod-link>true</disable-pod-link>
+  <!-- Disable the Text link? FIXME: NOT YET IMPLEMENETED. -->
+  <disable-txt-link>true</disable-txt-link>
+  <!-- Disable the xml source link? -->
+  <!-- The xml source link makes it possible to access the xml rendition
+    of the source frim the html page, and to have it generated statically.
+    This can be used to enable other sites and services to reuse the
+    xml format for their uses. Keep this disabled if you don't want other
+    sites to easily reuse your pages.-->
+  <disable-xml-link>true</disable-xml-link>
+
+  <!-- Disable navigation icons on all external links? -->
+  <disable-external-link-image>true</disable-external-link-image>
+
+  <!-- Disable w3c compliance links? 
+    Use e.g. align="center" to move the compliance links logos to 
+    an alternate location default is left.
+    (if the skin supports it) -->
+  <disable-compliance-links>true</disable-compliance-links>
+
+  <!-- Render mailto: links unrecognisable by spam harvesters? -->
+  <obfuscate-mail-links>false</obfuscate-mail-links>
+
+  <!-- Disable the javascript facility to change the font size -->
+  <disable-font-script>true</disable-font-script>
+
+  <!-- project logo -->
+  <project-name>Avro</project-name>
+  <project-description>Serialization System</project-description>
+  <project-url>http://avro.apache.org/</project-url>
+  <project-logo>images/avro-logo.png</project-logo>
+
+  <!-- group logo -->
+  <group-name>Apache</group-name>
+  <group-description>The Apache Software Foundation</group-description>
+  <group-url>http://www.apache.org/</group-url>
+  <group-logo>images/apache_feather.gif</group-logo>
+
+  <!-- optional host logo (e.g. sourceforge logo)
+       default skin: renders it at the bottom-left corner -->
+  <host-url></host-url>
+  <host-logo></host-logo>
+
+  <!-- relative url of a favicon file, normally favicon.ico -->
+  <favicon-url>images/favicon.ico</favicon-url>
+
+  <!-- The following are used to construct a copyright statement -->
+  <year>2012</year>
+  <vendor>The Apache Software Foundation.</vendor>
+  <copyright-link>http://www.apache.org/licenses/</copyright-link>
+
+  <!-- Some skins use this to form a 'breadcrumb trail' of links.
+    Use location="alt" to move the trail to an alternate location
+    (if the skin supports it).
+	  Omit the location attribute to display the trail in the default location.
+	  Use location="none" to not display the trail (if the skin supports it).
+    For some skins just set the attributes to blank.
+  -->
+  <trail>
+    <link1 name="Apache" href="http://www.apache.org/"/>
+    <link2 name="Avro" href="http://avro.apache.org/"/>
+    <link3 name="Avro" href="http://avro.apache.org/"/>
+  </trail>
+
+  <!-- Configure the TOC, i.e. the Table of Contents.
+  @max-depth
+   how many "section" levels need to be included in the
+   generated Table of Contents (TOC). 
+  @min-sections
+   Minimum required to create a TOC.
+  @location ("page","menu","page,menu", "none")
+   Where to show the TOC.
+  -->
+  <toc max-depth="2" min-sections="1" location="page"/>
+
+  <!-- Heading types can be clean|underlined|boxed  -->
+  <headings type="clean"/>
+  
+  <!-- The optional feedback element will be used to construct a
+    feedback link in the footer with the page pathname appended:
+    <a href="@href">{@to}</a>
+  <feedback to="webmaster at foo.com"
+    href="mailto:webmaster at foo.com?subject=Feedback " >
+    Send feedback about the website to:
+  </feedback>
+    -->
+  <!--
+    extra-css - here you can define custom css-elements that are 
+    a. overriding the fallback elements or 
+    b. adding the css definition from new elements that you may have 
+       used in your documentation.
+    -->
+  <extra-css>
+    <!--Example of b. 
+        To define the css definition of a new element that you may have used
+        in the class attribute of a <p> node. 
+        e.g. <p class="quote"/>
+    -->
+    p.quote {
+      margin-left: 2em;
+      padding: .5em;
+      background-color: #f0f0f0;
+      font-family: monospace;
+    }
+    table.right {
+      text-align: right;
+      display: block;
+    }
+  </extra-css>
+
+  <colors>
+  <!-- These values are used for the generated CSS files. -->
+
+  <!-- Krysalis -->
+<!--
+    <color name="header"    value="#FFFFFF"/>
+
+    <color name="tab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="tab-unselected" value="#F7F7F7"  link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-selected" value="#a5b6c6"  link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#a5b6c6"  link="#000000" vlink="#000000" hlink="#000000"/>
+
+    <color name="heading" value="#a5b6c6"/>
+    <color name="subheading" value="#CFDCED"/>
+        
+    <color name="navstrip" value="#CFDCED" font="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="toolbox" value="#a5b6c6"/>
+    <color name="border" value="#a5b6c6"/>
+        
+    <color name="menu" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>    
+    <color name="dialog" value="#F7F7F7"/>
+            
+    <color name="body"    value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
+    
+    <color name="table" value="#a5b6c6"/>    
+    <color name="table-cell" value="#ffffff"/>    
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#a5b6c6"/>
+        
+    <color name="footer" value="#a5b6c6"/>
+-->
+  
+  <!-- Forrest -->
+<!--
+    <color name="header"    value="#294563"/>
+
+    <color name="tab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="tab-unselected" value="#b5c7e7" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="subtab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="subtab-unselected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+
+    <color name="heading" value="#294563"/>
+    <color name="subheading" value="#4a6d8c"/>
+        
+    <color name="navstrip" value="#cedfef" font="#0F3660" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="toolbox" value="#4a6d8c"/>
+    <color name="border" value="#294563"/>
+    
+    <color name="menu" value="#4a6d8c" font="#cedfef" link="#ffffff" vlink="#ffffff" hlink="#ffcf00"/>    
+    <color name="dialog" value="#4a6d8c"/>
+            
+    <color name="body" value="#ffffff"  link="#0F3660" vlink="#009999" hlink="#000066"/>
+    
+    <color name="table" value="#7099C5"/>    
+    <color name="table-cell" value="#f0f0ff"/>    
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#CFDCED"/>
+        
+    <color name="footer" value="#cedfef"/>
+-->
+
+  <!-- Collabnet --> 
+<!--
+    <color name="header"    value="#003366"/>
+
+    <color name="tab-selected" value="#dddddd" link="#555555" vlink="#555555" hlink="#555555"/>
+    <color name="tab-unselected" value="#999999" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
+    <color name="subtab-selected" value="#cccccc" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#cccccc" link="#555555" vlink="#555555" hlink="#555555"/>
+
+    <color name="heading" value="#003366"/>
+    <color name="subheading" value="#888888"/>
+    
+    <color name="navstrip" value="#dddddd" font="#555555"/>
+    <color name="toolbox" value="#dddddd" font="#555555"/>
+    <color name="border" value="#999999"/>
+    
+    <color name="menu" value="#ffffff"/>    
+    <color name="dialog" value="#eeeeee"/>
+            
+    <color name="body"      value="#ffffff"/>
+    
+    <color name="table" value="#ccc"/>    
+    <color name="table-cell" value="#ffffff"/>   
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#003366"/>
+        
+    <color name="footer" value="#ffffff"/>
+-->
+ <!-- Lenya using pelt-->
+<!--
+    <color name="header" value="#ffffff"/>
+
+    <color name="tab-selected" value="#4C6C8F" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
+    <color name="tab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-selected" value="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
+
+    <color name="heading" value="#E5E4D9"/>
+    <color name="subheading" value="#000000"/>
+    <color name="published" value="#4C6C8F" font="#FFFFFF"/>
+    <color name="feedback" value="#4C6C8F" font="#FFFFFF" align="center"/>
+    <color name="navstrip" value="#E5E4D9" font="#000000"/>
+
+    <color name="toolbox" value="#CFDCED" font="#000000"/>
+
+    <color name="border" value="#999999"/>
+    <color name="menu" value="#4C6C8F" font="#ffffff" link="#ffffff" vlink="#ffffff" hlink="#ffffff" current="#FFCC33" />    
+    <color name="menuheading" value="#cfdced" font="#000000" />
+    <color name="searchbox" value="#E5E4D9" font="#000000"/>
+    
+    <color name="dialog" value="#CFDCED"/>
+    <color name="body" value="#ffffff" />            
+    
+    <color name="table" value="#ccc"/>    
+    <color name="table-cell" value="#ffffff"/>   
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#003366"/>
+        
+    <color name="footer" value="#E5E4D9"/>
+-->
+  </colors>
+ 
+  <!-- Settings specific to PDF output. -->
+  <pdf>
+    <!-- 
+       Supported page sizes are a0, a1, a2, a3, a4, a5, executive,
+       folio, legal, ledger, letter, quarto, tabloid (default letter).
+       Supported page orientations are portrait, landscape (default
+       portrait).
+       Supported text alignments are left, right, justify (default left).
+    -->
+    <page size="letter" orientation="portrait" text-align="left"/>
+
+    <!--
+       Margins can be specified for top, bottom, inner, and outer
+       edges. If double-sided="false", the inner edge is always left
+       and the outer is always right. If double-sided="true", the
+       inner edge will be left on odd pages, right on even pages,
+       the outer edge vice versa.
+       Specified below are the default settings.
+    -->
+    <margins double-sided="false">
+      <top>1in</top>
+      <bottom>1in</bottom>
+      <inner>1.25in</inner>
+      <outer>1in</outer>
+    </margins>
+
+    <!--
+      Print the URL text next to all links going outside the file
+    -->
+    <show-external-urls>false</show-external-urls>
+
+    <!--
+      Disable the copyright footer on each page of the PDF.
+      A footer is composed for each page. By default, a "credit" with role=pdf
+      will be used, as explained below. Otherwise a copyright statement
+      will be generated. This latter can be disabled.
+    -->
+    <disable-copyright-footer>false</disable-copyright-footer>
+  </pdf>
+
+  <!-- Credits are typically rendered as a set of small clickable
+    images in the page footer.
+    Use box-location="alt" to move the credit to an alternate location
+    (if the skin supports it). 
+  -->
+  <credits>
+    <credit box-location="alt">
+      <name>Built with Apache Forrest</name>
+      <url>http://forrest.apache.org/</url>
+      <image>images/built-with-forrest-button.png</image>
+      <width>88</width>
+      <height>31</height>
+    </credit>
+    <!-- A credit with @role="pdf" will be used to compose a footer
+     for each page in the PDF, using either "name" or "url" or both.
+    -->
+    <!--
+    <credit role="pdf">
+      <name>Built with Apache Forrest</name>
+      <url>http://forrest.apache.org/</url>
+    </credit>
+    -->
+  </credits>
+
+</skinconfig>
diff --git a/lang/py/build.xml b/lang/py/build.xml
new file mode 100644
index 0000000..6d371ea
--- /dev/null
+++ b/lang/py/build.xml
@@ -0,0 +1,181 @@
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="Avro" default="dist">
+ 
+  <!-- Load user's default properties. -->
+  <property file="${user.home}/build.properties"/>
+
+  <!-- Shared directories -->
+  <property name="share.dir" value="${basedir}/../../share"/>
+  <property name="share.schema.dir" value="${share.dir}/schemas/"/>
+  <property name="dist.dir" value="${basedir}/../../dist/py"/>
+  <property name="top.build" value="${basedir}/../../build"/>
+  <property name="interop.data.dir" value="${top.build}/interop/data"/>
+
+  <property name="python" value="python"/>
+
+  <!-- Python implementation directories -->
+  <property name="build.dir" value="${basedir}/build"/>
+  <property name="src.dir" value="${basedir}/src"/>
+  <property name="lib.dir" value="${basedir}/lib"/>
+  <property name="test.dir" value="${basedir}/test"/>
+
+  <!-- Load shared properties -->
+  <loadfile srcFile="${share.dir}/VERSION.txt" property="avro.version" />
+  <loadfile srcFile="${share.schema.dir}/org/apache/avro/ipc/HandshakeRequest.avsc" property="handshake.request.json"/>
+  <loadfile srcFile="${share.schema.dir}/org/apache/avro/ipc/HandshakeResponse.avsc" property="handshake.response.json"/>
+
+  <path id="java.classpath">
+    <fileset dir="lib">
+      <include name="**/*.jar" />
+    </fileset>
+  </path>
+
+  <path id="test.path">
+    <pathelement location="${build.dir}/src"/>
+    <pathelement location="${build.dir}/test"/>
+    <pathelement location="${build.dir}/lib"/>
+  </path>
+
+  <target name="init" description="Create the build directory.">
+    <mkdir dir="${build.dir}"/>
+  </target>
+
+  <target name="build"
+          description="Copy project files to build/ and do string replacement."
+          depends="init">
+    <!-- Copy src/, test/, lib/ -->
+    <copy todir="${build.dir}/src">
+      <fileset dir="${src.dir}">
+        <exclude name="**/*.pyc"/>
+        <exclude name="**/*.py~"/>
+      </fileset> 
+    </copy>
+    <copy todir="${build.dir}/test">
+      <fileset dir="${test.dir}">
+        <exclude name="**/*.pyc"/>
+        <exclude name="**/*.py~"/>
+      </fileset> 
+    </copy>
+    <copy todir="${build.dir}/lib">
+      <fileset dir="${lib.dir}" />
+    </copy>
+
+    <!-- Inline the handshake schemas -->
+    <copy file="${src.dir}/avro/ipc.py"
+          toFile="${build.dir}/src/avro/ipc.py"
+          overwrite="true">
+      <filterset>
+        <filter token="HANDSHAKE_REQUEST_SCHEMA" 
+          value="${handshake.request.json}"/>
+        <filter token="HANDSHAKE_RESPONSE_SCHEMA" 
+          value="${handshake.response.json}"/>
+     </filterset>
+    </copy>
+
+    <!-- Inline the Avro version -->
+    <copy file="${basedir}/setup.py"
+          toFile="${build.dir}/setup.py"
+          overwrite="true">
+      <filterset>
+        <filter token="AVRO_VERSION" value="${avro.version}"/>
+      </filterset>
+    </copy>
+
+     <!-- Inline the Avro version -->
+     <copy file="${basedir}/scripts/avro"
+           toFile="${build.dir}/scripts/avro"
+           overwrite="true">
+       <filterset>
+         <filter token="AVRO_VERSION" value="${avro.version}"/>
+       </filterset>
+     </copy>
+     <!-- Make executable (Ant does not preseve executable bit) -->
+     <exec executable="chmod">
+         <arg value="a+x" />
+         <arg value="${build.dir}/scripts/avro" />
+     </exec>
+
+    <!-- Inline the interop data directory -->
+    <copy file="${test.dir}/test_datafile_interop.py"
+          toFile="${build.dir}/test/test_datafile_interop.py"
+          overwrite="true">
+      <filterset>
+        <filter token="INTEROP_DATA_DIR" value="${interop.data.dir}"/>
+      </filterset>
+    </copy>
+  </target>
+
+  <target name="test"
+          description="Run python unit tests"
+          depends="build">
+    <taskdef name="py-test" classname="org.pyant.tasks.PythonTestTask"
+	     classpathref="java.classpath"/>
+    <py-test python="${python}" pythonpathref="test.path" >
+      <fileset dir="${build.dir}/test">
+        <include name="test_*.py"/>
+        <exclude name="test_datafile_interop.py"/>
+      </fileset>
+    </py-test>
+  </target>
+
+  <target name="interop-data-test"
+          description="Run python interop data tests"
+          depends="build">
+    <taskdef name="py-test" classname="org.pyant.tasks.PythonTestTask"
+	     classpathref="java.classpath"/>
+    <py-test python="${python}" pythonpathref="test.path" >
+      <fileset dir="${build.dir}/test">
+        <include name="test_datafile_interop.py"/>
+      </fileset>
+    </py-test>
+  </target>
+
+  <target name="interop-data-generate"
+          description="Generate Python interop data files."
+          depends="build">
+    <mkdir dir="${interop.data.dir}"/>
+    <exec executable="${python}">
+      <env key="PYTHONPATH" value="$PYTHONPATH:${build.dir}/src"/>
+      <arg value="${build.dir}/test/gen_interop_data.py"/>
+      <arg value="${share.dir}/test/schemas/interop.avsc"/>
+      <arg value="${interop.data.dir}/py.avro"/>
+    </exec>
+  </target>
+
+  <target name="dist"
+          description="Build source distribution"
+          depends="build">
+    <mkdir dir="${dist.dir}"/>
+    <exec executable="${python}" failonerror="true" dir="${build.dir}">
+      <arg value="${build.dir}/setup.py"/>
+      <arg value="sdist"/>
+      <arg value="--dist-dir=${dist.dir}"/>
+    </exec>
+  </target>
+
+  <target name="clean"
+          description="Delete build files and their directories">
+    <delete includeemptydirs="true" failonerror="false">
+      <fileset file="MANIFEST"/>
+      <fileset dir="${build.dir}"/>
+    </delete>
+  </target>
+
+</project>
diff --git a/lang/py/lib/pyAntTasks-1.3-LICENSE.txt b/lang/py/lib/pyAntTasks-1.3-LICENSE.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lang/py/lib/pyAntTasks-1.3-LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/lang/py/lib/pyAntTasks-1.3.jar b/lang/py/lib/pyAntTasks-1.3.jar
new file mode 100644
index 0000000..53a7877
Binary files /dev/null and b/lang/py/lib/pyAntTasks-1.3.jar differ
diff --git a/lang/py/lib/simplejson/LICENSE.txt b/lang/py/lib/simplejson/LICENSE.txt
new file mode 100644
index 0000000..ad95f29
--- /dev/null
+++ b/lang/py/lib/simplejson/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2006 Bob Ippolito
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/lang/py/lib/simplejson/__init__.py b/lang/py/lib/simplejson/__init__.py
new file mode 100644
index 0000000..d5b4d39
--- /dev/null
+++ b/lang/py/lib/simplejson/__init__.py
@@ -0,0 +1,318 @@
+r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
+JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
+interchange format.
+
+:mod:`simplejson` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.
+
+Encoding basic Python object hierarchies::
+
+    >>> import simplejson as json
+    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+    '["foo", {"bar": ["baz", null, 1.0, 2]}]'
+    >>> print json.dumps("\"foo\bar")
+    "\"foo\bar"
+    >>> print json.dumps(u'\u1234')
+    "\u1234"
+    >>> print json.dumps('\\')
+    "\\"
+    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+    {"a": 0, "b": 0, "c": 0}
+    >>> from StringIO import StringIO
+    >>> io = StringIO()
+    >>> json.dump(['streaming API'], io)
+    >>> io.getvalue()
+    '["streaming API"]'
+
+Compact encoding::
+
+    >>> import simplejson as json
+    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+    '[1,2,3,{"4":5,"6":7}]'
+
+Pretty printing::
+
+    >>> import simplejson as json
+    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
+    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
+    {
+        "4": 5,
+        "6": 7
+    }
+
+Decoding JSON::
+
+    >>> import simplejson as json
+    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
+    True
+    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
+    True
+    >>> from StringIO import StringIO
+    >>> io = StringIO('["streaming API"]')
+    >>> json.load(io)[0] == 'streaming API'
+    True
+
+Specializing JSON object decoding::
+
+    >>> import simplejson as json
+    >>> def as_complex(dct):
+    ...     if '__complex__' in dct:
+    ...         return complex(dct['real'], dct['imag'])
+    ...     return dct
+    ...
+    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
+    ...     object_hook=as_complex)
+    (1+2j)
+    >>> import decimal
+    >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1')
+    True
+
+Specializing JSON object encoding::
+
+    >>> import simplejson as json
+    >>> def encode_complex(obj):
+    ...     if isinstance(obj, complex):
+    ...         return [obj.real, obj.imag]
+    ...     raise TypeError(repr(o) + " is not JSON serializable")
+    ...
+    >>> json.dumps(2 + 1j, default=encode_complex)
+    '[2.0, 1.0]'
+    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
+    '[2.0, 1.0]'
+    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
+    '[2.0, 1.0]'
+
+
+Using simplejson.tool from the shell to validate and pretty-print::
+
+    $ echo '{"json":"obj"}' | python -m simplejson.tool
+    {
+        "json": "obj"
+    }
+    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+    Expecting property name: line 1 column 2 (char 2)
+"""
+__version__ = '2.0.9'
+__all__ = [
+    'dump', 'dumps', 'load', 'loads',
+    'JSONDecoder', 'JSONEncoder',
+]
+
+__author__ = 'Bob Ippolito <bob at redivi.com>'
+
+from decoder import JSONDecoder
+from encoder import JSONEncoder
+
+_default_encoder = JSONEncoder(
+    skipkeys=False,
+    ensure_ascii=True,
+    check_circular=True,
+    allow_nan=True,
+    indent=None,
+    separators=None,
+    encoding='utf-8',
+    default=None,
+)
+
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', default=None, **kw):
+    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+    ``.write()``-supporting file-like object).
+
+    If ``skipkeys`` is true then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is false, then the some chunks written to ``fp``
+    may be ``unicode`` instances, subject to normal Python ``str`` to
+    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+    to cause an error.
+
+    If ``check_circular`` is false, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If ``allow_nan`` is false, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+    in strict compliance of the JSON specification, instead of using the
+    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+    If ``indent`` is a non-negative integer, then JSON array elements and object
+    members will be pretty-printed with that indent level. An indent level
+    of 0 will only insert newlines. ``None`` is the most compact representation.
+
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+    ``default(obj)`` is a function that should return a serializable version
+    of obj or raise TypeError. The default simply raises TypeError.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg.
+
+    """
+    # cached encoder
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and default is None and not kw):
+        iterable = _default_encoder.iterencode(obj)
+    else:
+        if cls is None:
+            cls = JSONEncoder
+        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+            separators=separators, encoding=encoding,
+            default=default, **kw).iterencode(obj)
+    # could accelerate with writelines in some versions of Python, at
+    # a debuggability cost
+    for chunk in iterable:
+        fp.write(chunk)
+
+
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', default=None, **kw):
+    """Serialize ``obj`` to a JSON formatted ``str``.
+
+    If ``skipkeys`` is false then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is false, then the return value will be a
+    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+    coercion rules instead of being escaped to an ASCII ``str``.
+
+    If ``check_circular`` is false, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If ``allow_nan`` is false, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+    strict compliance of the JSON specification, instead of using the
+    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+    If ``indent`` is a non-negative integer, then JSON array elements and
+    object members will be pretty-printed with that indent level. An indent
+    level of 0 will only insert newlines. ``None`` is the most compact
+    representation.
+
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+    ``default(obj)`` is a function that should return a serializable version
+    of obj or raise TypeError. The default simply raises TypeError.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg.
+
+    """
+    # cached encoder
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and default is None and not kw):
+        return _default_encoder.encode(obj)
+    if cls is None:
+        cls = JSONEncoder
+    return cls(
+        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+        separators=separators, encoding=encoding, default=default,
+        **kw).encode(obj)
+
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None)
+
+
+def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
+        parse_int=None, parse_constant=None, **kw):
+    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+    a JSON document) to a Python object.
+
+    If the contents of ``fp`` is encoded with an ASCII based encoding other
+    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
+    be specified. Encodings that are not ASCII based (such as UCS-2) are
+    not allowed, and should be wrapped with
+    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
+    object and passed to ``loads()``
+
+    ``object_hook`` is an optional function that will be called with the
+    result of any object literal decode (a ``dict``). The return value of
+    ``object_hook`` will be used instead of the ``dict``. This feature
+    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg.
+
+    """
+    return loads(fp.read(),
+        encoding=encoding, cls=cls, object_hook=object_hook,
+        parse_float=parse_float, parse_int=parse_int,
+        parse_constant=parse_constant, **kw)
+
+
+def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
+        parse_int=None, parse_constant=None, **kw):
+    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+    document) to a Python object.
+
+    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
+    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
+    must be specified. Encodings that are not ASCII based (such as UCS-2)
+    are not allowed and should be decoded to ``unicode`` first.
+
+    ``object_hook`` is an optional function that will be called with the
+    result of any object literal decode (a ``dict``). The return value of
+    ``object_hook`` will be used instead of the ``dict``. This feature
+    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+    ``parse_float``, if specified, will be called with the string
+    of every JSON float to be decoded. By default this is equivalent to
+    float(num_str). This can be used to use another datatype or parser
+    for JSON floats (e.g. decimal.Decimal).
+
+    ``parse_int``, if specified, will be called with the string
+    of every JSON int to be decoded. By default this is equivalent to
+    int(num_str). This can be used to use another datatype or parser
+    for JSON integers (e.g. float).
+
+    ``parse_constant``, if specified, will be called with one of the
+    following strings: -Infinity, Infinity, NaN, null, true, false.
+    This can be used to raise an exception if invalid JSON numbers
+    are encountered.
+
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg.
+
+    """
+    if (cls is None and encoding is None and object_hook is None and
+            parse_int is None and parse_float is None and
+            parse_constant is None and not kw):
+        return _default_decoder.decode(s)
+    if cls is None:
+        cls = JSONDecoder
+    if object_hook is not None:
+        kw['object_hook'] = object_hook
+    if parse_float is not None:
+        kw['parse_float'] = parse_float
+    if parse_int is not None:
+        kw['parse_int'] = parse_int
+    if parse_constant is not None:
+        kw['parse_constant'] = parse_constant
+    return cls(encoding=encoding, **kw).decode(s)
diff --git a/lang/py/lib/simplejson/_speedups.c b/lang/py/lib/simplejson/_speedups.c
new file mode 100644
index 0000000..23b5f4a
--- /dev/null
+++ b/lang/py/lib/simplejson/_speedups.c
@@ -0,0 +1,2329 @@
+#include "Python.h"
+#include "structmember.h"
+#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
+#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
+#endif
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#define PyInt_FromSsize_t PyInt_FromLong
+#define PyInt_AsSsize_t PyInt_AsLong
+#endif
+#ifndef Py_IS_FINITE
+#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
+#endif
+
+#ifdef __GNUC__
+#define UNUSED __attribute__((__unused__))
+#else
+#define UNUSED
+#endif
+
+#define DEFAULT_ENCODING "utf-8"
+
+#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
+#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
+#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
+#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
+
+static PyTypeObject PyScannerType;
+static PyTypeObject PyEncoderType;
+
+typedef struct _PyScannerObject {
+    PyObject_HEAD
+    PyObject *encoding;
+    PyObject *strict;
+    PyObject *object_hook;
+    PyObject *parse_float;
+    PyObject *parse_int;
+    PyObject *parse_constant;
+} PyScannerObject;
+
+static PyMemberDef scanner_members[] = {
+    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
+    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
+    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
+    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
+    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
+    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
+    {NULL}
+};
+
+typedef struct _PyEncoderObject {
+    PyObject_HEAD
+    PyObject *markers;
+    PyObject *defaultfn;
+    PyObject *encoder;
+    PyObject *indent;
+    PyObject *key_separator;
+    PyObject *item_separator;
+    PyObject *sort_keys;
+    PyObject *skipkeys;
+    int fast_encode;
+    int allow_nan;
+} PyEncoderObject;
+
+static PyMemberDef encoder_members[] = {
+    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
+    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
+    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
+    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
+    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
+    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
+    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
+    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
+    {NULL}
+};
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
+void init_speedups(void);
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+scanner_dealloc(PyObject *self);
+static int
+scanner_clear(PyObject *self);
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+encoder_dealloc(PyObject *self);
+static int
+encoder_clear(PyObject *self);
+static int
+encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
+static int
+encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
+static int
+encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
+static PyObject *
+_encoded_const(PyObject *const);
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj);
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj);
+
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
+#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
+
+#define MIN_EXPANSION 6
+#ifdef Py_UNICODE_WIDE
+#define MAX_EXPANSION (2 * MIN_EXPANSION)
+#else
+#define MAX_EXPANSION MIN_EXPANSION
+#endif
+
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
+{
+    /* PyObject to Py_ssize_t converter */
+    *size_ptr = PyInt_AsSsize_t(o);
+    if (*size_ptr == -1 && PyErr_Occurred());
+        return 1;
+    return 0;
+}
+
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
+{
+    /* Py_ssize_t to PyObject converter */
+    return PyInt_FromSsize_t(*size_ptr);
+}
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
+{
+    /* Escape unicode code point c to ASCII escape sequences
+    in char *output. output must have at least 12 bytes unused to
+    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
+    output[chars++] = '\\';
+    switch (c) {
+        case '\\': output[chars++] = (char)c; break;
+        case '"': output[chars++] = (char)c; break;
+        case '\b': output[chars++] = 'b'; break;
+        case '\f': output[chars++] = 'f'; break;
+        case '\n': output[chars++] = 'n'; break;
+        case '\r': output[chars++] = 'r'; break;
+        case '\t': output[chars++] = 't'; break;
+        default:
+#ifdef Py_UNICODE_WIDE
+            if (c >= 0x10000) {
+                /* UTF-16 surrogate pair */
+                Py_UNICODE v = c - 0x10000;
+                c = 0xd800 | ((v >> 10) & 0x3ff);
+                output[chars++] = 'u';
+                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
+                c = 0xdc00 | (v & 0x3ff);
+                output[chars++] = '\\';
+            }
+#endif
+            output[chars++] = 'u';
+            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
+            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
+            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
+    }
+    return chars;
+}
+
+static PyObject *
+ascii_escape_unicode(PyObject *pystr)
+{
+    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t max_output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    Py_UNICODE *input_unicode;
+
+    input_chars = PyUnicode_GET_SIZE(pystr);
+    input_unicode = PyUnicode_AS_UNICODE(pystr);
+
+    /* One char input can be up to 6 chars output, estimate 4 of these */
+    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    max_output_size = 2 + (input_chars * MAX_EXPANSION);
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = input_unicode[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        }
+        else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        if (output_size - chars < (1 + MAX_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            Py_ssize_t new_output_size = output_size * 2;
+            /* This is an upper bound */
+            if (new_output_size > max_output_size) {
+                new_output_size = max_output_size;
+            }
+            /* Make sure that the output size changed before resizing */
+            if (new_output_size != output_size) {
+                output_size = new_output_size;
+                if (_PyString_Resize(&rval, output_size) == -1) {
+                    return NULL;
+                }
+                output = PyString_AS_STRING(rval);
+            }
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+
+static PyObject *
+ascii_escape_str(PyObject *pystr)
+{
+    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    char *input_str;
+
+    input_chars = PyString_GET_SIZE(pystr);
+    input_str = PyString_AS_STRING(pystr);
+
+    /* Fast path for a string that's already ASCII */
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
+        if (!S_CHAR(c)) {
+            /* If we have to escape something, scan the string for unicode */
+            Py_ssize_t j;
+            for (j = i; j < input_chars; j++) {
+                c = (Py_UNICODE)(unsigned char)input_str[j];
+                if (c > 0x7f) {
+                    /* We hit a non-ASCII character, bail to unicode mode */
+                    PyObject *uni;
+                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+                    if (uni == NULL) {
+                        return NULL;
+                    }
+                    rval = ascii_escape_unicode(uni);
+                    Py_DECREF(uni);
+                    return rval;
+                }
+            }
+            break;
+        }
+    }
+
+    if (i == input_chars) {
+        /* Input is already ASCII */
+        output_size = 2 + input_chars;
+    }
+    else {
+        /* One char input can be up to 6 chars output, estimate 4 of these */
+        output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    }
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    output[0] = '"';
+
+    /* We know that everything up to i is ASCII already */
+    chars = i + 1;
+    memcpy(&output[1], input_str, i);
+
+    for (; i < input_chars; i++) {
+        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        }
+        else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        /* An ASCII char can't possibly expand to a surrogate! */
+        if (output_size - chars < (1 + MIN_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            output_size *= 2;
+            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
+                output_size = 2 + (input_chars * MIN_EXPANSION);
+            }
+            if (_PyString_Resize(&rval, output_size) == -1) {
+                return NULL;
+            }
+            output = PyString_AS_STRING(rval);
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
+{
+    /* Use the Python function simplejson.decoder.errmsg to raise a nice
+    looking ValueError exception */
+    static PyObject *errmsg_fn = NULL;
+    PyObject *pymsg;
+    if (errmsg_fn == NULL) {
+        PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
+        if (decoder == NULL)
+            return;
+        errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
+        Py_DECREF(decoder);
+        if (errmsg_fn == NULL)
+            return;
+    }
+    pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
+    if (pymsg) {
+        PyErr_SetObject(PyExc_ValueError, pymsg);
+        Py_DECREF(pymsg);
+    }
+}
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+    /* return u''.join(lst) */
+    static PyObject *joinfn = NULL;
+    if (joinfn == NULL) {
+        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
+        if (ustr == NULL)
+            return NULL;
+
+        joinfn = PyObject_GetAttrString(ustr, "join");
+        Py_DECREF(ustr);
+        if (joinfn == NULL)
+            return NULL;
+    }
+    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+
+static PyObject *
+join_list_string(PyObject *lst)
+{
+    /* return ''.join(lst) */
+    static PyObject *joinfn = NULL;
+    if (joinfn == NULL) {
+        PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
+        if (ustr == NULL)
+            return NULL;
+
+        joinfn = PyObject_GetAttrString(ustr, "join");
+        Py_DECREF(ustr);
+        if (joinfn == NULL)
+            return NULL;
+    }
+    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
+    /* return (rval, idx) tuple, stealing reference to rval */
+    PyObject *tpl;
+    PyObject *pyidx;
+    /*
+    steal a reference to rval, returns (rval, idx)
+    */
+    if (rval == NULL) {
+        return NULL;
+    }
+    pyidx = PyInt_FromSsize_t(idx);
+    if (pyidx == NULL) {
+        Py_DECREF(rval);
+        return NULL;
+    }
+    tpl = PyTuple_New(2);
+    if (tpl == NULL) {
+        Py_DECREF(pyidx);
+        Py_DECREF(rval);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(tpl, 0, rval);
+    PyTuple_SET_ITEM(tpl, 1, pyidx);
+    return tpl;
+}
+
+static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
+{
+    /* Read the JSON string from PyString pystr.
+    end is the index of the first character after the quote.
+    encoding is the encoding of pystr (must be an ASCII superset)
+    if strict is zero then literal control characters are allowed
+    *next_end_ptr is a return-by-reference index of the character
+        after the end quote
+
+    Return value is a new PyString (if ASCII-only) or PyUnicode
+    */
+    PyObject *rval;
+    Py_ssize_t len = PyString_GET_SIZE(pystr);
+    Py_ssize_t begin = end - 1;
+    Py_ssize_t next = begin;
+    int has_unicode = 0;
+    char *buf = PyString_AS_STRING(pystr);
+    PyObject *chunks = PyList_New(0);
+    if (chunks == NULL) {
+        goto bail;
+    }
+    if (end < 0 || len <= end) {
+        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+        goto bail;
+    }
+    while (1) {
+        /* Find the end of the string or the next escape */
+        Py_UNICODE c = 0;
+        PyObject *chunk = NULL;
+        for (next = end; next < len; next++) {
+            c = (unsigned char)buf[next];
+            if (c == '"' || c == '\\') {
+                break;
+            }
+            else if (strict && c <= 0x1f) {
+                raise_errmsg("Invalid control character at", pystr, next);
+                goto bail;
+            }
+            else if (c > 0x7f) {
+                has_unicode = 1;
+            }
+        }
+        if (!(c == '"' || c == '\\')) {
+            raise_errmsg("Unterminated string starting at", pystr, begin);
+            goto bail;
+        }
+        /* Pick up this chunk if it's not zero length */
+        if (next != end) {
+            PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
+            if (strchunk == NULL) {
+                goto bail;
+            }
+            if (has_unicode) {
+                chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
+                Py_DECREF(strchunk);
+                if (chunk == NULL) {
+                    goto bail;
+                }
+            }
+            else {
+                chunk = strchunk;
+            }
+            if (PyList_Append(chunks, chunk)) {
+                Py_DECREF(chunk);
+                goto bail;
+            }
+            Py_DECREF(chunk);
+        }
+        next++;
+        if (c == '"') {
+            end = next;
+            break;
+        }
+        if (next == len) {
+            raise_errmsg("Unterminated string starting at", pystr, begin);
+            goto bail;
+        }
+        c = buf[next];
+        if (c != 'u') {
+            /* Non-unicode backslash escapes */
+            end = next + 1;
+            switch (c) {
+                case '"': break;
+                case '\\': break;
+                case '/': break;
+                case 'b': c = '\b'; break;
+                case 'f': c = '\f'; break;
+                case 'n': c = '\n'; break;
+                case 'r': c = '\r'; break;
+                case 't': c = '\t'; break;
+                default: c = 0;
+            }
+            if (c == 0) {
+                raise_errmsg("Invalid \\escape", pystr, end - 2);
+                goto bail;
+            }
+        }
+        else {
+            c = 0;
+            next++;
+            end = next + 4;
+            if (end >= len) {
+                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+                goto bail;
+            }
+            /* Decode 4 hex digits */
+            for (; next < end; next++) {
+                Py_UNICODE digit = buf[next];
+                c <<= 4;
+                switch (digit) {
+                    case '0': case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7': case '8': case '9':
+                        c |= (digit - '0'); break;
+                    case 'a': case 'b': case 'c': case 'd': case 'e':
+                    case 'f':
+                        c |= (digit - 'a' + 10); break;
+                    case 'A': case 'B': case 'C': case 'D': case 'E':
+                    case 'F':
+                        c |= (digit - 'A' + 10); break;
+                    default:
+                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+                        goto bail;
+                }
+            }
+#ifdef Py_UNICODE_WIDE
+            /* Surrogate pair */
+            if ((c & 0xfc00) == 0xd800) {
+                Py_UNICODE c2 = 0;
+                if (end + 6 >= len) {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                if (buf[next++] != '\\' || buf[next++] != 'u') {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                end += 6;
+                /* Decode 4 hex digits */
+                for (; next < end; next++) {
+                    c2 <<= 4;
+                    Py_UNICODE digit = buf[next];
+                    switch (digit) {
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                            c2 |= (digit - '0'); break;
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                            c2 |= (digit - 'a' + 10); break;
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                            c2 |= (digit - 'A' + 10); break;
+                        default:
+                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+                            goto bail;
+                    }
+                }
+                if ((c2 & 0xfc00) != 0xdc00) {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+            }
+            else if ((c & 0xfc00) == 0xdc00) {
+                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+                goto bail;
+            }
+#endif
+        }
+        if (c > 0x7f) {
+            has_unicode = 1;
+        }
+        if (has_unicode) {
+            chunk = PyUnicode_FromUnicode(&c, 1);
+            if (chunk == NULL) {
+                goto bail;
+            }
+        }
+        else {
+            char c_char = Py_CHARMASK(c);
+            chunk = PyString_FromStringAndSize(&c_char, 1);
+            if (chunk == NULL) {
+                goto bail;
+            }
+        }
+        if (PyList_Append(chunks, chunk)) {
+            Py_DECREF(chunk);
+            goto bail;
+        }
+        Py_DECREF(chunk);
+    }
+
+    rval = join_list_string(chunks);
+    if (rval == NULL) {
+        goto bail;
+    }
+    Py_CLEAR(chunks);
+    *next_end_ptr = end;
+    return rval;
+bail:
+    *next_end_ptr = -1;
+    Py_XDECREF(chunks);
+    return NULL;
+}
+
+
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
+{
+    /* Read the JSON string from PyUnicode pystr.
+    end is the index of the first character after the quote.
+    if strict is zero then literal control characters are allowed
+    *next_end_ptr is a return-by-reference index of the character
+        after the end quote
+
+    Return value is a new PyUnicode
+    */
+    PyObject *rval;
+    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
+    Py_ssize_t begin = end - 1;
+    Py_ssize_t next = begin;
+    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
+    PyObject *chunks = PyList_New(0);
+    if (chunks == NULL) {
+        goto bail;
+    }
+    if (end < 0 || len <= end) {
+        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+        goto bail;
+    }
+    while (1) {
+        /* Find the end of the string or the next escape */
+        Py_UNICODE c = 0;
+        PyObject *chunk = NULL;
+        for (next = end; next < len; next++) {
+            c = buf[next];
+            if (c == '"' || c == '\\') {
+                break;
+            }
+            else if (strict && c <= 0x1f) {
+                raise_errmsg("Invalid control character at", pystr, next);
+                goto bail;
+            }
+        }
+        if (!(c == '"' || c == '\\')) {
+            raise_errmsg("Unterminated string starting at", pystr, begin);
+            goto bail;
+        }
+        /* Pick up this chunk if it's not zero length */
+        if (next != end) {
+            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
+            if (chunk == NULL) {
+                goto bail;
+            }
+            if (PyList_Append(chunks, chunk)) {
+                Py_DECREF(chunk);
+                goto bail;
+            }
+            Py_DECREF(chunk);
+        }
+        next++;
+        if (c == '"') {
+            end = next;
+            break;
+        }
+        if (next == len) {
+            raise_errmsg("Unterminated string starting at", pystr, begin);
+            goto bail;
+        }
+        c = buf[next];
+        if (c != 'u') {
+            /* Non-unicode backslash escapes */
+            end = next + 1;
+            switch (c) {
+                case '"': break;
+                case '\\': break;
+                case '/': break;
+                case 'b': c = '\b'; break;
+                case 'f': c = '\f'; break;
+                case 'n': c = '\n'; break;
+                case 'r': c = '\r'; break;
+                case 't': c = '\t'; break;
+                default: c = 0;
+            }
+            if (c == 0) {
+                raise_errmsg("Invalid \\escape", pystr, end - 2);
+                goto bail;
+            }
+        }
+        else {
+            c = 0;
+            next++;
+            end = next + 4;
+            if (end >= len) {
+                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+                goto bail;
+            }
+            /* Decode 4 hex digits */
+            for (; next < end; next++) {
+                Py_UNICODE digit = buf[next];
+                c <<= 4;
+                switch (digit) {
+                    case '0': case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7': case '8': case '9':
+                        c |= (digit - '0'); break;
+                    case 'a': case 'b': case 'c': case 'd': case 'e':
+                    case 'f':
+                        c |= (digit - 'a' + 10); break;
+                    case 'A': case 'B': case 'C': case 'D': case 'E':
+                    case 'F':
+                        c |= (digit - 'A' + 10); break;
+                    default:
+                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+                        goto bail;
+                }
+            }
+#ifdef Py_UNICODE_WIDE
+            /* Surrogate pair */
+            if ((c & 0xfc00) == 0xd800) {
+                Py_UNICODE c2 = 0;
+                if (end + 6 >= len) {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                if (buf[next++] != '\\' || buf[next++] != 'u') {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                end += 6;
+                /* Decode 4 hex digits */
+                for (; next < end; next++) {
+                    c2 <<= 4;
+                    Py_UNICODE digit = buf[next];
+                    switch (digit) {
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                            c2 |= (digit - '0'); break;
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                            c2 |= (digit - 'a' + 10); break;
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                            c2 |= (digit - 'A' + 10); break;
+                        default:
+                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+                            goto bail;
+                    }
+                }
+                if ((c2 & 0xfc00) != 0xdc00) {
+                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+                    goto bail;
+                }
+                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+            }
+            else if ((c & 0xfc00) == 0xdc00) {
+                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+                goto bail;
+            }
+#endif
+        }
+        chunk = PyUnicode_FromUnicode(&c, 1);
+        if (chunk == NULL) {
+            goto bail;
+        }
+        if (PyList_Append(chunks, chunk)) {
+            Py_DECREF(chunk);
+            goto bail;
+        }
+        Py_DECREF(chunk);
+    }
+
+    rval = join_list_unicode(chunks);
+    if (rval == NULL) {
+        goto bail;
+    }
+    Py_DECREF(chunks);
+    *next_end_ptr = end;
+    return rval;
+bail:
+    *next_end_ptr = -1;
+    Py_XDECREF(chunks);
+    return NULL;
+}
+
+PyDoc_STRVAR(pydoc_scanstring,
+    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
+    "\n"
+    "Scan the string s for a JSON string. End is the index of the\n"
+    "character in s after the quote that started the JSON string.\n"
+    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
+    "on attempt to decode an invalid string. If strict is False then literal\n"
+    "control characters are allowed in the string.\n"
+    "\n"
+    "Returns a tuple of the decoded string and the index of the character in s\n"
+    "after the end quote."
+);
+
+static PyObject *
+py_scanstring(PyObject* self UNUSED, PyObject *args)
+{
+    PyObject *pystr;
+    PyObject *rval;
+    Py_ssize_t end;
+    Py_ssize_t next_end = -1;
+    char *encoding = NULL;
+    int strict = 1;
+    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
+        return NULL;
+    }
+    if (encoding == NULL) {
+        encoding = DEFAULT_ENCODING;
+    }
+    if (PyString_Check(pystr)) {
+        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+    }
+    else if (PyUnicode_Check(pystr)) {
+        rval = scanstring_unicode(pystr, end, strict, &next_end);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "first argument must be a string, not %.80s",
+                     Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+    return _build_rval_index_tuple(rval, next_end);
+}
+
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+    "encode_basestring_ascii(basestring) -> str\n"
+    "\n"
+    "Return an ASCII-only JSON representation of a Python string"
+);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
+{
+    /* Return an ASCII-only JSON representation of a Python string */
+    /* METH_O */
+    if (PyString_Check(pystr)) {
+        return ascii_escape_str(pystr);
+    }
+    else if (PyUnicode_Check(pystr)) {
+        return ascii_escape_unicode(pystr);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "first argument must be a string, not %.80s",
+                     Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+}
+
+static void
+scanner_dealloc(PyObject *self)
+{
+    /* Deallocate scanner object */
+    scanner_clear(self);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static int
+scanner_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    Py_VISIT(s->encoding);
+    Py_VISIT(s->strict);
+    Py_VISIT(s->object_hook);
+    Py_VISIT(s->parse_float);
+    Py_VISIT(s->parse_int);
+    Py_VISIT(s->parse_constant);
+    return 0;
+}
+
+static int
+scanner_clear(PyObject *self)
+{
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    Py_CLEAR(s->encoding);
+    Py_CLEAR(s->strict);
+    Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->parse_float);
+    Py_CLEAR(s->parse_int);
+    Py_CLEAR(s->parse_constant);
+    return 0;
+}
+
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON object from PyString pystr.
+    idx is the index of the first character after the opening curly brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing curly brace.
+
+    Returns a new PyObject (usually a dict, but object_hook can change that)
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    PyObject *rval = PyDict_New();
+    PyObject *key = NULL;
+    PyObject *val = NULL;
+    char *encoding = PyString_AS_STRING(s->encoding);
+    int strict = PyObject_IsTrue(s->strict);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after { */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the object is non-empty */
+    if (idx <= end_idx && str[idx] != '}') {
+        while (idx <= end_idx) {
+            /* read key */
+            if (str[idx] != '"') {
+                raise_errmsg("Expecting property name", pystr, idx);
+                goto bail;
+            }
+            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
+            if (key == NULL)
+                goto bail;
+            idx = next_idx;
+
+            /* skip whitespace between key and : delimiter, read :, skip whitespace */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+            if (idx > end_idx || str[idx] != ':') {
+                raise_errmsg("Expecting : delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* read any JSON data type */
+            val = scan_once_str(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (PyDict_SetItem(rval, key, val) == -1)
+                goto bail;
+
+            Py_CLEAR(key);
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace before } or , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the object is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (str[idx] == '}') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg("Expecting , delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , delimiter */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+        }
+    }
+    /* verify that idx < end_idx, str[idx] should be '}' */
+    if (idx > end_idx || str[idx] != '}') {
+        raise_errmsg("Expecting object", pystr, end_idx);
+        goto bail;
+    }
+    /* if object_hook is not None: rval = object_hook(rval) */
+    if (s->object_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(rval);
+        rval = val;
+        val = NULL;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(key);
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+
+static PyObject *
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON object from PyUnicode pystr.
+    idx is the index of the first character after the opening curly brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing curly brace.
+
+    Returns a new PyObject (usually a dict, but object_hook can change that)
+    */
+    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+    PyObject *val = NULL;
+    PyObject *rval = PyDict_New();
+    PyObject *key = NULL;
+    int strict = PyObject_IsTrue(s->strict);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after { */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the object is non-empty */
+    if (idx <= end_idx && str[idx] != '}') {
+        while (idx <= end_idx) {
+            /* read key */
+            if (str[idx] != '"') {
+                raise_errmsg("Expecting property name", pystr, idx);
+                goto bail;
+            }
+            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
+            if (key == NULL)
+                goto bail;
+            idx = next_idx;
+
+            /* skip whitespace between key and : delimiter, read :, skip whitespace */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+            if (idx > end_idx || str[idx] != ':') {
+                raise_errmsg("Expecting : delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* read any JSON term */
+            val = scan_once_unicode(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (PyDict_SetItem(rval, key, val) == -1)
+                goto bail;
+
+            Py_CLEAR(key);
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace before } or , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the object is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (str[idx] == '}') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg("Expecting , delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , delimiter */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+        }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be '}' */
+    if (idx > end_idx || str[idx] != '}') {
+        raise_errmsg("Expecting object", pystr, end_idx);
+        goto bail;
+    }
+
+    /* if object_hook is not None: rval = object_hook(rval) */
+    if (s->object_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(rval);
+        rval = val;
+        val = NULL;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(key);
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+
+static PyObject *
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON array from PyString pystr.
+    idx is the index of the first character after the opening brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing brace.
+
+    Returns a new PyList
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    PyObject *val = NULL;
+    PyObject *rval = PyList_New(0);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after [ */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the array is non-empty */
+    if (idx <= end_idx && str[idx] != ']') {
+        while (idx <= end_idx) {
+
+            /* read any JSON term and de-tuplefy the (rval, idx) */
+            val = scan_once_str(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (PyList_Append(rval, val) == -1)
+                goto bail;
+
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace between term and , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the array is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (str[idx] == ']') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg("Expecting , delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+        }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be ']' */
+    if (idx > end_idx || str[idx] != ']') {
+        raise_errmsg("Expecting object", pystr, end_idx);
+        goto bail;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+
+static PyObject *
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON array from PyString pystr.
+    idx is the index of the first character after the opening brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing brace.
+
+    Returns a new PyList
+    */
+    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+    PyObject *val = NULL;
+    PyObject *rval = PyList_New(0);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after [ */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the array is non-empty */
+    if (idx <= end_idx && str[idx] != ']') {
+        while (idx <= end_idx) {
+
+            /* read any JSON term  */
+            val = scan_once_unicode(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (PyList_Append(rval, val) == -1)
+                goto bail;
+
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace between term and , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the array is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (str[idx] == ']') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg("Expecting , delimiter", pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+        }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be ']' */
+    if (idx > end_idx || str[idx] != ']') {
+        raise_errmsg("Expecting object", pystr, end_idx);
+        goto bail;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+
+static PyObject *
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON constant from PyString pystr.
+    constant is the constant string that was found
+        ("NaN", "Infinity", "-Infinity").
+    idx is the index of the first character of the constant
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the constant.
+
+    Returns the result of parse_constant
+    */
+    PyObject *cstr;
+    PyObject *rval;
+    /* constant is "NaN", "Infinity", or "-Infinity" */
+    cstr = PyString_InternFromString(constant);
+    if (cstr == NULL)
+        return NULL;
+
+    /* rval = parse_constant(constant) */
+    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
+    idx += PyString_GET_SIZE(cstr);
+    Py_DECREF(cstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+
+static PyObject *
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON number from PyString pystr.
+    idx is the index of the first character of the number
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of that number:
+        PyInt, PyLong, or PyFloat.
+        May return other types if parse_int or parse_float are set
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    Py_ssize_t idx = start;
+    int is_float = 0;
+    PyObject *rval;
+    PyObject *numstr;
+
+    /* read a sign if it's there, make sure it's not the end of the string */
+    if (str[idx] == '-') {
+        idx++;
+        if (idx > end_idx) {
+            PyErr_SetNone(PyExc_StopIteration);
+            return NULL;
+        }
+    }
+
+    /* read as many integer digits as we find as long as it doesn't start with 0 */
+    if (str[idx] >= '1' && str[idx] <= '9') {
+        idx++;
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+    /* if it starts with 0 we only expect one integer digit */
+    else if (str[idx] == '0') {
+        idx++;
+    }
+    /* no integer digits, error */
+    else {
+        PyErr_SetNone(PyExc_StopIteration);
+        return NULL;
+    }
+
+    /* if the next char is '.' followed by a digit then read all float digits */
+    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+        is_float = 1;
+        idx += 2;
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+
+    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+
+        /* save the index of the 'e' or 'E' just in case we need to backtrack */
+        Py_ssize_t e_start = idx;
+        idx++;
+
+        /* read an exponent sign if present */
+        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+
+        /* read all digits */
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+
+        /* if we got a digit, then parse as float. if not, backtrack */
+        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+            is_float = 1;
+        }
+        else {
+            idx = e_start;
+        }
+    }
+
+    /* copy the section we determined to be a number */
+    numstr = PyString_FromStringAndSize(&str[start], idx - start);
+    if (numstr == NULL)
+        return NULL;
+    if (is_float) {
+        /* parse as a float using a fast path if available, otherwise call user defined method */
+        if (s->parse_float != (PyObject *)&PyFloat_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+        }
+        else {
+            rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr)));
+        }
+    }
+    else {
+        /* parse as an int using a fast path if available, otherwise call user defined method */
+        if (s->parse_int != (PyObject *)&PyInt_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+        }
+        else {
+            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
+        }
+    }
+    Py_DECREF(numstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+
+static PyObject *
+_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+    /* Read a JSON number from PyUnicode pystr.
+    idx is the index of the first character of the number
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of that number:
+        PyInt, PyLong, or PyFloat.
+        May return other types if parse_int or parse_float are set
+    */
+    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+    Py_ssize_t idx = start;
+    int is_float = 0;
+    PyObject *rval;
+    PyObject *numstr;
+
+    /* read a sign if it's there, make sure it's not the end of the string */
+    if (str[idx] == '-') {
+        idx++;
+        if (idx > end_idx) {
+            PyErr_SetNone(PyExc_StopIteration);
+            return NULL;
+        }
+    }
+
+    /* read as many integer digits as we find as long as it doesn't start with 0 */
+    if (str[idx] >= '1' && str[idx] <= '9') {
+        idx++;
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+    /* if it starts with 0 we only expect one integer digit */
+    else if (str[idx] == '0') {
+        idx++;
+    }
+    /* no integer digits, error */
+    else {
+        PyErr_SetNone(PyExc_StopIteration);
+        return NULL;
+    }
+
+    /* if the next char is '.' followed by a digit then read all float digits */
+    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+        is_float = 1;
+        idx += 2;
+        while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+
+    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+        Py_ssize_t e_start = idx;
+        idx++;
+
+        /* read an exponent sign if present */
+        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+
+        /* read all digits */
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+
+        /* if we got a digit, then parse as float. if not, backtrack */
+        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+            is_float = 1;
+        }
+        else {
+            idx = e_start;
+        }
+    }
+
+    /* copy the section we determined to be a number */
+    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
+    if (numstr == NULL)
+        return NULL;
+    if (is_float) {
+        /* parse as a float using a fast path if available, otherwise call user defined method */
+        if (s->parse_float != (PyObject *)&PyFloat_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+        }
+        else {
+            rval = PyFloat_FromString(numstr, NULL);
+        }
+    }
+    else {
+        /* no fast path for unicode -> int, just call */
+        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+    }
+    Py_DECREF(numstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read one JSON term (of any kind) from PyString pystr.
+    idx is the index of the first character of the term
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of the term.
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t length = PyString_GET_SIZE(pystr);
+    if (idx >= length) {
+        PyErr_SetNone(PyExc_StopIteration);
+        return NULL;
+    }
+    switch (str[idx]) {
+        case '"':
+            /* string */
+            return scanstring_str(pystr, idx + 1,
+                PyString_AS_STRING(s->encoding),
+                PyObject_IsTrue(s->strict),
+                next_idx_ptr);
+        case '{':
+            /* object */
+            return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
+        case '[':
+            /* array */
+            return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
+        case 'n':
+            /* null */
+            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+                Py_INCREF(Py_None);
+                *next_idx_ptr = idx + 4;
+                return Py_None;
+            }
+            break;
+        case 't':
+            /* true */
+            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+                Py_INCREF(Py_True);
+                *next_idx_ptr = idx + 4;
+                return Py_True;
+            }
+            break;
+        case 'f':
+            /* false */
+            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+                Py_INCREF(Py_False);
+                *next_idx_ptr = idx + 5;
+                return Py_False;
+            }
+            break;
+        case 'N':
+            /* NaN */
+            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+                return _parse_constant(s, "NaN", idx, next_idx_ptr);
+            }
+            break;
+        case 'I':
+            /* Infinity */
+            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
+            }
+            break;
+        case '-':
+            /* -Infinity */
+            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+            }
+            break;
+    }
+    /* Didn't find a string, object, array, or named constant. Look for a number. */
+    return _match_number_str(s, pystr, idx, next_idx_ptr);
+}
+
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read one JSON term (of any kind) from PyUnicode pystr.
+    idx is the index of the first character of the term
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of the term.
+    */
+    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
+    if (idx >= length) {
+        PyErr_SetNone(PyExc_StopIteration);
+        return NULL;
+    }
+    switch (str[idx]) {
+        case '"':
+            /* string */
+            return scanstring_unicode(pystr, idx + 1,
+                PyObject_IsTrue(s->strict),
+                next_idx_ptr);
+        case '{':
+            /* object */
+            return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
+        case '[':
+            /* array */
+            return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
+        case 'n':
+            /* null */
+            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+                Py_INCREF(Py_None);
+                *next_idx_ptr = idx + 4;
+                return Py_None;
+            }
+            break;
+        case 't':
+            /* true */
+            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+                Py_INCREF(Py_True);
+                *next_idx_ptr = idx + 4;
+                return Py_True;
+            }
+            break;
+        case 'f':
+            /* false */
+            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+                Py_INCREF(Py_False);
+                *next_idx_ptr = idx + 5;
+                return Py_False;
+            }
+            break;
+        case 'N':
+            /* NaN */
+            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+                return _parse_constant(s, "NaN", idx, next_idx_ptr);
+            }
+            break;
+        case 'I':
+            /* Infinity */
+            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
+            }
+            break;
+        case '-':
+            /* -Infinity */
+            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+            }
+            break;
+    }
+    /* Didn't find a string, object, array, or named constant. Look for a number. */
+    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
+}
+
+static PyObject *
+scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Python callable interface to scan_once_{str,unicode} */
+    PyObject *pystr;
+    PyObject *rval;
+    Py_ssize_t idx;
+    Py_ssize_t next_idx = -1;
+    static char *kwlist[] = {"string", "idx", NULL};
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
+        return NULL;
+
+    if (PyString_Check(pystr)) {
+        rval = scan_once_str(s, pystr, idx, &next_idx);
+    }
+    else if (PyUnicode_Check(pystr)) {
+        rval = scan_once_unicode(s, pystr, idx, &next_idx);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                 "first argument must be a string, not %.80s",
+                 Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+    return _build_rval_index_tuple(rval, next_idx);
+}
+
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyScannerObject *s;
+    s = (PyScannerObject *)type->tp_alloc(type, 0);
+    if (s != NULL) {
+        s->encoding = NULL;
+        s->strict = NULL;
+        s->object_hook = NULL;
+        s->parse_float = NULL;
+        s->parse_int = NULL;
+        s->parse_constant = NULL;
+    }
+    return (PyObject *)s;
+}
+
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Initialize Scanner object */
+    PyObject *ctx;
+    static char *kwlist[] = {"context", NULL};
+    PyScannerObject *s;
+
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
+        return -1;
+
+    /* PyString_AS_STRING is used on encoding */
+    s->encoding = PyObject_GetAttrString(ctx, "encoding");
+    if (s->encoding == Py_None) {
+        Py_DECREF(Py_None);
+        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
+    }
+    else if (PyUnicode_Check(s->encoding)) {
+        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
+        Py_DECREF(s->encoding);
+        s->encoding = tmp;
+    }
+    if (s->encoding == NULL || !PyString_Check(s->encoding))
+        goto bail;
+
+    /* All of these will fail "gracefully" so we don't need to verify them */
+    s->strict = PyObject_GetAttrString(ctx, "strict");
+    if (s->strict == NULL)
+        goto bail;
+    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
+    if (s->object_hook == NULL)
+        goto bail;
+    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
+    if (s->parse_float == NULL)
+        goto bail;
+    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
+    if (s->parse_int == NULL)
+        goto bail;
+    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
+    if (s->parse_constant == NULL)
+        goto bail;
+
+    return 0;
+
+bail:
+    Py_CLEAR(s->encoding);
+    Py_CLEAR(s->strict);
+    Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->parse_float);
+    Py_CLEAR(s->parse_int);
+    Py_CLEAR(s->parse_constant);
+    return -1;
+}
+
+PyDoc_STRVAR(scanner_doc, "JSON scanner object");
+
+static
+PyTypeObject PyScannerType = {
+    PyObject_HEAD_INIT(NULL)
+    0,                    /* tp_internal */
+    "simplejson._speedups.Scanner",       /* tp_name */
+    sizeof(PyScannerObject), /* tp_basicsize */
+    0,                    /* tp_itemsize */
+    scanner_dealloc, /* tp_dealloc */
+    0,                    /* tp_print */
+    0,                    /* tp_getattr */
+    0,                    /* tp_setattr */
+    0,                    /* tp_compare */
+    0,                    /* tp_repr */
+    0,                    /* tp_as_number */
+    0,                    /* tp_as_sequence */
+    0,                    /* tp_as_mapping */
+    0,                    /* tp_hash */
+    scanner_call,         /* tp_call */
+    0,                    /* tp_str */
+    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
+    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
+    0,                    /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
+    scanner_doc,          /* tp_doc */
+    scanner_traverse,                    /* tp_traverse */
+    scanner_clear,                    /* tp_clear */
+    0,                    /* tp_richcompare */
+    0,                    /* tp_weaklistoffset */
+    0,                    /* tp_iter */
+    0,                    /* tp_iternext */
+    0,                    /* tp_methods */
+    scanner_members,                    /* tp_members */
+    0,                    /* tp_getset */
+    0,                    /* tp_base */
+    0,                    /* tp_dict */
+    0,                    /* tp_descr_get */
+    0,                    /* tp_descr_set */
+    0,                    /* tp_dictoffset */
+    scanner_init,                    /* tp_init */
+    0,/* PyType_GenericAlloc, */        /* tp_alloc */
+    scanner_new,          /* tp_new */
+    0,/* PyObject_GC_Del, */              /* tp_free */
+};
+
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyEncoderObject *s;
+    s = (PyEncoderObject *)type->tp_alloc(type, 0);
+    if (s != NULL) {
+        s->markers = NULL;
+        s->defaultfn = NULL;
+        s->encoder = NULL;
+        s->indent = NULL;
+        s->key_separator = NULL;
+        s->item_separator = NULL;
+        s->sort_keys = NULL;
+        s->skipkeys = NULL;
+    }
+    return (PyObject *)s;
+}
+
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* initialize Encoder object */
+    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
+
+    PyEncoderObject *s;
+    PyObject *allow_nan;
+
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
+        &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
+        return -1;
+
+    Py_INCREF(s->markers);
+    Py_INCREF(s->defaultfn);
+    Py_INCREF(s->encoder);
+    Py_INCREF(s->indent);
+    Py_INCREF(s->key_separator);
+    Py_INCREF(s->item_separator);
+    Py_INCREF(s->sort_keys);
+    Py_INCREF(s->skipkeys);
+    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
+    s->allow_nan = PyObject_IsTrue(allow_nan);
+    return 0;
+}
+
+static PyObject *
+encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Python callable interface to encode_listencode_obj */
+    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
+    PyObject *obj;
+    PyObject *rval;
+    Py_ssize_t indent_level;
+    PyEncoderObject *s;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
+        &obj, _convertPyInt_AsSsize_t, &indent_level))
+        return NULL;
+    rval = PyList_New(0);
+    if (rval == NULL)
+        return NULL;
+    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
+        Py_DECREF(rval);
+        return NULL;
+    }
+    return rval;
+}
+
+static PyObject *
+_encoded_const(PyObject *obj)
+{
+    /* Return the JSON string representation of None, True, False */
+    if (obj == Py_None) {
+        static PyObject *s_null = NULL;
+        if (s_null == NULL) {
+            s_null = PyString_InternFromString("null");
+        }
+        Py_INCREF(s_null);
+        return s_null;
+    }
+    else if (obj == Py_True) {
+        static PyObject *s_true = NULL;
+        if (s_true == NULL) {
+            s_true = PyString_InternFromString("true");
+        }
+        Py_INCREF(s_true);
+        return s_true;
+    }
+    else if (obj == Py_False) {
+        static PyObject *s_false = NULL;
+        if (s_false == NULL) {
+            s_false = PyString_InternFromString("false");
+        }
+        Py_INCREF(s_false);
+        return s_false;
+    }
+    else {
+        PyErr_SetString(PyExc_ValueError, "not a const");
+        return NULL;
+    }
+}
+
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj)
+{
+    /* Return the JSON representation of a PyFloat */
+    double i = PyFloat_AS_DOUBLE(obj);
+    if (!Py_IS_FINITE(i)) {
+        if (!s->allow_nan) {
+            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
+            return NULL;
+        }
+        if (i > 0) {
+            return PyString_FromString("Infinity");
+        }
+        else if (i < 0) {
+            return PyString_FromString("-Infinity");
+        }
+        else {
+            return PyString_FromString("NaN");
+        }
+    }
+    /* Use a better float format here? */
+    return PyObject_Repr(obj);
+}
+
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj)
+{
+    /* Return the JSON representation of a string */
+    if (s->fast_encode)
+        return py_encode_basestring_ascii(NULL, obj);
+    else
+        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
+}
+
+static int
+_steal_list_append(PyObject *lst, PyObject *stolen)
+{
+    /* Append stolen and then decrement its reference count */
+    int rval = PyList_Append(lst, stolen);
+    Py_DECREF(stolen);
+    return rval;
+}
+
+static int
+encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
+{
+    /* Encode Python object obj to a JSON term, rval is a PyList */
+    PyObject *newobj;
+    int rv;
+
+    if (obj == Py_None || obj == Py_True || obj == Py_False) {
+        PyObject *cstr = _encoded_const(obj);
+        if (cstr == NULL)
+            return -1;
+        return _steal_list_append(rval, cstr);
+    }
+    else if (PyString_Check(obj) || PyUnicode_Check(obj))
+    {
+        PyObject *encoded = encoder_encode_string(s, obj);
+        if (encoded == NULL)
+            return -1;
+        return _steal_list_append(rval, encoded);
+    }
+    else if (PyInt_Check(obj) || PyLong_Check(obj)) {
+        PyObject *encoded = PyObject_Str(obj);
+        if (encoded == NULL)
+            return -1;
+        return _steal_list_append(rval, encoded);
+    }
+    else if (PyFloat_Check(obj)) {
+        PyObject *encoded = encoder_encode_float(s, obj);
+        if (encoded == NULL)
+            return -1;
+        return _steal_list_append(rval, encoded);
+    }
+    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
+        return encoder_listencode_list(s, rval, obj, indent_level);
+    }
+    else if (PyDict_Check(obj)) {
+        return encoder_listencode_dict(s, rval, obj, indent_level);
+    }
+    else {
+        PyObject *ident = NULL;
+        if (s->markers != Py_None) {
+            int has_key;
+            ident = PyLong_FromVoidPtr(obj);
+            if (ident == NULL)
+                return -1;
+            has_key = PyDict_Contains(s->markers, ident);
+            if (has_key) {
+                if (has_key != -1)
+                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+                Py_DECREF(ident);
+                return -1;
+            }
+            if (PyDict_SetItem(s->markers, ident, obj)) {
+                Py_DECREF(ident);
+                return -1;
+            }
+        }
+        newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
+        if (newobj == NULL) {
+            Py_XDECREF(ident);
+            return -1;
+        }
+        rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+        Py_DECREF(newobj);
+        if (rv) {
+            Py_XDECREF(ident);
+            return -1;
+        }
+        if (ident != NULL) {
+            if (PyDict_DelItem(s->markers, ident)) {
+                Py_XDECREF(ident);
+                return -1;
+            }
+            Py_XDECREF(ident);
+        }
+        return rv;
+    }
+}
+
+static int
+encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
+{
+    /* Encode Python dict dct a JSON term, rval is a PyList */
+    static PyObject *open_dict = NULL;
+    static PyObject *close_dict = NULL;
+    static PyObject *empty_dict = NULL;
+    PyObject *kstr = NULL;
+    PyObject *ident = NULL;
+    PyObject *key, *value;
+    Py_ssize_t pos;
+    int skipkeys;
+    Py_ssize_t idx;
+
+    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
+        open_dict = PyString_InternFromString("{");
+        close_dict = PyString_InternFromString("}");
+        empty_dict = PyString_InternFromString("{}");
+        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
+            return -1;
+    }
+    if (PyDict_Size(dct) == 0)
+        return PyList_Append(rval, empty_dict);
+
+    if (s->markers != Py_None) {
+        int has_key;
+        ident = PyLong_FromVoidPtr(dct);
+        if (ident == NULL)
+            goto bail;
+        has_key = PyDict_Contains(s->markers, ident);
+        if (has_key) {
+            if (has_key != -1)
+                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+            goto bail;
+        }
+        if (PyDict_SetItem(s->markers, ident, dct)) {
+            goto bail;
+        }
+    }
+
+    if (PyList_Append(rval, open_dict))
+        goto bail;
+
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level += 1;
+        /*
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        */
+    }
+
+    /* TODO: C speedup not implemented for sort_keys */
+
+    pos = 0;
+    skipkeys = PyObject_IsTrue(s->skipkeys);
+    idx = 0;
+    while (PyDict_Next(dct, &pos, &key, &value)) {
+        PyObject *encoded;
+
+        if (PyString_Check(key) || PyUnicode_Check(key)) {
+            Py_INCREF(key);
+            kstr = key;
+        }
+        else if (PyFloat_Check(key)) {
+            kstr = encoder_encode_float(s, key);
+            if (kstr == NULL)
+                goto bail;
+        }
+        else if (PyInt_Check(key) || PyLong_Check(key)) {
+            kstr = PyObject_Str(key);
+            if (kstr == NULL)
+                goto bail;
+        }
+        else if (key == Py_True || key == Py_False || key == Py_None) {
+            kstr = _encoded_const(key);
+            if (kstr == NULL)
+                goto bail;
+        }
+        else if (skipkeys) {
+            continue;
+        }
+        else {
+            /* TODO: include repr of key */
+            PyErr_SetString(PyExc_ValueError, "keys must be a string");
+            goto bail;
+        }
+
+        if (idx) {
+            if (PyList_Append(rval, s->item_separator))
+                goto bail;
+        }
+
+        encoded = encoder_encode_string(s, kstr);
+        Py_CLEAR(kstr);
+        if (encoded == NULL)
+            goto bail;
+        if (PyList_Append(rval, encoded)) {
+            Py_DECREF(encoded);
+            goto bail;
+        }
+        Py_DECREF(encoded);
+        if (PyList_Append(rval, s->key_separator))
+            goto bail;
+        if (encoder_listencode_obj(s, rval, value, indent_level))
+            goto bail;
+        idx += 1;
+    }
+    if (ident != NULL) {
+        if (PyDict_DelItem(s->markers, ident))
+            goto bail;
+        Py_CLEAR(ident);
+    }
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level -= 1;
+        /*
+            yield '\n' + (' ' * (_indent * _current_indent_level))
+        */
+    }
+    if (PyList_Append(rval, close_dict))
+        goto bail;
+    return 0;
+
+bail:
+    Py_XDECREF(kstr);
+    Py_XDECREF(ident);
+    return -1;
+}
+
+
+static int
+encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
+{
+    /* Encode Python list seq to a JSON term, rval is a PyList */
+    static PyObject *open_array = NULL;
+    static PyObject *close_array = NULL;
+    static PyObject *empty_array = NULL;
+    PyObject *ident = NULL;
+    PyObject *s_fast = NULL;
+    Py_ssize_t num_items;
+    PyObject **seq_items;
+    Py_ssize_t i;
+
+    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
+        open_array = PyString_InternFromString("[");
+        close_array = PyString_InternFromString("]");
+        empty_array = PyString_InternFromString("[]");
+        if (open_array == NULL || close_array == NULL || empty_array == NULL)
+            return -1;
+    }
+    ident = NULL;
+    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
+    if (s_fast == NULL)
+        return -1;
+    num_items = PySequence_Fast_GET_SIZE(s_fast);
+    if (num_items == 0) {
+        Py_DECREF(s_fast);
+        return PyList_Append(rval, empty_array);
+    }
+
+    if (s->markers != Py_None) {
+        int has_key;
+        ident = PyLong_FromVoidPtr(seq);
+        if (ident == NULL)
+            goto bail;
+        has_key = PyDict_Contains(s->markers, ident);
+        if (has_key) {
+            if (has_key != -1)
+                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+            goto bail;
+        }
+        if (PyDict_SetItem(s->markers, ident, seq)) {
+            goto bail;
+        }
+    }
+
+    seq_items = PySequence_Fast_ITEMS(s_fast);
+    if (PyList_Append(rval, open_array))
+        goto bail;
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level += 1;
+        /*
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        */
+    }
+    for (i = 0; i < num_items; i++) {
+        PyObject *obj = seq_items[i];
+        if (i) {
+            if (PyList_Append(rval, s->item_separator))
+                goto bail;
+        }
+        if (encoder_listencode_obj(s, rval, obj, indent_level))
+            goto bail;
+    }
+    if (ident != NULL) {
+        if (PyDict_DelItem(s->markers, ident))
+            goto bail;
+        Py_CLEAR(ident);
+    }
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level -= 1;
+        /*
+            yield '\n' + (' ' * (_indent * _current_indent_level))
+        */
+    }
+    if (PyList_Append(rval, close_array))
+        goto bail;
+    Py_DECREF(s_fast);
+    return 0;
+
+bail:
+    Py_XDECREF(ident);
+    Py_DECREF(s_fast);
+    return -1;
+}
+
+static void
+encoder_dealloc(PyObject *self)
+{
+    /* Deallocate Encoder */
+    encoder_clear(self);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static int
+encoder_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyEncoderObject *s;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    Py_VISIT(s->markers);
+    Py_VISIT(s->defaultfn);
+    Py_VISIT(s->encoder);
+    Py_VISIT(s->indent);
+    Py_VISIT(s->key_separator);
+    Py_VISIT(s->item_separator);
+    Py_VISIT(s->sort_keys);
+    Py_VISIT(s->skipkeys);
+    return 0;
+}
+
+static int
+encoder_clear(PyObject *self)
+{
+    /* Deallocate Encoder */
+    PyEncoderObject *s;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    Py_CLEAR(s->markers);
+    Py_CLEAR(s->defaultfn);
+    Py_CLEAR(s->encoder);
+    Py_CLEAR(s->indent);
+    Py_CLEAR(s->key_separator);
+    Py_CLEAR(s->item_separator);
+    Py_CLEAR(s->sort_keys);
+    Py_CLEAR(s->skipkeys);
+    return 0;
+}
+
+PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
+
+static
+PyTypeObject PyEncoderType = {
+    PyObject_HEAD_INIT(NULL)
+    0,                    /* tp_internal */
+    "simplejson._speedups.Encoder",       /* tp_name */
+    sizeof(PyEncoderObject), /* tp_basicsize */
+    0,                    /* tp_itemsize */
+    encoder_dealloc, /* tp_dealloc */
+    0,                    /* tp_print */
+    0,                    /* tp_getattr */
+    0,                    /* tp_setattr */
+    0,                    /* tp_compare */
+    0,                    /* tp_repr */
+    0,                    /* tp_as_number */
+    0,                    /* tp_as_sequence */
+    0,                    /* tp_as_mapping */
+    0,                    /* tp_hash */
+    encoder_call,         /* tp_call */
+    0,                    /* tp_str */
+    0,                    /* tp_getattro */
+    0,                    /* tp_setattro */
+    0,                    /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
+    encoder_doc,          /* tp_doc */
+    encoder_traverse,     /* tp_traverse */
+    encoder_clear,        /* tp_clear */
+    0,                    /* tp_richcompare */
+    0,                    /* tp_weaklistoffset */
+    0,                    /* tp_iter */
+    0,                    /* tp_iternext */
+    0,                    /* tp_methods */
+    encoder_members,      /* tp_members */
+    0,                    /* tp_getset */
+    0,                    /* tp_base */
+    0,                    /* tp_dict */
+    0,                    /* tp_descr_get */
+    0,                    /* tp_descr_set */
+    0,                    /* tp_dictoffset */
+    encoder_init,         /* tp_init */
+    0,                    /* tp_alloc */
+    encoder_new,          /* tp_new */
+    0,                    /* tp_free */
+};
+
+static PyMethodDef speedups_methods[] = {
+    {"encode_basestring_ascii",
+        (PyCFunction)py_encode_basestring_ascii,
+        METH_O,
+        pydoc_encode_basestring_ascii},
+    {"scanstring",
+        (PyCFunction)py_scanstring,
+        METH_VARARGS,
+        pydoc_scanstring},
+    {NULL, NULL, 0, NULL}
+};
+
+PyDoc_STRVAR(module_doc,
+"simplejson speedups\n");
+
+void
+init_speedups(void)
+{
+    PyObject *m;
+    PyScannerType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PyScannerType) < 0)
+        return;
+    PyEncoderType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PyEncoderType) < 0)
+        return;
+    m = Py_InitModule3("_speedups", speedups_methods, module_doc);
+    Py_INCREF((PyObject*)&PyScannerType);
+    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
+    Py_INCREF((PyObject*)&PyEncoderType);
+    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
+}
diff --git a/lang/py/lib/simplejson/decoder.py b/lang/py/lib/simplejson/decoder.py
new file mode 100644
index 0000000..b769ea4
--- /dev/null
+++ b/lang/py/lib/simplejson/decoder.py
@@ -0,0 +1,354 @@
+"""Implementation of JSONDecoder
+"""
+import re
+import sys
+import struct
+
+from simplejson.scanner import make_scanner
+try:
+    from simplejson._speedups import scanstring as c_scanstring
+except ImportError:
+    c_scanstring = None
+
+__all__ = ['JSONDecoder']
+
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+def _floatconstants():
+    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+    if sys.byteorder != 'big':
+        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+    nan, inf = struct.unpack('dd', _BYTES)
+    return nan, inf, -inf
+
+NaN, PosInf, NegInf = _floatconstants()
+
+
+def linecol(doc, pos):
+    lineno = doc.count('\n', 0, pos) + 1
+    if lineno == 1:
+        colno = pos
+    else:
+        colno = pos - doc.rindex('\n', 0, pos)
+    return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+    # Note that this function is called from _speedups
+    lineno, colno = linecol(doc, pos)
+    if end is None:
+        #fmt = '{0}: line {1} column {2} (char {3})'
+        #return fmt.format(msg, lineno, colno, pos)
+        fmt = '%s: line %d column %d (char %d)'
+        return fmt % (msg, lineno, colno, pos)
+    endlineno, endcolno = linecol(doc, end)
+    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
+    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
+    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
+_CONSTANTS = {
+    '-Infinity': NegInf,
+    'Infinity': PosInf,
+    'NaN': NaN,
+}
+
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
+BACKSLASH = {
+    '"': u'"', '\\': u'\\', '/': u'/',
+    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+
+DEFAULT_ENCODING = "utf-8"
+
+def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
+    """Scan the string s for a JSON string. End is the index of the
+    character in s after the quote that started the JSON string.
+    Unescapes all valid JSON string escape sequences and raises ValueError
+    on attempt to decode an invalid string. If strict is False then literal
+    control characters are allowed in the string.
+    
+    Returns a tuple of the decoded string and the index of the character in s
+    after the end quote."""
+    if encoding is None:
+        encoding = DEFAULT_ENCODING
+    chunks = []
+    _append = chunks.append
+    begin = end - 1
+    while 1:
+        chunk = _m(s, end)
+        if chunk is None:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        end = chunk.end()
+        content, terminator = chunk.groups()
+        # Content is contains zero or more unescaped string characters
+        if content:
+            if not isinstance(content, unicode):
+                content = unicode(content, encoding)
+            _append(content)
+        # Terminator is the end of string, a literal control character,
+        # or a backslash denoting that an escape sequence follows
+        if terminator == '"':
+            break
+        elif terminator != '\\':
+            if strict:
+                msg = "Invalid control character %r at" % (terminator,)
+                #msg = "Invalid control character {0!r} at".format(terminator)
+                raise ValueError(errmsg(msg, s, end))
+            else:
+                _append(terminator)
+                continue
+        try:
+            esc = s[end]
+        except IndexError:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        # If not a unicode escape sequence, must be in the lookup table
+        if esc != 'u':
+            try:
+                char = _b[esc]
+            except KeyError:
+                msg = "Invalid \\escape: " + repr(esc)
+                raise ValueError(errmsg(msg, s, end))
+            end += 1
+        else:
+            # Unicode escape sequence
+            esc = s[end + 1:end + 5]
+            next_end = end + 5
+            if len(esc) != 4:
+                msg = "Invalid \\uXXXX escape"
+                raise ValueError(errmsg(msg, s, end))
+            uni = int(esc, 16)
+            # Check for surrogate pair on UCS-4 systems
+            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+                if not s[end + 5:end + 7] == '\\u':
+                    raise ValueError(errmsg(msg, s, end))
+                esc2 = s[end + 7:end + 11]
+                if len(esc2) != 4:
+                    raise ValueError(errmsg(msg, s, end))
+                uni2 = int(esc2, 16)
+                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+                next_end += 6
+            char = unichr(uni)
+            end = next_end
+        # Append the unescaped character
+        _append(char)
+    return u''.join(chunks), end
+
+
+# Use speedup if available
+scanstring = c_scanstring or py_scanstring
+
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
+
+def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    pairs = {}
+    # Use a slice to prevent IndexError from being raised, the following
+    # check will raise a more specific ValueError if the string is empty
+    nextchar = s[end:end + 1]
+    # Normally we expect nextchar == '"'
+    if nextchar != '"':
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
+        # Trivial empty object
+        if nextchar == '}':
+            return pairs, end + 1
+        elif nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end))
+    end += 1
+    while True:
+        key, end = scanstring(s, end, encoding, strict)
+
+        # To skip some function call overhead we optimize the fast paths where
+        # the JSON key separator is ": " or just ":".
+        if s[end:end + 1] != ':':
+            end = _w(s, end).end()
+            if s[end:end + 1] != ':':
+                raise ValueError(errmsg("Expecting : delimiter", s, end))
+
+        end += 1
+
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
+
+        try:
+            value, end = scan_once(s, end)
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        pairs[key] = value
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end = _w(s, end + 1).end()
+                nextchar = s[end]
+        except IndexError:
+            nextchar = ''
+        end += 1
+
+        if nextchar == '}':
+            break
+        elif nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end += 1
+                nextchar = s[end]
+                if nextchar in _ws:
+                    end = _w(s, end + 1).end()
+                    nextchar = s[end]
+        except IndexError:
+            nextchar = ''
+
+        end += 1
+        if nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end - 1))
+
+    if object_hook is not None:
+        pairs = object_hook(pairs)
+    return pairs, end
+
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    values = []
+    nextchar = s[end:end + 1]
+    if nextchar in _ws:
+        end = _w(s, end + 1).end()
+        nextchar = s[end:end + 1]
+    # Look-ahead for trivial empty array
+    if nextchar == ']':
+        return values, end + 1
+    _append = values.append
+    while True:
+        try:
+            value, end = scan_once(s, end)
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        _append(value)
+        nextchar = s[end:end + 1]
+        if nextchar in _ws:
+            end = _w(s, end + 1).end()
+            nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == ']':
+            break
+        elif nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end))
+
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
+
+    return values, end
+
+class JSONDecoder(object):
+    """Simple JSON <http://json.org> decoder
+
+    Performs the following translations in decoding by default:
+
+    +---------------+-------------------+
+    | JSON          | Python            |
+    +===============+===================+
+    | object        | dict              |
+    +---------------+-------------------+
+    | array         | list              |
+    +---------------+-------------------+
+    | string        | unicode           |
+    +---------------+-------------------+
+    | number (int)  | int, long         |
+    +---------------+-------------------+
+    | number (real) | float             |
+    +---------------+-------------------+
+    | true          | True              |
+    +---------------+-------------------+
+    | false         | False             |
+    +---------------+-------------------+
+    | null          | None              |
+    +---------------+-------------------+
+
+    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+    their corresponding ``float`` values, which is outside the JSON spec.
+
+    """
+
+    def __init__(self, encoding=None, object_hook=None, parse_float=None,
+            parse_int=None, parse_constant=None, strict=True):
+        """``encoding`` determines the encoding used to interpret any ``str``
+        objects decoded by this instance (utf-8 by default).  It has no
+        effect when decoding ``unicode`` objects.
+
+        Note that currently only encodings that are a superset of ASCII work,
+        strings of other encodings should be passed in as ``unicode``.
+
+        ``object_hook``, if specified, will be called with the result
+        of every JSON object decoded and its return value will be used in
+        place of the given ``dict``.  This can be used to provide custom
+        deserializations (e.g. to support JSON-RPC class hinting).
+
+        ``parse_float``, if specified, will be called with the string
+        of every JSON float to be decoded. By default this is equivalent to
+        float(num_str). This can be used to use another datatype or parser
+        for JSON floats (e.g. decimal.Decimal).
+
+        ``parse_int``, if specified, will be called with the string
+        of every JSON int to be decoded. By default this is equivalent to
+        int(num_str). This can be used to use another datatype or parser
+        for JSON integers (e.g. float).
+
+        ``parse_constant``, if specified, will be called with one of the
+        following strings: -Infinity, Infinity, NaN.
+        This can be used to raise an exception if invalid JSON numbers
+        are encountered.
+
+        """
+        self.encoding = encoding
+        self.object_hook = object_hook
+        self.parse_float = parse_float or float
+        self.parse_int = parse_int or int
+        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+        self.strict = strict
+        self.parse_object = JSONObject
+        self.parse_array = JSONArray
+        self.parse_string = scanstring
+        self.scan_once = make_scanner(self)
+
+    def decode(self, s, _w=WHITESPACE.match):
+        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
+        instance containing a JSON document)
+
+        """
+        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+        end = _w(s, end).end()
+        if end != len(s):
+            raise ValueError(errmsg("Extra data", s, end, len(s)))
+        return obj
+
+    def raw_decode(self, s, idx=0):
+        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+        with a JSON document) and return a 2-tuple of the Python
+        representation and the index in ``s`` where the document ended.
+
+        This can be used to decode a JSON document from a string that may
+        have extraneous data at the end.
+
+        """
+        try:
+            obj, end = self.scan_once(s, idx)
+        except StopIteration:
+            raise ValueError("No JSON object could be decoded")
+        return obj, end
diff --git a/lang/py/lib/simplejson/encoder.py b/lang/py/lib/simplejson/encoder.py
new file mode 100644
index 0000000..cf58290
--- /dev/null
+++ b/lang/py/lib/simplejson/encoder.py
@@ -0,0 +1,440 @@
+"""Implementation of JSONEncoder
+"""
+import re
+
+try:
+    from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
+except ImportError:
+    c_encode_basestring_ascii = None
+try:
+    from simplejson._speedups import make_encoder as c_make_encoder
+except ImportError:
+    c_make_encoder = None
+
+ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
+ESCAPE_DCT = {
+    '\\': '\\\\',
+    '"': '\\"',
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+for i in range(0x20):
+    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+
+# Assume this produces an infinity on all machines (probably not guaranteed)
+INFINITY = float('1e66666')
+FLOAT_REPR = repr
+
+def encode_basestring(s):
+    """Return a JSON representation of a Python string
+
+    """
+    def replace(match):
+        return ESCAPE_DCT[match.group(0)]
+    return '"' + ESCAPE.sub(replace, s) + '"'
+
+
+def py_encode_basestring_ascii(s):
+    """Return an ASCII-only JSON representation of a Python string
+
+    """
+    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+        s = s.decode('utf-8')
+    def replace(match):
+        s = match.group(0)
+        try:
+            return ESCAPE_DCT[s]
+        except KeyError:
+            n = ord(s)
+            if n < 0x10000:
+                #return '\\u{0:04x}'.format(n)
+                return '\\u%04x' % (n,)
+            else:
+                # surrogate pair
+                n -= 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                s2 = 0xdc00 | (n & 0x3ff)
+                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+
+
+encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
+
+class JSONEncoder(object):
+    """Extensible JSON <http://json.org> encoder for Python data structures.
+
+    Supports the following objects and types by default:
+
+    +-------------------+---------------+
+    | Python            | JSON          |
+    +===================+===============+
+    | dict              | object        |
+    +-------------------+---------------+
+    | list, tuple       | array         |
+    +-------------------+---------------+
+    | str, unicode      | string        |
+    +-------------------+---------------+
+    | int, long, float  | number        |
+    +-------------------+---------------+
+    | True              | true          |
+    +-------------------+---------------+
+    | False             | false         |
+    +-------------------+---------------+
+    | None              | null          |
+    +-------------------+---------------+
+
+    To extend this to recognize other objects, subclass and implement a
+    ``.default()`` method with another method that returns a serializable
+    object for ``o`` if possible, otherwise it should call the superclass
+    implementation (to raise ``TypeError``).
+
+    """
+    item_separator = ', '
+    key_separator = ': '
+    def __init__(self, skipkeys=False, ensure_ascii=True,
+            check_circular=True, allow_nan=True, sort_keys=False,
+            indent=None, separators=None, encoding='utf-8', default=None):
+        """Constructor for JSONEncoder, with sensible defaults.
+
+        If skipkeys is false, then it is a TypeError to attempt
+        encoding of keys that are not str, int, long, float or None.  If
+        skipkeys is True, such items are simply skipped.
+
+        If ensure_ascii is true, the output is guaranteed to be str
+        objects with all incoming unicode characters escaped.  If
+        ensure_ascii is false, the output will be unicode object.
+
+        If check_circular is true, then lists, dicts, and custom encoded
+        objects will be checked for circular references during encoding to
+        prevent an infinite recursion (which would cause an OverflowError).
+        Otherwise, no such check takes place.
+
+        If allow_nan is true, then NaN, Infinity, and -Infinity will be
+        encoded as such.  This behavior is not JSON specification compliant,
+        but is consistent with most JavaScript based encoders and decoders.
+        Otherwise, it will be a ValueError to encode such floats.
+
+        If sort_keys is true, then the output of dictionaries will be
+        sorted by key; this is useful for regression tests to ensure
+        that JSON serializations can be compared on a day-to-day basis.
+
+        If indent is a non-negative integer, then JSON array
+        elements and object members will be pretty-printed with that
+        indent level.  An indent level of 0 will only insert newlines.
+        None is the most compact representation.
+
+        If specified, separators should be a (item_separator, key_separator)
+        tuple.  The default is (', ', ': ').  To get the most compact JSON
+        representation you should specify (',', ':') to eliminate whitespace.
+
+        If specified, default is a function that gets called for objects
+        that can't otherwise be serialized.  It should return a JSON encodable
+        version of the object or raise a ``TypeError``.
+
+        If encoding is not None, then all input strings will be
+        transformed into unicode using that encoding prior to JSON-encoding.
+        The default is UTF-8.
+
+        """
+
+        self.skipkeys = skipkeys
+        self.ensure_ascii = ensure_ascii
+        self.check_circular = check_circular
+        self.allow_nan = allow_nan
+        self.sort_keys = sort_keys
+        self.indent = indent
+        if separators is not None:
+            self.item_separator, self.key_separator = separators
+        if default is not None:
+            self.default = default
+        self.encoding = encoding
+
+    def default(self, o):
+        """Implement this method in a subclass such that it returns
+        a serializable object for ``o``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+        For example, to support arbitrary iterators, you could
+        implement default like this::
+
+            def default(self, o):
+                try:
+                    iterable = iter(o)
+                except TypeError:
+                    pass
+                else:
+                    return list(iterable)
+                return JSONEncoder.default(self, o)
+
+        """
+        raise TypeError(repr(o) + " is not JSON serializable")
+
+    def encode(self, o):
+        """Return a JSON string representation of a Python data structure.
+
+        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+        '{"foo": ["bar", "baz"]}'
+
+        """
+        # This is for extremely simple cases and benchmarks.
+        if isinstance(o, basestring):
+            if isinstance(o, str):
+                _encoding = self.encoding
+                if (_encoding is not None
+                        and not (_encoding == 'utf-8')):
+                    o = o.decode(_encoding)
+            if self.ensure_ascii:
+                return encode_basestring_ascii(o)
+            else:
+                return encode_basestring(o)
+        # This doesn't pass the iterator directly to ''.join() because the
+        # exceptions aren't as detailed.  The list call should be roughly
+        # equivalent to the PySequence_Fast that ''.join() would do.
+        chunks = self.iterencode(o, _one_shot=True)
+        if not isinstance(chunks, (list, tuple)):
+            chunks = list(chunks)
+        return ''.join(chunks)
+
+    def iterencode(self, o, _one_shot=False):
+        """Encode the given object and yield each string
+        representation as available.
+
+        For example::
+
+            for chunk in JSONEncoder().iterencode(bigobject):
+                mysocket.write(chunk)
+
+        """
+        if self.check_circular:
+            markers = {}
+        else:
+            markers = None
+        if self.ensure_ascii:
+            _encoder = encode_basestring_ascii
+        else:
+            _encoder = encode_basestring
+        if self.encoding != 'utf-8':
+            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+                if isinstance(o, str):
+                    o = o.decode(_encoding)
+                return _orig_encoder(o)
+
+        def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
+            # Check for specials.  Note that this type of test is processor- and/or
+            # platform-specific, so do tests which don't depend on the internals.
+
+            if o != o:
+                text = 'NaN'
+            elif o == _inf:
+                text = 'Infinity'
+            elif o == _neginf:
+                text = '-Infinity'
+            else:
+                return _repr(o)
+
+            if not allow_nan:
+                raise ValueError(
+                    "Out of range float values are not JSON compliant: " +
+                    repr(o))
+
+            return text
+
+
+        if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
+            _iterencode = c_make_encoder(
+                markers, self.default, _encoder, self.indent,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, self.allow_nan)
+        else:
+            _iterencode = _make_iterencode(
+                markers, self.default, _encoder, self.indent, floatstr,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, _one_shot)
+        return _iterencode(o, 0)
+
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        ## HACK: hand-optimized bytecode; turn globals into locals
+        False=False,
+        True=True,
+        ValueError=ValueError,
+        basestring=basestring,
+        dict=dict,
+        float=float,
+        id=id,
+        int=int,
+        isinstance=isinstance,
+        list=list,
+        long=long,
+        str=str,
+        tuple=tuple,
+    ):
+
+    def _iterencode_list(lst, _current_indent_level):
+        if not lst:
+            yield '[]'
+            return
+        if markers is not None:
+            markerid = id(lst)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = lst
+        buf = '['
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        else:
+            newline_indent = None
+            separator = _item_separator
+        first = True
+        for value in lst:
+            if first:
+                first = False
+            else:
+                buf = separator
+            if isinstance(value, basestring):
+                yield buf + _encoder(value)
+            elif value is None:
+                yield buf + 'null'
+            elif value is True:
+                yield buf + 'true'
+            elif value is False:
+                yield buf + 'false'
+            elif isinstance(value, (int, long)):
+                yield buf + str(value)
+            elif isinstance(value, float):
+                yield buf + _floatstr(value)
+            else:
+                yield buf
+                if isinstance(value, (list, tuple)):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                elif isinstance(value, dict):
+                    chunks = _iterencode_dict(value, _current_indent_level)
+                else:
+                    chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
+        if newline_indent is not None:
+            _current_indent_level -= 1
+            yield '\n' + (' ' * (_indent * _current_indent_level))
+        yield ']'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode_dict(dct, _current_indent_level):
+        if not dct:
+            yield '{}'
+            return
+        if markers is not None:
+            markerid = id(dct)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = dct
+        yield '{'
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            item_separator = _item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            item_separator = _item_separator
+        first = True
+        if _sort_keys:
+            items = dct.items()
+            items.sort(key=lambda kv: kv[0])
+        else:
+            items = dct.iteritems()
+        for key, value in items:
+            if isinstance(key, basestring):
+                pass
+            # JavaScript is weakly typed for these, so it makes sense to
+            # also allow them.  Many encoders seem to do something like this.
+            elif isinstance(key, float):
+                key = _floatstr(key)
+            elif key is True:
+                key = 'true'
+            elif key is False:
+                key = 'false'
+            elif key is None:
+                key = 'null'
+            elif isinstance(key, (int, long)):
+                key = str(key)
+            elif _skipkeys:
+                continue
+            else:
+                raise TypeError("key " + repr(key) + " is not a string")
+            if first:
+                first = False
+            else:
+                yield item_separator
+            yield _encoder(key)
+            yield _key_separator
+            if isinstance(value, basestring):
+                yield _encoder(value)
+            elif value is None:
+                yield 'null'
+            elif value is True:
+                yield 'true'
+            elif value is False:
+                yield 'false'
+            elif isinstance(value, (int, long)):
+                yield str(value)
+            elif isinstance(value, float):
+                yield _floatstr(value)
+            else:
+                if isinstance(value, (list, tuple)):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                elif isinstance(value, dict):
+                    chunks = _iterencode_dict(value, _current_indent_level)
+                else:
+                    chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
+        if newline_indent is not None:
+            _current_indent_level -= 1
+            yield '\n' + (' ' * (_indent * _current_indent_level))
+        yield '}'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode(o, _current_indent_level):
+        if isinstance(o, basestring):
+            yield _encoder(o)
+        elif o is None:
+            yield 'null'
+        elif o is True:
+            yield 'true'
+        elif o is False:
+            yield 'false'
+        elif isinstance(o, (int, long)):
+            yield str(o)
+        elif isinstance(o, float):
+            yield _floatstr(o)
+        elif isinstance(o, (list, tuple)):
+            for chunk in _iterencode_list(o, _current_indent_level):
+                yield chunk
+        elif isinstance(o, dict):
+            for chunk in _iterencode_dict(o, _current_indent_level):
+                yield chunk
+        else:
+            if markers is not None:
+                markerid = id(o)
+                if markerid in markers:
+                    raise ValueError("Circular reference detected")
+                markers[markerid] = o
+            o = _default(o)
+            for chunk in _iterencode(o, _current_indent_level):
+                yield chunk
+            if markers is not None:
+                del markers[markerid]
+
+    return _iterencode
diff --git a/lang/py/lib/simplejson/scanner.py b/lang/py/lib/simplejson/scanner.py
new file mode 100644
index 0000000..adbc6ec
--- /dev/null
+++ b/lang/py/lib/simplejson/scanner.py
@@ -0,0 +1,65 @@
+"""JSON token scanner
+"""
+import re
+try:
+    from simplejson._speedups import make_scanner as c_make_scanner
+except ImportError:
+    c_make_scanner = None
+
+__all__ = ['make_scanner']
+
+NUMBER_RE = re.compile(
+    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
+    (re.VERBOSE | re.MULTILINE | re.DOTALL))
+
+def py_make_scanner(context):
+    parse_object = context.parse_object
+    parse_array = context.parse_array
+    parse_string = context.parse_string
+    match_number = NUMBER_RE.match
+    encoding = context.encoding
+    strict = context.strict
+    parse_float = context.parse_float
+    parse_int = context.parse_int
+    parse_constant = context.parse_constant
+    object_hook = context.object_hook
+
+    def _scan_once(string, idx):
+        try:
+            nextchar = string[idx]
+        except IndexError:
+            raise StopIteration
+
+        if nextchar == '"':
+            return parse_string(string, idx + 1, encoding, strict)
+        elif nextchar == '{':
+            return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook)
+        elif nextchar == '[':
+            return parse_array((string, idx + 1), _scan_once)
+        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
+            return None, idx + 4
+        elif nextchar == 't' and string[idx:idx + 4] == 'true':
+            return True, idx + 4
+        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
+            return False, idx + 5
+
+        m = match_number(string, idx)
+        if m is not None:
+            integer, frac, exp = m.groups()
+            if frac or exp:
+                res = parse_float(integer + (frac or '') + (exp or ''))
+            else:
+                res = parse_int(integer)
+            return res, m.end()
+        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+            return parse_constant('NaN'), idx + 3
+        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+            return parse_constant('Infinity'), idx + 8
+        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+            return parse_constant('-Infinity'), idx + 9
+        else:
+            raise StopIteration
+
+    return _scan_once
+
+make_scanner = c_make_scanner or py_make_scanner
diff --git a/lang/py/lib/simplejson/tool.py b/lang/py/lib/simplejson/tool.py
new file mode 100644
index 0000000..9044331
--- /dev/null
+++ b/lang/py/lib/simplejson/tool.py
@@ -0,0 +1,37 @@
+r"""Command-line tool to validate and pretty-print JSON
+
+Usage::
+
+    $ echo '{"json":"obj"}' | python -m simplejson.tool
+    {
+        "json": "obj"
+    }
+    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+    Expecting property name: line 1 column 2 (char 2)
+
+"""
+import sys
+import simplejson
+
+def main():
+    if len(sys.argv) == 1:
+        infile = sys.stdin
+        outfile = sys.stdout
+    elif len(sys.argv) == 2:
+        infile = open(sys.argv[1], 'rb')
+        outfile = sys.stdout
+    elif len(sys.argv) == 3:
+        infile = open(sys.argv[1], 'rb')
+        outfile = open(sys.argv[2], 'wb')
+    else:
+        raise SystemExit(sys.argv[0] + " [infile [outfile]]")
+    try:
+        obj = simplejson.load(infile)
+    except ValueError, e:
+        raise SystemExit(e)
+    simplejson.dump(obj, outfile, sort_keys=True, indent=4)
+    outfile.write('\n')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/lang/py/scripts/avro b/lang/py/scripts/avro
new file mode 100644
index 0000000..2e53afd
--- /dev/null
+++ b/lang/py/scripts/avro
@@ -0,0 +1,262 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Command line utlity for reading and writing Avro files."""
+
+from avro.io import DatumReader, DatumWriter
+from avro.datafile import DataFileReader, DataFileWriter
+import avro.schema
+
+try:
+    import json
+except ImportError:
+    import simplejson as json
+import csv
+from sys import stdout, stdin
+from itertools import ifilter, imap
+from functools import partial
+from os.path import splitext
+
+class AvroError(Exception):
+    pass
+
+def print_json(row):
+    print(json.dumps(row))
+
+def print_json_pretty(row):
+    print(json.dumps(row, indent=4))
+
+_write_row = csv.writer(stdout).writerow
+_encoding = stdout.encoding or "UTF-8"
+def _encode(v, encoding=_encoding):
+    if not isinstance(v, basestring):
+        return v
+    return v.encode(_encoding)
+
+def print_csv(row):
+    # We sort the keys to the fields will be in the same place
+    # FIXME: Do we want to do it in schema order?
+    _write_row([_encode(row[key]) for key in sorted(row)])
+
+def select_printer(format):
+    return {
+        "json" : print_json,
+        "json-pretty" : print_json_pretty,
+        "csv" : print_csv
+    }[format]
+
+def record_match(expr, record):
+    return eval(expr, None, {"r" : record})
+
+def parse_fields(fields):
+    fields = fields or ''
+    if not fields.strip():
+        return None
+
+    return [field.strip() for field in fields.split(',') if field.strip()]
+
+def field_selector(fields):
+    fields = set(fields)
+    def keys_filter(obj):
+        return dict((k, obj[k]) for k in (set(obj) & fields))
+    return keys_filter
+
+def print_avro(avro, opts):
+    if opts.header and (opts.format != "csv"):
+        raise AvroError("--header applies only to CSV format")
+
+    # Apply filter first
+    if opts.filter:
+        avro = ifilter(partial(record_match, opts.filter), avro)
+
+    for i in xrange(opts.skip):
+        try:
+            next(avro)
+        except StopIteration:
+            return
+
+    fields = parse_fields(opts.fields)
+    if fields:
+        avro = imap(field_selector(fields), avro)
+
+    printer = select_printer(opts.format)
+    for i, record in enumerate(avro):
+        if i == 0 and opts.header:
+            _write_row(sorted(record.keys()))
+        if i >= opts.count:
+            break
+        printer(record)
+
+def print_schema(avro):
+    schema = avro.meta["avro.schema"]
+    # Pretty print
+    print json.dumps(json.loads(schema), indent=4)
+
+def cat(opts, args):
+    if not args:
+        raise AvroError("No files to show")
+
+    for filename in args:
+        try:
+            fo = open(filename, "rb")
+        except (OSError, IOError), e:
+            raise AvroError("Can't open %s - %s" % (filename, e))
+
+        avro = DataFileReader(fo, DatumReader())
+
+        if opts.print_schema:
+            print_schema(avro)
+            continue
+
+        print_avro(avro, opts)
+
+def _open(filename, mode):
+    if filename == "-":
+        return {
+            "rb" : stdin,
+            "wb" : stdout
+        }[mode]
+
+    return open(filename, mode)
+
+def iter_json(info, _):
+    return imap(json.loads, info)
+
+def convert(value, field):
+    type = field.type.type
+    if type == "union":
+        return convert_union(value, field)
+
+    return  {
+        "int" : int,
+        "long" : long,
+        "float" : float,
+        "double" : float,
+        "string" : str,
+        "bytes" : str,
+        "boolean" : bool,
+        "null" : lambda _: None,
+        "union" : lambda v: convert_union(v, field),
+    }[type](value)
+
+def convert_union(value, field):
+    for name in [s.name for s in field.type.schemas]:
+        try:
+            return convert(name)(value)
+        except ValueError:
+            continue
+
+def iter_csv(info, schema):
+    header = [field.name for field in schema.fields]
+    for row in csv.reader(info):
+        values = [convert(v, f) for v, f in zip(row, schema.fields)]
+        yield dict(zip(header, values))
+
+def guess_input_type(files):
+    if not files:
+        return None
+
+    ext = splitext(files[0])[1].lower()
+    if ext in (".json", ".js"):
+        return "json"
+    elif ext in (".csv",):
+        return "csv"
+
+    return None
+
+def write(opts, files):
+    if not opts.schema:
+        raise AvroError("No schema specified")
+
+    input_type = opts.input_type or guess_input_type(files)
+    if not input_type:
+        raise AvroError("Can't guess input file type (not .json or .csv)")
+
+    try:
+        schema = avro.schema.parse(open(opts.schema, "rb").read())
+        out = _open(opts.output, "wb")
+    except (IOError, OSError), e:
+        raise AvroError("Can't open file - %s" % e)
+
+    writer = DataFileWriter(out, DatumWriter(), schema)
+
+    iter_records = {"json" : iter_json, "csv" : iter_csv}[input_type]
+    for filename in (files or ["-"]):
+        info = _open(filename, "rb")
+        for record in iter_records(info, schema):
+            writer.append(record)
+
+    writer.close()
+
+def main(argv=None):
+    import sys
+    from optparse import OptionParser, OptionGroup
+
+    argv = argv or sys.argv
+
+    parser = OptionParser(description="Display/write for Avro files",
+                      version="@AVRO_VERSION@",
+                      usage="usage: %prog cat|write [options] FILE [FILE...]")
+    # cat options
+
+    cat_options = OptionGroup(parser, "cat options")
+    cat_options.add_option("-n", "--count", default=float("Infinity"),
+                    help="number of records to print", type=int)
+    cat_options.add_option("-s", "--skip", help="number of records to skip",
+                           type=int, default=0)
+    cat_options.add_option("-f", "--format", help="record format",
+                           default="json",
+                           choices=["json", "csv", "json-pretty"])
+    cat_options.add_option("--header", help="print CSV header", default=False,
+                   action="store_true")
+    cat_options.add_option("--filter", help="filter records (e.g. r['age']>1)",
+                    default=None)
+    cat_options.add_option("--print-schema", help="print schema",
+                      action="store_true", default=False)
+    cat_options.add_option('--fields', default=None,
+                help='fields to show, comma separated (show all by default)')
+    parser.add_option_group(cat_options)
+
+    # write options
+    write_options = OptionGroup(parser, "write options")
+    write_options.add_option("--schema", help="schema file (required)")
+    write_options.add_option("--input-type",
+                             help="input file(s) type (json or csv)",
+                             choices=["json", "csv"], default=None)
+    write_options.add_option("-o", "--output", help="output file", default="-")
+    parser.add_option_group(write_options)
+
+    opts, args = parser.parse_args(argv[1:])
+    if len(args) < 1:
+        parser.error("You much specify `cat` or `write`")  # Will exit
+
+    command = args.pop(0)
+    try:
+        if command == "cat":
+            cat(opts, args)
+        elif command == "write":
+            write(opts, args)
+        else:
+            raise AvroError("Unknown command - %s" % command)
+    except AvroError, e:
+        parser.error("%s" % e) # Will exit
+    except Exception, e:
+        raise SystemExit("panic: %s" % e)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/lang/py/setup.py b/lang/py/setup.py
new file mode 100755
index 0000000..94f2c3a
--- /dev/null
+++ b/lang/py/setup.py
@@ -0,0 +1,49 @@
+#! /usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+  from setuptools import setup
+except ImportError:
+  from distutils.core import setup
+from sys import version_info
+
+install_requires = []
+if version_info[:2] <= (2, 5):
+    install_requires.append('simplejson >= 2.0.9')
+
+setup(
+  name = 'avro',
+  version = '@AVRO_VERSION@',
+  packages = ['avro',],
+  package_dir = {'avro': 'src/avro'},
+  scripts = ["./scripts/avro"],
+
+  # Project uses simplejson, so ensure that it gets installed or upgraded
+  # on the target machine
+  install_requires = install_requires,
+
+  # metadata for upload to PyPI
+  author = 'Apache Avro',
+  author_email = 'avro-dev at hadoop.apache.org',
+  description = 'Avro is a serialization and RPC framework.',
+  license = 'Apache License 2.0',
+  keywords = 'avro serialization rpc',
+  url = 'http://hadoop.apache.org/avro',
+  extras_require = {
+    'snappy': ['python-snappy'],
+  },
+)
diff --git a/lang/py/src/avro/__init__.py b/lang/py/src/avro/__init__.py
new file mode 100644
index 0000000..da51d9b
--- /dev/null
+++ b/lang/py/src/avro/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['schema', 'io', 'datafile', 'protocol', 'ipc']
+
diff --git a/lang/py/src/avro/datafile.py b/lang/py/src/avro/datafile.py
new file mode 100644
index 0000000..e12a68f
--- /dev/null
+++ b/lang/py/src/avro/datafile.py
@@ -0,0 +1,376 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Read/Write Avro File Object Containers.
+"""
+import zlib
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import schema
+from avro import io
+try:
+  import snappy
+  has_snappy = True
+except ImportError:
+  has_snappy = False
+#
+# Constants
+#
+
+VERSION = 1
+MAGIC = 'Obj' + chr(VERSION)
+MAGIC_SIZE = len(MAGIC)
+SYNC_SIZE = 16
+SYNC_INTERVAL = 4000 * SYNC_SIZE # TODO(hammer): make configurable
+META_SCHEMA = schema.parse("""\
+{"type": "record", "name": "org.apache.avro.file.Header",
+ "fields" : [
+   {"name": "magic", "type": {"type": "fixed", "name": "magic", "size": %d}},
+   {"name": "meta", "type": {"type": "map", "values": "bytes"}},
+   {"name": "sync", "type": {"type": "fixed", "name": "sync", "size": %d}}]}
+""" % (MAGIC_SIZE, SYNC_SIZE))
+VALID_CODECS = ['null', 'deflate']
+if has_snappy:
+    VALID_CODECS.append('snappy')
+VALID_ENCODINGS = ['binary'] # not used yet
+
+CODEC_KEY = "avro.codec"
+SCHEMA_KEY = "avro.schema"
+
+#
+# Exceptions
+#
+
+class DataFileException(schema.AvroException):
+  """
+  Raised when there's a problem reading or writing file object containers.
+  """
+  def __init__(self, fail_msg):
+    schema.AvroException.__init__(self, fail_msg)
+
+#
+# Write Path
+#
+
+class DataFileWriter(object):
+  @staticmethod
+  def generate_sync_marker():
+    return generate_sixteen_random_bytes()
+
+  # TODO(hammer): make 'encoder' a metadata property
+  def __init__(self, writer, datum_writer, writers_schema=None, codec='null'):
+    """
+    If the schema is not present, presume we're appending.
+
+    @param writer: File-like object to write into.
+    """
+    self._writer = writer
+    self._encoder = io.BinaryEncoder(writer)
+    self._datum_writer = datum_writer
+    self._buffer_writer = StringIO()
+    self._buffer_encoder = io.BinaryEncoder(self._buffer_writer)
+    self._block_count = 0
+    self._meta = {}
+    self._header_written = False
+
+    if writers_schema is not None:
+      if codec not in VALID_CODECS:
+        raise DataFileException("Unknown codec: %r" % codec)
+      self._sync_marker = DataFileWriter.generate_sync_marker()
+      self.set_meta('avro.codec', codec)
+      self.set_meta('avro.schema', str(writers_schema))
+      self.datum_writer.writers_schema = writers_schema
+    else:
+      # open writer for reading to collect metadata
+      dfr = DataFileReader(writer, io.DatumReader())
+      
+      # TODO(hammer): collect arbitrary metadata
+      # collect metadata
+      self._sync_marker = dfr.sync_marker
+      self.set_meta('avro.codec', dfr.get_meta('avro.codec'))
+
+      # get schema used to write existing file
+      schema_from_file = dfr.get_meta('avro.schema')
+      self.set_meta('avro.schema', schema_from_file)
+      self.datum_writer.writers_schema = schema.parse(schema_from_file)
+
+      # seek to the end of the file and prepare for writing
+      writer.seek(0, 2)
+      self._header_written = True
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+  encoder = property(lambda self: self._encoder)
+  datum_writer = property(lambda self: self._datum_writer)
+  buffer_writer = property(lambda self: self._buffer_writer)
+  buffer_encoder = property(lambda self: self._buffer_encoder)
+  sync_marker = property(lambda self: self._sync_marker)
+  meta = property(lambda self: self._meta)
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, type, value, traceback):
+    # Perform a close if there's no exception
+    if type is None:
+      self.close()
+
+  # read/write properties
+  def set_block_count(self, new_val):
+    self._block_count = new_val
+  block_count = property(lambda self: self._block_count, set_block_count)
+
+  # utility functions to read/write metadata entries
+  def get_meta(self, key):
+    return self._meta.get(key)
+  def set_meta(self, key, val):
+    self._meta[key] = val
+
+  def _write_header(self):
+    header = {'magic': MAGIC,
+              'meta': self.meta,
+              'sync': self.sync_marker}
+    self.datum_writer.write_data(META_SCHEMA, header, self.encoder)
+    self._header_written = True
+
+  # TODO(hammer): make a schema for blocks and use datum_writer
+  def _write_block(self):
+    if not self._header_written:
+      self._write_header()
+
+    if self.block_count > 0:
+      # write number of items in block
+      self.encoder.write_long(self.block_count)
+
+      # write block contents
+      uncompressed_data = self.buffer_writer.getvalue()
+      if self.get_meta(CODEC_KEY) == 'null':
+        compressed_data = uncompressed_data
+        compressed_data_length = len(compressed_data)
+      elif self.get_meta(CODEC_KEY) == 'deflate':
+        # The first two characters and last character are zlib
+        # wrappers around deflate data.
+        compressed_data = zlib.compress(uncompressed_data)[2:-1]
+        compressed_data_length = len(compressed_data)
+      elif self.get_meta(CODEC_KEY) == 'snappy':
+        compressed_data = snappy.compress(uncompressed_data)
+        compressed_data_length = len(compressed_data) + 4 # crc32
+      else:
+        fail_msg = '"%s" codec is not supported.' % self.get_meta(CODEC_KEY)
+        raise DataFileException(fail_msg)
+
+      # Write length of block
+      self.encoder.write_long(compressed_data_length)
+
+      # Write block
+      self.writer.write(compressed_data)
+      
+      # Write CRC32 checksum for Snappy
+      if self.get_meta(CODEC_KEY) == 'snappy':
+        self.encoder.write_crc32(uncompressed_data)
+
+      # write sync marker
+      self.writer.write(self.sync_marker)
+
+      # reset buffer
+      self.buffer_writer.truncate(0) 
+      self.block_count = 0
+
+  def append(self, datum):
+    """Append a datum to the file."""
+    self.datum_writer.write(datum, self.buffer_encoder)
+    self.block_count += 1
+
+    # if the data to write is larger than the sync interval, write the block
+    if self.buffer_writer.tell() >= SYNC_INTERVAL:
+      self._write_block()
+
+  def sync(self):
+    """
+    Return the current position as a value that may be passed to
+    DataFileReader.seek(long). Forces the end of the current block,
+    emitting a synchronization marker.
+    """
+    self._write_block()
+    return self.writer.tell()
+
+  def flush(self):
+    """Flush the current state of the file, including metadata."""
+    self._write_block()
+    self.writer.flush()
+
+  def close(self):
+    """Close the file."""
+    self.flush()
+    self.writer.close()
+
+class DataFileReader(object):
+  """Read files written by DataFileWriter."""
+  # TODO(hammer): allow user to specify expected schema?
+  # TODO(hammer): allow user to specify the encoder
+  def __init__(self, reader, datum_reader):
+    self._reader = reader
+    self._raw_decoder = io.BinaryDecoder(reader)
+    self._datum_decoder = None # Maybe reset at every block.
+    self._datum_reader = datum_reader
+    
+    # read the header: magic, meta, sync
+    self._read_header()
+
+    # ensure codec is valid
+    self.codec = self.get_meta('avro.codec')
+    if self.codec is None:
+      self.codec = "null"
+    if self.codec not in VALID_CODECS:
+      raise DataFileException('Unknown codec: %s.' % self.codec)
+
+    # get file length
+    self._file_length = self.determine_file_length()
+
+    # get ready to read
+    self._block_count = 0
+    self.datum_reader.writers_schema = schema.parse(self.get_meta(SCHEMA_KEY))
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, type, value, traceback):
+    # Perform a close if there's no exception
+    if type is None:
+      self.close()
+
+  def __iter__(self):
+    return self
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+  raw_decoder = property(lambda self: self._raw_decoder)
+  datum_decoder = property(lambda self: self._datum_decoder)
+  datum_reader = property(lambda self: self._datum_reader)
+  sync_marker = property(lambda self: self._sync_marker)
+  meta = property(lambda self: self._meta)
+  file_length = property(lambda self: self._file_length)
+
+  # read/write properties
+  def set_block_count(self, new_val):
+    self._block_count = new_val
+  block_count = property(lambda self: self._block_count, set_block_count)
+
+  # utility functions to read/write metadata entries
+  def get_meta(self, key):
+    return self._meta.get(key)
+  def set_meta(self, key, val):
+    self._meta[key] = val
+
+  def determine_file_length(self):
+    """
+    Get file length and leave file cursor where we found it.
+    """
+    remember_pos = self.reader.tell()
+    self.reader.seek(0, 2)
+    file_length = self.reader.tell()
+    self.reader.seek(remember_pos)
+    return file_length
+
+  def is_EOF(self):
+    return self.reader.tell() == self.file_length
+
+  def _read_header(self):
+    # seek to the beginning of the file to get magic block
+    self.reader.seek(0, 0) 
+
+    # read header into a dict
+    header = self.datum_reader.read_data(
+      META_SCHEMA, META_SCHEMA, self.raw_decoder)
+
+    # check magic number
+    if header.get('magic') != MAGIC:
+      fail_msg = "Not an Avro data file: %s doesn't match %s."\
+                 % (header.get('magic'), MAGIC)
+      raise schema.AvroException(fail_msg)
+
+    # set metadata
+    self._meta = header['meta']
+
+    # set sync marker
+    self._sync_marker = header['sync']
+
+  def _read_block_header(self):
+    self.block_count = self.raw_decoder.read_long()
+    if self.codec == "null":
+      # Skip a long; we don't need to use the length.
+      self.raw_decoder.skip_long()
+      self._datum_decoder = self._raw_decoder
+    elif self.codec == 'deflate':
+      # Compressed data is stored as (length, data), which
+      # corresponds to how the "bytes" type is encoded.
+      data = self.raw_decoder.read_bytes()
+      # -15 is the log of the window size; negative indicates
+      # "raw" (no zlib headers) decompression.  See zlib.h.
+      uncompressed = zlib.decompress(data, -15)
+      self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
+    elif self.codec == 'snappy':
+      # Compressed data includes a 4-byte CRC32 checksum
+      length = self.raw_decoder.read_long()
+      data = self.raw_decoder.read(length - 4)
+      uncompressed = snappy.decompress(data)
+      self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
+      self.raw_decoder.check_crc32(uncompressed);
+    else:
+      raise DataFileException("Unknown codec: %r" % self.codec)
+
+  def _skip_sync(self):
+    """
+    Read the length of the sync marker; if it matches the sync marker,
+    return True. Otherwise, seek back to where we started and return False.
+    """
+    proposed_sync_marker = self.reader.read(SYNC_SIZE)
+    if proposed_sync_marker != self.sync_marker:
+      self.reader.seek(-SYNC_SIZE, 1)
+      return False
+    else:
+      return True
+
+  # TODO(hammer): handle block of length zero
+  # TODO(hammer): clean this up with recursion
+  def next(self):
+    """Return the next datum in the file."""
+    if self.block_count == 0:
+      if self.is_EOF():
+        raise StopIteration
+      elif self._skip_sync():
+        if self.is_EOF(): raise StopIteration
+        self._read_block_header()
+      else:
+        self._read_block_header()
+
+    datum = self.datum_reader.read(self.datum_decoder) 
+    self.block_count -= 1
+    return datum
+
+  def close(self):
+    """Close this reader."""
+    self.reader.close()
+
+def generate_sixteen_random_bytes():
+  try:
+    import os
+    return os.urandom(16)
+  except:
+    import random
+    return [ chr(random.randrange(256)) for i in range(16) ]
diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py
new file mode 100644
index 0000000..b2fd2f9
--- /dev/null
+++ b/lang/py/src/avro/io.py
@@ -0,0 +1,890 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Input/Output utilities, including:
+
+ * i/o-specific constants
+ * i/o-specific exceptions
+ * schema validation
+ * leaf value encoding and decoding
+ * datum reader/writer stuff (?)
+
+Also includes a generic representation for data, which
+uses the following mapping:
+
+  * Schema records are implemented as dict.
+  * Schema arrays are implemented as list.
+  * Schema maps are implemented as dict.
+  * Schema strings are implemented as unicode.
+  * Schema bytes are implemented as str.
+  * Schema ints are implemented as int.
+  * Schema longs are implemented as long.
+  * Schema floats are implemented as float.
+  * Schema doubles are implemented as float.
+  * Schema booleans are implemented as bool. 
+"""
+import struct
+from avro import schema
+import sys
+from binascii import crc32
+
+try:
+	import json
+except ImportError:
+	import simplejson as json
+
+#
+# Constants
+#
+
+INT_MIN_VALUE = -(1 << 31)
+INT_MAX_VALUE = (1 << 31) - 1
+LONG_MIN_VALUE = -(1 << 63)
+LONG_MAX_VALUE = (1 << 63) - 1
+
+# TODO(hammer): shouldn't ! be < for little-endian (according to spec?)
+if sys.version_info >= (2, 5, 0):
+  struct_class = struct.Struct
+else:
+  class SimpleStruct(object):
+    def __init__(self, format):
+      self.format = format
+    def pack(self, *args):
+      return struct.pack(self.format, *args)
+    def unpack(self, *args):
+      return struct.unpack(self.format, *args)
+  struct_class = SimpleStruct
+
+STRUCT_INT = struct_class('!I')     # big-endian unsigned int
+STRUCT_LONG = struct_class('!Q')    # big-endian unsigned long long
+STRUCT_FLOAT = struct_class('!f')   # big-endian float
+STRUCT_DOUBLE = struct_class('!d')  # big-endian double
+STRUCT_CRC32 = struct_class('>I')   # big-endian unsigned int
+
+#
+# Exceptions
+#
+
+class AvroTypeException(schema.AvroException):
+  """Raised when datum is not an example of schema."""
+  def __init__(self, expected_schema, datum):
+    pretty_expected = json.dumps(json.loads(str(expected_schema)), indent=2)
+    fail_msg = "The datum %s is not an example of the schema %s"\
+               % (datum, pretty_expected)
+    schema.AvroException.__init__(self, fail_msg)
+
+class SchemaResolutionException(schema.AvroException):
+  def __init__(self, fail_msg, writers_schema=None, readers_schema=None):
+    pretty_writers = json.dumps(json.loads(str(writers_schema)), indent=2)
+    pretty_readers = json.dumps(json.loads(str(readers_schema)), indent=2)
+    if writers_schema: fail_msg += "\nWriter's Schema: %s" % pretty_writers
+    if readers_schema: fail_msg += "\nReader's Schema: %s" % pretty_readers
+    schema.AvroException.__init__(self, fail_msg)
+
+#
+# Validate
+#
+
+def validate(expected_schema, datum):
+  """Determine if a python datum is an instance of a schema."""
+  schema_type = expected_schema.type
+  if schema_type == 'null':
+    return datum is None
+  elif schema_type == 'boolean':
+    return isinstance(datum, bool)
+  elif schema_type == 'string':
+    return isinstance(datum, basestring)
+  elif schema_type == 'bytes':
+    return isinstance(datum, str)
+  elif schema_type == 'int':
+    return ((isinstance(datum, int) or isinstance(datum, long)) 
+            and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
+  elif schema_type == 'long':
+    return ((isinstance(datum, int) or isinstance(datum, long)) 
+            and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE)
+  elif schema_type in ['float', 'double']:
+    return (isinstance(datum, int) or isinstance(datum, long)
+            or isinstance(datum, float))
+  elif schema_type == 'fixed':
+    return isinstance(datum, str) and len(datum) == expected_schema.size
+  elif schema_type == 'enum':
+    return datum in expected_schema.symbols
+  elif schema_type == 'array':
+    return (isinstance(datum, list) and
+      False not in [validate(expected_schema.items, d) for d in datum])
+  elif schema_type == 'map':
+    return (isinstance(datum, dict) and
+      False not in [isinstance(k, basestring) for k in datum.keys()] and
+      False not in
+        [validate(expected_schema.values, v) for v in datum.values()])
+  elif schema_type in ['union', 'error_union']:
+    return True in [validate(s, datum) for s in expected_schema.schemas]
+  elif schema_type in ['record', 'error', 'request']:
+    return (isinstance(datum, dict) and
+      False not in
+        [validate(f.type, datum.get(f.name)) for f in expected_schema.fields])
+
+#
+# Decoder/Encoder
+#
+
+class BinaryDecoder(object):
+  """Read leaf values."""
+  def __init__(self, reader):
+    """
+    reader is a Python object on which we can call read, seek, and tell.
+    """
+    self._reader = reader
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+
+  def read(self, n):
+    """
+    Read n bytes.
+    """
+    return self.reader.read(n)
+
+  def read_null(self):
+    """
+    null is written as zero bytes
+    """
+    return None
+
+  def read_boolean(self):
+    """
+    a boolean is written as a single byte 
+    whose value is either 0 (false) or 1 (true).
+    """
+    return ord(self.read(1)) == 1
+
+  def read_int(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    return self.read_long()
+
+  def read_long(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    b = ord(self.read(1))
+    n = b & 0x7F
+    shift = 7
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+      n |= (b & 0x7F) << shift
+      shift += 7
+    datum = (n >> 1) ^ -(n & 1)
+    return datum
+
+  def read_float(self):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xffL)) |
+      ((ord(self.read(1)) & 0xffL) <<  8) |
+      ((ord(self.read(1)) & 0xffL) << 16) |
+      ((ord(self.read(1)) & 0xffL) << 24))
+    return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+
+  def read_double(self):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xffL)) |
+      ((ord(self.read(1)) & 0xffL) <<  8) |
+      ((ord(self.read(1)) & 0xffL) << 16) |
+      ((ord(self.read(1)) & 0xffL) << 24) |
+      ((ord(self.read(1)) & 0xffL) << 32) |
+      ((ord(self.read(1)) & 0xffL) << 40) |
+      ((ord(self.read(1)) & 0xffL) << 48) |
+      ((ord(self.read(1)) & 0xffL) << 56))
+    return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+
+  def read_bytes(self):
+    """
+    Bytes are encoded as a long followed by that many bytes of data. 
+    """
+    return self.read(self.read_long())
+
+  def read_utf8(self):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    return unicode(self.read_bytes(), "utf-8")
+
+  def check_crc32(self, bytes):
+    checksum = STRUCT_CRC32.unpack(self.read(4))[0];
+    if crc32(bytes) & 0xffffffff != checksum:
+      raise schema.AvroException("Checksum failure")
+
+  def skip_null(self):
+    pass
+
+  def skip_boolean(self):
+    self.skip(1)
+
+  def skip_int(self):
+    self.skip_long()
+
+  def skip_long(self):
+    b = ord(self.read(1))
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+
+  def skip_float(self):
+    self.skip(4)
+
+  def skip_double(self):
+    self.skip(8)
+
+  def skip_bytes(self):
+    self.skip(self.read_long())
+
+  def skip_utf8(self):
+    self.skip_bytes()
+
+  def skip(self, n):
+    self.reader.seek(self.reader.tell() + n)
+
+class BinaryEncoder(object):
+  """Write leaf values."""
+  def __init__(self, writer):
+    """
+    writer is a Python object on which we can call write.
+    """
+    self._writer = writer
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+
+  def write(self, datum):
+    """Write an abritrary datum."""
+    self.writer.write(datum)
+
+  def write_null(self, datum):
+    """
+    null is written as zero bytes
+    """
+    pass
+  
+  def write_boolean(self, datum):
+    """
+    a boolean is written as a single byte 
+    whose value is either 0 (false) or 1 (true).
+    """
+    if datum:
+      self.write(chr(1))
+    else:
+      self.write(chr(0))
+
+  def write_int(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.    
+    """
+    self.write_long(datum);
+
+  def write_long(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    datum = (datum << 1) ^ (datum >> 63)
+    while (datum & ~0x7F) != 0:
+      self.write(chr((datum & 0x7f) | 0x80))
+      datum >>= 7
+    self.write(chr(datum))
+
+  def write_float(self, datum):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
+    self.write(chr((bits) & 0xFF))
+    self.write(chr((bits >> 8) & 0xFF))
+    self.write(chr((bits >> 16) & 0xFF))
+    self.write(chr((bits >> 24) & 0xFF))
+
+  def write_double(self, datum):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
+    self.write(chr((bits) & 0xFF))
+    self.write(chr((bits >> 8) & 0xFF))
+    self.write(chr((bits >> 16) & 0xFF))
+    self.write(chr((bits >> 24) & 0xFF))
+    self.write(chr((bits >> 32) & 0xFF))
+    self.write(chr((bits >> 40) & 0xFF))
+    self.write(chr((bits >> 48) & 0xFF))
+    self.write(chr((bits >> 56) & 0xFF))
+
+  def write_bytes(self, datum):
+    """
+    Bytes are encoded as a long followed by that many bytes of data. 
+    """
+    self.write_long(len(datum))
+    self.write(struct.pack('%ds' % len(datum), datum))
+
+  def write_utf8(self, datum):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    datum = datum.encode("utf-8")
+    self.write_bytes(datum)
+
+  def write_crc32(self, bytes):
+    """
+    A 4-byte, big-endian CRC32 checksum
+    """
+    self.write(STRUCT_CRC32.pack(crc32(bytes) & 0xffffffff));
+
+#
+# DatumReader/Writer
+#
+
+class DatumReader(object):
+  """Deserialize Avro-encoded data into a Python data structure."""
+  @staticmethod
+  def check_props(schema_one, schema_two, prop_list):
+    for prop in prop_list:
+      if getattr(schema_one, prop) != getattr(schema_two, prop):
+        return False
+    return True
+
+  @staticmethod
+  def match_schemas(writers_schema, readers_schema):
+    w_type = writers_schema.type
+    r_type = readers_schema.type
+    if 'union' in [w_type, r_type] or 'error_union' in [w_type, r_type]:
+      return True
+    elif (w_type in schema.PRIMITIVE_TYPES and r_type in schema.PRIMITIVE_TYPES
+          and w_type == r_type):
+      return True
+    elif (w_type == r_type == 'record' and
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'error' and
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'request'):
+      return True
+    elif (w_type == r_type == 'fixed' and 
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname', 'size'])):
+      return True
+    elif (w_type == r_type == 'enum' and 
+          DatumReader.check_props(writers_schema, readers_schema, 
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'map' and 
+          DatumReader.check_props(writers_schema.values,
+                                  readers_schema.values, ['type'])):
+      return True
+    elif (w_type == r_type == 'array' and 
+          DatumReader.check_props(writers_schema.items,
+                                  readers_schema.items, ['type'])):
+      return True
+    
+    # Handle schema promotion
+    if w_type == 'int' and r_type in ['long', 'float', 'double']:
+      return True
+    elif w_type == 'long' and r_type in ['float', 'double']:
+      return True
+    elif w_type == 'float' and r_type == 'double':
+      return True
+    return False
+
+  def __init__(self, writers_schema=None, readers_schema=None):
+    """
+    As defined in the Avro specification, we call the schema encoded
+    in the data the "writer's schema", and the schema expected by the
+    reader the "reader's schema".
+    """
+    self._writers_schema = writers_schema
+    self._readers_schema = readers_schema 
+
+  # read/write properties
+  def set_writers_schema(self, writers_schema):
+    self._writers_schema = writers_schema
+  writers_schema = property(lambda self: self._writers_schema,
+                            set_writers_schema)
+  def set_readers_schema(self, readers_schema):
+    self._readers_schema = readers_schema
+  readers_schema = property(lambda self: self._readers_schema,
+                            set_readers_schema)
+  
+  def read(self, decoder):
+    if self.readers_schema is None:
+      self.readers_schema = self.writers_schema
+    return self.read_data(self.writers_schema, self.readers_schema, decoder)
+
+  def read_data(self, writers_schema, readers_schema, decoder):
+    # schema matching
+    if not DatumReader.match_schemas(writers_schema, readers_schema):
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    # schema resolution: reader's schema is a union, writer's schema is not
+    if (writers_schema.type not in ['union', 'error_union']
+        and readers_schema.type in ['union', 'error_union']):
+      for s in readers_schema.schemas:
+        if DatumReader.match_schemas(writers_schema, s):
+          return self.read_data(writers_schema, s, decoder)
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    # function dispatch for reading data based on type of writer's schema
+    if writers_schema.type == 'null':
+      return decoder.read_null()
+    elif writers_schema.type == 'boolean':
+      return decoder.read_boolean()
+    elif writers_schema.type == 'string':
+      return decoder.read_utf8()
+    elif writers_schema.type == 'int':
+      return decoder.read_int()
+    elif writers_schema.type == 'long':
+      return decoder.read_long()
+    elif writers_schema.type == 'float':
+      return decoder.read_float()
+    elif writers_schema.type == 'double':
+      return decoder.read_double()
+    elif writers_schema.type == 'bytes':
+      return decoder.read_bytes()
+    elif writers_schema.type == 'fixed':
+      return self.read_fixed(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'enum':
+      return self.read_enum(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'array':
+      return self.read_array(writers_schema, readers_schema, decoder)
+    elif writers_schema.type == 'map':
+      return self.read_map(writers_schema, readers_schema, decoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      return self.read_union(writers_schema, readers_schema, decoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      return self.read_record(writers_schema, readers_schema, decoder)
+    else:
+      fail_msg = "Cannot read unknown schema type: %s" % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def skip_data(self, writers_schema, decoder):
+    if writers_schema.type == 'null':
+      return decoder.skip_null()
+    elif writers_schema.type == 'boolean':
+      return decoder.skip_boolean()
+    elif writers_schema.type == 'string':
+      return decoder.skip_utf8()
+    elif writers_schema.type == 'int':
+      return decoder.skip_int()
+    elif writers_schema.type == 'long':
+      return decoder.skip_long()
+    elif writers_schema.type == 'float':
+      return decoder.skip_float()
+    elif writers_schema.type == 'double':
+      return decoder.skip_double()
+    elif writers_schema.type == 'bytes':
+      return decoder.skip_bytes()
+    elif writers_schema.type == 'fixed':
+      return self.skip_fixed(writers_schema, decoder)
+    elif writers_schema.type == 'enum':
+      return self.skip_enum(writers_schema, decoder)
+    elif writers_schema.type == 'array':
+      return self.skip_array(writers_schema, decoder)
+    elif writers_schema.type == 'map':
+      return self.skip_map(writers_schema, decoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      return self.skip_union(writers_schema, decoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      return self.skip_record(writers_schema, decoder)
+    else:
+      fail_msg = "Unknown schema type: %s" % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def read_fixed(self, writers_schema, readers_schema, decoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    return decoder.read(writers_schema.size)
+
+  def skip_fixed(self, writers_schema, decoder):
+    return decoder.skip(writers_schema.size)
+
+  def read_enum(self, writers_schema, readers_schema, decoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    # read data
+    index_of_symbol = decoder.read_int()
+    if index_of_symbol >= len(writers_schema.symbols):
+      fail_msg = "Can't access enum index %d for enum with %d symbols"\
+                 % (index_of_symbol, len(writers_schema.symbols))
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+    read_symbol = writers_schema.symbols[index_of_symbol]
+
+    # schema resolution
+    if read_symbol not in readers_schema.symbols:
+      fail_msg = "Symbol %s not present in Reader's Schema" % read_symbol
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+
+    return read_symbol
+
+  def skip_enum(self, writers_schema, decoder):
+    return decoder.skip_int()
+
+  def read_array(self, writers_schema, readers_schema, decoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = []
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        read_items.append(self.read_data(writers_schema.items,
+                                         readers_schema.items, decoder))
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_array(self, writers_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          self.skip_data(writers_schema.items, decoder)
+      block_count = decoder.read_long()
+
+  def read_map(self, writers_schema, readers_schema, decoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = {}
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        key = decoder.read_utf8()
+        read_items[key] = self.read_data(writers_schema.values,
+                                         readers_schema.values, decoder)
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_map(self, writers_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          decoder.skip_utf8()
+          self.skip_data(writers_schema.values, decoder)
+      block_count = decoder.read_long()
+
+  def read_union(self, writers_schema, readers_schema, decoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # schema resolution
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writers_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writers_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writers_schema, readers_schema)
+    selected_writers_schema = writers_schema.schemas[index_of_schema]
+    
+    # read data
+    return self.read_data(selected_writers_schema, readers_schema, decoder)
+
+  def skip_union(self, writers_schema, decoder):
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writers_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writers_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writers_schema)
+    return self.skip_data(writers_schema.schemas[index_of_schema], decoder)
+
+  def read_record(self, writers_schema, readers_schema, decoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+
+    Schema Resolution:
+     * the ordering of fields may be different: fields are matched by name.
+     * schemas for fields with the same name in both records are resolved
+       recursively.
+     * if the writer's record contains a field with a name not present in the
+       reader's record, the writer's value for that field is ignored.
+     * if the reader's record schema has a field that contains a default value,
+       and writer's schema does not have a field with the same name, then the
+       reader should use the default value from its field.
+     * if the reader's record schema has a field with no default value, and 
+       writer's schema does not have a field with the same name, then the
+       field's value is unset.
+    """
+    # schema resolution
+    readers_fields_dict = readers_schema.fields_dict
+    read_record = {}
+    for field in writers_schema.fields:
+      readers_field = readers_fields_dict.get(field.name)
+      if readers_field is not None:
+        field_val = self.read_data(field.type, readers_field.type, decoder)
+        read_record[field.name] = field_val
+      else:
+        self.skip_data(field.type, decoder)
+
+    # fill in default values
+    if len(readers_fields_dict) > len(read_record):
+      writers_fields_dict = writers_schema.fields_dict
+      for field_name, field in readers_fields_dict.items():
+        if not writers_fields_dict.has_key(field_name):
+          if field.has_default:
+            field_val = self._read_default_value(field.type, field.default)
+            read_record[field.name] = field_val
+          else:
+            fail_msg = 'No default value for field %s' % field_name
+            raise SchemaResolutionException(fail_msg, writers_schema,
+                                            readers_schema)
+    return read_record
+
+  def skip_record(self, writers_schema, decoder):
+    for field in writers_schema.fields:
+      self.skip_data(field.type, decoder)
+
+  def _read_default_value(self, field_schema, default_value):
+    """
+    Basically a JSON Decoder?
+    """
+    if field_schema.type == 'null':
+      return None
+    elif field_schema.type == 'boolean':
+      return bool(default_value)
+    elif field_schema.type == 'int':
+      return int(default_value)
+    elif field_schema.type == 'long':
+      return long(default_value)
+    elif field_schema.type in ['float', 'double']:
+      return float(default_value)
+    elif field_schema.type in ['enum', 'fixed', 'string', 'bytes']:
+      return default_value
+    elif field_schema.type == 'array':
+      read_array = []
+      for json_val in default_value:
+        item_val = self._read_default_value(field_schema.items, json_val)
+        read_array.append(item_val)
+      return read_array
+    elif field_schema.type == 'map':
+      read_map = {}
+      for key, json_val in default_value.items():
+        map_val = self._read_default_value(field_schema.values, json_val)
+        read_map[key] = map_val
+      return read_map
+    elif field_schema.type in ['union', 'error_union']:
+      return self._read_default_value(field_schema.schemas[0], default_value)
+    elif field_schema.type == 'record':
+      read_record = {}
+      for field in field_schema.fields:
+        json_val = default_value.get(field.name)
+        if json_val is None: json_val = field.default
+        field_val = self._read_default_value(field.type, json_val)
+        read_record[field.name] = field_val
+      return read_record
+    else:
+      fail_msg = 'Unknown type: %s' % field_schema.type
+      raise schema.AvroException(fail_msg)
+
+class DatumWriter(object):
+  """DatumWriter for generic python objects."""
+  def __init__(self, writers_schema=None):
+    self._writers_schema = writers_schema
+
+  # read/write properties
+  def set_writers_schema(self, writers_schema):
+    self._writers_schema = writers_schema
+  writers_schema = property(lambda self: self._writers_schema,
+                            set_writers_schema)
+
+  def write(self, datum, encoder):
+    # validate datum
+    if not validate(self.writers_schema, datum):
+      raise AvroTypeException(self.writers_schema, datum)
+    
+    self.write_data(self.writers_schema, datum, encoder)
+
+  def write_data(self, writers_schema, datum, encoder):
+    # function dispatch to write datum
+    if writers_schema.type == 'null':
+      encoder.write_null(datum)
+    elif writers_schema.type == 'boolean':
+      encoder.write_boolean(datum)
+    elif writers_schema.type == 'string':
+      encoder.write_utf8(datum)
+    elif writers_schema.type == 'int':
+      encoder.write_int(datum)
+    elif writers_schema.type == 'long':
+      encoder.write_long(datum)
+    elif writers_schema.type == 'float':
+      encoder.write_float(datum)
+    elif writers_schema.type == 'double':
+      encoder.write_double(datum)
+    elif writers_schema.type == 'bytes':
+      encoder.write_bytes(datum)
+    elif writers_schema.type == 'fixed':
+      self.write_fixed(writers_schema, datum, encoder)
+    elif writers_schema.type == 'enum':
+      self.write_enum(writers_schema, datum, encoder)
+    elif writers_schema.type == 'array':
+      self.write_array(writers_schema, datum, encoder)
+    elif writers_schema.type == 'map':
+      self.write_map(writers_schema, datum, encoder)
+    elif writers_schema.type in ['union', 'error_union']:
+      self.write_union(writers_schema, datum, encoder)
+    elif writers_schema.type in ['record', 'error', 'request']:
+      self.write_record(writers_schema, datum, encoder)
+    else:
+      fail_msg = 'Unknown type: %s' % writers_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def write_fixed(self, writers_schema, datum, encoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    encoder.write(datum)
+
+  def write_enum(self, writers_schema, datum, encoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    index_of_datum = writers_schema.symbols.index(datum)
+    encoder.write_int(index_of_datum)
+
+  def write_array(self, writers_schema, datum, encoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for item in datum:
+        self.write_data(writers_schema.items, item, encoder)
+    encoder.write_long(0)
+
+  def write_map(self, writers_schema, datum, encoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for key, val in datum.items():
+        encoder.write_utf8(key)
+        self.write_data(writers_schema.values, val, encoder)
+    encoder.write_long(0)
+
+  def write_union(self, writers_schema, datum, encoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # resolve union
+    index_of_schema = -1
+    for i, candidate_schema in enumerate(writers_schema.schemas):
+      if validate(candidate_schema, datum):
+        index_of_schema = i
+    if index_of_schema < 0: raise AvroTypeException(writers_schema, datum)
+
+    # write data
+    encoder.write_long(index_of_schema)
+    self.write_data(writers_schema.schemas[index_of_schema], datum, encoder)
+
+  def write_record(self, writers_schema, datum, encoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+    """
+    for field in writers_schema.fields:
+      self.write_data(field.type, datum.get(field.name), encoder)
diff --git a/lang/py/src/avro/ipc.py b/lang/py/src/avro/ipc.py
new file mode 100644
index 0000000..d59baf7
--- /dev/null
+++ b/lang/py/src/avro/ipc.py
@@ -0,0 +1,485 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Support for inter-process calls.
+"""
+import httplib
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import io
+from avro import protocol
+from avro import schema
+
+#
+# Constants
+#
+
+# Handshake schema is pulled in during build
+HANDSHAKE_REQUEST_SCHEMA = schema.parse("""
+ at HANDSHAKE_REQUEST_SCHEMA@
+""")
+
+HANDSHAKE_RESPONSE_SCHEMA = schema.parse("""
+ at HANDSHAKE_RESPONSE_SCHEMA@
+""")
+
+HANDSHAKE_REQUESTOR_WRITER = io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA)
+HANDSHAKE_REQUESTOR_READER = io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_WRITER = io.DatumWriter(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_READER = io.DatumReader(HANDSHAKE_REQUEST_SCHEMA)
+
+META_SCHEMA = schema.parse('{"type": "map", "values": "bytes"}')
+META_WRITER = io.DatumWriter(META_SCHEMA)
+META_READER = io.DatumReader(META_SCHEMA)
+
+SYSTEM_ERROR_SCHEMA = schema.parse('["string"]')
+
+# protocol cache
+REMOTE_HASHES = {}
+REMOTE_PROTOCOLS = {}
+
+BIG_ENDIAN_INT_STRUCT = io.struct_class('!I')
+BUFFER_HEADER_LENGTH = 4
+BUFFER_SIZE = 8192
+
+#
+# Exceptions
+#
+
+class AvroRemoteException(schema.AvroException):
+  """
+  Raised when an error message is sent by an Avro requestor or responder.
+  """
+  def __init__(self, fail_msg=None):
+    schema.AvroException.__init__(self, fail_msg)
+
+class ConnectionClosedException(schema.AvroException):
+  pass
+
+#
+# Base IPC Classes (Requestor/Responder)
+#
+
+class BaseRequestor(object):
+  """Base class for the client side of a protocol interaction."""
+  def __init__(self, local_protocol, transceiver):
+    self._local_protocol = local_protocol
+    self._transceiver = transceiver
+    self._remote_protocol = None
+    self._remote_hash = None
+    self._send_protocol = None
+
+  # read-only properties
+  local_protocol = property(lambda self: self._local_protocol)
+  transceiver = property(lambda self: self._transceiver)
+
+  # read/write properties
+  def set_remote_protocol(self, new_remote_protocol):
+    self._remote_protocol = new_remote_protocol
+    REMOTE_PROTOCOLS[self.transceiver.remote_name] = self.remote_protocol
+  remote_protocol = property(lambda self: self._remote_protocol,
+                             set_remote_protocol)
+
+  def set_remote_hash(self, new_remote_hash):
+    self._remote_hash = new_remote_hash
+    REMOTE_HASHES[self.transceiver.remote_name] = self.remote_hash
+  remote_hash = property(lambda self: self._remote_hash, set_remote_hash)
+
+  def set_send_protocol(self, new_send_protocol):
+    self._send_protocol = new_send_protocol
+  send_protocol = property(lambda self: self._send_protocol, set_send_protocol)
+
+  def request(self, message_name, request_datum):
+    """
+    Writes a request message and reads a response or error message.
+    """
+    # build handshake and call request
+    buffer_writer = StringIO()
+    buffer_encoder = io.BinaryEncoder(buffer_writer)
+    self.write_handshake_request(buffer_encoder)
+    self.write_call_request(message_name, request_datum, buffer_encoder)
+
+    # send the handshake and call request; block until call response
+    call_request = buffer_writer.getvalue()
+    return self.issue_request(call_request, message_name, request_datum)
+
+  def write_handshake_request(self, encoder):
+    local_hash = self.local_protocol.md5
+    remote_name = self.transceiver.remote_name
+    remote_hash = REMOTE_HASHES.get(remote_name)
+    if remote_hash is None:
+      remote_hash = local_hash
+      self.remote_protocol = self.local_protocol
+    request_datum = {}
+    request_datum['clientHash'] = local_hash
+    request_datum['serverHash'] = remote_hash
+    if self.send_protocol:
+      request_datum['clientProtocol'] = str(self.local_protocol)
+    HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
+
+  def write_call_request(self, message_name, request_datum, encoder):
+    """
+    The format of a call request is:
+      * request metadata, a map with values of type bytes
+      * the message name, an Avro string, followed by
+      * the message parameters. Parameters are serialized according to
+        the message's request declaration.
+    """
+    # request metadata (not yet implemented)
+    request_metadata = {}
+    META_WRITER.write(request_metadata, encoder)
+
+    # message name
+    message = self.local_protocol.messages.get(message_name)
+    if message is None:
+      raise schema.AvroException('Unknown message: %s' % message_name)
+    encoder.write_utf8(message.name)
+
+    # message parameters
+    self.write_request(message.request, request_datum, encoder)
+
+  def write_request(self, request_schema, request_datum, encoder):
+    datum_writer = io.DatumWriter(request_schema)
+    datum_writer.write(request_datum, encoder)
+
+  def read_handshake_response(self, decoder):
+    handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
+    match = handshake_response.get('match')
+    if match == 'BOTH':
+      self.send_protocol = False
+      return True
+    elif match == 'CLIENT':
+      if self.send_protocol:
+        raise schema.AvroException('Handshake failure.')
+      self.remote_protocol = protocol.parse(
+                             handshake_response.get('serverProtocol'))
+      self.remote_hash = handshake_response.get('serverHash')
+      self.send_protocol = False
+      return True
+    elif match == 'NONE':
+      if self.send_protocol:
+        raise schema.AvroException('Handshake failure.')
+      self.remote_protocol = protocol.parse(
+                             handshake_response.get('serverProtocol'))
+      self.remote_hash = handshake_response.get('serverHash')
+      self.send_protocol = True
+      return False
+    else:
+      raise schema.AvroException('Unexpected match: %s' % match)
+
+  def read_call_response(self, message_name, decoder):
+    """
+    The format of a call response is:
+      * response metadata, a map with values of type bytes
+      * a one-byte error flag boolean, followed by either:
+        o if the error flag is false,
+          the message response, serialized per the message's response schema.
+        o if the error flag is true, 
+          the error, serialized per the message's error union schema.
+    """
+    # response metadata
+    response_metadata = META_READER.read(decoder)
+
+    # remote response schema
+    remote_message_schema = self.remote_protocol.messages.get(message_name)
+    if remote_message_schema is None:
+      raise schema.AvroException('Unknown remote message: %s' % message_name)
+
+    # local response schema
+    local_message_schema = self.local_protocol.messages.get(message_name)
+    if local_message_schema is None:
+      raise schema.AvroException('Unknown local message: %s' % message_name)
+
+    # error flag
+    if not decoder.read_boolean():
+      writers_schema = remote_message_schema.response
+      readers_schema = local_message_schema.response
+      return self.read_response(writers_schema, readers_schema, decoder)
+    else:
+      writers_schema = remote_message_schema.errors
+      readers_schema = local_message_schema.errors
+      raise self.read_error(writers_schema, readers_schema, decoder)
+
+  def read_response(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    result = datum_reader.read(decoder)
+    return result
+
+  def read_error(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    return AvroRemoteException(datum_reader.read(decoder))
+
+class Requestor(BaseRequestor):
+
+  def issue_request(self, call_request, message_name, request_datum):
+    call_response = self.transceiver.transceive(call_request)
+
+    # process the handshake and call response
+    buffer_decoder = io.BinaryDecoder(StringIO(call_response))
+    call_response_exists = self.read_handshake_response(buffer_decoder)
+    if call_response_exists:
+      return self.read_call_response(message_name, buffer_decoder)
+    else:
+      return self.request(message_name, request_datum)
+
+class Responder(object):
+  """Base class for the server side of a protocol interaction."""
+  def __init__(self, local_protocol):
+    self._local_protocol = local_protocol
+    self._local_hash = self.local_protocol.md5
+    self._protocol_cache = {}
+    self.set_protocol_cache(self.local_hash, self.local_protocol)
+
+  # read-only properties
+  local_protocol = property(lambda self: self._local_protocol)
+  local_hash = property(lambda self: self._local_hash)
+  protocol_cache = property(lambda self: self._protocol_cache)
+
+  # utility functions to manipulate protocol cache
+  def get_protocol_cache(self, hash):
+    return self.protocol_cache.get(hash)
+  def set_protocol_cache(self, hash, protocol):
+    self.protocol_cache[hash] = protocol
+
+  def respond(self, call_request):
+    """
+    Called by a server to deserialize a request, compute and serialize
+    a response or error. Compare to 'handle()' in Thrift.
+    """
+    buffer_reader = StringIO(call_request)
+    buffer_decoder = io.BinaryDecoder(buffer_reader)
+    buffer_writer = StringIO()
+    buffer_encoder = io.BinaryEncoder(buffer_writer)
+    error = None
+    response_metadata = {}
+    
+    try:
+      remote_protocol = self.process_handshake(buffer_decoder, buffer_encoder)
+      # handshake failure
+      if remote_protocol is None:  
+        return buffer_writer.getvalue()
+
+      # read request using remote protocol
+      request_metadata = META_READER.read(buffer_decoder)
+      remote_message_name = buffer_decoder.read_utf8()
+
+      # get remote and local request schemas so we can do
+      # schema resolution (one fine day)
+      remote_message = remote_protocol.messages.get(remote_message_name)
+      if remote_message is None:
+        fail_msg = 'Unknown remote message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      local_message = self.local_protocol.messages.get(remote_message_name)
+      if local_message is None:
+        fail_msg = 'Unknown local message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      writers_schema = remote_message.request
+      readers_schema = local_message.request
+      request = self.read_request(writers_schema, readers_schema,
+                                  buffer_decoder)
+
+      # perform server logic
+      try:
+        response = self.invoke(local_message, request)
+      except AvroRemoteException, e:
+        error = e
+      except Exception, e:
+        error = AvroRemoteException(str(e))
+
+      # write response using local protocol
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(error is not None)
+      if error is None:
+        writers_schema = local_message.response
+        self.write_response(writers_schema, response, buffer_encoder)
+      else:
+        writers_schema = local_message.errors
+        self.write_error(writers_schema, error, buffer_encoder)
+    except schema.AvroException, e:
+      error = AvroRemoteException(str(e))
+      buffer_encoder = io.BinaryEncoder(StringIO())
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(True)
+      self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
+    return buffer_writer.getvalue()
+
+  def process_handshake(self, decoder, encoder):
+    handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
+    handshake_response = {}
+
+    # determine the remote protocol
+    client_hash = handshake_request.get('clientHash')
+    client_protocol = handshake_request.get('clientProtocol')
+    remote_protocol = self.get_protocol_cache(client_hash)
+    if remote_protocol is None and client_protocol is not None:
+      remote_protocol = protocol.parse(client_protocol)
+      self.set_protocol_cache(client_hash, remote_protocol)
+
+    # evaluate remote's guess of the local protocol
+    server_hash = handshake_request.get('serverHash')
+    if self.local_hash == server_hash:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'BOTH'
+    else:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'CLIENT'
+
+    if handshake_response['match'] != 'BOTH':
+      handshake_response['serverProtocol'] = str(self.local_protocol)
+      handshake_response['serverHash'] = self.local_hash
+
+    HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
+    return remote_protocol
+
+  def invoke(self, local_message, request):
+    """
+    Aactual work done by server: cf. handler in thrift.
+    """
+    pass
+
+  def read_request(self, writers_schema, readers_schema, decoder):
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    return datum_reader.read(decoder)
+
+  def write_response(self, writers_schema, response_datum, encoder):
+    datum_writer = io.DatumWriter(writers_schema)
+    datum_writer.write(response_datum, encoder)
+
+  def write_error(self, writers_schema, error_exception, encoder):
+    datum_writer = io.DatumWriter(writers_schema)
+    datum_writer.write(str(error_exception), encoder)
+
+#
+# Utility classes
+#
+
+class FramedReader(object):
+  """Wrapper around a file-like object to read framed data."""
+  def __init__(self, reader):
+    self._reader = reader
+
+  # read-only properties
+  reader = property(lambda self: self._reader)
+
+  def read_framed_message(self):
+    message = []
+    while True:
+      buffer = StringIO()
+      buffer_length = self._read_buffer_length()
+      if buffer_length == 0:
+        return ''.join(message)
+      while buffer.tell() < buffer_length:
+        chunk = self.reader.read(buffer_length - buffer.tell())
+        if chunk == '':
+          raise ConnectionClosedException("Reader read 0 bytes.")
+        buffer.write(chunk)
+      message.append(buffer.getvalue())
+
+  def _read_buffer_length(self):
+    read = self.reader.read(BUFFER_HEADER_LENGTH)
+    if read == '':
+      raise ConnectionClosedException("Reader read 0 bytes.")
+    return BIG_ENDIAN_INT_STRUCT.unpack(read)[0]
+
+class FramedWriter(object):
+  """Wrapper around a file-like object to write framed data."""
+  def __init__(self, writer):
+    self._writer = writer
+
+  # read-only properties
+  writer = property(lambda self: self._writer)
+
+  def write_framed_message(self, message):
+    message_length = len(message)
+    total_bytes_sent = 0
+    while message_length - total_bytes_sent > 0:
+      if message_length - total_bytes_sent > BUFFER_SIZE:
+        buffer_length = BUFFER_SIZE
+      else:
+        buffer_length = message_length - total_bytes_sent
+      self.write_buffer(message[total_bytes_sent:
+                                (total_bytes_sent + buffer_length)])
+      total_bytes_sent += buffer_length
+    # A message is always terminated by a zero-length buffer.
+    self.write_buffer_length(0)
+
+  def write_buffer(self, chunk):
+    buffer_length = len(chunk)
+    self.write_buffer_length(buffer_length)
+    self.writer.write(chunk)
+
+  def write_buffer_length(self, n):
+    self.writer.write(BIG_ENDIAN_INT_STRUCT.pack(n))
+
+#
+# Transceiver Implementations
+#
+
+class HTTPTransceiver(object):
+  """
+  A simple HTTP-based transceiver implementation.
+  Useful for clients but not for servers
+  """
+  def __init__(self, host, port, req_resource='/'):
+    self.req_resource = req_resource
+    self.conn = httplib.HTTPConnection(host, port)
+    self.conn.connect()
+
+  # read-only properties
+  sock = property(lambda self: self.conn.sock)
+  remote_name = property(lambda self: self.sock.getsockname())
+
+  # read/write properties
+  def set_conn(self, new_conn):
+    self._conn = new_conn
+  conn = property(lambda self: self._conn, set_conn)
+  req_resource = '/'
+
+  def transceive(self, request):
+    self.write_framed_message(request)
+    result = self.read_framed_message()
+    return result
+
+  def read_framed_message(self):
+    response = self.conn.getresponse()
+    response_reader = FramedReader(response)
+    framed_message = response_reader.read_framed_message()
+    response.read()    # ensure we're ready for subsequent requests
+    return framed_message
+
+  def write_framed_message(self, message):
+    req_method = 'POST'
+    req_headers = {'Content-Type': 'avro/binary'}
+
+    req_body_buffer = FramedWriter(StringIO())
+    req_body_buffer.write_framed_message(message)
+    req_body = req_body_buffer.writer.getvalue()
+
+    self.conn.request(req_method, self.req_resource, req_body, req_headers)
+
+  def close(self):
+    self.conn.close()
+
+#
+# Server Implementations (none yet)
+#
+
diff --git a/lang/py/src/avro/protocol.py b/lang/py/src/avro/protocol.py
new file mode 100644
index 0000000..68666cc
--- /dev/null
+++ b/lang/py/src/avro/protocol.py
@@ -0,0 +1,224 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Protocol implementation.
+"""
+try:
+  from hashlib import md5
+except ImportError:
+  from md5 import md5
+try:
+  import json
+except ImportError:
+  import simplejson as json
+from avro import schema
+
+#
+# Constants
+#
+
+# TODO(hammer): confirmed 'fixed' with Doug
+VALID_TYPE_SCHEMA_TYPES = ('enum', 'record', 'error', 'fixed')
+
+#
+# Exceptions
+#
+
+class ProtocolParseException(schema.AvroException):
+  pass
+
+#
+# Base Classes
+#
+
+class Protocol(object):
+  """An application protocol."""
+  def _parse_types(self, types, type_names):
+    type_objects = []
+    for type in types:
+      type_object = schema.make_avsc_object(type, type_names)
+      if type_object.type not in VALID_TYPE_SCHEMA_TYPES:
+        fail_msg = 'Type %s not an enum, fixed, record, or error.' % type
+        raise ProtocolParseException(fail_msg)
+      type_objects.append(type_object)
+    return type_objects
+
+  def _parse_messages(self, messages, names):
+    message_objects = {}
+    for name, body in messages.iteritems():
+      if message_objects.has_key(name):
+        fail_msg = 'Message name "%s" repeated.' % name
+        raise ProtocolParseException(fail_msg)
+      elif not(hasattr(body, 'get') and callable(body.get)):
+        fail_msg = 'Message name "%s" has non-object body %s.' % (name, body)
+        raise ProtocolParseException(fail_msg)
+      request = body.get('request')
+      response = body.get('response')
+      errors = body.get('errors')
+      message_objects[name] = Message(name, request, response, errors, names)
+    return message_objects
+
+  def __init__(self, name, namespace=None, types=None, messages=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Protocols must have a non-empty name.'
+      raise ProtocolParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise ProtocolParseException(fail_msg)
+    elif namespace is not None and not isinstance(namespace, basestring):
+      fail_msg = 'The namespace property must be a string.'
+      raise ProtocolParseException(fail_msg)
+    elif types is not None and not isinstance(types, list):
+      fail_msg = 'The types property must be a list.'
+      raise ProtocolParseException(fail_msg)
+    elif (messages is not None and 
+          not(hasattr(messages, 'get') and callable(messages.get))):
+      fail_msg = 'The messages property must be a JSON object.'
+      raise ProtocolParseException(fail_msg)
+
+    self._props = {}
+    self.set_prop('name', name)
+    type_names = schema.Names()
+    if namespace is not None: 
+      self.set_prop('namespace', namespace)
+      type_names.default_namespace = namespace
+    if types is not None:
+      self.set_prop('types', self._parse_types(types, type_names))
+    if messages is not None:
+      self.set_prop('messages', self._parse_messages(messages, type_names))
+    self._md5 = md5(str(self)).digest()
+
+  # read-only properties
+  name = property(lambda self: self.get_prop('name'))
+  namespace = property(lambda self: self.get_prop('namespace'))
+  fullname = property(lambda self: 
+                      schema.Name(self.name, self.namespace).fullname)
+  types = property(lambda self: self.get_prop('types'))
+  types_dict = property(lambda self: dict([(type.name, type)
+                                           for type in self.types]))
+  messages = property(lambda self: self.get_prop('messages'))
+  md5 = property(lambda self: self._md5)
+  props = property(lambda self: self._props)
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+  def set_prop(self, key, value):
+    self.props[key] = value  
+
+  def to_json(self):
+    to_dump = {}
+    to_dump['protocol'] = self.name
+    names = schema.Names(default_namespace=self.namespace)
+    if self.namespace: 
+      to_dump['namespace'] = self.namespace
+    if self.types:
+      to_dump['types'] = [ t.to_json(names) for t in self.types ]
+    if self.messages:
+      messages_dict = {}
+      for name, body in self.messages.iteritems():
+        messages_dict[name] = body.to_json(names)
+      to_dump['messages'] = messages_dict
+    return to_dump
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class Message(object):
+  """A Protocol message."""
+  def _parse_request(self, request, names):
+    if not isinstance(request, list):
+      fail_msg = 'Request property not a list: %s' % request
+      raise ProtocolParseException(fail_msg)
+    return schema.RecordSchema(None, None, request, names, 'request')
+  
+  def _parse_response(self, response, names):
+    if isinstance(response, basestring) and names.has_name(response, None):
+      return names.get_name(response, None)
+    else:
+      return schema.make_avsc_object(response, names)
+
+  def _parse_errors(self, errors, names):
+    if not isinstance(errors, list):
+      fail_msg = 'Errors property not a list: %s' % errors
+      raise ProtocolParseException(fail_msg)
+    errors_for_parsing = {'type': 'error_union', 'declared_errors': errors}
+    return schema.make_avsc_object(errors_for_parsing, names)
+
+  def __init__(self,  name, request, response, errors=None, names=None):
+    self._name = name
+
+    self._props = {}
+    self.set_prop('request', self._parse_request(request, names))
+    self.set_prop('response', self._parse_response(response, names))
+    if errors is not None:
+      self.set_prop('errors', self._parse_errors(errors, names))
+
+  # read-only properties
+  name = property(lambda self: self._name)
+  request = property(lambda self: self.get_prop('request'))
+  response = property(lambda self: self.get_prop('response'))
+  errors = property(lambda self: self.get_prop('errors'))
+  props = property(lambda self: self._props)
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self.props.get(key)
+  def set_prop(self, key, value):
+    self.props[key] = value  
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def to_json(self, names=None):
+    if names is None:
+      names = schema.Names()
+    to_dump = {}
+    to_dump['request'] = self.request.to_json(names)
+    to_dump['response'] = self.response.to_json(names)
+    if self.errors:
+      to_dump['errors'] = self.errors.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    return self.name == that.name and self.props == that.props
+      
+def make_avpr_object(json_data):
+  """Build Avro Protocol from data parsed out of JSON string."""
+  if hasattr(json_data, 'get') and callable(json_data.get):
+    name = json_data.get('protocol')
+    namespace = json_data.get('namespace')
+    types = json_data.get('types')
+    messages = json_data.get('messages')
+    return Protocol(name, namespace, types, messages)
+  else:
+    raise ProtocolParseException('Not a JSON object: %s' % json_data)
+
+def parse(json_string):
+  """Constructs the Protocol from the JSON text."""
+  try:
+    json_data = json.loads(json_string)
+  except:
+    raise ProtocolParseException('Error parsing JSON: %s' % json_string)
+
+  # construct the Avro Protocol object
+  return make_avpr_object(json_data)
+
diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py
new file mode 100644
index 0000000..86ce86a
--- /dev/null
+++ b/lang/py/src/avro/schema.py
@@ -0,0 +1,784 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Contains the Schema classes.
+
+A schema may be one of:
+  A record, mapping field names to field value data;
+  An error, equivalent to a record;
+  An enum, containing one of a small set of symbols;
+  An array of values, all of the same schema;
+  A map containing string/value pairs, each of a declared schema;
+  A union of other schemas;
+  A fixed sized binary object;
+  A unicode string;
+  A sequence of bytes;
+  A 32-bit signed int;
+  A 64-bit signed long;
+  A 32-bit floating-point float;
+  A 64-bit floating-point double;
+  A boolean; or
+  Null.
+"""
+try:
+  import json
+except ImportError:
+  import simplejson as json
+
+#
+# Constants
+#
+
+PRIMITIVE_TYPES = (
+  'null',
+  'boolean',
+  'string',
+  'bytes',
+  'int',
+  'long',
+  'float',
+  'double',
+)
+
+NAMED_TYPES = (
+  'fixed',
+  'enum',
+  'record',
+  'error',
+)
+
+VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + (
+  'array',
+  'map',
+  'union',
+  'request',
+  'error_union'
+)
+
+SCHEMA_RESERVED_PROPS = (
+  'type',
+  'name',
+  'namespace',
+  'fields',     # Record
+  'items',      # Array
+  'size',       # Fixed
+  'symbols',    # Enum
+  'values',     # Map
+  'doc',
+)
+
+FIELD_RESERVED_PROPS = (
+  'default',
+  'name',
+  'doc',
+  'order',
+  'type',
+)
+
+VALID_FIELD_SORT_ORDERS = (
+  'ascending',
+  'descending',
+  'ignore',
+)
+
+#
+# Exceptions
+#
+
+class AvroException(Exception):
+  pass
+
+class SchemaParseException(AvroException):
+  pass
+
+#
+# Base Classes
+#
+
+class Schema(object):
+  """Base class for all Schema classes."""
+  def __init__(self, type, other_props=None):
+    # Ensure valid ctor args
+    if not isinstance(type, basestring):
+      fail_msg = 'Schema type must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif type not in VALID_TYPES:
+      fail_msg = '%s is not a valid type.' % type
+      raise SchemaParseException(fail_msg)
+
+    # add members
+    if not hasattr(self, '_props'): self._props = {}
+    self.set_prop('type', type)
+    self.type = type
+    self._props.update(other_props or {})
+
+  # Read-only properties dict. Printing schemas
+  # creates JSON properties directly from this dict. 
+  props = property(lambda self: self._props)
+
+  # Read-only property dict. Non-reserved properties
+  other_props = property(lambda self: get_other_props(self._props, SCHEMA_RESERVED_PROPS),
+                         doc="dictionary of non-reserved properties")
+
+  # utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self._props.get(key)
+
+  def set_prop(self, key, value):
+    self._props[key] = value
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def to_json(self, names):
+    """
+    Converts the schema object into its AVRO specification representation.
+
+    Schema types that have names (records, enums, and fixed) must
+    be aware of not re-defining schemas that are already listed
+    in the parameter names.
+    """
+    raise Exception("Must be implemented by subclasses.")
+
+class Name(object):
+  """Class to describe Avro name."""
+  
+  def __init__(self, name_attr, space_attr, default_space):
+    """
+    Formulate full name according to the specification.
+    
+    @arg name_attr: name value read in schema or None.
+    @arg space_attr: namespace value read in schema or None.
+    @ard default_space: the current default space or None.
+    """
+    # Ensure valid ctor args
+    if not (isinstance(name_attr, basestring) or (name_attr is None)):
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+
+    if not (isinstance(space_attr, basestring) or (space_attr is None)):
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+  
+    if not (isinstance(default_space, basestring) or (default_space is None)):
+      fail_msg = 'Default space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Default must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    
+    self._full = None; 
+    
+    if name_attr is None or name_attr == "":
+        return;
+    
+    if (name_attr.find('.') < 0):
+      if (space_attr is not None) and (space_attr != ""):
+        self._full = "%s.%s" % (space_attr, name_attr)
+      else:
+        if (default_space is not None) and (default_space != ""):
+           self._full = "%s.%s" % (default_space, name_attr)
+        else:
+          self._full = name_attr
+    else:
+        self._full = name_attr         
+    
+  def __eq__(self, other):
+    if not isinstance(other, Name):
+        return False
+    return (self.fullname == other.fullname)
+      
+  fullname = property(lambda self: self._full)
+
+  def get_space(self):
+    """Back out a namespace from full name."""
+    if self._full is None:
+        return None
+    
+    if (self._full.find('.') > 0):
+      return self._full.rsplit(".", 1)[0]
+    else:
+      return ""
+
+class Names(object):
+  """Track name set and default namespace during parsing."""
+  def __init__(self, default_namespace=None):
+      self.names = {}
+      self.default_namespace = default_namespace
+      
+  def has_name(self, name_attr, space_attr):
+      test = Name(name_attr, space_attr, self.default_namespace).fullname
+      return self.names.has_key(test)
+  
+  def get_name(self, name_attr, space_attr):    
+      test = Name(name_attr, space_attr, self.default_namespace).fullname
+      if not self.names.has_key(test):
+          return None
+      return self.names[test]
+  
+  def prune_namespace(self, properties):
+    """given a properties, return properties with namespace removed if
+    it matches the own default namespace"""
+    if self.default_namespace is None:
+      # I have no default -- no change
+      return properties
+    if 'namespace' not in properties:
+      # he has no namespace - no change
+      return properties
+    if properties['namespace'] != self.default_namespace:
+      # we're different - leave his stuff alone
+      return properties
+    # we each have a namespace and it's redundant. delete his.
+    prunable = properties.copy()
+    del(prunable['namespace'])
+    return prunable
+
+  def add_name(self, name_attr, space_attr, new_schema):
+    """
+    Add a new schema object to the name set.
+    
+      @arg name_attr: name value read in schema
+      @arg space_attr: namespace value read in schema.
+      
+      @return: the Name that was just added.
+    """
+    to_add = Name(name_attr, space_attr, self.default_namespace)
+    
+    if to_add.fullname in VALID_TYPES:
+      fail_msg = '%s is a reserved type name.' % to_add.fullname
+      raise SchemaParseException(fail_msg)
+    elif self.names.has_key(to_add.fullname):
+      fail_msg = 'The name "%s" is already in use.' % to_add.fullname
+      raise SchemaParseException(fail_msg)
+
+    self.names[to_add.fullname] = new_schema
+    return to_add
+
+class NamedSchema(Schema):
+  """Named Schemas specified in NAMED_TYPES."""
+  def __init__(self, type, name, namespace=None, names=None, other_props=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Named Schemas must have a non-empty name.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif namespace is not None and not isinstance(namespace, basestring):
+      fail_msg = 'The namespace property must be a string.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor
+    Schema.__init__(self, type, other_props)
+
+    # Add class members
+    new_name = names.add_name(name, namespace, self)
+
+    # Store name and namespace as they were read in origin schema
+    self.set_prop('name', name)
+    if namespace is not None: 
+      self.set_prop('namespace', new_name.get_space())
+
+    # Store full name as calculated from name, namespace
+    self._fullname = new_name.fullname
+    
+  def name_ref(self, names):
+    if self.namespace == names.default_namespace:
+      return self.name
+    else:
+      return self.fullname
+
+  # read-only properties
+  name = property(lambda self: self.get_prop('name'))
+  namespace = property(lambda self: self.get_prop('namespace'))
+  fullname = property(lambda self: self._fullname)
+
+class Field(object):
+  def __init__(self, type, name, has_default, default=None,
+               order=None,names=None, doc=None, other_props=None):
+    # Ensure valid ctor args
+    if not name:
+      fail_msg = 'Fields must have a non-empty name.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(name, basestring):
+      fail_msg = 'The name property must be a string.'
+      raise SchemaParseException(fail_msg)
+    elif order is not None and order not in VALID_FIELD_SORT_ORDERS:
+      fail_msg = 'The order property %s is not valid.' % order
+      raise SchemaParseException(fail_msg)
+
+    # add members
+    self._props = {}
+    self._has_default = has_default
+    self._props.update(other_props or {})
+
+    if (isinstance(type, basestring) and names is not None
+        and names.has_name(type, None)):
+      type_schema = names.get_name(type, None)
+    else:
+      try:
+        type_schema = make_avsc_object(type, names)
+      except Exception, e:
+        fail_msg = 'Type property "%s" not a valid Avro schema: %s' % (type, e)
+        raise SchemaParseException(fail_msg)
+    self.set_prop('type', type_schema)
+    self.set_prop('name', name)
+    self.type = type_schema
+    self.name = name
+    # TODO(hammer): check to ensure default is valid
+    if has_default: self.set_prop('default', default)
+    if order is not None: self.set_prop('order', order)
+    if doc is not None: self.set_prop('doc', doc)
+
+  # read-only properties
+  default = property(lambda self: self.get_prop('default'))
+  has_default = property(lambda self: self._has_default)
+  order = property(lambda self: self.get_prop('order'))
+  doc = property(lambda self: self.get_prop('doc'))
+  props = property(lambda self: self._props)
+
+  # Read-only property dict. Non-reserved properties
+  other_props = property(lambda self: get_other_props(self._props, FIELD_RESERVED_PROPS),
+                         doc="dictionary of non-reserved properties")
+
+# utility functions to manipulate properties dict
+  def get_prop(self, key):
+    return self._props.get(key)
+  def set_prop(self, key, value):
+    self._props[key] = value
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    to_dump['type'] = self.type.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+#
+# Primitive Types
+#
+class PrimitiveSchema(Schema):
+  """Valid primitive types are in PRIMITIVE_TYPES."""
+  def __init__(self, type):
+    # Ensure valid ctor args
+    if type not in PRIMITIVE_TYPES:
+      raise AvroException("%s is not a valid primitive type." % type)
+
+    # Call parent ctor
+    Schema.__init__(self, type)
+
+    self.fullname = type
+
+  def to_json(self, names=None):
+    if len(self.props) == 1:
+      return self.fullname
+    else:
+      return self.props
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+#
+# Complex Types (non-recursive)
+#
+
+class FixedSchema(NamedSchema):
+  def __init__(self, name, namespace, size, names=None, other_props=None):
+    # Ensure valid ctor args
+    if not isinstance(size, int):
+      fail_msg = 'Fixed Schema requires a valid integer for size property.'
+      raise AvroException(fail_msg)
+
+    # Call parent ctor
+    NamedSchema.__init__(self, 'fixed', name, namespace, names, other_props)
+
+    # Add class members
+    self.set_prop('size', size)
+
+  # read-only properties
+  size = property(lambda self: self.get_prop('size'))
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return names.prune_namespace(self.props)
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+class EnumSchema(NamedSchema):
+  def __init__(self, name, namespace, symbols, names=None, doc=None, other_props=None):
+    # Ensure valid ctor args
+    if not isinstance(symbols, list):
+      fail_msg = 'Enum Schema requires a JSON array for the symbols property.'
+      raise AvroException(fail_msg)
+    elif False in [isinstance(s, basestring) for s in symbols]:
+      fail_msg = 'Enum Schems requires All symbols to be JSON strings.'
+      raise AvroException(fail_msg)
+    elif len(set(symbols)) < len(symbols):
+      fail_msg = 'Duplicate symbol: %s' % symbols
+      raise AvroException(fail_msg)
+
+    # Call parent ctor
+    NamedSchema.__init__(self, 'enum', name, namespace, names, other_props)
+
+    # Add class members
+    self.set_prop('symbols', symbols)
+    if doc is not None: self.set_prop('doc', doc)
+
+  # read-only properties
+  symbols = property(lambda self: self.get_prop('symbols'))
+  doc = property(lambda self: self.get_prop('doc'))
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return names.prune_namespace(self.props)
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+#
+# Complex Types (recursive)
+#
+
+class ArraySchema(Schema):
+  def __init__(self, items, names=None, other_props=None):
+    # Call parent ctor
+    Schema.__init__(self, 'array', other_props)
+    # Add class members
+
+    if isinstance(items, basestring) and names.has_name(items, None):
+      items_schema = names.get_name(items, None)
+    else:
+      try:
+        items_schema = make_avsc_object(items, names)
+      except SchemaParseException, e:
+        fail_msg = 'Items schema (%s) not a valid Avro schema: %s (known names: %s)' % (items, e, names.names.keys())
+        raise SchemaParseException(fail_msg)
+
+    self.set_prop('items', items_schema)
+
+  # read-only properties
+  items = property(lambda self: self.get_prop('items'))
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    item_schema = self.get_prop('items')
+    to_dump['items'] = item_schema.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class MapSchema(Schema):
+  def __init__(self, values, names=None, other_props=None):
+    # Call parent ctor
+    Schema.__init__(self, 'map',other_props)
+
+    # Add class members
+    if isinstance(values, basestring) and names.has_name(values, None):
+      values_schema = names.get_name(values, None)
+    else:
+      try:
+        values_schema = make_avsc_object(values, names)
+      except:
+        fail_msg = 'Values schema not a valid Avro schema.'
+        raise SchemaParseException(fail_msg)
+
+    self.set_prop('values', values_schema)
+
+  # read-only properties
+  values = property(lambda self: self.get_prop('values'))
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    to_dump['values'] = self.get_prop('values').to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class UnionSchema(Schema):
+  """
+  names is a dictionary of schema objects
+  """
+  def __init__(self, schemas, names=None):
+    # Ensure valid ctor args
+    if not isinstance(schemas, list):
+      fail_msg = 'Union schema requires a list of schemas.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor
+    Schema.__init__(self, 'union')
+
+    # Add class members
+    schema_objects = []
+    for schema in schemas:
+      if isinstance(schema, basestring) and names.has_name(schema, None):
+        new_schema = names.get_name(schema, None)
+      else:
+        try:
+          new_schema = make_avsc_object(schema, names)
+        except Exception, e:
+          raise SchemaParseException('Union item must be a valid Avro schema: %s' % str(e))
+      # check the new schema
+      if (new_schema.type in VALID_TYPES and new_schema.type not in NAMED_TYPES
+          and new_schema.type in [schema.type for schema in schema_objects]):
+        raise SchemaParseException('%s type already in Union' % new_schema.type)
+      elif new_schema.type == 'union':
+        raise SchemaParseException('Unions cannot contain other unions.')
+      else:
+        schema_objects.append(new_schema)
+    self._schemas = schema_objects
+
+  # read-only properties
+  schemas = property(lambda self: self._schemas)
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = []
+    for schema in self.schemas:
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+class ErrorUnionSchema(UnionSchema):
+  def __init__(self, schemas, names=None):
+    # Prepend "string" to handle system errors
+    UnionSchema.__init__(self, ['string'] + schemas, names)
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = []
+    for schema in self.schemas:
+      # Don't print the system error schema
+      if schema.type == 'string': continue
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+class RecordSchema(NamedSchema):
+  @staticmethod
+  def make_field_objects(field_data, names):
+    """We're going to need to make message parameters too."""
+    field_objects = []
+    field_names = []
+    for i, field in enumerate(field_data):
+      if hasattr(field, 'get') and callable(field.get):
+        type = field.get('type')        
+        name = field.get('name')
+
+        # null values can have a default value of None
+        has_default = False
+        default = None
+        if field.has_key('default'):
+          has_default = True
+          default = field.get('default')
+
+        order = field.get('order')
+        doc = field.get('doc')
+        other_props = get_other_props(field, FIELD_RESERVED_PROPS)
+        new_field = Field(type, name, has_default, default, order, names, doc,
+                         other_props)
+        # make sure field name has not been used yet
+        if new_field.name in field_names:
+          fail_msg = 'Field name %s already in use.' % new_field.name
+          raise SchemaParseException(fail_msg)
+        field_names.append(new_field.name)
+      else:
+        raise SchemaParseException('Not a valid field: %s' % field)
+      field_objects.append(new_field)
+    return field_objects
+
+  def __init__(self, name, namespace, fields, names=None, schema_type='record',
+               doc=None, other_props=None):
+    # Ensure valid ctor args
+    if fields is None:
+      fail_msg = 'Record schema requires a non-empty fields property.'
+      raise SchemaParseException(fail_msg)
+    elif not isinstance(fields, list):
+      fail_msg = 'Fields property must be a list of Avro schemas.'
+      raise SchemaParseException(fail_msg)
+
+    # Call parent ctor (adds own name to namespace, too)
+    if schema_type == 'request':
+      Schema.__init__(self, schema_type, other_props)
+    else:
+      NamedSchema.__init__(self, schema_type, name, namespace, names,
+                           other_props)
+
+    if schema_type == 'record': 
+      old_default = names.default_namespace
+      names.default_namespace = Name(name, namespace,
+                                     names.default_namespace).get_space()
+
+    # Add class members
+    field_objects = RecordSchema.make_field_objects(fields, names)
+    self.set_prop('fields', field_objects)
+    if doc is not None: self.set_prop('doc', doc)
+
+    if schema_type == 'record':
+      names.default_namespace = old_default
+
+  # read-only properties
+  fields = property(lambda self: self.get_prop('fields'))
+  doc = property(lambda self: self.get_prop('doc'))
+
+  @property
+  def fields_dict(self):
+    fields_dict = {}
+    for field in self.fields:
+      fields_dict[field.name] = field
+    return fields_dict
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    # Request records don't have names
+    if self.type == 'request':
+      return [ f.to_json(names) for f in self.fields ]
+
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+
+    to_dump = names.prune_namespace(self.props.copy())
+    to_dump['fields'] = [ f.to_json(names) for f in self.fields ]
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+#
+# Module Methods
+#
+def get_other_props(all_props,reserved_props):
+  """
+  Retrieve the non-reserved properties from a dictionary of properties
+  @args reserved_props: The set of reserved properties to exclude
+  """
+  if hasattr(all_props, 'items') and callable(all_props.items):
+    return dict([(k,v) for (k,v) in all_props.items() if k not in
+                 reserved_props ])
+
+
+def make_avsc_object(json_data, names=None):
+  """
+  Build Avro Schema from data parsed out of JSON string.
+
+  @arg names: A Name object (tracks seen names and default space)
+  """
+  if names == None:
+    names = Names()
+  
+  # JSON object (non-union)
+  if hasattr(json_data, 'get') and callable(json_data.get):
+    type = json_data.get('type')
+    other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
+    if type in PRIMITIVE_TYPES:
+      return PrimitiveSchema(type)
+    elif type in NAMED_TYPES:
+      name = json_data.get('name')
+      namespace = json_data.get('namespace', names.default_namespace)
+      if type == 'fixed':
+        size = json_data.get('size')
+        return FixedSchema(name, namespace, size, names, other_props)
+      elif type == 'enum':
+        symbols = json_data.get('symbols')
+        doc = json_data.get('doc')
+        return EnumSchema(name, namespace, symbols, names, doc, other_props)
+      elif type in ['record', 'error']:
+        fields = json_data.get('fields')
+        doc = json_data.get('doc')
+        return RecordSchema(name, namespace, fields, names, type, doc, other_props)
+      else:
+        raise SchemaParseException('Unknown Named Type: %s' % type)
+    elif type in VALID_TYPES:
+      if type == 'array':
+        items = json_data.get('items')
+        return ArraySchema(items, names, other_props)
+      elif type == 'map':
+        values = json_data.get('values')
+        return MapSchema(values, names, other_props)
+      elif type == 'error_union':
+        declared_errors = json_data.get('declared_errors')
+        return ErrorUnionSchema(declared_errors, names)
+      else:
+        raise SchemaParseException('Unknown Valid Type: %s' % type)
+    elif type is None:
+      raise SchemaParseException('No "type" property: %s' % json_data)
+    else:
+      raise SchemaParseException('Undefined type: %s' % type)
+  # JSON array (union)
+  elif isinstance(json_data, list):
+    return UnionSchema(json_data, names)
+  # JSON string (primitive)
+  elif json_data in PRIMITIVE_TYPES:
+    return PrimitiveSchema(json_data)
+  # not for us!
+  else:
+    fail_msg = "Could not make an Avro Schema object from %s." % json_data
+    raise SchemaParseException(fail_msg)
+
+# TODO(hammer): make method for reading from a file?
+def parse(json_string):
+  """Constructs the Schema from the JSON text."""
+  # TODO(hammer): preserve stack trace from JSON parse
+  # parse the JSON
+  try:
+    json_data = json.loads(json_string)
+  except:
+    raise SchemaParseException('Error parsing JSON: %s' % json_string)
+
+  # Initialize the names object
+  names = Names()
+
+  # construct the Avro Schema object
+  return make_avsc_object(json_data, names)
diff --git a/lang/py/src/avro/tool.py b/lang/py/src/avro/tool.py
new file mode 100644
index 0000000..edd6f18
--- /dev/null
+++ b/lang/py/src/avro/tool.py
@@ -0,0 +1,160 @@
+#! /usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Command-line tool
+
+NOTE: The API for the command-line tool is experimental.
+"""
+import sys
+from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
+import urlparse
+from avro import io
+from avro import datafile
+from avro import protocol
+from avro import ipc
+
+class GenericResponder(ipc.Responder):
+  def __init__(self, proto, msg, datum):
+    proto_json = file(proto, 'r').read()
+    ipc.Responder.__init__(self, protocol.parse(proto_json))
+    self.msg = msg
+    self.datum = datum
+
+  def invoke(self, message, request):
+    if message.name == self.msg:
+      print >> sys.stderr, "Message: %s Datum: %s" % (message.name, self.datum)
+      # server will shut down after processing a single Avro request
+      global server_should_shutdown
+      server_should_shutdown = True
+      return self.datum
+
+class GenericHandler(BaseHTTPRequestHandler):
+  def do_POST(self):
+    self.responder = responder
+    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request = call_request_reader.read_framed_message()
+    resp_body = self.responder.respond(call_request)
+    self.send_response(200)
+    self.send_header('Content-Type', 'avro/binary')
+    self.end_headers()
+    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer.write_framed_message(resp_body)
+    if server_should_shutdown:
+      print >> sys.stderr, "Shutting down server."
+      self.server.force_stop()
+
+class StoppableHTTPServer(HTTPServer):
+  """HTTPServer.shutdown added in Python 2.6. FML."""
+  stopped = False
+  allow_reuse_address = True
+  def __init__(self, *args, **kw):
+    HTTPServer.__init__(self, *args, **kw)
+    self.allow_reuse_address = True
+
+  def serve_forever(self):
+    while not self.stopped:
+      self.handle_request()
+
+  def force_stop(self):
+    self.server_close()
+    self.stopped = True
+    self.serve_forever()
+
+def run_server(uri, proto, msg, datum):
+  url_obj = urlparse.urlparse(uri)
+  server_addr = (url_obj.hostname, url_obj.port)
+  global responder
+  global server_should_shutdown
+  server_should_shutdown = False
+  responder = GenericResponder(proto, msg, datum)
+  server = StoppableHTTPServer(server_addr, GenericHandler)
+  print "Port: %s" % server.server_port
+  sys.stdout.flush()
+  server.allow_reuse_address = True
+  print >> sys.stderr, "Starting server."
+  server.serve_forever()
+
+def send_message(uri, proto, msg, datum):
+  url_obj = urlparse.urlparse(uri)
+  client = ipc.HTTPTransceiver(url_obj.hostname, url_obj.port)
+  proto_json = file(proto, 'r').read()
+  requestor = ipc.Requestor(protocol.parse(proto_json), client)
+  print requestor.request(msg, datum)
+
+def file_or_stdin(f):
+  if f == "-":
+    return sys.stdin
+  else:
+    return file(f)
+
+def main(args=sys.argv):
+  if len(args) == 1:
+    print "Usage: %s [dump|rpcreceive|rpcsend]" % args[0]
+    return 1
+
+  if args[1] == "dump":
+    if len(args) != 3:
+      print "Usage: %s dump input_file" % args[0]
+      return 1
+    for d in datafile.DataFileReader(file_or_stdin(args[2]), io.DatumReader()):
+      print repr(d)
+  elif args[1] == "rpcreceive":
+    usage_str = "Usage: %s rpcreceive uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print usage_str
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print "JSON Decoder not yet implemented."
+        return 1
+      else:
+        print usage_str
+        return 1
+    run_server(uri, proto, msg, datum)
+  elif args[1] == "rpcsend":
+    usage_str = "Usage: %s rpcsend uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print usage_str
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print "JSON Decoder not yet implemented."
+        return 1
+      else:
+        print usage_str
+        return 1
+    send_message(uri, proto, msg, datum)
+  return 0
+  
+if __name__ == "__main__":
+  sys.exit(main(sys.argv))
diff --git a/lang/py/src/avro/txipc.py b/lang/py/src/avro/txipc.py
new file mode 100644
index 0000000..6a4d8b7
--- /dev/null
+++ b/lang/py/src/avro/txipc.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from avro import ipc
+from avro import io
+
+from zope.interface import implements
+
+from twisted.web.client import Agent
+from twisted.web.http_headers import Headers
+from twisted.internet.defer import maybeDeferred, Deferred
+from twisted.web.iweb import IBodyProducer
+from twisted.web import resource, server
+from twisted.internet.protocol import Protocol
+
+class TwistedRequestor(ipc.BaseRequestor):
+  """A Twisted-compatible requestor. Returns a Deferred that will fire with the
+     returning value, instead of blocking until the request completes."""
+  def _process_handshake(self, call_response, message_name, request_datum):
+    # process the handshake and call response
+    buffer_decoder = io.BinaryDecoder(StringIO(call_response))
+    call_response_exists = self.read_handshake_response(buffer_decoder)
+    if call_response_exists:
+      return self.read_call_response(message_name, buffer_decoder)
+    else:
+      return self.request(message_name, request_datum)
+
+  def issue_request(self, call_request, message_name, request_datum):
+    d = self.transceiver.transceive(call_request)
+    d.addCallback(self._process_handshake, message_name, request_datum)
+    return d
+
+class RequestStreamingProducer(object):
+  """A streaming producer for issuing requests with the Twisted.web Agent."""
+  implements(IBodyProducer)
+
+  paused = False
+  stopped = False
+  started = False
+
+  def __init__(self, message):
+    self._message = message
+    self._length = len(message)
+    # We need a buffer length header for every buffer and an additional
+    # zero-length buffer as the message terminator
+    self._length += (self._length / ipc.BUFFER_SIZE + 2) \
+      * ipc.BUFFER_HEADER_LENGTH
+    self._total_bytes_sent = 0
+    self._deferred = Deferred()
+
+  # read-only properties
+  message = property(lambda self: self._message)
+  length = property(lambda self: self._length)
+  consumer = property(lambda self: self._consumer)
+  deferred = property(lambda self: self._deferred)
+
+  def _get_total_bytes_sent(self):
+    return self._total_bytes_sent
+
+  def _set_total_bytes_sent(self, bytes_sent):
+    self._total_bytes_sent = bytes_sent
+
+  total_bytes_sent = property(_get_total_bytes_sent, _set_total_bytes_sent)
+
+  def startProducing(self, consumer):
+    if self.started:
+      return
+
+    self.started = True
+    self._consumer = consumer
+    # Keep writing data to the consumer until we're finished,
+    # paused (pauseProducing()) or stopped (stopProducing())
+    while self.length - self.total_bytes_sent > 0 and \
+      not self.paused and not self.stopped:
+      self.write()
+    # self.write will fire this deferred once it has written
+    # the entire message to the consumer
+    return self.deferred
+
+  def resumeProducing(self):
+    self.paused = False
+    self.write(self)
+
+  def pauseProducing(self):
+    self.paused = True
+
+  def stopProducing(self):
+    self.stopped = True
+
+  def write(self):
+    if self.length - self.total_bytes_sent > ipc.BUFFER_SIZE:
+      buffer_length = ipc.BUFFER_SIZE
+    else:
+      buffer_length = self.length - self.total_bytes_sent
+    self.write_buffer(self.message[self.total_bytes_sent:
+                              (self.total_bytes_sent + buffer_length)])
+    self.total_bytes_sent += buffer_length
+    # Make sure we wrote the entire message
+    if self.total_bytes_sent == self.length and not self.stopped:
+      self.stopProducing()
+      # A message is always terminated by a zero-length buffer.
+      self.write_buffer_length(0)
+      self.deferred.callback(None)
+
+  def write_buffer(self, chunk):
+    buffer_length = len(chunk)
+    self.write_buffer_length(buffer_length)
+    self.consumer.write(chunk)
+
+  def write_buffer_length(self, n):
+    self.consumer.write(ipc.BIG_ENDIAN_INT_STRUCT.pack(n))
+
+class AvroProtocol(Protocol):
+
+  recvd = ''
+  done = False
+
+  def __init__(self, finished):
+    self.finished = finished
+    self.message = []
+
+  def dataReceived(self, data):
+    self.recvd = self.recvd + data
+    while len(self.recvd) >= ipc.BUFFER_HEADER_LENGTH:
+      buffer_length ,= ipc.BIG_ENDIAN_INT_STRUCT.unpack(
+        self.recvd[:ipc.BUFFER_HEADER_LENGTH])
+      if buffer_length == 0:
+        response = ''.join(self.message)
+        self.done = True
+        self.finished.callback(response)
+        break
+      if len(self.recvd) < buffer_length + ipc.BUFFER_HEADER_LENGTH:
+        break
+      buffer = self.recvd[ipc.BUFFER_HEADER_LENGTH:buffer_length + ipc.BUFFER_HEADER_LENGTH]
+      self.recvd = self.recvd[buffer_length + ipc.BUFFER_HEADER_LENGTH:]
+      self.message.append(buffer)
+
+  def connectionLost(self, reason):
+    if not self.done:
+      self.finished.errback(ipc.ConnectionClosedException("Reader read 0 bytes."))
+
+class TwistedHTTPTransceiver(object):
+  """This transceiver uses the Agent class present in Twisted.web >= 9.0
+     for issuing requests to the remote endpoint."""
+  def __init__(self, host, port, remote_name=None, reactor=None):
+    self.url = "http://%s:%d/" % (host, port)
+
+    if remote_name is None:
+      # There's no easy way to get this peer's remote address
+      # in Twisted so I use a random UUID to identify ourselves
+      import uuid
+      self.remote_name = uuid.uuid4()
+
+    if reactor is None:
+      from twisted.internet import reactor
+    self.agent = Agent(reactor)
+
+  def read_framed_message(self, response):
+    finished = Deferred()
+    response.deliverBody(AvroProtocol(finished))
+    return finished
+
+  def transceive(self, request):
+    req_method = 'POST'
+    req_headers = {
+      'Content-Type': ['avro/binary'],
+      'Accept-Encoding': ['identity'],
+    }
+
+    body_producer = RequestStreamingProducer(request)
+    d = self.agent.request(
+      req_method,
+      self.url,
+      headers=Headers(req_headers),
+      bodyProducer=body_producer)
+    return d.addCallback(self.read_framed_message)
+
+class AvroResponderResource(resource.Resource):
+  """This Twisted.web resource can be placed anywhere in a URL hierarchy
+     to provide an Avro endpoint. Different Avro protocols can be served
+     by the same web server as long as they are in different resources in
+     a URL hierarchy."""
+  isLeaf = True
+
+  def __init__(self, responder):
+    resource.Resource.__init__(self)
+    self.responder = responder
+
+  def cb_render_POST(self, resp_body, request):
+    request.setResponseCode(200)
+    request.setHeader('Content-Type', 'avro/binary')
+    resp_writer = ipc.FramedWriter(request)
+    resp_writer.write_framed_message(resp_body)
+    request.finish()
+
+  def render_POST(self, request):
+    # Unfortunately, Twisted.web doesn't support incoming
+    # streamed input yet, the whole payload must be kept in-memory
+    request.content.seek(0, 0)
+    call_request_reader = ipc.FramedReader(request.content)
+    call_request = call_request_reader.read_framed_message()
+    d = maybeDeferred(self.responder.respond, call_request)
+    d.addCallback(self.cb_render_POST, request)
+    return server.NOT_DONE_YET
diff --git a/lang/py/test/av_bench.py b/lang/py/test/av_bench.py
new file mode 100644
index 0000000..5725997
--- /dev/null
+++ b/lang/py/test/av_bench.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import time
+from random import sample, choice, randint
+from string import lowercase
+
+import avro.datafile
+import avro.schema
+import avro.io
+
+
+types = ["A", "CNAME"]
+
+def rand_name():
+    return ''.join(sample(lowercase, 15))
+
+def rand_ip():
+    return "%s.%s.%s.%s" %(randint(0,255), randint(0,255), randint(0,255), randint(0,255))
+
+def write(n):
+    schema_s="""
+    { "type": "record",
+      "name": "Query",
+    "fields" : [
+        {"name": "query", "type": "string"},
+        {"name": "response", "type": "string"},
+        {"name": "type", "type": "string", "default": "A"}
+    ]}"""
+    out = open("datafile.avr",'w')
+
+    schema = avro.schema.parse(schema_s)
+    writer = avro.io.DatumWriter(schema)
+    dw = avro.datafile.DataFileWriter(out, writer, schema) #,codec='deflate')
+    for _ in xrange(n):
+        response = rand_ip()
+        query = rand_name()
+        type = choice(types)
+        dw.append({'query': query, 'response': response, 'type': type})
+
+    dw.close()
+
+def read():
+    f = open("datafile.avr")
+    reader = avro.io.DatumReader()
+    af=avro.datafile.DataFileReader(f,reader)
+
+    x=0
+    for _ in af:
+        pass
+
+def t(f, *args):
+    s = time.time()
+    f(*args)
+    e = time.time()
+    return e-s
+
+if __name__ == "__main__":
+    n = int(sys.argv[1])
+    print "Write %0.4f" % t(write, n)
+    print "Read %0.4f" % t(read)
diff --git a/lang/py/test/gen_interop_data.py b/lang/py/test/gen_interop_data.py
new file mode 100644
index 0000000..579505a
--- /dev/null
+++ b/lang/py/test/gen_interop_data.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from avro import schema
+from avro import io
+from avro import datafile
+
+DATUM = {
+  'intField': 12,
+  'longField': 15234324L,
+  'stringField': unicode('hey'),
+  'boolField': True,
+  'floatField': 1234.0,
+  'doubleField': -1234.0,
+  'bytesField': '12312adf',
+  'nullField': None,
+  'arrayField': [5.0, 0.0, 12.0],
+  'mapField': {'a': {'label': 'a'}, 'bee': {'label': 'cee'}},
+  'unionField': 12.0,
+  'enumField': 'C',
+  'fixedField': '1019181716151413',
+  'recordField': {'label': 'blah', 'children': [{'label': 'inner', 'children': []}]},
+}
+
+if __name__ == "__main__":
+  interop_schema = schema.parse(open(sys.argv[1], 'r').read())
+  writer = open(sys.argv[2], 'wb')
+  datum_writer = io.DatumWriter()
+  # NB: not using compression
+  dfw = datafile.DataFileWriter(writer, datum_writer, interop_schema)
+  dfw.append(DATUM)
+  dfw.close()
diff --git a/lang/py/test/sample_http_client.py b/lang/py/test/sample_http_client.py
new file mode 100644
index 0000000..86942d8
--- /dev/null
+++ b/lang/py/test/sample_http_client.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from avro import ipc
+from avro import protocol
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+SERVER_HOST = 'localhost'
+SERVER_PORT = 9090
+
+class UsageError(Exception):
+  def __init__(self, value):
+    self.value = value
+  def __str__(self):
+    return repr(self.value)
+
+def make_requestor(server_host, server_port, protocol):
+  client = ipc.HTTPTransceiver(SERVER_HOST, SERVER_PORT)
+  return ipc.Requestor(protocol, client)
+
+if __name__ == '__main__':
+  if len(sys.argv) not in [4, 5]:
+    raise UsageError("Usage: <to> <from> <body> [<count>]")
+
+  # client code - attach to the server and send a message
+  # fill in the Message record
+  message = dict()
+  message['to'] = sys.argv[1]
+  message['from'] = sys.argv[2]
+  message['body'] = sys.argv[3]
+
+  try:
+    num_messages = int(sys.argv[4])
+  except:
+    num_messages = 1
+
+  # build the parameters for the request
+  params = {}
+  params['message'] = message
+   
+  # send the requests and print the result
+  for msg_count in range(num_messages):
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    result = requestor.request('send', params)
+    print("Result: " + result)
+
+  # try out a replay message
+  requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+  result = requestor.request('replay', dict())
+  print("Replay Result: " + result)
diff --git a/lang/py/test/sample_http_server.py b/lang/py/test/sample_http_server.py
new file mode 100644
index 0000000..53f6928
--- /dev/null
+++ b/lang/py/test/sample_http_server.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from avro import ipc
+from avro import protocol
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+SERVER_ADDRESS = ('localhost', 9090)
+
+class MailResponder(ipc.Responder):
+  def __init__(self):
+    ipc.Responder.__init__(self, MAIL_PROTOCOL)
+
+  def invoke(self, message, request):
+    if message.name == 'send':
+      request_content = request['message']
+      response = "Sent message to %(to)s from %(from)s with body %(body)s" % \
+                 request_content
+      return response
+    elif message.name == 'replay':
+      return 'replay'
+
+class MailHandler(BaseHTTPRequestHandler):
+  def do_POST(self):
+    self.responder = MailResponder()
+    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request = call_request_reader.read_framed_message()
+    resp_body = self.responder.respond(call_request)
+    self.send_response(200)
+    self.send_header('Content-Type', 'avro/binary')
+    self.end_headers()
+    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer.write_framed_message(resp_body)
+
+if __name__ == '__main__':
+  mail_server = HTTPServer(SERVER_ADDRESS, MailHandler)
+  mail_server.allow_reuse_address = True
+  mail_server.serve_forever()
diff --git a/lang/py/test/test_datafile.py b/lang/py/test/test_datafile.py
new file mode 100644
index 0000000..b3ce692
--- /dev/null
+++ b/lang/py/test/test_datafile.py
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+from avro import schema
+from avro import io
+from avro import datafile
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+  ('"bytes"', '12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+  ("""\
+   {"type": "record",
+    "name": "Test",
+    "fields": [{"name": "f", "type": "long"}]}
+   """, {'f': 5}),
+  ("""\
+   {"type": "record",
+    "name": "Lisp",
+    "fields": [{"name": "value",
+                "type": ["null", "string",
+                         {"type": "record",
+                          "name": "Cons",
+                          "fields": [{"name": "car", "type": "Lisp"},
+                                     {"name": "cdr", "type": "Lisp"}]}]}]}
+   """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+FILENAME = 'test_datafile.out'
+CODECS_TO_VALIDATE = ('null', 'deflate')
+try:
+  import snappy
+  CODECS_TO_VALIDATE += ('snappy',)
+except ImportError:
+  print 'Snappy not present, will skip testing it.'
+
+# TODO(hammer): clean up written files with ant, not os.remove
+class TestDataFile(unittest.TestCase):
+  def test_round_trip(self):
+    print ''
+    print 'TEST ROUND TRIP'
+    print '==============='
+    print ''
+    correct = 0
+    for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        print ''
+        print 'SCHEMA NUMBER %d' % (i + 1)
+        print '================'
+        print ''
+        print 'Schema: %s' % example_schema
+        print 'Datum: %s' % datum
+        print 'Codec: %s' % codec
+
+        # write data in binary to file 10 times
+        writer = open(FILENAME, 'wb')
+        datum_writer = io.DatumWriter()
+        schema_object = schema.parse(example_schema)
+        dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
+        for i in range(10):
+          dfw.append(datum)
+        dfw.close()
+
+        # read data in binary from file
+        reader = open(FILENAME, 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        round_trip_data = []
+        for datum in dfr:
+          round_trip_data.append(datum)
+
+        print 'Round Trip Data: %s' % round_trip_data
+        print 'Round Trip Data Length: %d' % len(round_trip_data)
+        is_correct = [datum] * 10 == round_trip_data
+        if is_correct: correct += 1
+        print 'Correct Round Trip: %s' % is_correct
+        print ''
+    os.remove(FILENAME)
+    self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
+
+  def test_append(self):
+    print ''
+    print 'TEST APPEND'
+    print '==========='
+    print ''
+    correct = 0
+    for i, (example_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        print ''
+        print 'SCHEMA NUMBER %d' % (i + 1)
+        print '================'
+        print ''
+        print 'Schema: %s' % example_schema
+        print 'Datum: %s' % datum
+        print 'Codec: %s' % codec
+
+        # write data in binary to file once
+        writer = open(FILENAME, 'wb')
+        datum_writer = io.DatumWriter()
+        schema_object = schema.parse(example_schema)
+        dfw = datafile.DataFileWriter(writer, datum_writer, schema_object, codec=codec)
+        dfw.append(datum)
+        dfw.close()
+
+        # open file, write, and close nine times
+        for i in range(9):
+          writer = open(FILENAME, 'ab+')
+          dfw = datafile.DataFileWriter(writer, io.DatumWriter())
+          dfw.append(datum)
+          dfw.close()
+
+        # read data in binary from file
+        reader = open(FILENAME, 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        appended_data = []
+        for datum in dfr:
+          appended_data.append(datum)
+
+        print 'Appended Data: %s' % appended_data
+        print 'Appended Data Length: %d' % len(appended_data)
+        is_correct = [datum] * 10 == appended_data
+        if is_correct: correct += 1
+        print 'Correct Appended: %s' % is_correct
+        print ''
+    os.remove(FILENAME)
+    self.assertEquals(correct, len(CODECS_TO_VALIDATE)*len(SCHEMAS_TO_VALIDATE))
+
+  def test_context_manager(self):
+    # Context manager was introduced as a first class
+    # member only in Python 2.6 and above.
+    import sys
+    if sys.version_info < (2,6):
+      print 'Skipping context manager tests on this Python version.'
+      return
+    # Test the writer with a 'with' statement.
+    writer = open(FILENAME, 'wb')
+    datum_writer = io.DatumWriter()
+    sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
+    schema_object = schema.parse(sample_schema)
+    with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
+      dfw.append(sample_datum)
+    self.assertTrue(writer.closed)
+
+    # Test the reader with a 'with' statement.
+    datums = []
+    reader = open(FILENAME, 'rb')
+    datum_reader = io.DatumReader()
+    with datafile.DataFileReader(reader, datum_reader) as dfr:
+      for datum in dfr:
+        datums.append(datum)
+    self.assertTrue(reader.closed)
+
+  def test_metadata(self):
+    # Test the writer with a 'with' statement.
+    writer = open(FILENAME, 'wb')
+    datum_writer = io.DatumWriter()
+    sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
+    schema_object = schema.parse(sample_schema)
+    with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
+      dfw.set_meta('test.string', 'foo')
+      dfw.set_meta('test.number', '1')
+      dfw.append(sample_datum)
+    self.assertTrue(writer.closed)
+
+    # Test the reader with a 'with' statement.
+    datums = []
+    reader = open(FILENAME, 'rb')
+    datum_reader = io.DatumReader()
+    with datafile.DataFileReader(reader, datum_reader) as dfr:
+      self.assertEquals('foo', dfr.get_meta('test.string'))
+      self.assertEquals('1', dfr.get_meta('test.number'))
+      for datum in dfr:
+        datums.append(datum)
+    self.assertTrue(reader.closed)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_datafile_interop.py b/lang/py/test/test_datafile_interop.py
new file mode 100644
index 0000000..8f4e883
--- /dev/null
+++ b/lang/py/test/test_datafile_interop.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+from avro import io
+from avro import datafile
+
+class TestDataFileInterop(unittest.TestCase):
+  def test_interop(self):
+    print ''
+    print 'TEST INTEROP'
+    print '============'
+    print ''
+    for f in os.listdir('@INTEROP_DATA_DIR@'):
+      print 'READING %s' % f
+      print ''
+
+      # read data in binary from file
+      reader = open(os.path.join('@INTEROP_DATA_DIR@', f), 'rb')
+      datum_reader = io.DatumReader()
+      dfr = datafile.DataFileReader(reader, datum_reader)
+      for datum in dfr:
+        assert datum is not None
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_io.py b/lang/py/test/test_io.py
new file mode 100644
index 0000000..05a6f80
--- /dev/null
+++ b/lang/py/test/test_io.py
@@ -0,0 +1,337 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+from binascii import hexlify
+from avro import schema
+from avro import io
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', unicode('adsfasdf09809dsf-=adsf')),
+  ('"bytes"', '12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', 'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+  ("""\
+   {"type": "record",
+    "name": "Test",
+    "fields": [{"name": "f", "type": "long"}]}
+   """, {'f': 5}),
+  ("""\
+   {"type": "record",
+    "name": "Lisp",
+    "fields": [{"name": "value",
+                "type": ["null", "string",
+                         {"type": "record",
+                          "name": "Cons",
+                          "fields": [{"name": "car", "type": "Lisp"},
+                                     {"name": "cdr", "type": "Lisp"}]}]}]}
+   """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+BINARY_ENCODINGS = (
+  (0, '00'),
+  (-1, '01'),
+  (1, '02'),
+  (-2, '03'),
+  (2, '04'),
+  (-64, '7f'),
+  (64, '80 01'),
+  (8192, '80 80 01'),
+  (-8193, '81 80 01'),
+)
+
+DEFAULT_VALUE_EXAMPLES = (
+  ('"null"', 'null', None),
+  ('"boolean"', 'true', True),
+  ('"string"', '"foo"', u'foo'),
+  ('"bytes"', '"\u00FF\u00FF"', u'\xff\xff'),
+  ('"int"', '5', 5),
+  ('"long"', '5', 5L),
+  ('"float"', '1.1', 1.1),
+  ('"double"', '1.1', 1.1),
+  ('{"type": "fixed", "name": "F", "size": 2}', '"\u00FF\u00FF"', u'\xff\xff'),
+  ('{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}', '"FOO"', 'FOO'),
+  ('{"type": "array", "items": "int"}', '[1, 2, 3]', [1, 2, 3]),
+  ('{"type": "map", "values": "int"}', '{"a": 1, "b": 2}', {'a': 1, 'b': 2}),
+  ('["int", "null"]', '5', 5),
+  ('{"type": "record", "name": "F", "fields": [{"name": "A", "type": "int"}]}',
+   '{"A": 5}', {'A': 5}),
+)
+
+LONG_RECORD_SCHEMA = schema.parse("""\
+  {"type": "record",
+   "name": "Test",
+   "fields": [{"name": "A", "type": "int"},
+              {"name": "B", "type": "int"},
+              {"name": "C", "type": "int"},
+              {"name": "D", "type": "int"},
+              {"name": "E", "type": "int"},
+              {"name": "F", "type": "int"},
+              {"name": "G", "type": "int"}]}""")
+
+LONG_RECORD_DATUM = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7}
+
+def avro_hexlify(reader):
+  """Return the hex value, as a string, of a binary-encoded int or long."""
+  bytes = []
+  current_byte = reader.read(1)
+  bytes.append(hexlify(current_byte))
+  while (ord(current_byte) & 0x80) != 0:
+    current_byte = reader.read(1)
+    bytes.append(hexlify(current_byte))
+  return ' '.join(bytes)
+
+def print_test_name(test_name):
+  print ''
+  print test_name
+  print '=' * len(test_name)
+  print ''
+
+def write_datum(datum, writers_schema):
+  writer = StringIO()
+  encoder = io.BinaryEncoder(writer)
+  datum_writer = io.DatumWriter(writers_schema)
+  datum_writer.write(datum, encoder)
+  return writer, encoder, datum_writer
+
+def read_datum(buffer, writers_schema, readers_schema=None):
+  reader = StringIO(buffer.getvalue())
+  decoder = io.BinaryDecoder(reader)
+  datum_reader = io.DatumReader(writers_schema, readers_schema)
+  return datum_reader.read(decoder)
+
+def check_binary_encoding(number_type):
+  print_test_name('TEST BINARY %s ENCODING' % number_type.upper())
+  correct = 0
+  for datum, hex_encoding in BINARY_ENCODINGS:
+    print 'Datum: %d' % datum
+    print 'Correct Encoding: %s' % hex_encoding
+
+    writers_schema = schema.parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(datum, writers_schema)
+    writer.seek(0)
+    hex_val = avro_hexlify(writer)
+
+    print 'Read Encoding: %s' % hex_val
+    if hex_encoding == hex_val: correct += 1
+    print ''
+  return correct
+
+def check_skip_number(number_type):
+  print_test_name('TEST SKIP %s' % number_type.upper())
+  correct = 0
+  for value_to_skip, hex_encoding in BINARY_ENCODINGS:
+    VALUE_TO_READ = 6253
+    print 'Value to Skip: %d' % value_to_skip
+
+    # write the value to skip and a known value
+    writers_schema = schema.parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(value_to_skip, writers_schema)
+    datum_writer.write(VALUE_TO_READ, encoder)
+
+    # skip the value
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    decoder.skip_long()
+
+    # read data from string buffer
+    datum_reader = io.DatumReader(writers_schema)
+    read_value = datum_reader.read(decoder)
+
+    print 'Read Value: %d' % read_value
+    if read_value == VALUE_TO_READ: correct += 1
+    print ''
+  return correct
+    
+class TestIO(unittest.TestCase):
+  #
+  # BASIC FUNCTIONALITY
+  #
+
+  def test_validate(self):
+    print_test_name('TEST VALIDATE')
+    passed = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      print 'Schema: %s' % example_schema
+      print 'Datum: %s' % datum
+      validated = io.validate(schema.parse(example_schema), datum)
+      print 'Valid: %s' % validated
+      if validated: passed += 1
+    self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
+
+  def test_round_trip(self):
+    print_test_name('TEST ROUND TRIP')
+    correct = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      print 'Schema: %s' % example_schema
+      print 'Datum: %s' % datum
+
+      writers_schema = schema.parse(example_schema)
+      writer, encoder, datum_writer = write_datum(datum, writers_schema)
+      round_trip_datum = read_datum(writer, writers_schema)
+
+      print 'Round Trip Datum: %s' % round_trip_datum
+      if datum == round_trip_datum: correct += 1
+    self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE))
+
+  #
+  # BINARY ENCODING OF INT AND LONG
+  #
+
+  def test_binary_int_encoding(self):
+    correct = check_binary_encoding('int')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_binary_long_encoding(self):
+    correct = check_binary_encoding('long')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_skip_int(self):
+    correct = check_skip_number('int')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  def test_skip_long(self):
+    correct = check_skip_number('long')
+    self.assertEquals(correct, len(BINARY_ENCODINGS))
+
+  #
+  # SCHEMA RESOLUTION
+  #
+
+  def test_schema_promotion(self):
+    print_test_name('TEST SCHEMA PROMOTION')
+    # note that checking writers_schema.type in read_data
+    # allows us to handle promotion correctly
+    promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
+    incorrect = 0
+    for i, ws in enumerate(promotable_schemas):
+      writers_schema = schema.parse(ws)
+      datum_to_write = 219
+      for rs in promotable_schemas[i + 1:]:
+        readers_schema = schema.parse(rs)
+        writer, enc, dw = write_datum(datum_to_write, writers_schema)
+        datum_read = read_datum(writer, writers_schema, readers_schema)
+        print 'Writer: %s Reader: %s' % (writers_schema, readers_schema)
+        print 'Datum Read: %s' % datum_read
+        if datum_read != datum_to_write: incorrect += 1
+    self.assertEquals(incorrect, 0)
+
+  def test_unknown_symbol(self):
+    print_test_name('TEST UNKNOWN SYMBOL')
+    writers_schema = schema.parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["FOO", "BAR"]}""")
+    datum_to_write = 'FOO'
+
+    readers_schema = schema.parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["BAR", "BAZ"]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    self.assertRaises(io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def test_default_value(self):
+    print_test_name('TEST DEFAULT VALUE')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    correct = 0
+    for field_type, default_json, default_datum in DEFAULT_VALUE_EXAMPLES:
+      readers_schema = schema.parse("""\
+        {"type": "record", "name": "Test",
+         "fields": [{"name": "H", "type": %s, "default": %s}]}
+        """ % (field_type, default_json))
+      datum_to_read = {'H': default_datum}
+
+      writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+      datum_read = read_datum(writer, writers_schema, readers_schema)
+      print 'Datum Read: %s' % datum_read
+      if datum_to_read == datum_read: correct += 1
+    self.assertEquals(correct, len(DEFAULT_VALUE_EXAMPLES))
+
+  def test_no_default_value(self):
+    print_test_name('TEST NO DEFAULT VALUE')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "H", "type": "int"}]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    reader = StringIO(writer.getvalue())
+    decoder = io.BinaryDecoder(reader)
+    datum_reader = io.DatumReader(writers_schema, readers_schema)
+    self.assertRaises(io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def test_projection(self):
+    print_test_name('TEST PROJECTION')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "E", "type": "int"},
+                  {"name": "F", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    print 'Datum Read: %s' % datum_read
+    self.assertEquals(datum_to_read, datum_read)
+
+  def test_field_order(self):
+    print_test_name('TEST FIELD ORDER')
+    writers_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    readers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
+    datum_read = read_datum(writer, writers_schema, readers_schema)
+    print 'Datum Read: %s' % datum_read
+    self.assertEquals(datum_to_read, datum_read)
+
+  def test_type_exception(self):
+    print_test_name('TEST TYPE EXCEPTION')
+    writers_schema = schema.parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_write = {'E': 5, 'F': 'Bad'}
+    self.assertRaises(io.AvroTypeException, write_datum, datum_to_write, writers_schema)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_ipc.py b/lang/py/test/test_ipc.py
new file mode 100644
index 0000000..2545b15
--- /dev/null
+++ b/lang/py/test/test_ipc.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+There are currently no IPC tests within python, in part because there are no
+servers yet available.
+"""
+import unittest
+
+# This test does import this code, to make sure it at least passes
+# compilation.
+from avro import ipc
+
+class TestIPC(unittest.TestCase):
+  def test_placeholder(self):
+    pass
+
+  def test_server_with_path(self):
+    client_with_custom_path = ipc.HTTPTransceiver('dummyserver.net', 80, '/service/article')
+    self.assertEqual('/service/article', client_with_custom_path.req_resource)
+
+    client_with_default_path = ipc.HTTPTransceiver('dummyserver.net', 80)
+    self.assertEqual('/', client_with_default_path.req_resource)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_protocol.py b/lang/py/test/test_protocol.py
new file mode 100644
index 0000000..8da8db1
--- /dev/null
+++ b/lang/py/test/test_protocol.py
@@ -0,0 +1,439 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the protocol parsing logic.
+"""
+import unittest
+from avro import protocol
+
+class ExampleProtocol(object):
+  def __init__(self, protocol_string, valid, name='', comment=''):
+    self._protocol_string = protocol_string
+    self._valid = valid
+    self._name = name or protocol_string # default to schema_string for name
+    self._comment = comment
+
+  # read-only properties
+  protocol_string = property(lambda self: self._protocol_string)
+  valid = property(lambda self: self._valid)
+  name = property(lambda self: self._name)
+
+  # read/write properties
+  def set_comment(self, new_comment): self._comment = new_comment
+  comment = property(lambda self: self._comment, set_comment)
+
+#
+# Example Protocols
+#
+HELLO_WORLD = ExampleProtocol("""\
+{
+  "namespace": "com.acme",
+  "protocol": "HelloWorld",
+
+  "types": [
+    {"name": "Greeting", "type": "record", "fields": [
+      {"name": "message", "type": "string"}]},
+    {"name": "Curse", "type": "error", "fields": [
+      {"name": "message", "type": "string"}]}
+  ],
+
+  "messages": {
+    "hello": {
+      "request": [{"name": "greeting", "type": "Greeting" }],
+      "response": "Greeting",
+      "errors": ["Curse"]
+    }
+  }
+}
+    """, True)
+EXAMPLES = [
+  HELLO_WORLD,
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test",
+ "protocol": "Simple",
+
+ "types": [
+     {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
+
+     {"name": "MD5", "type": "fixed", "size": 16},
+
+     {"name": "TestRecord", "type": "record",
+      "fields": [
+          {"name": "name", "type": "string", "order": "ignore"},
+          {"name": "kind", "type": "Kind", "order": "descending"},
+          {"name": "hash", "type": "MD5"}
+      ]
+     },
+
+     {"name": "TestError", "type": "error", "fields": [
+         {"name": "message", "type": "string"}
+      ]
+     }
+
+ ],
+
+ "messages": {
+
+     "hello": {
+         "request": [{"name": "greeting", "type": "string"}],
+         "response": "string"
+     },
+
+     "echo": {
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "add": {
+         "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
+         "response": "int"
+     },
+
+     "echoBytes": {
+         "request": [{"name": "data", "type": "bytes"}],
+         "response": "bytes"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["TestError"]
+     }
+ }
+
+}
+    """, True),
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
+     },
+     {"name": "TestError", "namespace": "org.apache.avro.test.errors",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.errors.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestImplicitNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "unqalified", "type": "ReferencedRecord"} ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespaceTwo",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "qualified", 
+                    "type": "org.apache.avro.other.namespace.ReferencedRecord"} 
+                ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestValidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestInvalidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, False),
+  ExampleProtocol("""\
+{"namespace": "org.apache.avro.test",
+ "protocol": "BulkData",
+
+ "types": [],
+
+ "messages": {
+
+     "read": {
+         "request": [],
+         "response": "bytes"
+     },
+
+     "write": {
+         "request": [ {"name": "data", "type": "bytes"} ],
+         "response": "null"
+     }
+
+ }
+
+}
+    """, True),
+  ExampleProtocol("""\
+{
+  "protocol" : "API",
+  "namespace" : "xyz.api",
+  "types" : [ {
+    "type" : "enum",
+    "name" : "Symbology",
+    "namespace" : "xyz.api.product",
+    "symbols" : [ "OPRA", "CUSIP", "ISIN", "SEDOL" ]
+  }, {
+    "type" : "record",
+    "name" : "Symbol",
+    "namespace" : "xyz.api.product",
+    "fields" : [ {
+      "name" : "symbology",
+      "type" : "xyz.api.product.Symbology"
+    }, {
+      "name" : "symbol",
+      "type" : "string"
+    } ]
+  }, {
+    "type" : "record",
+    "name" : "MultiSymbol",
+    "namespace" : "xyz.api.product",
+    "fields" : [ {
+      "name" : "symbols",
+      "type" : {
+        "type" : "map",
+        "values" : "xyz.api.product.Symbol"
+      }
+    } ]
+  } ],
+  "messages" : {
+  }
+}
+    """, True),
+]
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+class TestProtocol(unittest.TestCase):
+  def test_parse(self):
+    num_correct = 0
+    for example in EXAMPLES:
+      try:
+        protocol.parse(example.protocol_string)
+        if example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Parsed invalid protocol: %s" % (example.name,))
+      except Exception, e:
+        if not example.valid: 
+          num_correct += 1
+        else:
+          self.fail("Coudl not parse valid protocol: %s" % (example.name,))
+
+    fail_msg = "Parse behavior correct on %d out of %d protocols." % \
+      (num_correct, len(EXAMPLES))
+    self.assertEqual(num_correct, len(EXAMPLES), fail_msg)
+
+  def test_inner_namespace_set(self):
+    print ''
+    print 'TEST INNER NAMESPACE'
+    print '==================='
+    print ''
+    proto = protocol.parse(HELLO_WORLD.protocol_string)
+    self.assertEqual(proto.namespace, "com.acme")
+    greeting_type = proto.types_dict['Greeting']
+    self.assertEqual(greeting_type.namespace, 'com.acme')
+
+  def test_inner_namespace_not_rendered(self):
+    proto = protocol.parse(HELLO_WORLD.protocol_string)
+    self.assertEqual('com.acme.Greeting', proto.types[0].fullname)
+    self.assertEqual('Greeting', proto.types[0].name)
+    # but there shouldn't be 'namespace' rendered to json on the inner type
+    self.assertFalse('namespace' in proto.to_json()['types'][0])
+
+  def test_valid_cast_to_string_after_parse(self):
+    """
+    Test that the string generated by an Avro Protocol object
+    is, in fact, a valid Avro protocol.
+    """
+    print ''
+    print 'TEST CAST TO STRING'
+    print '==================='
+    print ''
+
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      protocol_data = protocol.parse(example.protocol_string)
+      try:
+        try:
+          protocol.parse(str(protocol_data))
+          debug_msg = "%s: STRING CAST SUCCESS" % example.name
+          num_correct += 1
+        except:
+          debug_msg = "%s: STRING CAST FAILURE" % example.name
+      finally:
+        print debug_msg
+
+    fail_msg = "Cast to string success on %d out of %d protocols" % \
+      (num_correct, len(VALID_EXAMPLES))
+    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+  def test_equivalence_after_round_trip(self):
+    """
+    1. Given a string, parse it to get Avro protocol "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro protocol "round trip".
+    3. Ensure "original" and "round trip" protocols are equivalent.
+    """
+    print ''
+    print 'TEST ROUND TRIP'
+    print '==============='
+    print ''
+
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      original_protocol = protocol.parse(example.protocol_string)
+      round_trip_protocol = protocol.parse(str(original_protocol))
+
+      if original_protocol == round_trip_protocol:
+        num_correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        self.fail("Round trip failure: %s %s %s", (example.name, example.protocol_string, str(original_protocol)))
+
+    fail_msg = "Round trip success on %d out of %d protocols" % \
+      (num_correct, len(VALID_EXAMPLES))
+    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py
new file mode 100644
index 0000000..b9c84b3
--- /dev/null
+++ b/lang/py/test/test_schema.py
@@ -0,0 +1,475 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the schema parsing logic.
+"""
+import unittest
+from avro import schema
+
+def print_test_name(test_name):
+  print ''
+  print test_name
+  print '=' * len(test_name)
+  print ''
+
+class ExampleSchema(object):
+  def __init__(self, schema_string, valid, name='', comment=''):
+    self._schema_string = schema_string
+    self._valid = valid
+    self._name = name or schema_string # default to schema_string for name
+    self.comment = comment
+
+  @property
+  def schema_string(self):
+    return self._schema_string
+
+  @property
+  def valid(self):
+    return self._valid
+
+  @property
+  def name(self):
+    return self._name
+
+#
+# Example Schemas
+#
+
+def make_primitive_examples():
+  examples = []
+  for type in schema.PRIMITIVE_TYPES:
+    examples.append(ExampleSchema('"%s"' % type, True))
+    examples.append(ExampleSchema('{"type": "%s"}' % type, True))
+  return examples
+
+PRIMITIVE_EXAMPLES = [
+  ExampleSchema('"True"', False),
+  ExampleSchema('True', False),
+  ExampleSchema('{"no_type": "test"}', False),
+  ExampleSchema('{"type": "panther"}', False),
+] + make_primitive_examples()
+
+FIXED_EXAMPLES = [
+  ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "name": "MyFixed",
+     "namespace": "org.apache.hadoop.avro",
+     "size": 1}
+    """, True),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "name": "Missing size"}
+    """, False),
+  ExampleSchema("""\
+    {"type": "fixed",
+     "size": 314}
+    """, False),
+]
+
+ENUM_EXAMPLES = [
+  ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "Status",
+     "symbols": "Normal Caution Critical"}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": [ 0, 1, 1, 2, 3, 5, 8 ],
+     "symbols": ["Golden", "Mean"]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "symbols" : ["I", "will", "fail", "no", "name"]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "Test"
+     "symbols" : ["AA", "AA"]}
+    """, False),
+]
+
+ARRAY_EXAMPLES = [
+  ExampleSchema('{"type": "array", "items": "long"}', True),
+  ExampleSchema("""\
+    {"type": "array",
+     "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+    """, True),
+]
+
+MAP_EXAMPLES = [
+  ExampleSchema('{"type": "map", "values": "long"}', True),
+  ExampleSchema("""\
+    {"type": "map",
+     "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}
+    """, True),
+]
+
+UNION_EXAMPLES = [
+  ExampleSchema('["string", "null", "long"]', True),
+  ExampleSchema('["null", "null"]', False),
+  ExampleSchema('["long", "long"]', False),
+  ExampleSchema("""\
+    [{"type": "array", "items": "long"}
+     {"type": "array", "items": "string"}]
+    """, False),
+]
+
+RECORD_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Test",
+     "fields": [{"name": "f",
+                 "type": "long"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "error",
+     "name": "Test",
+     "fields": [{"name": "f",
+                 "type": "long"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Node",
+     "fields": [{"name": "label", "type": "string"},
+                {"name": "children",
+                 "type": {"type": "array", "items": "Node"}}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Lisp",
+     "fields": [{"name": "value",
+                 "type": ["null", "string",
+                          {"type": "record",
+                           "name": "Cons",
+                           "fields": [{"name": "car", "type": "Lisp"},
+                                      {"name": "cdr", "type": "Lisp"}]}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "HandshakeRequest",
+     "namespace": "org.apache.avro.ipc",
+     "fields": [{"name": "clientHash",
+                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                {"name": "clientProtocol", "type": ["null", "string"]},
+                {"name": "serverHash", "type": "MD5"},
+                {"name": "meta", 
+                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "HandshakeResponse",
+     "namespace": "org.apache.avro.ipc",
+     "fields": [{"name": "match",
+                 "type": {"type": "enum",
+                          "name": "HandshakeMatch",
+                          "symbols": ["BOTH", "CLIENT", "NONE"]}},
+                {"name": "serverProtocol", "type": ["null", "string"]},
+                {"name": "serverHash",
+                 "type": ["null",
+                          {"name": "MD5", "size": 16, "type": "fixed"}]},
+                {"name": "meta",
+                 "type": ["null", {"type": "map", "values": "bytes"}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Interop",
+     "namespace": "org.apache.avro",
+     "fields": [{"name": "intField", "type": "int"},
+                {"name": "longField", "type": "long"},
+                {"name": "stringField", "type": "string"},
+                {"name": "boolField", "type": "boolean"},
+                {"name": "floatField", "type": "float"},
+                {"name": "doubleField", "type": "double"},
+                {"name": "bytesField", "type": "bytes"},
+                {"name": "nullField", "type": "null"},
+                {"name": "arrayField",
+                 "type": {"type": "array", "items": "double"}},
+                {"name": "mapField",
+                 "type": {"type": "map",
+                          "values": {"name": "Foo",
+                                     "type": "record",
+                                     "fields": [{"name": "label",
+                                                 "type": "string"}]}}},
+                {"name": "unionField",
+                 "type": ["boolean",
+                          "double",
+                          {"type": "array", "items": "bytes"}]},
+                {"name": "enumField",
+                 "type": {"type": "enum",
+                          "name": "Kind",
+                          "symbols": ["A", "B", "C"]}},
+                {"name": "fixedField",
+                 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                {"name": "recordField",
+                 "type": {"type": "record",
+                          "name": "Node",
+                          "fields": [{"name": "label", "type": "string"},
+                                     {"name": "children",
+                                      "type": {"type": "array",
+                                               "items": "Node"}}]}}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "ipAddr",
+     "fields": [{"name": "addr", 
+                 "type": [{"name": "IPv6", "type": "fixed", "size": 16},
+                          {"name": "IPv4", "type": "fixed", "size": 4}]}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Address",
+     "fields": [{"type": "string"},
+                {"type": "string", "name": "City"}]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "Event",
+     "fields": [{"name": "Sponsor"},
+                {"name": "City", "type": "string"}]}
+    """, False),
+  ExampleSchema("""\
+    {"type": "record",
+     "fields": "His vision, from the constantly passing bars,"
+     "name", "Rainer"}
+    """, False),
+  ExampleSchema("""\
+    {"name": ["Tom", "Jerry"],
+     "type": "record",
+     "fields": [{"name": "name", "type": "string"}]}
+    """, False),
+]
+
+DOC_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "TestDoc",
+     "doc":  "Doc string",
+     "fields": [{"name": "name", "type": "string", 
+                 "doc" : "Doc String"}]}
+    """, True),
+  ExampleSchema("""\
+    {"type": "enum", "name": "Test", "symbols": ["A", "B"],
+     "doc": "Doc String"}
+    """, True),
+]
+
+OTHER_PROP_EXAMPLES = [
+  ExampleSchema("""\
+    {"type": "record",
+     "name": "TestRecord",
+     "cp_string": "string",
+     "cp_int": 1,
+     "cp_array": [ 1, 2, 3, 4],
+     "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} },
+                 {"name": "f2", "type": "long", "cp_null": null} ]}
+    """, True),
+  ExampleSchema("""\
+     {"type": "map", "values": "long", "cp_boolean": true}
+    """, True),
+  ExampleSchema("""\
+    {"type": "enum",
+     "name": "TestEnum",
+     "symbols": [ "one", "two", "three" ],
+     "cp_float" : 1.0 }
+    """,True),
+]
+
+EXAMPLES = PRIMITIVE_EXAMPLES
+EXAMPLES += FIXED_EXAMPLES
+EXAMPLES += ENUM_EXAMPLES
+EXAMPLES += ARRAY_EXAMPLES
+EXAMPLES += MAP_EXAMPLES
+EXAMPLES += UNION_EXAMPLES
+EXAMPLES += RECORD_EXAMPLES
+EXAMPLES += DOC_EXAMPLES
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+# TODO(hammer): refactor into harness for examples
+# TODO(hammer): pretty-print detailed output
+# TODO(hammer): make verbose flag
+# TODO(hammer): show strack trace to user
+# TODO(hammer): use logging module?
+class TestSchema(unittest.TestCase):
+
+  def test_correct_recursive_extraction(self):
+    s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}')
+    t = schema.parse(str(s.fields[0].type))
+    # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
+    self.assertEqual("X", t.fields[0].type.name)
+
+  def test_parse(self):
+    correct = 0
+    for example in EXAMPLES:
+      try:
+        schema.parse(example.schema_string)
+        if example.valid:
+          correct += 1
+        else:
+          self.fail("Invalid schema was parsed: " + example.schema_string)
+      except:
+        if not example.valid: 
+          correct += 1
+        else:
+          self.fail("Valid schema failed to parse: " + example.schema_string)
+
+    fail_msg = "Parse behavior correct on %d out of %d schemas." % \
+      (correct, len(EXAMPLES))
+    self.assertEqual(correct, len(EXAMPLES), fail_msg)
+
+  def test_valid_cast_to_string_after_parse(self):
+    """
+    Test that the string generated by an Avro Schema object
+    is, in fact, a valid Avro schema.
+    """
+    print_test_name('TEST CAST TO STRING AFTER PARSE')
+    correct = 0
+    for example in VALID_EXAMPLES:
+      schema_data = schema.parse(example.schema_string)
+      schema.parse(str(schema_data))
+      correct += 1
+
+    fail_msg = "Cast to string success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  def test_equivalence_after_round_trip(self):
+    """
+    1. Given a string, parse it to get Avro schema "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro schema "round trip".
+    3. Ensure "original" and "round trip" schemas are equivalent.
+    """
+    print_test_name('TEST ROUND TRIP')
+    correct = 0
+    for example in VALID_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      round_trip_schema = schema.parse(str(original_schema))
+      if original_schema == round_trip_schema:
+        correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:       
+        debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+        self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema)))
+
+    fail_msg = "Round trip success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  # TODO(hammer): more tests
+  def test_fullname(self):
+    """
+    The fullname is determined in one of the following ways:
+     * A name and namespace are both specified.  For example,
+       one might use "name": "X", "namespace": "org.foo"
+       to indicate the fullname "org.foo.X".
+     * A fullname is specified.  If the name specified contains
+       a dot, then it is assumed to be a fullname, and any
+       namespace also specified is ignored.  For example,
+       use "name": "org.foo.X" to indicate the
+       fullname "org.foo.X".
+     * A name only is specified, i.e., a name that contains no
+       dots.  In this case the namespace is taken from the most
+       tightly encosing schema or protocol.  For example,
+       if "name": "X" is specified, and this occurs
+       within a field of the record definition
+       of "org.foo.Y", then the fullname is "org.foo.X".
+
+    References to previously defined names are as in the latter
+    two cases above: if they contain a dot they are a fullname, if
+    they do not contain a dot, the namespace is the namespace of
+    the enclosing definition.
+
+    Primitive type names have no namespace and their names may
+    not be defined in any namespace.  A schema may only contain
+    multiple definitions of a fullname if the definitions are
+    equivalent.
+    """
+    print_test_name('TEST FULLNAME')
+
+    # name and namespace specified    
+    fullname = schema.Name('a', 'o.a.h', None).fullname
+    self.assertEqual(fullname, 'o.a.h.a')
+
+    # fullname and namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.h', None).fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+    
+    # name and default namespace specified
+    fullname = schema.Name('a', None, 'b.c.d').fullname
+    self.assertEqual(fullname, 'b.c.d.a')
+
+    # fullname and default namespace specified
+    fullname = schema.Name('a.b.c.d', None, 'o.a.h').fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # fullname, namespace, default namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.a', 'o.a.h').fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # name, namespace, default namespace specified
+    fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
+    self.assertEqual(fullname, 'o.a.a.a')
+
+  def test_doc_attributes(self):
+    print_test_name('TEST DOC ATTRIBUTES')
+    correct = 0
+    for example in DOC_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      if original_schema.doc is not None:
+        correct += 1
+      if original_schema.type == 'record':
+        for f in original_schema.fields:
+          if f.doc is None:
+            self.fail("Failed to preserve 'doc' in fields: " + example.schema_string)
+    self.assertEqual(correct,len(DOC_EXAMPLES))
+
+  def test_other_attributes(self):
+    print_test_name('TEST OTHER ATTRIBUTES')
+    correct = 0
+    props = {}
+    for example in OTHER_PROP_EXAMPLES:
+      original_schema = schema.parse(example.schema_string)
+      round_trip_schema = schema.parse(str(original_schema))
+      self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
+      if original_schema.type == "record":
+        field_props = 0
+        for f in original_schema.fields:
+          if f.other_props:
+            props.update(f.other_props)
+            field_props += 1
+        self.assertEqual(field_props,len(original_schema.fields))
+      if original_schema.other_props:
+        props.update(original_schema.other_props)
+        correct += 1
+    for k in props:
+      v = props[k]
+      if k == "cp_boolean":
+        self.assertEqual(type(v), bool)
+      elif k == "cp_int":
+        self.assertEqual(type(v), int)
+      elif k == "cp_object":
+        self.assertEqual(type(v), dict)
+      elif k == "cp_float":
+        self.assertEqual(type(v), float)
+      elif k == "cp_array":
+        self.assertEqual(type(v), list)
+    self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py/test/test_script.py b/lang/py/test/test_script.py
new file mode 100644
index 0000000..d506828
--- /dev/null
+++ b/lang/py/test/test_script.py
@@ -0,0 +1,256 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import csv
+from cStringIO import StringIO
+try:
+    import json
+except ImportError:
+    import simplejson as json
+from tempfile import NamedTemporaryFile
+import avro.schema
+from avro.io import DatumWriter
+from avro.datafile import DataFileWriter
+from os.path import dirname, join, isfile
+from os import remove
+from operator import itemgetter
+
+NUM_RECORDS = 7
+
+try:
+    from subprocess import check_output
+except ImportError:
+    from subprocess import Popen, PIPE
+
+    def check_output(args):
+        pipe = Popen(args, stdout=PIPE)
+        if pipe.wait() != 0:
+            raise ValueError
+        return pipe.stdout.read()
+
+try:
+    from subprocess import check_call
+except ImportError:
+    def check_call(args, **kw):
+        pipe = Popen(args, **kw)
+        assert pipe.wait() == 0
+
+SCHEMA = '''
+{
+    "namespace": "test.avro",
+        "name": "LooneyTunes",
+        "type": "record",
+        "fields": [
+            {"name": "first", "type": "string"},
+            {"name": "last", "type": "string"},
+            {"name": "type", "type": "string"}
+        ]
+}
+'''
+
+LOONIES = (
+    ("daffy", "duck", "duck"),
+    ("bugs", "bunny", "bunny"),
+    ("tweety", "", "bird"),
+    ("road", "runner", "bird"),
+    ("wile", "e", "coyote"),
+    ("pepe", "le pew", "skunk"),
+    ("foghorn", "leghorn", "rooster"),
+)
+
+def looney_records():
+    for f, l, t in LOONIES:
+        yield {"first": f, "last" : l, "type" : t}
+
+SCRIPT = join(dirname(__file__), "..", "scripts", "avro")
+
+_JSON_PRETTY = '''{
+    "type": "duck", 
+    "last": "duck", 
+    "first": "daffy"
+}'''
+
+def gen_avro(filename):
+    schema = avro.schema.parse(SCHEMA)
+    fo = open(filename, "wb")
+    writer = DataFileWriter(fo, DatumWriter(), schema)
+    for record in looney_records():
+        writer.append(record)
+    writer.close()
+    fo.close()
+
+def tempfile():
+    return NamedTemporaryFile(delete=False).name
+
+class TestCat(unittest.TestCase):
+    def setUp(self):
+        self.avro_file = tempfile()
+        gen_avro(self.avro_file)
+
+    def tearDown(self):
+        if isfile(self.avro_file):
+            remove(self.avro_file)
+
+    def _run(self, *args, **kw):
+        out = check_output([SCRIPT, "cat", self.avro_file] + list(args))
+        if kw.get("raw"):
+            return out
+        else:
+            return out.splitlines()
+
+    def test_print(self):
+        return len(self._run()) == NUM_RECORDS
+
+    def test_filter(self):
+        return len(self._run("--filter", "r['type']=='bird'")) == 2
+
+    def test_skip(self):
+        skip = 3
+        return len(self._run("--skip", str(skip))) == NUM_RECORDS - skip
+
+    def test_csv(self):
+        reader = csv.reader(StringIO(self._run("-f", "csv", raw=True)))
+        assert len(list(reader)) == NUM_RECORDS
+
+    def test_csv_header(self):
+        io = StringIO(self._run("-f", "csv", "--header", raw=True))
+        reader = csv.DictReader(io)
+        r = {"type": "duck", "last": "duck", "first": "daffy"}
+        assert next(reader) == r
+
+    def test_print_schema(self):
+        out = self._run("--print-schema", raw=True)
+        assert json.loads(out)["namespace"] == "test.avro"
+
+    def test_help(self):
+        # Just see we have these
+        self._run("-h")
+        self._run("--help")
+
+    def test_json_pretty(self):
+        out = self._run("--format", "json-pretty", "-n", "1", raw=1)
+        assert out.strip() == _JSON_PRETTY.strip()
+
+    def test_version(self):
+        check_output([SCRIPT, "cat", "--version"])
+
+    def test_files(self):
+        out = self._run(self.avro_file)
+        assert len(out) == 2 * NUM_RECORDS
+
+    def test_fields(self):
+        # One field selection (no comma)
+        out = self._run('--fields', 'last')
+        assert json.loads(out[0]) == {'last': 'duck'}
+
+        # Field selection (with comma and space)
+        out = self._run('--fields', 'first, last')
+        assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
+        # Empty fields should get all
+        out = self._run('--fields', '')
+        assert json.loads(out[0]) == \
+                {'first': 'daffy', 'last': 'duck', 'type': 'duck'}
+
+        # Non existing fields are ignored
+        out = self._run('--fields', 'first,last,age')
+        assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
+class TestWrite(unittest.TestCase):
+    def setUp(self):
+        self.json_file = tempfile() + ".json"
+        fo = open(self.json_file, "w")
+        for record in looney_records():
+            json.dump(record, fo)
+            fo.write("\n")
+        fo.close()
+
+        self.csv_file = tempfile() + ".csv"
+        fo = open(self.csv_file, "w")
+        write = csv.writer(fo).writerow
+        get = itemgetter("first", "last", "type")
+        for record in looney_records():
+            write(get(record))
+        fo.close()
+
+        self.schema_file = tempfile()
+        fo = open(self.schema_file, "w")
+        fo.write(SCHEMA)
+        fo.close()
+
+    def tearDown(self):
+        for filename in (self.csv_file, self.json_file, self.schema_file):
+            try:
+                remove(filename)
+            except OSError:
+                continue
+
+    def _run(self, *args, **kw):
+        args = [SCRIPT, "write", "--schema", self.schema_file] + list(args)
+        check_call(args, **kw)
+
+    def load_avro(self, filename):
+        out = check_output([SCRIPT, "cat", filename])
+        return map(json.loads, out.splitlines())
+
+    def test_version(self):
+        check_call([SCRIPT, "write", "--version"])
+
+    def format_check(self, format, filename):
+        tmp = tempfile()
+        fo = open(tmp, "wb")
+        self._run(filename, "-f", format, stdout=fo)
+        fo.close()
+
+        records = self.load_avro(tmp)
+        assert len(records) == NUM_RECORDS
+        assert records[0]["first"] == "daffy"
+
+        remove(tmp)
+
+    def test_write_json(self):
+        self.format_check("json", self.json_file)
+
+    def test_write_csv(self):
+        self.format_check("csv", self.csv_file)
+
+    def test_outfile(self):
+        tmp = tempfile()
+        remove(tmp)
+        self._run(self.json_file, "-o", tmp)
+
+        assert len(self.load_avro(tmp)) == NUM_RECORDS
+        remove(tmp)
+
+    def test_multi_file(self):
+        tmp = tempfile()
+        fo = open(tmp, "wb")
+        self._run(self.json_file, self.json_file, stdout=fo)
+        fo.close()
+
+        assert len(self.load_avro(tmp)) == 2 * NUM_RECORDS
+        remove(tmp)
+
+    def test_stdin(self):
+        tmp = tempfile()
+
+        info = open(self.json_file, "rb")
+        fo = open(tmp, "wb")
+        self._run("--input-type", "json", stdin=info, stdout=fo)
+        fo.close()
+
+        assert len(self.load_avro(tmp)) == NUM_RECORDS
+        remove(tmp)
diff --git a/lang/py/test/txsample_http_client.py b/lang/py/test/txsample_http_client.py
new file mode 100644
index 0000000..ca03c4d
--- /dev/null
+++ b/lang/py/test/txsample_http_client.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from twisted.internet import reactor, defer
+from twisted.python.util import println
+
+from avro import protocol
+from avro import txipc
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+SERVER_HOST = 'localhost'
+SERVER_PORT = 9090
+
+class UsageError(Exception):
+  def __init__(self, value):
+    self.value = value
+  def __str__(self):
+    return repr(self.value)
+
+def make_requestor(server_host, server_port, protocol):
+  client = txipc.TwistedHTTPTransceiver(SERVER_HOST, SERVER_PORT)
+  return txipc.TwistedRequestor(protocol, client)
+
+if __name__ == '__main__':
+  if len(sys.argv) not in [4, 5]:
+    raise UsageError("Usage: <to> <from> <body> [<count>]")
+
+  # client code - attach to the server and send a message
+  # fill in the Message record
+  message = dict()
+  message['to'] = sys.argv[1]
+  message['from'] = sys.argv[2]
+  message['body'] = sys.argv[3]
+
+  try:
+    num_messages = int(sys.argv[4])
+  except:
+    num_messages = 1
+
+  # build the parameters for the request
+  params = {}
+  params['message'] = message
+
+  requests = []
+  # send the requests and print the result
+  for msg_count in range(num_messages):
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    d = requestor.request('send', params)
+    d.addCallback(lambda result: println("Result: " + result))
+    requests.append(d)
+  results = defer.gatherResults(requests)
+
+  def replay_cb(result):
+    print("Replay Result: " + result)
+    reactor.stop()
+
+  def replay(_):
+    # try out a replay message
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    d = requestor.request('replay', dict())
+    d.addCallback(replay_cb)
+
+  results.addCallback(replay)
+  reactor.run()
diff --git a/lang/py/test/txsample_http_server.py b/lang/py/test/txsample_http_server.py
new file mode 100644
index 0000000..e1d910d
--- /dev/null
+++ b/lang/py/test/txsample_http_server.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from twisted.web import server
+from twisted.internet import reactor
+
+from avro import ipc
+from avro import protocol
+from avro import txipc
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+SERVER_ADDRESS = ('localhost', 9090)
+
+class MailResponder(ipc.Responder):
+  def __init__(self):
+    ipc.Responder.__init__(self, MAIL_PROTOCOL)
+
+  def invoke(self, message, request):
+    if message.name == 'send':
+      request_content = request['message']
+      response = "Sent message to %(to)s from %(from)s with body %(body)s" % \
+                 request_content
+      return response
+    elif message.name == 'replay':
+      return 'replay'
+
+if __name__ == '__main__':
+  root = server.Site(txipc.AvroResponderResource(MailResponder()))
+  reactor.listenTCP(9090, root)
+  reactor.run()
diff --git a/lang/py3/avro/__init__.py b/lang/py3/avro/__init__.py
new file mode 100644
index 0000000..3e3222f
--- /dev/null
+++ b/lang/py3/avro/__init__.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ('schema', 'io', 'datafile', 'protocol', 'ipc')
+
+
+import os
+
+
+def LoadResource(name):
+  dir_path = os.path.dirname(__file__)
+  rsrc_path = os.path.join(dir_path, name)
+  with open(rsrc_path, 'r') as f:
+    return f.read()
+
+
+VERSION = LoadResource('VERSION.txt').strip()
diff --git a/lang/py3/avro/datafile.py b/lang/py3/avro/datafile.py
new file mode 100644
index 0000000..fc93fe1
--- /dev/null
+++ b/lang/py3/avro/datafile.py
@@ -0,0 +1,532 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Read/Write Avro File Object Containers."""
+
+import io
+import logging
+import os
+import zlib
+
+from avro import schema
+from avro import io as avro_io
+
+try:
+  import snappy
+  has_snappy = True
+except ImportError:
+  has_snappy = False
+
+
+# ------------------------------------------------------------------------------
+# Constants
+
+# Version of the container file:
+VERSION = 1
+
+# Magic code that starts a data container file:
+MAGIC = b'Obj' + bytes([VERSION])
+
+# Size of the magic code, in number of bytes:
+MAGIC_SIZE = len(MAGIC)
+
+# Size of the synchronization marker, in number of bytes:
+SYNC_SIZE = 16
+
+# Interval between synchronization markers, in number of bytes:
+# TODO: make configurable
+SYNC_INTERVAL = 1000 * SYNC_SIZE
+
+# Schema of the container header:
+META_SCHEMA = schema.Parse("""
+{
+  "type": "record", "name": "org.apache.avro.file.Header",
+  "fields": [{
+    "name": "magic",
+    "type": {"type": "fixed", "name": "magic", "size": %(magic_size)d}
+  }, {
+    "name": "meta",
+    "type": {"type": "map", "values": "bytes"}
+  }, {
+    "name": "sync",
+    "type": {"type": "fixed", "name": "sync", "size": %(sync_size)d}
+  }]
+}
+""" % {
+    'magic_size': MAGIC_SIZE,
+    'sync_size': SYNC_SIZE,
+})
+
+# Codecs supported by container files:
+VALID_CODECS = frozenset(['null', 'deflate'])
+if has_snappy:
+  VALID_CODECS = frozenset.union(VALID_CODECS, ['snappy'])
+
+# Not used yet
+VALID_ENCODINGS = frozenset(['binary'])
+
+# Metadata key associated to the codec:
+CODEC_KEY = "avro.codec"
+
+# Metadata key associated to the schema:
+SCHEMA_KEY = "avro.schema"
+
+
+# ------------------------------------------------------------------------------
+# Exceptions
+
+
+class DataFileException(schema.AvroException):
+  """Problem reading or writing file object containers."""
+
+  def __init__(self, msg):
+    super(DataFileException, self).__init__(msg)
+
+
+# ------------------------------------------------------------------------------
+
+
+class DataFileWriter(object):
+  """Writes Avro data files."""
+
+  @staticmethod
+  def GenerateSyncMarker():
+    """Generates a random synchronization marker."""
+    return os.urandom(SYNC_SIZE)
+
+  # TODO: make 'encoder' a metadata property
+  def __init__(
+      self,
+      writer,
+      datum_writer,
+      writer_schema=None,
+      codec='null',
+  ):
+    """Constructs a new DataFileWriter instance.
+
+    If the schema is not present, presume we're appending.
+
+    Args:
+      writer: File-like object to write into.
+      datum_writer:
+      writer_schema: Schema
+      codec:
+    """
+    self._writer = writer
+    self._encoder = avro_io.BinaryEncoder(writer)
+    self._datum_writer = datum_writer
+    self._buffer_writer = io.BytesIO()
+    self._buffer_encoder = avro_io.BinaryEncoder(self._buffer_writer)
+    self._block_count = 0
+    self._meta = {}
+
+    # Ensure we have a writer that accepts bytes:
+    self._writer.write(b'')
+
+    # Whether the header has already been written:
+    self._header_written = False
+
+    if writer_schema is not None:
+      if codec not in VALID_CODECS:
+        raise DataFileException('Unknown codec: %r' % codec)
+      self._sync_marker = DataFileWriter.GenerateSyncMarker()
+      self.SetMeta('avro.codec', codec)
+      self.SetMeta('avro.schema', str(writer_schema).encode('utf-8'))
+      self.datum_writer.writer_schema = writer_schema
+    else:
+      # open writer for reading to collect metadata
+      dfr = DataFileReader(writer, avro_io.DatumReader())
+
+      # TODO: collect arbitrary metadata
+      # collect metadata
+      self._sync_marker = dfr.sync_marker
+      self.SetMeta('avro.codec', dfr.GetMeta('avro.codec'))
+
+      # get schema used to write existing file
+      schema_from_file = dfr.GetMeta('avro.schema').decode('utf-8')
+      self.SetMeta('avro.schema', schema_from_file)
+      self.datum_writer.writer_schema = schema.Parse(schema_from_file)
+
+      # seek to the end of the file and prepare for writing
+      writer.seek(0, 2)
+      self._header_written = True
+
+  # read-only properties
+
+  @property
+  def writer(self):
+    return self._writer
+
+  @property
+  def encoder(self):
+    return self._encoder
+
+  @property
+  def datum_writer(self):
+    return self._datum_writer
+
+  @property
+  def buffer_encoder(self):
+    return self._buffer_encoder
+
+  @property
+  def sync_marker(self):
+    return self._sync_marker
+
+  @property
+  def meta(self):
+    return self._meta
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, type, value, traceback):
+    # Perform a close if there's no exception
+    if type is None:
+      self.close()
+
+  @property
+  def block_count(self):
+    return self._block_count
+
+  def GetMeta(self, key):
+    """Reports the metadata associated to the given key.
+
+    Args:
+      key: Key of the metadata to report the value of.
+    Returns:
+      The metadata value, as bytes, or None if the key does not exist.
+    """
+    return self._meta.get(key)
+
+  def SetMeta(self, key, value):
+    """Sets the metadata value for the given key.
+
+    Note: metadata is persisted and retrieved as bytes.
+
+    Args:
+      key: Key of the metadata to set.
+      value: Value of the metadata, as bytes or str.
+          Strings are automatically converted to bytes.
+    """
+    if isinstance(value, str):
+      value = value.encode('utf-8')
+    assert isinstance(value, bytes), (
+        'Invalid metadata value for key %r: %r' % (key, value))
+    self._meta[key] = value
+
+  def _WriteHeader(self):
+    header = {
+        'magic': MAGIC,
+        'meta': self.meta,
+        'sync': self.sync_marker,
+    }
+    logging.debug(
+        'Writing Avro data file header:\n%s\nAvro header schema:\n%s',
+        header, META_SCHEMA)
+    self.datum_writer.write_data(META_SCHEMA, header, self.encoder)
+    self._header_written = True
+
+  # TODO: make a schema for blocks and use datum_writer
+  def _WriteBlock(self):
+    if not self._header_written:
+      self._WriteHeader()
+
+    if self.block_count <= 0:
+      logging.info('Current block is empty, nothing to write.')
+      return
+
+    # write number of items in block
+    self.encoder.write_long(self.block_count)
+
+    # write block contents
+    uncompressed_data = self._buffer_writer.getvalue()
+    codec = self.GetMeta(CODEC_KEY).decode('utf-8')
+    if codec == 'null':
+      compressed_data = uncompressed_data
+      compressed_data_length = len(compressed_data)
+    elif codec == 'deflate':
+      # The first two characters and last character are zlib
+      # wrappers around deflate data.
+      compressed_data = zlib.compress(uncompressed_data)[2:-1]
+      compressed_data_length = len(compressed_data)
+    elif codec == 'snappy':
+      compressed_data = snappy.compress(uncompressed_data)
+      compressed_data_length = len(compressed_data) + 4 # crc32
+    else:
+      fail_msg = '"%s" codec is not supported.' % codec
+      raise DataFileException(fail_msg)
+
+    # Write length of block
+    self.encoder.write_long(compressed_data_length)
+
+    # Write block
+    self.writer.write(compressed_data)
+
+    # Write CRC32 checksum for Snappy
+    if self.GetMeta(CODEC_KEY) == 'snappy':
+      self.encoder.write_crc32(uncompressed_data)
+
+    # write sync marker
+    self.writer.write(self.sync_marker)
+
+    logging.debug(
+        'Writing block with count=%d nbytes=%d sync=%r',
+        self.block_count, compressed_data_length, self.sync_marker)
+
+    # reset buffer
+    self._buffer_writer.seek(0)
+    self._buffer_writer.truncate()
+    self._block_count = 0
+
+  def append(self, datum):
+    """Append a datum to the file."""
+    self.datum_writer.write(datum, self.buffer_encoder)
+    self._block_count += 1
+
+    # if the data to write is larger than the sync interval, write the block
+    if self._buffer_writer.tell() >= SYNC_INTERVAL:
+      self._WriteBlock()
+
+  def sync(self):
+    """
+    Return the current position as a value that may be passed to
+    DataFileReader.seek(long). Forces the end of the current block,
+    emitting a synchronization marker.
+    """
+    self._WriteBlock()
+    return self.writer.tell()
+
+  def flush(self):
+    """Flush the current state of the file, including metadata."""
+    self._WriteBlock()
+    self.writer.flush()
+
+  def close(self):
+    """Close the file."""
+    self.flush()
+    self.writer.close()
+
+
+# ------------------------------------------------------------------------------
+
+
+class DataFileReader(object):
+  """Read files written by DataFileWriter."""
+
+  # TODO: allow user to specify expected schema?
+  # TODO: allow user to specify the encoder
+  def __init__(self, reader, datum_reader):
+    """Initializes a new data file reader.
+
+    Args:
+      reader: Open file to read from.
+      datum_reader: Avro datum reader.
+    """
+    self._reader = reader
+    self._raw_decoder = avro_io.BinaryDecoder(reader)
+    self._datum_decoder = None # Maybe reset at every block.
+    self._datum_reader = datum_reader
+
+    # read the header: magic, meta, sync
+    self._read_header()
+
+    # ensure codec is valid
+    self.codec = self.GetMeta('avro.codec').decode('utf-8')
+    if self.codec is None:
+      self.codec = "null"
+    if self.codec not in VALID_CODECS:
+      raise DataFileException('Unknown codec: %s.' % self.codec)
+
+    self._file_length = self._GetInputFileLength()
+
+    # get ready to read
+    self._block_count = 0
+    self.datum_reader.writer_schema = (
+        schema.Parse(self.GetMeta(SCHEMA_KEY).decode('utf-8')))
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, type, value, traceback):
+    # Perform a close if there's no exception
+    if type is None:
+      self.close()
+
+  def __iter__(self):
+    return self
+
+  def __next__(self):
+    """Implements the iterator interface."""
+    return next(self)
+
+  # read-only properties
+  @property
+  def reader(self):
+    return self._reader
+
+  @property
+  def raw_decoder(self):
+    return self._raw_decoder
+
+  @property
+  def datum_decoder(self):
+    return self._datum_decoder
+
+  @property
+  def datum_reader(self):
+    return self._datum_reader
+
+  @property
+  def sync_marker(self):
+    return self._sync_marker
+
+  @property
+  def meta(self):
+    return self._meta
+
+  @property
+  def file_length(self):
+    """Length of the input file, in bytes."""
+    return self._file_length
+
+  # read/write properties
+  @property
+  def block_count(self):
+    return self._block_count
+
+  def GetMeta(self, key):
+    """Reports the value of a given metadata key.
+
+    Args:
+      key: Metadata key (string) to report the value of.
+    Returns:
+      Value associated to the metadata key, as bytes.
+    """
+    return self._meta.get(key)
+
+  def SetMeta(self, key, value):
+    """Sets a metadata.
+
+    Args:
+      key: Metadata key (string) to set.
+      value: Metadata value to set, as bytes.
+    """
+    if isinstance(value, str):
+      value = value.encode('utf-8')
+    self._meta[key] = value
+
+  def _GetInputFileLength(self):
+    """Reports the length of the input file, in bytes.
+
+    Leaves the current position unmodified.
+
+    Returns:
+      The length of the input file, in bytes.
+    """
+    current_pos = self.reader.tell()
+    self.reader.seek(0, 2)
+    file_length = self.reader.tell()
+    self.reader.seek(current_pos)
+    return file_length
+
+  def is_EOF(self):
+    return self.reader.tell() == self.file_length
+
+  def _read_header(self):
+    # seek to the beginning of the file to get magic block
+    self.reader.seek(0, 0)
+
+    # read header into a dict
+    header = self.datum_reader.read_data(
+      META_SCHEMA, META_SCHEMA, self.raw_decoder)
+
+    # check magic number
+    if header.get('magic') != MAGIC:
+      fail_msg = "Not an Avro data file: %s doesn't match %s."\
+                 % (header.get('magic'), MAGIC)
+      raise schema.AvroException(fail_msg)
+
+    # set metadata
+    self._meta = header['meta']
+
+    # set sync marker
+    self._sync_marker = header['sync']
+
+  def _read_block_header(self):
+    self._block_count = self.raw_decoder.read_long()
+    if self.codec == "null":
+      # Skip a long; we don't need to use the length.
+      self.raw_decoder.skip_long()
+      self._datum_decoder = self._raw_decoder
+    elif self.codec == 'deflate':
+      # Compressed data is stored as (length, data), which
+      # corresponds to how the "bytes" type is encoded.
+      data = self.raw_decoder.read_bytes()
+      # -15 is the log of the window size; negative indicates
+      # "raw" (no zlib headers) decompression.  See zlib.h.
+      uncompressed = zlib.decompress(data, -15)
+      self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
+    elif self.codec == 'snappy':
+      # Compressed data includes a 4-byte CRC32 checksum
+      length = self.raw_decoder.read_long()
+      data = self.raw_decoder.read(length - 4)
+      uncompressed = snappy.decompress(data)
+      self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
+      self.raw_decoder.check_crc32(uncompressed);
+    else:
+      raise DataFileException("Unknown codec: %r" % self.codec)
+
+  def _skip_sync(self):
+    """
+    Read the length of the sync marker; if it matches the sync marker,
+    return True. Otherwise, seek back to where we started and return False.
+    """
+    proposed_sync_marker = self.reader.read(SYNC_SIZE)
+    if proposed_sync_marker != self.sync_marker:
+      self.reader.seek(-SYNC_SIZE, 1)
+      return False
+    else:
+      return True
+
+  # TODO: handle block of length zero
+  # TODO: clean this up with recursion
+  def __next__(self):
+    """Return the next datum in the file."""
+    if self.block_count == 0:
+      if self.is_EOF():
+        raise StopIteration
+      elif self._skip_sync():
+        if self.is_EOF(): raise StopIteration
+        self._read_block_header()
+      else:
+        self._read_block_header()
+
+    datum = self.datum_reader.read(self.datum_decoder)
+    self._block_count -= 1
+    return datum
+
+  def close(self):
+    """Close this reader."""
+    self.reader.close()
+
+
+if __name__ == '__main__':
+  raise Exception('Not a standalone module')
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
new file mode 100644
index 0000000..f417b66
--- /dev/null
+++ b/lang/py3/avro/io.py
@@ -0,0 +1,933 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Input/output utilities.
+
+Includes:
+ - i/o-specific constants
+ - i/o-specific exceptions
+ - schema validation
+ - leaf value encoding and decoding
+ - datum reader/writer stuff (?)
+
+Also includes a generic representation for data, which uses the
+following mapping:
+ - Schema records are implemented as dict.
+ - Schema arrays are implemented as list.
+ - Schema maps are implemented as dict.
+ - Schema strings are implemented as unicode.
+ - Schema bytes are implemented as str.
+ - Schema ints are implemented as int.
+ - Schema longs are implemented as long.
+ - Schema floats are implemented as float.
+ - Schema doubles are implemented as float.
+ - Schema booleans are implemented as bool.
+"""
+
+import binascii
+import json
+import logging
+import struct
+import sys
+
+from avro import schema
+
+
+# ------------------------------------------------------------------------------
+# Constants
+
+
+INT_MIN_VALUE = -(1 << 31)
+INT_MAX_VALUE = (1 << 31) - 1
+LONG_MIN_VALUE = -(1 << 63)
+LONG_MAX_VALUE = (1 << 63) - 1
+
+STRUCT_INT = struct.Struct('!I')     # big-endian unsigned int
+STRUCT_LONG = struct.Struct('!Q')    # big-endian unsigned long long
+STRUCT_FLOAT = struct.Struct('!f')   # big-endian float
+STRUCT_DOUBLE = struct.Struct('!d')  # big-endian double
+STRUCT_CRC32 = struct.Struct('>I')   # big-endian unsigned int
+
+
+# ------------------------------------------------------------------------------
+# Exceptions
+
+
+class AvroTypeException(schema.AvroException):
+  """Raised when datum is not an example of schema."""
+  def __init__(self, expected_schema, datum):
+    pretty_expected = json.dumps(json.loads(str(expected_schema)), indent=2)
+    fail_msg = "The datum %s is not an example of the schema %s"\
+               % (datum, pretty_expected)
+    schema.AvroException.__init__(self, fail_msg)
+
+
+class SchemaResolutionException(schema.AvroException):
+  def __init__(self, fail_msg, writer_schema=None, reader_schema=None):
+    pretty_writers = json.dumps(json.loads(str(writer_schema)), indent=2)
+    pretty_readers = json.dumps(json.loads(str(reader_schema)), indent=2)
+    if writer_schema: fail_msg += "\nWriter's Schema: %s" % pretty_writers
+    if reader_schema: fail_msg += "\nReader's Schema: %s" % pretty_readers
+    schema.AvroException.__init__(self, fail_msg)
+
+
+# ------------------------------------------------------------------------------
+# Validate
+
+
+def Validate(expected_schema, datum):
+  """Determines if a python datum is an instance of a schema.
+
+  Args:
+    expected_schema: Schema to validate against.
+    datum: Datum to validate.
+  Returns:
+    True if the datum is an instance of the schema.
+  """
+  schema_type = expected_schema.type
+  if schema_type == 'null':
+    return datum is None
+  elif schema_type == 'boolean':
+    return isinstance(datum, bool)
+  elif schema_type == 'string':
+    return isinstance(datum, str)
+  elif schema_type == 'bytes':
+    return isinstance(datum, bytes)
+  elif schema_type == 'int':
+    return (isinstance(datum, int)
+        and (INT_MIN_VALUE <= datum <= INT_MAX_VALUE))
+  elif schema_type == 'long':
+    return (isinstance(datum, int)
+        and (LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE))
+  elif schema_type in ['float', 'double']:
+    return (isinstance(datum, int) or isinstance(datum, float))
+  elif schema_type == 'fixed':
+    return isinstance(datum, bytes) and (len(datum) == expected_schema.size)
+  elif schema_type == 'enum':
+    return datum in expected_schema.symbols
+  elif schema_type == 'array':
+    return (isinstance(datum, list)
+        and all(Validate(expected_schema.items, item) for item in datum))
+  elif schema_type == 'map':
+    return (isinstance(datum, dict)
+        and all(isinstance(key, str) for key in datum.keys())
+        and all(Validate(expected_schema.values, value)
+                for value in datum.values()))
+  elif schema_type in ['union', 'error_union']:
+    return any(Validate(union_branch, datum)
+               for union_branch in expected_schema.schemas)
+  elif schema_type in ['record', 'error', 'request']:
+    return (isinstance(datum, dict)
+        and all(Validate(field.type, datum.get(field.name))
+                for field in expected_schema.fields))
+  else:
+    raise AvroTypeException('Unknown Avro schema type: %r' % schema_type)
+
+
+# ------------------------------------------------------------------------------
+# Decoder/Encoder
+
+
+class BinaryDecoder(object):
+  """Read leaf values."""
+  def __init__(self, reader):
+    """
+    reader is a Python object on which we can call read, seek, and tell.
+    """
+    self._reader = reader
+
+  @property
+  def reader(self):
+    """Reports the reader used by this decoder."""
+    return self._reader
+
+  def read(self, n):
+    """Read n bytes.
+
+    Args:
+      n: Number of bytes to read.
+    Returns:
+      The next n bytes from the input.
+    """
+    assert (n >= 0), n
+    input_bytes = self.reader.read(n)
+    assert (len(input_bytes) == n), input_bytes
+    return input_bytes
+
+  def read_null(self):
+    """
+    null is written as zero bytes
+    """
+    return None
+
+  def read_boolean(self):
+    """
+    a boolean is written as a single byte
+    whose value is either 0 (false) or 1 (true).
+    """
+    return ord(self.read(1)) == 1
+
+  def read_int(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    return self.read_long()
+
+  def read_long(self):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    b = ord(self.read(1))
+    n = b & 0x7F
+    shift = 7
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+      n |= (b & 0x7F) << shift
+      shift += 7
+    datum = (n >> 1) ^ -(n & 1)
+    return datum
+
+  def read_float(self):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xff)) |
+      ((ord(self.read(1)) & 0xff) <<  8) |
+      ((ord(self.read(1)) & 0xff) << 16) |
+      ((ord(self.read(1)) & 0xff) << 24))
+    return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+
+  def read_double(self):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = (((ord(self.read(1)) & 0xff)) |
+      ((ord(self.read(1)) & 0xff) <<  8) |
+      ((ord(self.read(1)) & 0xff) << 16) |
+      ((ord(self.read(1)) & 0xff) << 24) |
+      ((ord(self.read(1)) & 0xff) << 32) |
+      ((ord(self.read(1)) & 0xff) << 40) |
+      ((ord(self.read(1)) & 0xff) << 48) |
+      ((ord(self.read(1)) & 0xff) << 56))
+    return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+
+  def read_bytes(self):
+    """
+    Bytes are encoded as a long followed by that many bytes of data.
+    """
+    nbytes = self.read_long()
+    assert (nbytes >= 0), nbytes
+    return self.read(nbytes)
+
+  def read_utf8(self):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    input_bytes = self.read_bytes()
+    try:
+      return input_bytes.decode('utf-8')
+    except UnicodeDecodeError as exn:
+      logging.error('Invalid UTF-8 input bytes: %r', input_bytes)
+      raise exn
+
+  def check_crc32(self, bytes):
+    checksum = STRUCT_CRC32.unpack(self.read(4))[0];
+    if binascii.crc32(bytes) & 0xffffffff != checksum:
+      raise schema.AvroException("Checksum failure")
+
+  def skip_null(self):
+    pass
+
+  def skip_boolean(self):
+    self.skip(1)
+
+  def skip_int(self):
+    self.skip_long()
+
+  def skip_long(self):
+    b = ord(self.read(1))
+    while (b & 0x80) != 0:
+      b = ord(self.read(1))
+
+  def skip_float(self):
+    self.skip(4)
+
+  def skip_double(self):
+    self.skip(8)
+
+  def skip_bytes(self):
+    self.skip(self.read_long())
+
+  def skip_utf8(self):
+    self.skip_bytes()
+
+  def skip(self, n):
+    self.reader.seek(self.reader.tell() + n)
+
+
+# ------------------------------------------------------------------------------
+
+
+class BinaryEncoder(object):
+  """Write leaf values."""
+
+  def __init__(self, writer):
+    """
+    writer is a Python object on which we can call write.
+    """
+    self._writer = writer
+
+  @property
+  def writer(self):
+    """Reports the writer used by this encoder."""
+    return self._writer
+
+
+  def write(self, datum):
+    """Write a sequence of bytes.
+
+    Args:
+      datum: Byte array, as a Python bytes.
+    """
+    assert isinstance(datum, bytes), ('Expecting bytes, got %r' % datum)
+    self.writer.write(datum)
+
+  def WriteByte(self, byte):
+    self.writer.write(bytes((byte,)))
+
+  def write_null(self, datum):
+    """
+    null is written as zero bytes
+    """
+    pass
+
+  def write_boolean(self, datum):
+    """
+    a boolean is written as a single byte
+    whose value is either 0 (false) or 1 (true).
+    """
+    # Python maps True to 1 and False to 0.
+    self.WriteByte(int(bool(datum)))
+
+  def write_int(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    self.write_long(datum);
+
+  def write_long(self, datum):
+    """
+    int and long values are written using variable-length, zig-zag coding.
+    """
+    datum = (datum << 1) ^ (datum >> 63)
+    while (datum & ~0x7F) != 0:
+      self.WriteByte((datum & 0x7f) | 0x80)
+      datum >>= 7
+    self.WriteByte(datum)
+
+  def write_float(self, datum):
+    """
+    A float is written as 4 bytes.
+    The float is converted into a 32-bit integer using a method equivalent to
+    Java's floatToIntBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
+    self.WriteByte((bits) & 0xFF)
+    self.WriteByte((bits >> 8) & 0xFF)
+    self.WriteByte((bits >> 16) & 0xFF)
+    self.WriteByte((bits >> 24) & 0xFF)
+
+  def write_double(self, datum):
+    """
+    A double is written as 8 bytes.
+    The double is converted into a 64-bit integer using a method equivalent to
+    Java's doubleToLongBits and then encoded in little-endian format.
+    """
+    bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
+    self.WriteByte((bits) & 0xFF)
+    self.WriteByte((bits >> 8) & 0xFF)
+    self.WriteByte((bits >> 16) & 0xFF)
+    self.WriteByte((bits >> 24) & 0xFF)
+    self.WriteByte((bits >> 32) & 0xFF)
+    self.WriteByte((bits >> 40) & 0xFF)
+    self.WriteByte((bits >> 48) & 0xFF)
+    self.WriteByte((bits >> 56) & 0xFF)
+
+  def write_bytes(self, datum):
+    """
+    Bytes are encoded as a long followed by that many bytes of data.
+    """
+    self.write_long(len(datum))
+    self.write(datum)
+
+  def write_utf8(self, datum):
+    """
+    A string is encoded as a long followed by
+    that many bytes of UTF-8 encoded character data.
+    """
+    datum = datum.encode("utf-8")
+    self.write_bytes(datum)
+
+  def write_crc32(self, bytes):
+    """
+    A 4-byte, big-endian CRC32 checksum
+    """
+    self.write(STRUCT_CRC32.pack(binascii.crc32(bytes) & 0xffffffff));
+
+
+# ------------------------------------------------------------------------------
+# DatumReader/Writer
+
+
+class DatumReader(object):
+  """Deserialize Avro-encoded data into a Python data structure."""
+  @staticmethod
+  def check_props(schema_one, schema_two, prop_list):
+    for prop in prop_list:
+      if getattr(schema_one, prop) != getattr(schema_two, prop):
+        return False
+    return True
+
+  @staticmethod
+  def match_schemas(writer_schema, reader_schema):
+    w_type = writer_schema.type
+    r_type = reader_schema.type
+    if 'union' in [w_type, r_type] or 'error_union' in [w_type, r_type]:
+      return True
+    elif (w_type in schema.PRIMITIVE_TYPES and r_type in schema.PRIMITIVE_TYPES
+          and w_type == r_type):
+      return True
+    elif (w_type == r_type == 'record' and
+          DatumReader.check_props(writer_schema, reader_schema,
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'error' and
+          DatumReader.check_props(writer_schema, reader_schema,
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'request'):
+      return True
+    elif (w_type == r_type == 'fixed' and
+          DatumReader.check_props(writer_schema, reader_schema,
+                                  ['fullname', 'size'])):
+      return True
+    elif (w_type == r_type == 'enum' and
+          DatumReader.check_props(writer_schema, reader_schema,
+                                  ['fullname'])):
+      return True
+    elif (w_type == r_type == 'map' and
+          DatumReader.check_props(writer_schema.values,
+                                  reader_schema.values, ['type'])):
+      return True
+    elif (w_type == r_type == 'array' and
+          DatumReader.check_props(writer_schema.items,
+                                  reader_schema.items, ['type'])):
+      return True
+
+    # Handle schema promotion
+    if w_type == 'int' and r_type in ['long', 'float', 'double']:
+      return True
+    elif w_type == 'long' and r_type in ['float', 'double']:
+      return True
+    elif w_type == 'float' and r_type == 'double':
+      return True
+    return False
+
+  def __init__(self, writer_schema=None, reader_schema=None):
+    """
+    As defined in the Avro specification, we call the schema encoded
+    in the data the "writer's schema", and the schema expected by the
+    reader the "reader's schema".
+    """
+    self._writer_schema = writer_schema
+    self._reader_schema = reader_schema
+
+  # read/write properties
+  def set_writer_schema(self, writer_schema):
+    self._writer_schema = writer_schema
+  writer_schema = property(lambda self: self._writer_schema,
+                            set_writer_schema)
+  def set_reader_schema(self, reader_schema):
+    self._reader_schema = reader_schema
+  reader_schema = property(lambda self: self._reader_schema,
+                            set_reader_schema)
+
+  def read(self, decoder):
+    if self.reader_schema is None:
+      self.reader_schema = self.writer_schema
+    return self.read_data(self.writer_schema, self.reader_schema, decoder)
+
+  def read_data(self, writer_schema, reader_schema, decoder):
+    # schema matching
+    if not DatumReader.match_schemas(writer_schema, reader_schema):
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
+
+    # schema resolution: reader's schema is a union, writer's schema is not
+    if (writer_schema.type not in ['union', 'error_union']
+        and reader_schema.type in ['union', 'error_union']):
+      for s in reader_schema.schemas:
+        if DatumReader.match_schemas(writer_schema, s):
+          return self.read_data(writer_schema, s, decoder)
+      fail_msg = 'Schemas do not match.'
+      raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
+
+    # function dispatch for reading data based on type of writer's schema
+    if writer_schema.type == 'null':
+      return decoder.read_null()
+    elif writer_schema.type == 'boolean':
+      return decoder.read_boolean()
+    elif writer_schema.type == 'string':
+      return decoder.read_utf8()
+    elif writer_schema.type == 'int':
+      return decoder.read_int()
+    elif writer_schema.type == 'long':
+      return decoder.read_long()
+    elif writer_schema.type == 'float':
+      return decoder.read_float()
+    elif writer_schema.type == 'double':
+      return decoder.read_double()
+    elif writer_schema.type == 'bytes':
+      return decoder.read_bytes()
+    elif writer_schema.type == 'fixed':
+      return self.read_fixed(writer_schema, reader_schema, decoder)
+    elif writer_schema.type == 'enum':
+      return self.read_enum(writer_schema, reader_schema, decoder)
+    elif writer_schema.type == 'array':
+      return self.read_array(writer_schema, reader_schema, decoder)
+    elif writer_schema.type == 'map':
+      return self.read_map(writer_schema, reader_schema, decoder)
+    elif writer_schema.type in ['union', 'error_union']:
+      return self.read_union(writer_schema, reader_schema, decoder)
+    elif writer_schema.type in ['record', 'error', 'request']:
+      return self.read_record(writer_schema, reader_schema, decoder)
+    else:
+      fail_msg = "Cannot read unknown schema type: %s" % writer_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def skip_data(self, writer_schema, decoder):
+    if writer_schema.type == 'null':
+      return decoder.skip_null()
+    elif writer_schema.type == 'boolean':
+      return decoder.skip_boolean()
+    elif writer_schema.type == 'string':
+      return decoder.skip_utf8()
+    elif writer_schema.type == 'int':
+      return decoder.skip_int()
+    elif writer_schema.type == 'long':
+      return decoder.skip_long()
+    elif writer_schema.type == 'float':
+      return decoder.skip_float()
+    elif writer_schema.type == 'double':
+      return decoder.skip_double()
+    elif writer_schema.type == 'bytes':
+      return decoder.skip_bytes()
+    elif writer_schema.type == 'fixed':
+      return self.skip_fixed(writer_schema, decoder)
+    elif writer_schema.type == 'enum':
+      return self.skip_enum(writer_schema, decoder)
+    elif writer_schema.type == 'array':
+      return self.skip_array(writer_schema, decoder)
+    elif writer_schema.type == 'map':
+      return self.skip_map(writer_schema, decoder)
+    elif writer_schema.type in ['union', 'error_union']:
+      return self.skip_union(writer_schema, decoder)
+    elif writer_schema.type in ['record', 'error', 'request']:
+      return self.skip_record(writer_schema, decoder)
+    else:
+      fail_msg = "Unknown schema type: %s" % writer_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def read_fixed(self, writer_schema, reader_schema, decoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    return decoder.read(writer_schema.size)
+
+  def skip_fixed(self, writer_schema, decoder):
+    return decoder.skip(writer_schema.size)
+
+  def read_enum(self, writer_schema, reader_schema, decoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    # read data
+    index_of_symbol = decoder.read_int()
+    if index_of_symbol >= len(writer_schema.symbols):
+      fail_msg = "Can't access enum index %d for enum with %d symbols"\
+                 % (index_of_symbol, len(writer_schema.symbols))
+      raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
+    read_symbol = writer_schema.symbols[index_of_symbol]
+
+    # schema resolution
+    if read_symbol not in reader_schema.symbols:
+      fail_msg = "Symbol %s not present in Reader's Schema" % read_symbol
+      raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
+
+    return read_symbol
+
+  def skip_enum(self, writer_schema, decoder):
+    return decoder.skip_int()
+
+  def read_array(self, writer_schema, reader_schema, decoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = []
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        read_items.append(self.read_data(writer_schema.items,
+                                         reader_schema.items, decoder))
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_array(self, writer_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          self.skip_data(writer_schema.items, decoder)
+      block_count = decoder.read_long()
+
+  def read_map(self, writer_schema, reader_schema, decoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    read_items = {}
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_count = -block_count
+        block_size = decoder.read_long()
+      for i in range(block_count):
+        key = decoder.read_utf8()
+        read_items[key] = self.read_data(writer_schema.values,
+                                         reader_schema.values, decoder)
+      block_count = decoder.read_long()
+    return read_items
+
+  def skip_map(self, writer_schema, decoder):
+    block_count = decoder.read_long()
+    while block_count != 0:
+      if block_count < 0:
+        block_size = decoder.read_long()
+        decoder.skip(block_size)
+      else:
+        for i in range(block_count):
+          decoder.skip_utf8()
+          self.skip_data(writer_schema.values, decoder)
+      block_count = decoder.read_long()
+
+  def read_union(self, writer_schema, reader_schema, decoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # schema resolution
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writer_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writer_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
+    selected_writer_schema = writer_schema.schemas[index_of_schema]
+
+    # read data
+    return self.read_data(selected_writer_schema, reader_schema, decoder)
+
+  def skip_union(self, writer_schema, decoder):
+    index_of_schema = int(decoder.read_long())
+    if index_of_schema >= len(writer_schema.schemas):
+      fail_msg = "Can't access branch index %d for union with %d branches"\
+                 % (index_of_schema, len(writer_schema.schemas))
+      raise SchemaResolutionException(fail_msg, writer_schema)
+    return self.skip_data(writer_schema.schemas[index_of_schema], decoder)
+
+  def read_record(self, writer_schema, reader_schema, decoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+
+    Schema Resolution:
+     * the ordering of fields may be different: fields are matched by name.
+     * schemas for fields with the same name in both records are resolved
+       recursively.
+     * if the writer's record contains a field with a name not present in the
+       reader's record, the writer's value for that field is ignored.
+     * if the reader's record schema has a field that contains a default value,
+       and writer's schema does not have a field with the same name, then the
+       reader should use the default value from its field.
+     * if the reader's record schema has a field with no default value, and
+       writer's schema does not have a field with the same name, then the
+       field's value is unset.
+    """
+    # schema resolution
+    readers_fields_dict = reader_schema.field_map
+    read_record = {}
+    for field in writer_schema.fields:
+      readers_field = readers_fields_dict.get(field.name)
+      if readers_field is not None:
+        field_val = self.read_data(field.type, readers_field.type, decoder)
+        read_record[field.name] = field_val
+      else:
+        self.skip_data(field.type, decoder)
+
+    # fill in default values
+    if len(readers_fields_dict) > len(read_record):
+      writers_fields_dict = writer_schema.field_map
+      for field_name, field in readers_fields_dict.items():
+        if field_name not in writers_fields_dict:
+          if field.has_default:
+            field_val = self._read_default_value(field.type, field.default)
+            read_record[field.name] = field_val
+          else:
+            fail_msg = 'No default value for field %s' % field_name
+            raise SchemaResolutionException(fail_msg, writer_schema,
+                                            reader_schema)
+    return read_record
+
+  def skip_record(self, writer_schema, decoder):
+    for field in writer_schema.fields:
+      self.skip_data(field.type, decoder)
+
+  def _read_default_value(self, field_schema, default_value):
+    """
+    Basically a JSON Decoder?
+    """
+    if field_schema.type == 'null':
+      return None
+    elif field_schema.type == 'boolean':
+      return bool(default_value)
+    elif field_schema.type == 'int':
+      return int(default_value)
+    elif field_schema.type == 'long':
+      return int(default_value)
+    elif field_schema.type in ['float', 'double']:
+      return float(default_value)
+    elif field_schema.type in ['enum', 'fixed', 'string', 'bytes']:
+      return default_value
+    elif field_schema.type == 'array':
+      read_array = []
+      for json_val in default_value:
+        item_val = self._read_default_value(field_schema.items, json_val)
+        read_array.append(item_val)
+      return read_array
+    elif field_schema.type == 'map':
+      read_map = {}
+      for key, json_val in default_value.items():
+        map_val = self._read_default_value(field_schema.values, json_val)
+        read_map[key] = map_val
+      return read_map
+    elif field_schema.type in ['union', 'error_union']:
+      return self._read_default_value(field_schema.schemas[0], default_value)
+    elif field_schema.type == 'record':
+      read_record = {}
+      for field in field_schema.fields:
+        json_val = default_value.get(field.name)
+        if json_val is None: json_val = field.default
+        field_val = self._read_default_value(field.type, json_val)
+        read_record[field.name] = field_val
+      return read_record
+    else:
+      fail_msg = 'Unknown type: %s' % field_schema.type
+      raise schema.AvroException(fail_msg)
+
+
+# ------------------------------------------------------------------------------
+
+
+class DatumWriter(object):
+  """DatumWriter for generic python objects."""
+  def __init__(self, writer_schema=None):
+    self._writer_schema = writer_schema
+
+  # read/write properties
+  def set_writer_schema(self, writer_schema):
+    self._writer_schema = writer_schema
+  writer_schema = property(lambda self: self._writer_schema,
+                            set_writer_schema)
+
+  def write(self, datum, encoder):
+    # validate datum
+    if not Validate(self.writer_schema, datum):
+      raise AvroTypeException(self.writer_schema, datum)
+
+    self.write_data(self.writer_schema, datum, encoder)
+
+  def write_data(self, writer_schema, datum, encoder):
+    # function dispatch to write datum
+    if writer_schema.type == 'null':
+      encoder.write_null(datum)
+    elif writer_schema.type == 'boolean':
+      encoder.write_boolean(datum)
+    elif writer_schema.type == 'string':
+      encoder.write_utf8(datum)
+    elif writer_schema.type == 'int':
+      encoder.write_int(datum)
+    elif writer_schema.type == 'long':
+      encoder.write_long(datum)
+    elif writer_schema.type == 'float':
+      encoder.write_float(datum)
+    elif writer_schema.type == 'double':
+      encoder.write_double(datum)
+    elif writer_schema.type == 'bytes':
+      encoder.write_bytes(datum)
+    elif writer_schema.type == 'fixed':
+      self.write_fixed(writer_schema, datum, encoder)
+    elif writer_schema.type == 'enum':
+      self.write_enum(writer_schema, datum, encoder)
+    elif writer_schema.type == 'array':
+      self.write_array(writer_schema, datum, encoder)
+    elif writer_schema.type == 'map':
+      self.write_map(writer_schema, datum, encoder)
+    elif writer_schema.type in ['union', 'error_union']:
+      self.write_union(writer_schema, datum, encoder)
+    elif writer_schema.type in ['record', 'error', 'request']:
+      self.write_record(writer_schema, datum, encoder)
+    else:
+      fail_msg = 'Unknown type: %s' % writer_schema.type
+      raise schema.AvroException(fail_msg)
+
+  def write_fixed(self, writer_schema, datum, encoder):
+    """
+    Fixed instances are encoded using the number of bytes declared
+    in the schema.
+    """
+    encoder.write(datum)
+
+  def write_enum(self, writer_schema, datum, encoder):
+    """
+    An enum is encoded by a int, representing the zero-based position
+    of the symbol in the schema.
+    """
+    index_of_datum = writer_schema.symbols.index(datum)
+    encoder.write_int(index_of_datum)
+
+  def write_array(self, writer_schema, datum, encoder):
+    """
+    Arrays are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many array items.
+    A block with count zero indicates the end of the array.
+    Each item is encoded per the array's item schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for item in datum:
+        self.write_data(writer_schema.items, item, encoder)
+    encoder.write_long(0)
+
+  def write_map(self, writer_schema, datum, encoder):
+    """
+    Maps are encoded as a series of blocks.
+
+    Each block consists of a long count value,
+    followed by that many key/value pairs.
+    A block with count zero indicates the end of the map.
+    Each item is encoded per the map's value schema.
+
+    If a block's count is negative,
+    then the count is followed immediately by a long block size,
+    indicating the number of bytes in the block.
+    The actual count in this case
+    is the absolute value of the count written.
+    """
+    if len(datum) > 0:
+      encoder.write_long(len(datum))
+      for key, val in datum.items():
+        encoder.write_utf8(key)
+        self.write_data(writer_schema.values, val, encoder)
+    encoder.write_long(0)
+
+  def write_union(self, writer_schema, datum, encoder):
+    """
+    A union is encoded by first writing a long value indicating
+    the zero-based position within the union of the schema of its value.
+    The value is then encoded per the indicated schema within the union.
+    """
+    # resolve union
+    index_of_schema = -1
+    for i, candidate_schema in enumerate(writer_schema.schemas):
+      if Validate(candidate_schema, datum):
+        index_of_schema = i
+    if index_of_schema < 0: raise AvroTypeException(writer_schema, datum)
+
+    # write data
+    encoder.write_long(index_of_schema)
+    self.write_data(writer_schema.schemas[index_of_schema], datum, encoder)
+
+  def write_record(self, writer_schema, datum, encoder):
+    """
+    A record is encoded by encoding the values of its fields
+    in the order that they are declared. In other words, a record
+    is encoded as just the concatenation of the encodings of its fields.
+    Field values are encoded per their schema.
+    """
+    for field in writer_schema.fields:
+      self.write_data(field.type, datum.get(field.name), encoder)
+
+
+if __name__ == '__main__':
+  raise Exception('Not a standalone module')
diff --git a/lang/py3/avro/ipc.py b/lang/py3/avro/ipc.py
new file mode 100644
index 0000000..973e674
--- /dev/null
+++ b/lang/py3/avro/ipc.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""RPC/IPC support."""
+
+import abc
+import http.client
+import http.server
+import io
+import logging
+import os
+import socketserver
+
+from avro import io as avro_io
+from avro import protocol
+from avro import schema
+
+# ------------------------------------------------------------------------------
+# Constants
+
+def LoadResource(name):
+  dir_path = os.path.dirname(__file__)
+  rsrc_path = os.path.join(dir_path, name)
+  with open(rsrc_path, 'r') as f:
+    return f.read()
+
+
+# Handshake schema is pulled in during build
+HANDSHAKE_REQUEST_SCHEMA_JSON = LoadResource('HandshakeRequest.avsc')
+HANDSHAKE_RESPONSE_SCHEMA_JSON = LoadResource('HandshakeResponse.avsc')
+
+HANDSHAKE_REQUEST_SCHEMA = schema.Parse(HANDSHAKE_REQUEST_SCHEMA_JSON)
+HANDSHAKE_RESPONSE_SCHEMA = schema.Parse(HANDSHAKE_RESPONSE_SCHEMA_JSON)
+
+HANDSHAKE_REQUESTOR_WRITER = avro_io.DatumWriter(HANDSHAKE_REQUEST_SCHEMA)
+HANDSHAKE_REQUESTOR_READER = avro_io.DatumReader(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_WRITER = avro_io.DatumWriter(HANDSHAKE_RESPONSE_SCHEMA)
+HANDSHAKE_RESPONDER_READER = avro_io.DatumReader(HANDSHAKE_REQUEST_SCHEMA)
+
+META_SCHEMA = schema.Parse('{"type": "map", "values": "bytes"}')
+META_WRITER = avro_io.DatumWriter(META_SCHEMA)
+META_READER = avro_io.DatumReader(META_SCHEMA)
+
+SYSTEM_ERROR_SCHEMA = schema.Parse('["string"]')
+
+AVRO_RPC_MIME = 'avro/binary'
+
+# protocol cache
+
+# Map: remote name -> remote MD5 hash
+_REMOTE_HASHES = {}
+
+# Decoder/encoder for a 32 bits big-endian integer.
+UINT32_BE = avro_io.STRUCT_INT
+
+# Default size of the buffers use to frame messages:
+BUFFER_SIZE = 8192
+
+
+# ------------------------------------------------------------------------------
+# Exceptions
+
+
+class AvroRemoteException(schema.AvroException):
+  """
+  Raised when an error message is sent by an Avro requestor or responder.
+  """
+  def __init__(self, fail_msg=None):
+    schema.AvroException.__init__(self, fail_msg)
+
+class ConnectionClosedException(schema.AvroException):
+  pass
+
+
+# ------------------------------------------------------------------------------
+# Base IPC Classes (Requestor/Responder)
+
+
+class BaseRequestor(object, metaclass=abc.ABCMeta):
+  """Base class for the client side of a protocol interaction."""
+
+  def __init__(self, local_protocol, transceiver):
+    """Initializes a new requestor object.
+
+    Args:
+      local_protocol: Avro Protocol describing the messages sent and received.
+      transceiver: Transceiver instance to channel messages through.
+    """
+    self._local_protocol = local_protocol
+    self._transceiver = transceiver
+    self._remote_protocol = None
+    self._remote_hash = None
+    self._send_protocol = None
+
+  @property
+  def local_protocol(self):
+    """Returns: the Avro Protocol describing the messages sent and received."""
+    return self._local_protocol
+
+  @property
+  def transceiver(self):
+    """Returns: the underlying channel used by this requestor."""
+    return self._transceiver
+
+  @abc.abstractmethod
+  def _IssueRequest(self, call_request, message_name, request_datum):
+    """TODO: Document this method.
+
+    Args:
+      call_request: ???
+      message_name: Name of the message.
+      request_datum: ???
+    Returns:
+      ???
+    """
+    raise Error('Abstract method')
+
+  def Request(self, message_name, request_datum):
+    """Writes a request message and reads a response or error message.
+
+    Args:
+      message_name: Name of the IPC method.
+      request_datum: IPC request.
+    Returns:
+      The IPC response.
+    """
+    # build handshake and call request
+    buffer_writer = io.BytesIO()
+    buffer_encoder = avro_io.BinaryEncoder(buffer_writer)
+    self._WriteHandshakeRequest(buffer_encoder)
+    self._WriteCallRequest(message_name, request_datum, buffer_encoder)
+
+    # send the handshake and call request; block until call response
+    call_request = buffer_writer.getvalue()
+    return self._IssueRequest(call_request, message_name, request_datum)
+
+  def _WriteHandshakeRequest(self, encoder):
+    """Emits the handshake request.
+
+    Args:
+      encoder: Encoder to write the handshake request into.
+    """
+    local_hash = self._local_protocol.md5
+
+    # if self._remote_hash is None:
+    #   remote_name = self.transceiver.remote_name
+    #   self._remote_hash = _REMOTE_HASHES.get(remote_name)
+
+    if self._remote_hash is None:
+      self._remote_hash = local_hash
+      self._remote_protocol = self._local_protocol
+
+    request_datum = {
+      'clientHash': local_hash,
+      'serverHash': self._remote_hash,
+    }
+    if self._send_protocol:
+      request_datum['clientProtocol'] = str(self._local_protocol)
+
+    logging.info('Sending handshake request: %s', request_datum)
+    HANDSHAKE_REQUESTOR_WRITER.write(request_datum, encoder)
+
+  def _WriteCallRequest(self, message_name, request_datum, encoder):
+    """
+    The format of a call request is:
+      * request metadata, a map with values of type bytes
+      * the message name, an Avro string, followed by
+      * the message parameters. Parameters are serialized according to
+        the message's request declaration.
+    """
+    # request metadata (not yet implemented)
+    request_metadata = {}
+    META_WRITER.write(request_metadata, encoder)
+
+    # Identify message to send:
+    message = self.local_protocol.message_map.get(message_name)
+    if message is None:
+      raise schema.AvroException('Unknown message: %s' % message_name)
+    encoder.write_utf8(message.name)
+
+    # message parameters
+    self._WriteRequest(message.request, request_datum, encoder)
+
+  def _WriteRequest(self, request_schema, request_datum, encoder):
+    logging.info('writing request: %s', request_datum)
+    datum_writer = avro_io.DatumWriter(request_schema)
+    datum_writer.write(request_datum, encoder)
+
+  def _ReadHandshakeResponse(self, decoder):
+    """Reads and processes the handshake response message.
+
+    Args:
+      decoder: Decoder to read messages from.
+    Returns:
+      call-response exists (boolean) ???
+    Raises:
+      schema.AvroException on ???
+    """
+    handshake_response = HANDSHAKE_REQUESTOR_READER.read(decoder)
+    logging.info('Processing handshake response: %s', handshake_response)
+    match = handshake_response['match']
+    if match == 'BOTH':
+      # Both client and server protocol hashes match:
+      self._send_protocol = False
+      return True
+
+    elif match == 'CLIENT':
+      # Client's side hash mismatch:
+      self._remote_protocol = \
+          protocol.Parse(handshake_response['serverProtocol'])
+      self._remote_hash = handshake_response['serverHash']
+      self._send_protocol = False
+      return True
+
+    elif match == 'NONE':
+      # Neither client nor server match:
+      self._remote_protocol = \
+          protocol.Parse(handshake_response['serverProtocol'])
+      self._remote_hash = handshake_response['serverHash']
+      self._send_protocol = True
+      return False
+    else:
+      raise schema.AvroException('handshake_response.match=%r' % match)
+
+  def _ReadCallResponse(self, message_name, decoder):
+    """Reads and processes a method call response.
+
+    The format of a call response is:
+      - response metadata, a map with values of type bytes
+      - a one-byte error flag boolean, followed by either:
+        - if the error flag is false,
+          the message response, serialized per the message's response schema.
+        - if the error flag is true,
+          the error, serialized per the message's error union schema.
+
+    Args:
+      message_name:
+      decoder:
+    Returns:
+      ???
+    Raises:
+      schema.AvroException on ???
+    """
+    # response metadata
+    response_metadata = META_READER.read(decoder)
+
+    # remote response schema
+    remote_message_schema = self._remote_protocol.message_map.get(message_name)
+    if remote_message_schema is None:
+      raise schema.AvroException('Unknown remote message: %s' % message_name)
+
+    # local response schema
+    local_message_schema = self._local_protocol.message_map.get(message_name)
+    if local_message_schema is None:
+      raise schema.AvroException('Unknown local message: %s' % message_name)
+
+    # error flag
+    if not decoder.read_boolean():
+      writer_schema = remote_message_schema.response
+      reader_schema = local_message_schema.response
+      return self._ReadResponse(writer_schema, reader_schema, decoder)
+    else:
+      writer_schema = remote_message_schema.errors
+      reader_schema = local_message_schema.errors
+      raise self._ReadError(writer_schema, reader_schema, decoder)
+
+  def _ReadResponse(self, writer_schema, reader_schema, decoder):
+    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+    result = datum_reader.read(decoder)
+    return result
+
+  def _ReadError(self, writer_schema, reader_schema, decoder):
+    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+    return AvroRemoteException(datum_reader.read(decoder))
+
+
+class Requestor(BaseRequestor):
+  """Concrete requestor implementation."""
+
+  def _IssueRequest(self, call_request, message_name, request_datum):
+    call_response = self.transceiver.Transceive(call_request)
+
+    # process the handshake and call response
+    buffer_decoder = avro_io.BinaryDecoder(io.BytesIO(call_response))
+    call_response_exists = self._ReadHandshakeResponse(buffer_decoder)
+    if call_response_exists:
+      return self._ReadCallResponse(message_name, buffer_decoder)
+    else:
+      return self.Request(message_name, request_datum)
+
+
+# ------------------------------------------------------------------------------
+
+
+class Responder(object, metaclass=abc.ABCMeta):
+  """Base class for the server side of a protocol interaction."""
+
+  def __init__(self, local_protocol):
+    self._local_protocol = local_protocol
+    self._local_hash = self._local_protocol.md5
+    self._protocol_cache = {}
+
+    self.set_protocol_cache(self._local_hash, self._local_protocol)
+
+  @property
+  def local_protocol(self):
+    return self._local_protocol
+
+  # utility functions to manipulate protocol cache
+  def get_protocol_cache(self, hash):
+    return self._protocol_cache.get(hash)
+
+  def set_protocol_cache(self, hash, protocol):
+    self._protocol_cache[hash] = protocol
+
+  def Respond(self, call_request):
+    """Entry point to process one procedure call.
+
+    Args:
+      call_request: Serialized procedure call request.
+    Returns:
+      Serialized procedure call response.
+    Raises:
+      ???
+    """
+    buffer_reader = io.BytesIO(call_request)
+    buffer_decoder = avro_io.BinaryDecoder(buffer_reader)
+    buffer_writer = io.BytesIO()
+    buffer_encoder = avro_io.BinaryEncoder(buffer_writer)
+    error = None
+    response_metadata = {}
+
+    try:
+      remote_protocol = self._ProcessHandshake(buffer_decoder, buffer_encoder)
+      # handshake failure
+      if remote_protocol is None:
+        return buffer_writer.getvalue()
+
+      # read request using remote protocol
+      request_metadata = META_READER.read(buffer_decoder)
+      remote_message_name = buffer_decoder.read_utf8()
+
+      # get remote and local request schemas so we can do
+      # schema resolution (one fine day)
+      remote_message = remote_protocol.message_map.get(remote_message_name)
+      if remote_message is None:
+        fail_msg = 'Unknown remote message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      local_message = self.local_protocol.message_map.get(remote_message_name)
+      if local_message is None:
+        fail_msg = 'Unknown local message: %s' % remote_message_name
+        raise schema.AvroException(fail_msg)
+      writer_schema = remote_message.request
+      reader_schema = local_message.request
+      request = self._ReadRequest(writer_schema, reader_schema, buffer_decoder)
+      logging.info('Processing request: %r', request)
+
+      # perform server logic
+      try:
+        response = self.Invoke(local_message, request)
+      except AvroRemoteException as exn:
+        error = exn
+      except Exception as exn:
+        error = AvroRemoteException(str(exn))
+
+      # write response using local protocol
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(error is not None)
+      if error is None:
+        writer_schema = local_message.response
+        self._WriteResponse(writer_schema, response, buffer_encoder)
+      else:
+        writer_schema = local_message.errors
+        self._WriteError(writer_schema, error, buffer_encoder)
+    except schema.AvroException as exn:
+      error = AvroRemoteException(str(exn))
+      buffer_encoder = avro_io.BinaryEncoder(io.StringIO())
+      META_WRITER.write(response_metadata, buffer_encoder)
+      buffer_encoder.write_boolean(True)
+      self._WriteError(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
+    return buffer_writer.getvalue()
+
+  def _ProcessHandshake(self, decoder, encoder):
+    """Processes an RPC handshake.
+
+    Args:
+      decoder: Where to read from.
+      encoder: Where to write to.
+    Returns:
+      The requested Protocol.
+    """
+    handshake_request = HANDSHAKE_RESPONDER_READER.read(decoder)
+    logging.info('Processing handshake request: %s', handshake_request)
+
+    # determine the remote protocol
+    client_hash = handshake_request.get('clientHash')
+    client_protocol = handshake_request.get('clientProtocol')
+    remote_protocol = self.get_protocol_cache(client_hash)
+    if remote_protocol is None and client_protocol is not None:
+      remote_protocol = protocol.Parse(client_protocol)
+      self.set_protocol_cache(client_hash, remote_protocol)
+
+    # evaluate remote's guess of the local protocol
+    server_hash = handshake_request.get('serverHash')
+
+    handshake_response = {}
+    if self._local_hash == server_hash:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'BOTH'
+    else:
+      if remote_protocol is None:
+        handshake_response['match'] = 'NONE'
+      else:
+        handshake_response['match'] = 'CLIENT'
+
+    if handshake_response['match'] != 'BOTH':
+      handshake_response['serverProtocol'] = str(self.local_protocol)
+      handshake_response['serverHash'] = self._local_hash
+
+    logging.info('Handshake response: %s', handshake_response)
+    HANDSHAKE_RESPONDER_WRITER.write(handshake_response, encoder)
+    return remote_protocol
+
+  @abc.abstractmethod
+  def Invoke(self, local_message, request):
+    """Processes one procedure call.
+
+    Args:
+      local_message: Avro message specification.
+      request: Call request.
+    Returns:
+      Call response.
+    Raises:
+      ???
+    """
+    raise Error('abtract method')
+
+  def _ReadRequest(self, writer_schema, reader_schema, decoder):
+    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+    return datum_reader.read(decoder)
+
+  def _WriteResponse(self, writer_schema, response_datum, encoder):
+    datum_writer = avro_io.DatumWriter(writer_schema)
+    datum_writer.write(response_datum, encoder)
+
+  def _WriteError(self, writer_schema, error_exception, encoder):
+    datum_writer = avro_io.DatumWriter(writer_schema)
+    datum_writer.write(str(error_exception), encoder)
+
+
+# ------------------------------------------------------------------------------
+# Framed message
+
+
+class FramedReader(object):
+  """Wrapper around a file-like object to read framed data."""
+
+  def __init__(self, reader):
+    self._reader = reader
+
+  def Read(self):
+    """Reads one message from the configured reader.
+
+    Returns:
+      The message, as bytes.
+    """
+    message = io.BytesIO()
+    # Read and append frames until we encounter a 0-size frame:
+    while self._ReadFrame(message) > 0: pass
+    return message.getvalue()
+
+  def _ReadFrame(self, message):
+    """Reads and appends one frame into the given message bytes.
+
+    Args:
+      message: Message to append the frame to.
+    Returns:
+      Size of the frame that was read.
+      The empty frame (size 0) indicates the end of a message.
+    """
+    frame_size = self._ReadInt32()
+    remaining = frame_size
+    while remaining > 0:
+      data_bytes = self._reader.read(remaining)
+      if len(data_bytes) == 0:
+        raise ConnectionClosedException(
+            'FramedReader: expecting %d more bytes in frame of size %d, got 0.'
+            % (remaining, frame_size))
+      message.write(data_bytes)
+      remaining -= len(data_bytes)
+    return frame_size
+
+  def _ReadInt32(self):
+    encoded = self._reader.read(UINT32_BE.size)
+    if len(encoded) != UINT32_BE.size:
+      raise ConnectionClosedException('Invalid header: %r' % encoded)
+    return UINT32_BE.unpack(encoded)[0]
+
+
+class FramedWriter(object):
+  """Wrapper around a file-like object to write framed data."""
+
+  def __init__(self, writer):
+    self._writer = writer
+
+  def Write(self, message):
+    """Writes a message.
+
+    Message is chunked into sequences of frames terminated by an empty frame.
+
+    Args:
+      message: Message to write, as bytes.
+    """
+    while len(message) > 0:
+      chunk_size = max(BUFFER_SIZE, len(message))
+      chunk = message[:chunk_size]
+      self._WriteBuffer(chunk)
+      message = message[chunk_size:]
+
+    # A message is always terminated by a zero-length buffer.
+    self._WriteUnsignedInt32(0)
+
+  def _WriteBuffer(self, chunk):
+    self._WriteUnsignedInt32(len(chunk))
+    self._writer.write(chunk)
+
+  def _WriteUnsignedInt32(self, uint32):
+    self._writer.write(UINT32_BE.pack(uint32))
+
+
+# ------------------------------------------------------------------------------
+# Transceiver (send/receive channel)
+
+
+class Transceiver(object, metaclass=abc.ABCMeta):
+  @abc.abstractproperty
+  def remote_name(self):
+    pass
+
+  @abc.abstractmethod
+  def ReadMessage(self):
+    """Reads a single message from the channel.
+
+    Blocks until a message can be read.
+
+    Returns:
+      The message read from the channel.
+    """
+    pass
+
+  @abc.abstractmethod
+  def WriteMessage(self, message):
+    """Writes a message into the channel.
+
+    Blocks until the message has been written.
+
+    Args:
+      message: Message to write.
+    """
+    pass
+
+  def Transceive(self, request):
+    """Processes a single request-reply interaction.
+
+    Synchronous request-reply interaction.
+
+    Args:
+      request: Request message.
+    Returns:
+      The reply message.
+    """
+    self.WriteMessage(request)
+    result = self.ReadMessage()
+    return result
+
+  def Close(self):
+    """Closes this transceiver."""
+    pass
+
+
+class HTTPTransceiver(Transceiver):
+  """HTTP-based transceiver implementation."""
+
+  def __init__(self, host, port, req_resource='/'):
+    """Initializes a new HTTP transceiver.
+
+    Args:
+      host: Name or IP address of the remote host to interact with.
+      port: Port the remote server is listening on.
+      req_resource: Optional HTTP resource path to use, '/' by default.
+    """
+    self._req_resource = req_resource
+    self._conn = http.client.HTTPConnection(host, port)
+    self._conn.connect()
+    self._remote_name = self._conn.sock.getsockname()
+
+  @property
+  def remote_name(self):
+    return self._remote_name
+
+  def ReadMessage(self):
+    response = self._conn.getresponse()
+    response_reader = FramedReader(response)
+    framed_message = response_reader.Read()
+    response.read()    # ensure we're ready for subsequent requests
+    return framed_message
+
+  def WriteMessage(self, message):
+    req_method = 'POST'
+    req_headers = {'Content-Type': AVRO_RPC_MIME}
+
+    bio = io.BytesIO()
+    req_body_buffer = FramedWriter(bio)
+    req_body_buffer.Write(message)
+    req_body = bio.getvalue()
+
+    self._conn.request(req_method, self._req_resource, req_body, req_headers)
+
+  def Close(self):
+    self._conn.close()
+    self._conn = None
+
+
+# ------------------------------------------------------------------------------
+# Server Implementations
+
+
+def _MakeHandlerClass(responder):
+  class AvroHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
+    def do_POST(self):
+      reader = FramedReader(self.rfile)
+      call_request = reader.Read()
+      logging.info('Serialized request: %r', call_request)
+      call_response = responder.Respond(call_request)
+      logging.info('Serialized response: %r', call_response)
+
+      self.send_response(200)
+      self.send_header('Content-type', AVRO_RPC_MIME)
+      self.end_headers()
+
+      framed_writer = FramedWriter(self.wfile)
+      framed_writer.Write(call_response)
+      self.wfile.flush()
+      logging.info('Response sent')
+
+  return AvroHTTPRequestHandler
+
+
+class MultiThreadedHTTPServer(
+    socketserver.ThreadingMixIn,
+    http.server.HTTPServer,
+):
+  """Multi-threaded HTTP server."""
+  pass
+
+
+class AvroIpcHttpServer(MultiThreadedHTTPServer):
+  """Avro IPC server implemented on top of an HTTP server."""
+
+  def __init__(self, interface, port, responder):
+    """Initializes a new Avro IPC server.
+
+    Args:
+      interface: Interface the server listens on, eg. 'localhost' or '0.0.0.0'.
+      port: TCP port the server listens on, eg. 8000.
+      responder: Responder implementation to handle RPCs.
+    """
+    super(AvroIpcHttpServer, self).__init__(
+        server_address=(interface, port),
+        RequestHandlerClass=_MakeHandlerClass(responder),
+    )
+
+
+if __name__ == '__main__':
+  raise Exception('Not a standalone module')
diff --git a/lang/py3/avro/protocol.py b/lang/py3/avro/protocol.py
new file mode 100644
index 0000000..629d2fd
--- /dev/null
+++ b/lang/py3/avro/protocol.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Protocol implementation.
+"""
+
+
+import hashlib
+import json
+import logging
+
+from avro import schema
+
+ImmutableDict = schema.ImmutableDict
+
+# ------------------------------------------------------------------------------
+# Constants
+
+# Allowed top-level schemas in a protocol:
+VALID_TYPE_SCHEMA_TYPES = frozenset(['enum', 'record', 'error', 'fixed'])
+
+
+# ------------------------------------------------------------------------------
+# Exceptions
+
+
+class ProtocolParseException(schema.AvroException):
+  """Error while parsing a JSON protocol descriptor."""
+  pass
+
+
+# ------------------------------------------------------------------------------
+# Base Classes
+
+
+class Protocol(object):
+  """An application protocol."""
+
+  @staticmethod
+  def _ParseTypeDesc(type_desc, names):
+    type_schema = schema.SchemaFromJSONData(type_desc, names=names)
+    if type_schema.type not in VALID_TYPE_SCHEMA_TYPES:
+      raise ProtocolParseException(
+          'Invalid type %r in protocol %r: '
+          'protocols can only declare types %s.'
+          % (type_schema, avro_name, ','.join(VALID_TYPE_SCHEMA_TYPES)))
+    return type_schema
+
+  @staticmethod
+  def _ParseMessageDesc(name, message_desc, names):
+    """Parses a protocol message descriptor.
+
+    Args:
+      name: Name of the message.
+      message_desc: Descriptor of the message.
+      names: Tracker of the named Avro schema.
+    Returns:
+      The parsed protocol message.
+    Raises:
+      ProtocolParseException: if the descriptor is invalid.
+    """
+    request_desc = message_desc.get('request')
+    if request_desc is None:
+      raise ProtocolParseException(
+          'Invalid message descriptor with no "request": %r.' % message_desc)
+    request_schema = Message._ParseRequestFromJSONDesc(
+        request_desc=request_desc,
+        names=names,
+    )
+
+    response_desc = message_desc.get('response')
+    if response_desc is None:
+      raise ProtocolParseException(
+          'Invalid message descriptor with no "response": %r.' % message_desc)
+    response_schema = Message._ParseResponseFromJSONDesc(
+        response_desc=response_desc,
+        names=names,
+    )
+
+    # Errors are optional:
+    errors_desc = message_desc.get('errors', tuple())
+    error_union_schema = Message._ParseErrorsFromJSONDesc(
+        errors_desc=errors_desc,
+        names=names,
+    )
+
+    return Message(
+        name=name,
+        request=request_schema,
+        response=response_schema,
+        errors=error_union_schema,
+    )
+
+  @staticmethod
+  def _ParseMessageDescMap(message_desc_map, names):
+    for name, message_desc in message_desc_map.items():
+      yield Protocol._ParseMessageDesc(
+          name=name,
+          message_desc=message_desc,
+          names=names,
+      )
+
+  def __init__(
+      self,
+      name,
+      namespace=None,
+      types=tuple(),
+      messages=tuple(),
+  ):
+    """Initializes a new protocol object.
+
+    Args:
+      name: Protocol name (absolute or relative).
+      namespace: Optional explicit namespace (if name is relative).
+      types: Collection of types in the protocol.
+      messages: Collection of messages in the protocol.
+    """
+    self._avro_name = schema.Name(name=name, namespace=namespace)
+    self._fullname = self._avro_name.fullname
+    self._name = self._avro_name.simple_name
+    self._namespace = self._avro_name.namespace
+
+    self._props = {}
+    self._props['name'] = self._name
+    if self._namespace:
+      self._props['namespace'] = self._namespace
+
+    self._names = schema.Names(default_namespace=self._namespace)
+
+    self._types = tuple(types)
+    # Map: type full name -> type schema
+    self._type_map = (
+        ImmutableDict((type.fullname, type) for type in self._types))
+    # This assertion cannot fail unless we don't track named schemas properly:
+    assert (len(self._types) == len(self._type_map)), (
+        'Type list %r does not match type map: %r'
+        % (self._types, self._type_map))
+    # TODO: set props['types']
+
+    self._messages = tuple(messages)
+
+    # Map: message name -> Message
+    # Note that message names are simple names unique within the protocol.
+    self._message_map = ImmutableDict(
+        items=((message.name, message) for message in self._messages))
+    if len(self._messages) != len(self._message_map):
+      raise ProtocolParseException(
+          'Invalid protocol %s with duplicate message name: %r'
+          % (self._avro_name, self._messages))
+    # TODO: set props['messages']
+
+    self._md5 = hashlib.md5(str(self).encode('utf-8')).digest()
+
+  @property
+  def name(self):
+    """Returns: the simple name of the protocol."""
+    return self._name
+
+  @property
+  def namespace(self):
+    """Returns: the namespace this protocol belongs to."""
+    return self._namespace
+
+  @property
+  def fullname(self):
+    """Returns: the fully qualified name of this protocol."""
+    return self._fullname
+
+  @property
+  def types(self):
+    """Returns: the collection of types declared in this protocol."""
+    return self._types
+
+  @property
+  def type_map(self):
+    """Returns: the map of types in this protocol, indexed by their full name."""
+    return self._type_map
+
+  @property
+  def messages(self):
+    """Returns: the collection of messages declared in this protocol."""
+    return self._messages
+
+  @property
+  def message_map(self):
+    """Returns: the map of messages in this protocol, indexed by their name."""
+    return self._message_map
+
+  @property
+  def md5(self):
+    return self._md5
+
+  @property
+  def props(self):
+    return self._props
+
+  def to_json(self):
+    to_dump = {}
+    to_dump['protocol'] = self.name
+    names = schema.Names(default_namespace=self.namespace)
+    if self.namespace:
+      to_dump['namespace'] = self.namespace
+    if self.types:
+      to_dump['types'] = [ t.to_json(names) for t in self.types ]
+    if self.messages:
+      messages_dict = {}
+      for name, body in self.message_map.items():
+        messages_dict[name] = body.to_json(names)
+      to_dump['messages'] = messages_dict
+    return to_dump
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+
+
+class Message(object):
+  """A Protocol message."""
+
+  @staticmethod
+  def _ParseRequestFromJSONDesc(request_desc, names):
+    """Parses the request descriptor of a protocol message.
+
+    Args:
+      request_desc: Descriptor of the message request.
+          This is a list of fields that defines an unnamed record.
+      names: Tracker for named Avro schemas.
+    Returns:
+      The parsed request schema, as an unnamed record.
+    """
+    fields = schema.RecordSchema._MakeFieldList(request_desc, names=names)
+    return schema.RecordSchema(
+        name=None,
+        namespace=None,
+        fields=fields,
+        names=names,
+        record_type=schema.REQUEST,
+    )
+
+  @staticmethod
+  def _ParseResponseFromJSONDesc(response_desc, names):
+    """Parses the response descriptor of a protocol message.
+
+    Args:
+      response_desc: Descriptor of the message response.
+          This is an arbitrary Avro schema descriptor.
+    Returns:
+      The parsed response schema.
+    """
+    return schema.SchemaFromJSONData(response_desc, names=names)
+
+  @staticmethod
+  def _ParseErrorsFromJSONDesc(errors_desc, names):
+    """Parses the errors descriptor of a protocol message.
+
+    Args:
+      errors_desc: Descriptor of the errors thrown by the protocol message.
+          This is a list of error types understood as an implicit union.
+          Each error type is an arbitrary Avro schema.
+      names: Tracker for named Avro schemas.
+    Returns:
+      The parsed ErrorUnionSchema.
+    """
+    error_union_desc = {
+        'type': schema.ERROR_UNION,
+        'declared_errors': errors_desc,
+    }
+    return schema.SchemaFromJSONData(error_union_desc, names=names)
+
+  def __init__(self,  name, request, response, errors=None):
+    self._name = name
+
+    self._props = {}
+    # TODO: set properties
+    self._request = request
+    self._response = response
+    self._errors = errors
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def request(self):
+    return self._request
+
+  @property
+  def response(self):
+    return self._response
+
+  @property
+  def errors(self):
+    return self._errors
+
+  def props(self):
+    return self._props
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def to_json(self, names=None):
+    if names is None:
+      names = schema.Names()
+    to_dump = {}
+    to_dump['request'] = self.request.to_json(names)
+    to_dump['response'] = self.response.to_json(names)
+    if self.errors:
+      to_dump['errors'] = self.errors.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    return self.name == that.name and self.props == that.props
+
+
+# ------------------------------------------------------------------------------
+
+
+def ProtocolFromJSONData(json_data):
+  """Builds an Avro  Protocol from its JSON descriptor.
+
+  Args:
+    json_data: JSON data representing the descriptor of the Avro protocol.
+  Returns:
+    The Avro Protocol parsed from the JSON descriptor.
+  Raises:
+    ProtocolParseException: if the descriptor is invalid.
+  """
+  if type(json_data) != dict:
+    raise ProtocolParseException(
+        'Invalid JSON descriptor for an Avro protocol: %r' % json_data)
+
+  name = json_data.get('protocol')
+  if name is None:
+    raise ProtocolParseException(
+        'Invalid protocol descriptor with no "name": %r' % json_data)
+
+  # Namespace is optional
+  namespace = json_data.get('namespace')
+
+  avro_name = schema.Name(name=name, namespace=namespace)
+  names = schema.Names(default_namespace=avro_name.namespace)
+
+  type_desc_list = json_data.get('types', tuple())
+  types = tuple(map(
+      lambda desc: Protocol._ParseTypeDesc(desc, names=names),
+      type_desc_list))
+
+  message_desc_map = json_data.get('messages', dict())
+  messages = tuple(Protocol._ParseMessageDescMap(message_desc_map, names=names))
+
+  return Protocol(
+      name=name,
+      namespace=namespace,
+      types=types,
+      messages=messages,
+  )
+
+
+def Parse(json_string):
+  """Constructs a Protocol from its JSON descriptor in text form.
+
+  Args:
+    json_string: String representation of the JSON descriptor of the protocol.
+  Returns:
+    The parsed protocol.
+  Raises:
+    ProtocolParseException: on JSON parsing error,
+        or if the JSON descriptor is invalid.
+  """
+  try:
+    json_data = json.loads(json_string)
+  except Exception as exn:
+    raise ProtocolParseException(
+        'Error parsing protocol from JSON: %r. '
+        'Error message: %r.'
+        % (json_string, exn))
+
+  return ProtocolFromJSONData(json_data)
+
diff --git a/lang/py3/avro/schema.py b/lang/py3/avro/schema.py
new file mode 100644
index 0000000..b5d17fe
--- /dev/null
+++ b/lang/py3/avro/schema.py
@@ -0,0 +1,1283 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Representation of Avro schemas.
+
+A schema may be one of:
+ - A record, mapping field names to field value data;
+ - An error, equivalent to a record;
+ - An enum, containing one of a small set of symbols;
+ - An array of values, all of the same schema;
+ - A map containing string/value pairs, each of a declared schema;
+ - A union of other schemas;
+ - A fixed sized binary object;
+ - A unicode string;
+ - A sequence of bytes;
+ - A 32-bit signed int;
+ - A 64-bit signed long;
+ - A 32-bit floating-point float;
+ - A 64-bit floating-point double;
+ - A boolean;
+ - Null.
+"""
+
+
+import abc
+import collections
+import json
+import logging
+import re
+
+
+# ------------------------------------------------------------------------------
+# Constants
+
+# Log level more verbose than DEBUG=10, INFO=20, etc.
+DEBUG_VERBOSE=5
+
+
+NULL    = 'null'
+BOOLEAN = 'boolean'
+STRING  = 'string'
+BYTES   = 'bytes'
+INT     = 'int'
+LONG    = 'long'
+FLOAT   = 'float'
+DOUBLE  = 'double'
+FIXED   = 'fixed'
+ENUM    = 'enum'
+RECORD  = 'record'
+ERROR   = 'error'
+ARRAY   = 'array'
+MAP     = 'map'
+UNION   = 'union'
+
+# Request and error unions are part of Avro protocols:
+REQUEST = 'request'
+ERROR_UNION = 'error_union'
+
+PRIMITIVE_TYPES = frozenset([
+  NULL,
+  BOOLEAN,
+  STRING,
+  BYTES,
+  INT,
+  LONG,
+  FLOAT,
+  DOUBLE,
+])
+
+NAMED_TYPES = frozenset([
+  FIXED,
+  ENUM,
+  RECORD,
+  ERROR,
+])
+
+VALID_TYPES = frozenset.union(
+  PRIMITIVE_TYPES,
+  NAMED_TYPES,
+  [
+    ARRAY,
+    MAP,
+    UNION,
+    REQUEST,
+    ERROR_UNION,
+  ],
+)
+
+SCHEMA_RESERVED_PROPS = frozenset([
+  'type',
+  'name',
+  'namespace',
+  'fields',     # Record
+  'items',      # Array
+  'size',       # Fixed
+  'symbols',    # Enum
+  'values',     # Map
+  'doc',
+])
+
+FIELD_RESERVED_PROPS = frozenset([
+  'default',
+  'name',
+  'doc',
+  'order',
+  'type',
+])
+
+VALID_FIELD_SORT_ORDERS = frozenset([
+  'ascending',
+  'descending',
+  'ignore',
+])
+
+
+# ------------------------------------------------------------------------------
+# Exceptions
+
+
+class Error(Exception):
+  """Base class for errors in this module."""
+  pass
+
+
+class AvroException(Error):
+  """Generic Avro schema error."""
+  pass
+
+
+class SchemaParseException(AvroException):
+  """Error while parsing a JSON schema descriptor."""
+  pass
+
+
+# ------------------------------------------------------------------------------
+
+
+class ImmutableDict(dict):
+  """Dictionary guaranteed immutable.
+
+  All mutations raise an exception.
+  Behaves exactly as a dict otherwise.
+  """
+
+  def __init__(self, items=None, **kwargs):
+    if items is not None:
+      super(ImmutableDict, self).__init__(items)
+      assert (len(kwargs) == 0)
+    else:
+      super(ImmutableDict, self).__init__(**kwargs)
+
+  def __setitem__(self, key, value):
+    raise Exception(
+        'Attempting to map key %r to value %r in ImmutableDict %r'
+        % (key, value, self))
+
+  def __delitem__(self, key):
+    raise Exception(
+        'Attempting to remove mapping for key %r in ImmutableDict %r'
+        % (key, self))
+
+  def clear(self):
+    raise Exception('Attempting to clear ImmutableDict %r' % self)
+
+  def update(self, items=None, **kwargs):
+    raise Exception(
+        'Attempting to update ImmutableDict %r with items=%r, kwargs=%r'
+        % (self, args, kwargs))
+
+  def pop(self, key, default=None):
+    raise Exception(
+        'Attempting to pop key %r from ImmutableDict %r' % (key, self))
+
+  def popitem(self):
+    raise Exception('Attempting to pop item from ImmutableDict %r' % self)
+
+
+# ------------------------------------------------------------------------------
+
+
+class Schema(object, metaclass=abc.ABCMeta):
+  """Abstract base class for all Schema classes."""
+
+  def __init__(self, type, other_props=None):
+    """Initializes a new schema object.
+
+    Args:
+      type: Type of the schema to initialize.
+      other_props: Optional dictionary of additional properties.
+    """
+    if type not in VALID_TYPES:
+      raise SchemaParseException('%r is not a valid Avro type.' % type)
+
+    # All properties of this schema, as a map: property name -> property value
+    self._props = {}
+
+    self._props['type'] = type
+    self._type = type
+
+    if other_props:
+      self._props.update(other_props)
+
+  @property
+  def name(self):
+    """Returns: the simple name of this schema."""
+    return self._props['name']
+
+  @property
+  def fullname(self):
+    """Returns: the fully qualified name of this schema."""
+    # By default, the full name is the simple name.
+    # Named schemas override this behavior to include the namespace.
+    return self.name
+
+  @property
+  def namespace(self):
+    """Returns: the namespace this schema belongs to, if any, or None."""
+    return self._props.get('namespace', None)
+
+  @property
+  def type(self):
+    """Returns: the type of this schema."""
+    return self._type
+
+  @property
+  def doc(self):
+    """Returns: the documentation associated to this schema, if any, or None."""
+    return self._props.get('doc', None)
+
+  @property
+  def props(self):
+    """Reports all the properties of this schema.
+
+    Includes all properties, reserved and non reserved.
+    JSON properties of this schema are directly generated from this dict.
+
+    Returns:
+      A read-only dictionary of properties associated to this schema.
+    """
+    return ImmutableDict(self._props)
+
+  @property
+  def other_props(self):
+    """Returns: the dictionary of non-reserved properties."""
+    return dict(FilterKeysOut(items=self._props, keys=SCHEMA_RESERVED_PROPS))
+
+  def __str__(self):
+    """Returns: the JSON representation of this schema."""
+    return json.dumps(self.to_json())
+
+  @abc.abstractmethod
+  def to_json(self, names):
+    """Converts the schema object into its AVRO specification representation.
+
+    Schema types that have names (records, enums, and fixed) must
+    be aware of not re-defining schemas that are already listed
+    in the parameter names.
+    """
+    raise Exception('Cannot run abstract method.')
+
+
+# ------------------------------------------------------------------------------
+
+
+_RE_NAME = re.compile(r'[A-Za-z_][A-Za-z0-9_]*')
+
+_RE_FULL_NAME = re.compile(
+    r'^'
+    r'[.]?(?:[A-Za-z_][A-Za-z0-9_]*[.])*'  # optional namespace
+    r'([A-Za-z_][A-Za-z0-9_]*)'            # name
+    r'$'
+)
+
+class Name(object):
+  """Representation of an Avro name."""
+
+  def __init__(self, name, namespace=None):
+    """Parses an Avro name.
+
+    Args:
+      name: Avro name to parse (relative or absolute).
+      namespace: Optional explicit namespace if the name is relative.
+    """
+    # Normalize: namespace is always defined as a string, possibly empty.
+    if namespace is None: namespace = ''
+
+    if '.' in name:
+      # name is absolute, namespace is ignored:
+      self._fullname = name
+
+      match = _RE_FULL_NAME.match(self._fullname)
+      if match is None:
+        raise SchemaParseException(
+            'Invalid absolute schema name: %r.' % self._fullname)
+
+      self._name = match.group(1)
+      self._namespace = self._fullname[:-(len(self._name) + 1)]
+
+    else:
+      # name is relative, combine with explicit namespace:
+      self._name = name
+      self._namespace = namespace
+      self._fullname = '%s.%s' % (self._namespace, self._name)
+
+      # Validate the fullname:
+      if _RE_FULL_NAME.match(self._fullname) is None:
+        raise SchemaParseException(
+            'Invalid schema name %r infered from name %r and namespace %r.'
+            % (self._fullname, self._name, self._namespace))
+
+  def __eq__(self, other):
+    if not isinstance(other, Name):
+      return False
+    return (self.fullname == other.fullname)
+
+  @property
+  def simple_name(self):
+    """Returns: the simple name part of this name."""
+    return self._name
+
+  @property
+  def namespace(self):
+    """Returns: this name's namespace, possible the empty string."""
+    return self._namespace
+
+  @property
+  def fullname(self):
+    """Returns: the full name (always contains a period '.')."""
+    return self._fullname
+
+
+# ------------------------------------------------------------------------------
+
+
+class Names(object):
+  """Tracks Avro named schemas and default namespace during parsing."""
+
+  def __init__(self, default_namespace=None, names=None):
+    """Initializes a new name tracker.
+
+    Args:
+      default_namespace: Optional default namespace.
+      names: Optional initial mapping of known named schemas.
+    """
+    if names is None:
+      names = {}
+    self._names = names
+    self._default_namespace = default_namespace
+
+  @property
+  def names(self):
+    """Returns: the mapping of known named schemas."""
+    return self._names
+
+  @property
+  def default_namespace(self):
+    """Returns: the default namespace, if any, or None."""
+    return self._default_namespace
+
+  def NewWithDefaultNamespace(self, namespace):
+    """Creates a new name tracker from this tracker, but with a new default ns.
+
+    Args:
+      namespace: New default namespace to use.
+    Returns:
+      New name tracker with the specified default namespace.
+    """
+    return Names(names=self._names, default_namespace=namespace)
+
+  def GetName(self, name, namespace=None):
+    """Resolves the Avro name according to this name tracker's state.
+
+    Args:
+      name: Name to resolve (absolute or relative).
+      namespace: Optional explicit namespace.
+    Returns:
+      The specified name, resolved according to this tracker.
+    """
+    if namespace is None: namespace = self._default_namespace
+    return Name(name=name, namespace=namespace)
+
+  def has_name(self, name, namespace=None):
+    avro_name = self.GetName(name=name, namespace=namespace)
+    return avro_name.fullname in self._names
+
+  def get_name(self, name, namespace=None):
+    avro_name = self.GetName(name=name, namespace=namespace)
+    return self._names.get(avro_name.fullname, None)
+
+  def GetSchema(self, name, namespace=None):
+    """Resolves an Avro schema by name.
+
+    Args:
+      name: Name (relative or absolute) of the Avro schema to look up.
+      namespace: Optional explicit namespace.
+    Returns:
+      The schema with the specified name, if any, or None.
+    """
+    avro_name = self.GetName(name=name, namespace=namespace)
+    return self._names.get(avro_name.fullname, None)
+
+  def prune_namespace(self, properties):
+    """given a properties, return properties with namespace removed if
+    it matches the own default namespace
+    """
+    if self.default_namespace is None:
+      # I have no default -- no change
+      return properties
+    if 'namespace' not in properties:
+      # he has no namespace - no change
+      return properties
+    if properties['namespace'] != self.default_namespace:
+      # we're different - leave his stuff alone
+      return properties
+    # we each have a namespace and it's redundant. delete his.
+    prunable = properties.copy()
+    del(prunable['namespace'])
+    return prunable
+
+  def Register(self, schema):
+    """Registers a new named schema in this tracker.
+
+    Args:
+      schema: Named Avro schema to register in this tracker.
+    """
+    if schema.fullname in VALID_TYPES:
+      raise SchemaParseException(
+          '%s is a reserved type name.' % schema.fullname)
+    if schema.fullname in self.names:
+      raise SchemaParseException(
+          'Avro name %r already exists.' % schema.fullname)
+
+    logging.log(DEBUG_VERBOSE, 'Register new name for %r', schema.fullname)
+    self._names[schema.fullname] = schema
+
+
+# ------------------------------------------------------------------------------
+
+
+class NamedSchema(Schema):
+  """Abstract base class for named schemas.
+
+  Named schemas are enumerated in NAMED_TYPES.
+  """
+
+  def __init__(
+      self,
+      type,
+      name,
+      namespace=None,
+      names=None,
+      other_props=None,
+  ):
+    """Initializes a new named schema object.
+
+    Args:
+      type: Type of the named schema.
+      name: Name (absolute or relative) of the schema.
+      namespace: Optional explicit namespace if name is relative.
+      names: Tracker to resolve and register Avro names.
+      other_props: Optional map of additional properties of the schema.
+    """
+    assert (type in NAMED_TYPES), ('Invalid named type: %r' % type)
+    self._avro_name = names.GetName(name=name, namespace=namespace)
+
+    super(NamedSchema, self).__init__(type, other_props)
+
+    names.Register(self)
+
+    self._props['name'] = self.name
+    if self.namespace:
+      self._props['namespace'] = self.namespace
+
+  @property
+  def avro_name(self):
+    """Returns: the Name object describing this schema's name."""
+    return self._avro_name
+
+  @property
+  def name(self):
+    return self._avro_name.simple_name
+
+  @property
+  def namespace(self):
+    return self._avro_name.namespace
+
+  @property
+  def fullname(self):
+    return self._avro_name.fullname
+
+  def name_ref(self, names):
+    """Reports this schema name relative to the specified name tracker.
+
+    Args:
+      names: Avro name tracker to relativise this schema name against.
+    Returns:
+      This schema name, relativised against the specified name tracker.
+    """
+    if self.namespace == names.default_namespace:
+      return self.name
+    else:
+      return self.fullname
+
+
+# ------------------------------------------------------------------------------
+
+
+_NO_DEFAULT = object()
+
+
+class Field(object):
+  """Representation of the schema of a field in a record."""
+
+  def __init__(
+      self,
+      type,
+      name,
+      index,
+      has_default,
+      default=_NO_DEFAULT,
+      order=None,
+      names=None,
+      doc=None,
+      other_props=None
+  ):
+    """Initializes a new Field object.
+
+    Args:
+      type: Avro schema of the field.
+      name: Name of the field.
+      index: 0-based position of the field.
+      has_default:
+      default:
+      order:
+      names:
+      doc:
+      other_props:
+    """
+    if (not isinstance(name, str)) or (len(name) == 0):
+      raise SchemaParseException('Invalid record field name: %r.' % name)
+    if (order is not None) and (order not in VALID_FIELD_SORT_ORDERS):
+      raise SchemaParseException('Invalid record field order: %r.' % order)
+
+    # All properties of this record field:
+    self._props = {}
+
+    self._has_default = has_default
+    if other_props:
+      self._props.update(other_props)
+
+    self._index = index
+    self._type = self._props['type'] = type
+    self._name = self._props['name'] = name
+
+    # TODO: check to ensure default is valid
+    if has_default:
+      self._props['default'] = default
+
+    if order is not None:
+      self._props['order'] = order
+
+    if doc is not None:
+      self._props['doc'] = doc
+
+  @property
+  def type(self):
+    """Returns: the schema of this field."""
+    return self._type
+
+  @property
+  def name(self):
+    """Returns: this field name."""
+    return self._name
+
+  @property
+  def index(self):
+    """Returns: the 0-based index of this field in the record."""
+    return self._index
+
+  @property
+  def default(self):
+    return self._props['default']
+
+  @property
+  def has_default(self):
+    return self._has_default
+
+  @property
+  def order(self):
+    return self._props.get('order', None)
+
+  @property
+  def doc(self):
+    return self._props.get('doc', None)
+
+  @property
+  def props(self):
+    return self._props
+
+  @property
+  def other_props(self):
+    return FilterKeysOut(items=self._props, keys=FIELD_RESERVED_PROPS)
+
+  def __str__(self):
+    return json.dumps(self.to_json())
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    to_dump['type'] = self.type.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+# Primitive Types
+
+
+class PrimitiveSchema(Schema):
+  """Schema of a primitive Avro type.
+
+  Valid primitive types are defined in PRIMITIVE_TYPES.
+  """
+
+  def __init__(self, type):
+    """Initializes a new schema object for the specified primitive type.
+
+    Args:
+      type: Type of the schema to construct. Must be primitive.
+    """
+    if type not in PRIMITIVE_TYPES:
+      raise AvroException('%r is not a valid primitive type.' % type)
+    super(PrimitiveSchema, self).__init__(type)
+
+  @property
+  def name(self):
+    """Returns: the simple name of this schema."""
+    # The name of a primitive type is the type itself.
+    return self.type
+
+  def to_json(self, names=None):
+    if len(self.props) == 1:
+      return self.fullname
+    else:
+      return self.props
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+
+# ------------------------------------------------------------------------------
+# Complex Types (non-recursive)
+
+
+class FixedSchema(NamedSchema):
+  def __init__(
+      self,
+      name,
+      namespace,
+      size,
+      names=None,
+      other_props=None,
+  ):
+    # Ensure valid ctor args
+    if not isinstance(size, int):
+      fail_msg = 'Fixed Schema requires a valid integer for size property.'
+      raise AvroException(fail_msg)
+
+    super(FixedSchema, self).__init__(
+        type=FIXED,
+        name=name,
+        namespace=namespace,
+        names=names,
+        other_props=other_props,
+    )
+    self._props['size'] = size
+
+  @property
+  def size(self):
+    """Returns: the size of this fixed schema, in bytes."""
+    return self._props['size']
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return names.prune_namespace(self.props)
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+
+# ------------------------------------------------------------------------------
+
+
+class EnumSchema(NamedSchema):
+  def __init__(
+      self,
+      name,
+      namespace,
+      symbols,
+      names=None,
+      doc=None,
+      other_props=None,
+  ):
+    """Initializes a new enumeration schema object.
+
+    Args:
+      name: Simple name of this enumeration.
+      namespace: Optional namespace.
+      symbols: Ordered list of symbols defined in this enumeration.
+      names:
+      doc:
+      other_props:
+    """
+    symbols = tuple(symbols)
+    symbol_set = frozenset(symbols)
+    if (len(symbol_set) != len(symbols)
+        or not all(map(lambda symbol: isinstance(symbol, str), symbols))):
+      raise AvroException(
+          'Invalid symbols for enum schema: %r.' % (symbols,))
+
+    super(EnumSchema, self).__init__(
+        type=ENUM,
+        name=name,
+        namespace=namespace,
+        names=names,
+        other_props=other_props,
+    )
+
+    self._props['symbols'] = tuple(sorted(symbol_set))
+    if doc is not None:
+      self._props['doc'] = doc
+
+  @property
+  def symbols(self):
+    """Returns: the symbols defined in this enum."""
+    return self._props['symbols']
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+      return names.prune_namespace(self.props)
+
+  def __eq__(self, that):
+    return self.props == that.props
+
+
+# ------------------------------------------------------------------------------
+# Complex Types (recursive)
+
+
+class ArraySchema(Schema):
+  """Schema of an array."""
+
+  def __init__(self, items, other_props=None):
+    """Initializes a new array schema object.
+
+    Args:
+      items: Avro schema of the array items.
+      other_props:
+    """
+    super(ArraySchema, self).__init__(
+        type=ARRAY,
+        other_props=other_props,
+    )
+    self._items_schema = items
+    self._props['items'] = items
+
+  @property
+  def items(self):
+    """Returns: the schema of the items in this array."""
+    return self._items_schema
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    item_schema = self.items
+    to_dump['items'] = item_schema.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+
+
+class MapSchema(Schema):
+  """Schema of a map."""
+
+  def __init__(self, values, other_props=None):
+    """Initializes a new map schema object.
+
+    Args:
+      values: Avro schema of the map values.
+      other_props:
+    """
+    super(MapSchema, self).__init__(
+        type=MAP,
+        other_props=other_props,
+    )
+    self._values_schema = values
+    self._props['values'] = values
+
+  @property
+  def values(self):
+    """Returns: the schema of the values in this map."""
+    return self._values_schema
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = self.props.copy()
+    to_dump['values'] = self.values.to_json(names)
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+
+
+class UnionSchema(Schema):
+  """Schema of a union."""
+
+  def __init__(self, schemas):
+    """Initializes a new union schema object.
+
+    Args:
+      schemas: Ordered collection of schema branches in the union.
+    """
+    super(UnionSchema, self).__init__(type=UNION)
+    self._schemas = tuple(schemas)
+
+    # Validate the schema branches:
+
+    # All named schema names are unique:
+    named_branches = tuple(
+        filter(lambda schema: schema.type in NAMED_TYPES, self._schemas))
+    unique_names = frozenset(map(lambda schema: schema.fullname, named_branches))
+    if len(unique_names) != len(named_branches):
+      raise AvroException(
+          'Invalid union branches with duplicate schema name:%s'
+          % ''.join(map(lambda schema: ('\n\t - %s' % schema), self._schemas)))
+
+    # Types are unique within unnamed schemas, and union is not allowed:
+    unnamed_branches = tuple(
+        filter(lambda schema: schema.type not in NAMED_TYPES, self._schemas))
+    unique_types = frozenset(map(lambda schema: schema.type, unnamed_branches))
+    if UNION in unique_types:
+      raise AvroException(
+          'Invalid union branches contain other unions:%s'
+          % ''.join(map(lambda schema: ('\n\t - %s' % schema), self._schemas)))
+    if len(unique_types) != len(unnamed_branches):
+      raise AvroException(
+          'Invalid union branches with duplicate type:%s'
+          % ''.join(map(lambda schema: ('\n\t - %s' % schema), self._schemas)))
+
+  @property
+  def schemas(self):
+    """Returns: the ordered list of schema branches in the union."""
+    return self._schemas
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = []
+    for schema in self.schemas:
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+
+
+class ErrorUnionSchema(UnionSchema):
+  """Schema representing the declared errors of a protocol message."""
+
+  def __init__(self, schemas):
+    """Initializes an error-union schema.
+
+    Args:
+      schema: collection of error schema.
+    """
+    # TODO: check that string isn't already listed explicitly as an error.
+    # Prepend "string" to handle system errors
+    schemas = [PrimitiveSchema(type=STRING)] + list(schemas)
+    super(ErrorUnionSchema, self).__init__(schemas=schemas)
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    to_dump = []
+    for schema in self.schemas:
+      # Don't print the system error schema
+      if schema.type == STRING: continue
+      to_dump.append(schema.to_json(names))
+    return to_dump
+
+
+# ------------------------------------------------------------------------------
+
+
+class RecordSchema(NamedSchema):
+  """Schema of a record."""
+
+  @staticmethod
+  def _MakeField(index, field_desc, names):
+    """Builds field schemas from a list of field JSON descriptors.
+
+    Args:
+      index: 0-based index of the field in the record.
+      field_desc: JSON descriptors of a record field.
+      names: Avro schema tracker.
+    Return:
+      The field schema.
+    """
+    field_schema = SchemaFromJSONData(
+        json_data=field_desc['type'],
+        names=names,
+    )
+    other_props = (
+        dict(FilterKeysOut(items=field_desc, keys=FIELD_RESERVED_PROPS)))
+    return Field(
+        type=field_schema,
+        name=field_desc['name'],
+        index=index,
+        has_default=('default' in field_desc),
+        default=field_desc.get('default', _NO_DEFAULT),
+        order=field_desc.get('order', None),
+        names=names,
+        doc=field_desc.get('doc', None),
+        other_props=other_props,
+    )
+
+  @staticmethod
+  def _MakeFieldList(field_desc_list, names):
+    """Builds field schemas from a list of field JSON descriptors.
+
+    Guarantees field name unicity.
+
+    Args:
+      field_desc_list: collection of field JSON descriptors.
+      names: Avro schema tracker.
+    Yields
+      Field schemas.
+    """
+    for index, field_desc in enumerate(field_desc_list):
+      yield RecordSchema._MakeField(index, field_desc, names)
+
+  @staticmethod
+  def _MakeFieldMap(fields):
+    """Builds the field map.
+
+    Guarantees field name unicity.
+
+    Args:
+      fields: iterable of field schema.
+    Returns:
+      A read-only map of field schemas, indexed by name.
+    """
+    field_map = {}
+    for field in fields:
+      if field.name in field_map:
+        raise SchemaParseException(
+            'Duplicate field name %r in list %r.' % (field.name, field_desc_list))
+      field_map[field.name] = field
+    return ImmutableDict(field_map)
+
+  def __init__(
+      self,
+      name,
+      namespace,
+      fields=None,
+      make_fields=None,
+      names=None,
+      record_type=RECORD,
+      doc=None,
+      other_props=None
+  ):
+    """Initializes a new record schema object.
+
+    Args:
+      name: Name of the record (absolute or relative).
+      namespace: Optional namespace the record belongs to, if name is relative.
+      fields: collection of fields to add to this record.
+          Exactly one of fields or make_fields must be specified.
+      make_fields: function creating the fields that belong to the record.
+          The function signature is: make_fields(names) -> ordered field list.
+          Exactly one of fields or make_fields must be specified.
+      names:
+      record_type: Type of the record: one of RECORD, ERROR or REQUEST.
+          Protocol requests are not named.
+      doc:
+      other_props:
+    """
+    if record_type == REQUEST:
+      # Protocol requests are not named:
+      super(NamedSchema, self).__init__(
+          type=REQUEST,
+          other_props=other_props,
+      )
+    elif record_type in [RECORD, ERROR]:
+      # Register this record name in the tracker:
+      super(RecordSchema, self).__init__(
+          type=record_type,
+          name=name,
+          namespace=namespace,
+          names=names,
+          other_props=other_props,
+      )
+    else:
+      raise SchemaParseException(
+          'Invalid record type: %r.' % record_type)
+
+    if record_type in [RECORD, ERROR]:
+      avro_name = names.GetName(name=name, namespace=namespace)
+      nested_names = names.NewWithDefaultNamespace(namespace=avro_name.namespace)
+    elif record_type == REQUEST:
+      # Protocol request has no name: no need to change default namespace:
+      nested_names = names
+
+    if fields is None:
+      fields = make_fields(names=nested_names)
+    else:
+      assert (make_fields is None)
+    self._fields = tuple(fields)
+
+    self._field_map = RecordSchema._MakeFieldMap(self._fields)
+
+    self._props['fields'] = fields
+    if doc is not None:
+      self._props['doc'] = doc
+
+  @property
+  def fields(self):
+    """Returns: the field schemas, as an ordered tuple."""
+    return self._fields
+
+  @property
+  def field_map(self):
+    """Returns: a read-only map of the field schemas index by field names."""
+    return self._field_map
+
+  def to_json(self, names=None):
+    if names is None:
+      names = Names()
+    # Request records don't have names
+    if self.type == REQUEST:
+      return [f.to_json(names) for f in self.fields]
+
+    if self.fullname in names.names:
+      return self.name_ref(names)
+    else:
+      names.names[self.fullname] = self
+
+    to_dump = names.prune_namespace(self.props.copy())
+    to_dump['fields'] = [f.to_json(names) for f in self.fields]
+    return to_dump
+
+  def __eq__(self, that):
+    to_cmp = json.loads(str(self))
+    return to_cmp == json.loads(str(that))
+
+
+# ------------------------------------------------------------------------------
+# Module functions
+
+
+def FilterKeysOut(items, keys):
+  """Filters a collection of (key, value) items.
+
+  Exclude any item whose key belongs to keys.
+
+  Args:
+    items: Dictionary of items to filter the keys out of.
+    keys: Keys to filter out.
+  Yields:
+    Filtered items.
+  """
+  for key, value in items.items():
+    if key in keys: continue
+    yield (key, value)
+
+
+# ------------------------------------------------------------------------------
+
+
+def _SchemaFromJSONString(json_string, names):
+  if json_string in PRIMITIVE_TYPES:
+    return PrimitiveSchema(type=json_string)
+  else:
+    # Look for a known named schema:
+    schema = names.GetSchema(name=json_string)
+    if schema is None:
+      raise SchemaParseException(
+          'Unknown named schema %r, known names: %r.'
+          % (json_string, sorted(names.names)))
+    return schema
+
+
+def _SchemaFromJSONArray(json_array, names):
+  def MakeSchema(desc):
+    return SchemaFromJSONData(json_data=desc, names=names)
+  return UnionSchema(map(MakeSchema, json_array))
+
+
+def _SchemaFromJSONObject(json_object, names):
+  type = json_object.get('type')
+  if type is None:
+    raise SchemaParseException(
+        'Avro schema JSON descriptor has no "type" property: %r' % json_object)
+
+  other_props = dict(
+      FilterKeysOut(items=json_object, keys=SCHEMA_RESERVED_PROPS))
+
+  if type in PRIMITIVE_TYPES:
+    # FIXME should not ignore other properties
+    return PrimitiveSchema(type)
+
+  elif type in NAMED_TYPES:
+    name = json_object.get('name')
+    namespace = json_object.get('namespace', names.default_namespace)
+    if type == FIXED:
+      size = json_object.get('size')
+      return FixedSchema(name, namespace, size, names, other_props)
+    elif type == ENUM:
+      symbols = json_object.get('symbols')
+      doc = json_object.get('doc')
+      return EnumSchema(name, namespace, symbols, names, doc, other_props)
+
+    elif type in [RECORD, ERROR]:
+      field_desc_list = json_object.get('fields', ())
+
+      def MakeFields(names):
+        return tuple(RecordSchema._MakeFieldList(field_desc_list, names))
+
+      return RecordSchema(
+          name=name,
+          namespace=namespace,
+          make_fields=MakeFields,
+          names=names,
+          record_type=type,
+          doc=json_object.get('doc'),
+          other_props=other_props,
+      )
+    else:
+      raise Exception('Internal error: unknown type %r.' % type)
+
+  elif type in VALID_TYPES:
+    # Unnamed, non-primitive Avro type:
+
+    if type == ARRAY:
+      items_desc = json_object.get('items')
+      if items_desc is None:
+        raise SchemaParseException(
+            'Invalid array schema descriptor with no "items" : %r.'
+            % json_object)
+      return ArraySchema(
+          items=SchemaFromJSONData(items_desc, names),
+          other_props=other_props,
+      )
+
+    elif type == MAP:
+      values_desc = json_object.get('values')
+      if values_desc is None:
+        raise SchemaParseException(
+            'Invalid map schema descriptor with no "values" : %r.'
+            % json_object)
+      return MapSchema(
+          values=SchemaFromJSONData(values_desc, names=names),
+          other_props=other_props,
+      )
+
+    elif type == ERROR_UNION:
+      error_desc_list = json_object.get('declared_errors')
+      assert (error_desc_list is not None)
+      error_schemas = map(
+          lambda desc: SchemaFromJSONData(desc, names=names),
+          error_desc_list)
+      return ErrorUnionSchema(schemas=error_schemas)
+
+    else:
+      raise Exception('Internal error: unknown type %r.' % type)
+
+  raise SchemaParseException(
+      'Invalid JSON descriptor for an Avro schema: %r' % json_object)
+
+
+# Parsers for the JSON data types:
+_JSONDataParserTypeMap = {
+  str: _SchemaFromJSONString,
+  list: _SchemaFromJSONArray,
+  dict: _SchemaFromJSONObject,
+}
+
+
+def SchemaFromJSONData(json_data, names=None):
+  """Builds an Avro Schema from its JSON descriptor.
+
+  Args:
+    json_data: JSON data representing the descriptor of the Avro schema.
+    names: Optional tracker for Avro named schemas.
+  Returns:
+    The Avro schema parsed from the JSON descriptor.
+  Raises:
+    SchemaParseException: if the descriptor is invalid.
+  """
+  if names is None:
+    names = Names()
+
+  # Select the appropriate parser based on the JSON data type:
+  parser = _JSONDataParserTypeMap.get(type(json_data))
+  if parser is None:
+    raise SchemaParseException(
+        'Invalid JSON descriptor for an Avro schema: %r.' % json_data)
+  return parser(json_data, names=names)
+
+
+# ------------------------------------------------------------------------------
+
+
+def Parse(json_string):
+  """Constructs a Schema from its JSON descriptor in text form.
+
+  Args:
+    json_string: String representation of the JSON descriptor of the schema.
+  Returns:
+    The parsed schema.
+  Raises:
+    SchemaParseException: on JSON parsing error,
+        or if the JSON descriptor is invalid.
+  """
+  try:
+    json_data = json.loads(json_string)
+  except Exception as exn:
+    raise SchemaParseException(
+        'Error parsing schema from JSON: %r. '
+        'Error message: %r.'
+        % (json_string, exn))
+
+  # Initialize the names object
+  names = Names()
+
+  # construct the Avro Schema object
+  return SchemaFromJSONData(json_data, names)
diff --git a/lang/py3/avro/tests/av_bench.py b/lang/py3/avro/tests/av_bench.py
new file mode 100644
index 0000000..fd54230
--- /dev/null
+++ b/lang/py3/avro/tests/av_bench.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import random
+import string
+import sys
+import time
+
+import avro.datafile
+import avro.io
+import avro.schema
+
+
+TYPES = ('A', 'CNAME',)
+FILENAME = 'datafile.avr'
+
+
+def GenerateRandomName():
+  return ''.join(random.sample(string.ascii_lowercase, 15))
+
+
+def GenerateRandomIP():
+  return '%s.%s.%s.%s' % (
+      random.randint(0, 255),
+      random.randint(0, 255),
+      random.randint(0, 255),
+      random.randint(0, 255),
+  )
+
+
+def Write(nrecords):
+  """Writes a data file with the specified number of random records.
+
+  Args:
+    nrecords: Number of records to write.
+  """
+  schema_s = """
+  {
+    "type": "record",
+    "name": "Query",
+    "fields" : [
+      {"name": "query", "type": "string"},
+      {"name": "response", "type": "string"},
+      {"name": "type", "type": "string", "default": "A"}
+    ]
+  }
+  """
+  schema = avro.schema.Parse(schema_s)
+  writer = avro.io.DatumWriter(schema)
+
+  with open(FILENAME, 'wb') as out:
+    with avro.datafile.DataFileWriter(
+        out, writer, schema,
+        # codec='deflate'
+    ) as data_writer:
+      for _ in range(nrecords):
+        response = GenerateRandomIP()
+        query = GenerateRandomName()
+        type = random.choice(TYPES)
+        data_writer.append({
+            'query': query,
+            'response': response,
+            'type': type,
+        })
+
+
+def Read(expect_nrecords):
+  """Reads the data file generated by Write()."""
+  with open(FILENAME, 'rb') as f:
+    reader = avro.io.DatumReader()
+    with avro.datafile.DataFileReader(f, reader) as file_reader:
+      nrecords = 0
+      for record in file_reader:
+        nrecords += 1
+      assert (nrecords == expect_nrecords), (
+          'Expecting %d records, got %d.' % (expected_nrecords, nrecords))
+
+
+def Timing(f, *args):
+  s = time.time()
+  f(*args)
+  e = time.time()
+  return e - s
+
+
+def Main(args):
+  nrecords = int(args[1])
+  print('Write %0.4f' % Timing(Write, nrecords))
+  print('Read %0.4f' % Timing(Read, nrecords))
+
+
+if __name__ == '__main__':
+  log_formatter = logging.Formatter(
+      '%(asctime)s %(levelname)s %(filename)s:%(lineno)s : %(message)s')
+  logging.root.setLevel(logging.DEBUG)
+  console_handler = logging.StreamHandler()
+  console_handler.setFormatter(log_formatter)
+  console_handler.setLevel(logging.DEBUG)
+  logging.root.addHandler(console_handler)
+
+  Main(sys.argv)
diff --git a/lang/py3/avro/tests/gen_interop_data.py b/lang/py3/avro/tests/gen_interop_data.py
new file mode 100644
index 0000000..3c170fd
--- /dev/null
+++ b/lang/py3/avro/tests/gen_interop_data.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from avro import datafile
+from avro import io
+from avro import schema
+
+
+DATUM = {
+    'intField': 12,
+    'longField': 15234324,
+    'stringField': 'hey',
+    'boolField': True,
+    'floatField': 1234.0,
+    'doubleField': -1234.0,
+    'bytesField': '12312adf',
+    'nullField': None,
+    'arrayField': [5.0, 0.0, 12.0],
+    'mapField': {'a': {'label': 'a'}, 'bee': {'label': 'cee'}},
+    'unionField': 12.0,
+    'enumField': 'C',
+    'fixedField': b'1019181716151413',
+    'recordField': {
+        'label': 'blah',
+        'children': [{'label': 'inner', 'children': []}],
+    },
+}
+
+
+if __name__ == "__main__":
+  interop_schema = schema.Parse(open(sys.argv[1], 'r').read())
+  writer = open(sys.argv[2], 'wb')
+  datum_writer = io.DatumWriter()
+  # NB: not using compression
+  dfw = datafile.DataFileWriter(writer, datum_writer, interop_schema)
+  dfw.append(DATUM)
+  dfw.close()
diff --git a/lang/py3/avro/tests/run_tests.py b/lang/py3/avro/tests/run_tests.py
new file mode 100644
index 0000000..738c8e5
--- /dev/null
+++ b/lang/py3/avro/tests/run_tests.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runs all tests.
+
+Usage:
+
+- Run tests from all modules:
+    ./run_tests.py discover [-v]
+
+- Run tests in a specific module:
+    ./run_tests.py test_schema [-v]
+
+- Run a specific test:
+    ./run_tests.py test_schema.TestSchema.testParse [-v]
+
+- Set logging level:
+    PYTHON_LOG_LEVEL=<log-level> ./run_tests.py ...
+    log-level  0 includes all logging.
+    log-level 10 includes debug logging.
+    log-level 20 includes info logging.
+
+- Command-line help:
+  ./run_tests.py -h
+  ./run_tests.py discover -h
+"""
+
+import logging
+import os
+import sys
+import unittest
+
+from avro.tests.test_datafile import *
+from avro.tests.test_datafile_interop import *
+from avro.tests.test_io import *
+from avro.tests.test_ipc import *
+from avro.tests.test_protocol import *
+from avro.tests.test_schema import *
+from avro.tests.test_script import *
+
+
+def SetupLogging():
+  log_level = int(os.environ.get('PYTHON_LOG_LEVEL', logging.INFO))
+
+  log_formatter = logging.Formatter(
+      '%(asctime)s %(levelname)s %(filename)s:%(lineno)s : %(message)s')
+  logging.root.handlers = list()  # list.clear() only exists in python 3.3+
+  logging.root.setLevel(log_level)
+  console_handler = logging.StreamHandler()
+  console_handler.setFormatter(log_formatter)
+  console_handler.setLevel(logging.DEBUG)
+  logging.root.addHandler(console_handler)
+
+
+SetupLogging()
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/lang/py3/avro/tests/sample_http_client.py b/lang/py3/avro/tests/sample_http_client.py
new file mode 100644
index 0000000..10501bd
--- /dev/null
+++ b/lang/py3/avro/tests/sample_http_client.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from avro import ipc
+from avro import protocol
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.Parse(MAIL_PROTOCOL_JSON)
+SERVER_HOST = 'localhost'
+SERVER_PORT = 9090
+
+class UsageError(Exception):
+  def __init__(self, value):
+    self.value = value
+  def __str__(self):
+    return repr(self.value)
+
+def make_requestor(server_host, server_port, protocol):
+  client = ipc.HTTPTransceiver(SERVER_HOST, SERVER_PORT)
+  return ipc.Requestor(protocol, client)
+
+if __name__ == '__main__':
+  if len(sys.argv) not in [4, 5]:
+    raise UsageError("Usage: <to> <from> <body> [<count>]")
+
+  # client code - attach to the server and send a message
+  # fill in the Message record
+  message = dict()
+  message['to'] = sys.argv[1]
+  message['from'] = sys.argv[2]
+  message['body'] = sys.argv[3]
+
+  try:
+    num_messages = int(sys.argv[4])
+  except:
+    num_messages = 1
+
+  # build the parameters for the request
+  params = {}
+  params['message'] = message
+
+  # send the requests and print the result
+  for msg_count in range(num_messages):
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    result = requestor.request('send', params)
+    print("Result: " + result)
+
+  # try out a replay message
+  requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+  result = requestor.request('replay', dict())
+  print("Replay Result: " + result)
diff --git a/lang/py3/avro/tests/sample_http_server.py b/lang/py3/avro/tests/sample_http_server.py
new file mode 100644
index 0000000..ba6ed24
--- /dev/null
+++ b/lang/py3/avro/tests/sample_http_server.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from avro import ipc
+from avro import protocol
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.Parse(MAIL_PROTOCOL_JSON)
+SERVER_ADDRESS = ('localhost', 9090)
+
+class MailResponder(ipc.Responder):
+  def __init__(self):
+    ipc.Responder.__init__(self, MAIL_PROTOCOL)
+
+  def invoke(self, message, request):
+    if message.name == 'send':
+      request_content = request['message']
+      response = "Sent message to %(to)s from %(from)s with body %(body)s" % \
+                 request_content
+      return response
+    elif message.name == 'replay':
+      return 'replay'
+
+class MailHandler(BaseHTTPRequestHandler):
+  def do_POST(self):
+    self.responder = MailResponder()
+    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request = call_request_reader.read_framed_message()
+    resp_body = self.responder.respond(call_request)
+    self.send_response(200)
+    self.send_header('Content-Type', 'avro/binary')
+    self.end_headers()
+    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer.write_framed_message(resp_body)
+
+if __name__ == '__main__':
+  mail_server = HTTPServer(SERVER_ADDRESS, MailHandler)
+  mail_server.allow_reuse_address = True
+  mail_server.serve_forever()
diff --git a/lang/py3/avro/tests/test_datafile.py b/lang/py3/avro/tests/test_datafile.py
new file mode 100644
index 0000000..044afcb
--- /dev/null
+++ b/lang/py3/avro/tests/test_datafile.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import tempfile
+import unittest
+
+from avro import datafile
+from avro import io
+from avro import schema
+
+
+# ------------------------------------------------------------------------------
+
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', 'adsfasdf09809dsf-=adsf'),
+  ('"bytes"', b'12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', b'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+
+  ("""
+   {
+     "type": "record",
+     "name": "Test",
+     "fields": [{"name": "f", "type": "long"}]
+   }
+   """,
+   {'f': 5}),
+
+  ("""
+   {
+     "type": "record",
+     "name": "Lisp",
+     "fields": [{
+        "name": "value",
+        "type": [
+          "null",
+          "string",
+          {
+            "type": "record",
+            "name": "Cons",
+            "fields": [{"name": "car", "type": "Lisp"},
+                       {"name": "cdr", "type": "Lisp"}]
+          }
+        ]
+     }]
+   }
+   """,
+   {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+CODECS_TO_VALIDATE = ('null', 'deflate')
+
+try:
+  import snappy
+  CODECS_TO_VALIDATE += ('snappy',)
+except ImportError:
+  logging.info('Snappy not present, will skip testing it.')
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestDataFile(unittest.TestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    cls._temp_dir = (
+        tempfile.TemporaryDirectory(prefix=cls.__name__, suffix='.tmp'))
+    logging.debug('Created temporary directory: %s', cls._temp_dir.name)
+
+  @classmethod
+  def tearDownClass(cls):
+    logging.debug('Cleaning up temporary directory: %s', cls._temp_dir.name)
+    cls._temp_dir.cleanup()
+
+  def NewTempFile(self):
+    """Creates a new temporary file.
+
+    File is automatically cleaned up after test.
+
+    Returns:
+      The path of the new temporary file.
+    """
+    temp_file = tempfile.NamedTemporaryFile(
+        dir=self._temp_dir.name,
+        prefix='test',
+        suffix='.avro',
+        delete=False,
+    )
+    return temp_file.name
+
+  def testRoundTrip(self):
+    correct = 0
+    for iexample, (writer_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        file_path = self.NewTempFile()
+
+        # Write the datum this many times in the data file:
+        nitems = 10
+
+        logging.debug(
+            'Performing round-trip with codec %r in file %s for example #%d\n'
+            'Writing datum: %r using writer schema:\n%s',
+            codec, file_path, iexample,
+            datum, writer_schema)
+
+        logging.debug('Creating data file %r', file_path)
+        with open(file_path, 'wb') as writer:
+          datum_writer = io.DatumWriter()
+          schema_object = schema.Parse(writer_schema)
+          with datafile.DataFileWriter(
+              writer=writer,
+              datum_writer=datum_writer,
+              writer_schema=schema_object,
+              codec=codec,
+          ) as dfw:
+            for _ in range(nitems):
+              dfw.append(datum)
+
+        logging.debug('Reading data from %r', file_path)
+        with open(file_path, 'rb') as reader:
+          datum_reader = io.DatumReader()
+          with datafile.DataFileReader(reader, datum_reader) as dfr:
+            round_trip_data = list(dfr)
+
+        logging.debug(
+            'Round-trip data has %d items: %r',
+            len(round_trip_data), round_trip_data)
+
+        if ([datum] * nitems) == round_trip_data:
+          correct += 1
+        else:
+          logging.error(
+              'Round-trip data does not match:\n'
+              'Expect: %r\n'
+              'Actual: %r',
+              [datum] * nitems,
+              round_trip_data)
+
+    self.assertEqual(
+        correct,
+        len(CODECS_TO_VALIDATE) * len(SCHEMAS_TO_VALIDATE))
+
+  def testAppend(self):
+    correct = 0
+    for iexample, (writer_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
+      for codec in CODECS_TO_VALIDATE:
+        file_path = self.NewTempFile()
+
+        logging.debug(
+            'Performing append with codec %r in file %s for example #%d\n'
+            'Writing datum: %r using writer schema:\n%s',
+            codec, file_path, iexample,
+            datum, writer_schema)
+
+        logging.debug('Creating data file %r', file_path)
+        with open(file_path, 'wb') as writer:
+          datum_writer = io.DatumWriter()
+          schema_object = schema.Parse(writer_schema)
+          with datafile.DataFileWriter(
+              writer=writer,
+              datum_writer=datum_writer,
+              writer_schema=schema_object,
+              codec=codec,
+          ) as dfw:
+            dfw.append(datum)
+
+        logging.debug('Appending data to %r', file_path)
+        for i in range(9):
+          with open(file_path, 'ab+') as writer:
+            with datafile.DataFileWriter(writer, io.DatumWriter()) as dfw:
+              dfw.append(datum)
+
+        logging.debug('Reading appended data from %r', file_path)
+        with open(file_path, 'rb') as reader:
+          datum_reader = io.DatumReader()
+          with datafile.DataFileReader(reader, datum_reader) as dfr:
+            appended_data = list(dfr)
+
+        logging.debug(
+            'Appended data has %d items: %r',
+            len(appended_data), appended_data)
+
+        if ([datum] * 10) == appended_data:
+          correct += 1
+        else:
+          logging.error(
+              'Appended data does not match:\n'
+              'Expect: %r\n'
+              'Actual: %r',
+              [datum] * 10,
+              appended_data)
+
+    self.assertEqual(
+        correct,
+        len(CODECS_TO_VALIDATE) * len(SCHEMAS_TO_VALIDATE))
+
+  def testContextManager(self):
+    file_path = self.NewTempFile()
+
+    # Test the writer with a 'with' statement.
+    with open(file_path, 'wb') as writer:
+      datum_writer = io.DatumWriter()
+      sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
+      schema_object = schema.Parse(sample_schema)
+      with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
+        dfw.append(sample_datum)
+      self.assertTrue(writer.closed)
+
+    # Test the reader with a 'with' statement.
+    datums = []
+    with open(file_path, 'rb') as reader:
+      datum_reader = io.DatumReader()
+      with datafile.DataFileReader(reader, datum_reader) as dfr:
+        for datum in dfr:
+          datums.append(datum)
+      self.assertTrue(reader.closed)
+
+  def testMetadata(self):
+    file_path = self.NewTempFile()
+
+    # Test the writer with a 'with' statement.
+    with open(file_path, 'wb') as writer:
+      datum_writer = io.DatumWriter()
+      sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
+      schema_object = schema.Parse(sample_schema)
+      with datafile.DataFileWriter(writer, datum_writer, schema_object) as dfw:
+        dfw.SetMeta('test.string', 'foo')
+        dfw.SetMeta('test.number', '1')
+        dfw.append(sample_datum)
+      self.assertTrue(writer.closed)
+
+    # Test the reader with a 'with' statement.
+    datums = []
+    with open(file_path, 'rb') as reader:
+      datum_reader = io.DatumReader()
+      with datafile.DataFileReader(reader, datum_reader) as dfr:
+        self.assertEqual(b'foo', dfr.GetMeta('test.string'))
+        self.assertEqual(b'1', dfr.GetMeta('test.number'))
+        for datum in dfr:
+          datums.append(datum)
+      self.assertTrue(reader.closed)
+
+
+# ------------------------------------------------------------------------------
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_datafile_interop.py b/lang/py3/avro/tests/test_datafile_interop.py
new file mode 100644
index 0000000..93212c8
--- /dev/null
+++ b/lang/py3/avro/tests/test_datafile_interop.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import tempfile
+import unittest
+
+from avro import datafile
+from avro import io
+from avro import schema
+
+
+def GetInteropSchema():
+  test_dir = os.path.dirname(os.path.abspath(__file__))
+  schema_json_path = os.path.join(test_dir, 'interop.avsc')
+  with open(schema_json_path, 'r') as f:
+    schema_json = f.read()
+  return schema.Parse(schema_json)
+
+
+INTEROP_SCHEMA = GetInteropSchema()
+INTEROP_DATUM = {
+    'intField': 12,
+    'longField': 15234324,
+    'stringField': 'hey',
+    'boolField': True,
+    'floatField': 1234.0,
+    'doubleField': -1234.0,
+    'bytesField': b'12312adf',
+    'nullField': None,
+    'arrayField': [5.0, 0.0, 12.0],
+    'mapField': {'a': {'label': 'a'}, 'bee': {'label': 'cee'}},
+    'unionField': 12.0,
+    'enumField': 'C',
+    'fixedField': b'1019181716151413',
+    'recordField': {
+        'label': 'blah',
+        'children': [{'label': 'inner', 'children': []}],
+    },
+}
+
+
+def WriteDataFile(path, datum, schema):
+  datum_writer = io.DatumWriter()
+  with open(path, 'wb') as writer:
+    # NB: not using compression
+    with datafile.DataFileWriter(writer, datum_writer, schema) as dfw:
+      dfw.append(datum)
+
+
+class TestDataFileInterop(unittest.TestCase):
+  def testInterop(self):
+    with tempfile.NamedTemporaryFile() as temp_path:
+      WriteDataFile(temp_path.name, INTEROP_DATUM, INTEROP_SCHEMA)
+
+      # read data in binary from file
+      datum_reader = io.DatumReader()
+      with open(temp_path.name, 'rb') as reader:
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        for datum in dfr:
+          self.assertEqual(INTEROP_DATUM, datum)
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_io.py b/lang/py3/avro/tests/test_io.py
new file mode 100644
index 0000000..8349ce5
--- /dev/null
+++ b/lang/py3/avro/tests/test_io.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import binascii
+import io
+import logging
+import sys
+import unittest
+
+from avro import io as avro_io
+from avro import schema
+
+
+SCHEMAS_TO_VALIDATE = (
+  ('"null"', None),
+  ('"boolean"', True),
+  ('"string"', 'adsfasdf09809dsf-=adsf'),
+  ('"bytes"', b'12345abcd'),
+  ('"int"', 1234),
+  ('"long"', 1234),
+  ('"float"', 1234.0),
+  ('"double"', 1234.0),
+  ('{"type": "fixed", "name": "Test", "size": 1}', b'B'),
+  ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'),
+  ('{"type": "array", "items": "long"}', [1, 3, 2]),
+  ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}),
+  ('["string", "null", "long"]', None),
+  ("""\
+   {"type": "record",
+    "name": "Test",
+    "fields": [{"name": "f", "type": "long"}]}
+   """, {'f': 5}),
+  ("""
+   {
+     "type": "record",
+     "name": "Lisp",
+     "fields": [{
+       "name": "value",
+       "type": [
+         "null",
+         "string",
+         {
+           "type": "record",
+           "name": "Cons",
+           "fields": [{"name": "car", "type": "Lisp"},
+                      {"name": "cdr", "type": "Lisp"}]
+         }
+       ]
+     }]
+   }
+   """, {'value': {'car': {'value': 'head'}, 'cdr': {'value': None}}}),
+)
+
+BINARY_ENCODINGS = (
+  (0, '00'),
+  (-1, '01'),
+  (1, '02'),
+  (-2, '03'),
+  (2, '04'),
+  (-64, '7f'),
+  (64, '80 01'),
+  (8192, '80 80 01'),
+  (-8193, '81 80 01'),
+)
+
+DEFAULT_VALUE_EXAMPLES = (
+  ('"null"', 'null', None),
+  ('"boolean"', 'true', True),
+  ('"string"', '"foo"', 'foo'),
+  ('"bytes"', '"\u00FF\u00FF"', '\xff\xff'),
+  ('"int"', '5', 5),
+  ('"long"', '5', 5),
+  ('"float"', '1.1', 1.1),
+  ('"double"', '1.1', 1.1),
+  ('{"type": "fixed", "name": "F", "size": 2}', '"\u00FF\u00FF"', '\xff\xff'),
+  ('{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}', '"FOO"', 'FOO'),
+  ('{"type": "array", "items": "int"}', '[1, 2, 3]', [1, 2, 3]),
+  ('{"type": "map", "values": "int"}', '{"a": 1, "b": 2}', {'a': 1, 'b': 2}),
+  ('["int", "null"]', '5', 5),
+  ('{"type": "record", "name": "F", "fields": [{"name": "A", "type": "int"}]}',
+   '{"A": 5}', {'A': 5}),
+)
+
+LONG_RECORD_SCHEMA = schema.Parse("""
+{
+  "type": "record",
+  "name": "Test",
+  "fields": [
+    {"name": "A", "type": "int"},
+    {"name": "B", "type": "int"},
+    {"name": "C", "type": "int"},
+    {"name": "D", "type": "int"},
+    {"name": "E", "type": "int"},
+    {"name": "F", "type": "int"},
+    {"name": "G", "type": "int"}
+  ]
+}
+""")
+
+LONG_RECORD_DATUM = {'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7}
+
+
+def avro_hexlify(reader):
+  """Return the hex value, as a string, of a binary-encoded int or long."""
+  bytes = []
+  current_byte = reader.read(1)
+  bytes.append(binascii.hexlify(current_byte).decode())
+  while (ord(current_byte) & 0x80) != 0:
+    current_byte = reader.read(1)
+    bytes.append(binascii.hexlify(current_byte).decode())
+  return ' '.join(bytes)
+
+
+def write_datum(datum, writer_schema):
+  writer = io.BytesIO()
+  encoder = avro_io.BinaryEncoder(writer)
+  datum_writer = avro_io.DatumWriter(writer_schema)
+  datum_writer.write(datum, encoder)
+  return writer, encoder, datum_writer
+
+
+def read_datum(buffer, writer_schema, reader_schema=None):
+  reader = io.BytesIO(buffer.getvalue())
+  decoder = avro_io.BinaryDecoder(reader)
+  datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+  return datum_reader.read(decoder)
+
+
+def check_binary_encoding(number_type):
+  logging.debug('Testing binary encoding for type %s', number_type)
+  correct = 0
+  for datum, hex_encoding in BINARY_ENCODINGS:
+    logging.debug('Datum: %d', datum)
+    logging.debug('Correct Encoding: %s', hex_encoding)
+
+    writer_schema = schema.Parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(datum, writer_schema)
+    writer.seek(0)
+    hex_val = avro_hexlify(writer)
+
+    logging.debug('Read Encoding: %s', hex_val)
+    if hex_encoding == hex_val: correct += 1
+  return correct
+
+
+def check_skip_number(number_type):
+  logging.debug('Testing skip number for %s', number_type)
+  correct = 0
+  for value_to_skip, hex_encoding in BINARY_ENCODINGS:
+    VALUE_TO_READ = 6253
+    logging.debug('Value to Skip: %d', value_to_skip)
+
+    # write the value to skip and a known value
+    writer_schema = schema.Parse('"%s"' % number_type.lower())
+    writer, encoder, datum_writer = write_datum(value_to_skip, writer_schema)
+    datum_writer.write(VALUE_TO_READ, encoder)
+
+    # skip the value
+    reader = io.BytesIO(writer.getvalue())
+    decoder = avro_io.BinaryDecoder(reader)
+    decoder.skip_long()
+
+    # read data from string buffer
+    datum_reader = avro_io.DatumReader(writer_schema)
+    read_value = datum_reader.read(decoder)
+
+    logging.debug('Read Value: %d', read_value)
+    if read_value == VALUE_TO_READ: correct += 1
+  return correct
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestIO(unittest.TestCase):
+  #
+  # BASIC FUNCTIONALITY
+  #
+
+  def testValidate(self):
+    passed = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      logging.debug('Schema: %r', example_schema)
+      logging.debug('Datum: %r', datum)
+      validated = avro_io.Validate(schema.Parse(example_schema), datum)
+      logging.debug('Valid: %s', validated)
+      if validated: passed += 1
+    self.assertEqual(passed, len(SCHEMAS_TO_VALIDATE))
+
+  def testRoundTrip(self):
+    correct = 0
+    for example_schema, datum in SCHEMAS_TO_VALIDATE:
+      logging.debug('Schema: %s', example_schema)
+      logging.debug('Datum: %s', datum)
+
+      writer_schema = schema.Parse(example_schema)
+      writer, encoder, datum_writer = write_datum(datum, writer_schema)
+      round_trip_datum = read_datum(writer, writer_schema)
+
+      logging.debug('Round Trip Datum: %s', round_trip_datum)
+      if datum == round_trip_datum: correct += 1
+    self.assertEqual(correct, len(SCHEMAS_TO_VALIDATE))
+
+  #
+  # BINARY ENCODING OF INT AND LONG
+  #
+
+  def testBinaryIntEncoding(self):
+    correct = check_binary_encoding('int')
+    self.assertEqual(correct, len(BINARY_ENCODINGS))
+
+  def testBinaryLongEncoding(self):
+    correct = check_binary_encoding('long')
+    self.assertEqual(correct, len(BINARY_ENCODINGS))
+
+  def testSkipInt(self):
+    correct = check_skip_number('int')
+    self.assertEqual(correct, len(BINARY_ENCODINGS))
+
+  def testSkipLong(self):
+    correct = check_skip_number('long')
+    self.assertEqual(correct, len(BINARY_ENCODINGS))
+
+  #
+  # SCHEMA RESOLUTION
+  #
+
+  def testSchemaPromotion(self):
+    # note that checking writer_schema.type in read_data
+    # allows us to handle promotion correctly
+    promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
+    incorrect = 0
+    for i, ws in enumerate(promotable_schemas):
+      writer_schema = schema.Parse(ws)
+      datum_to_write = 219
+      for rs in promotable_schemas[i + 1:]:
+        reader_schema = schema.Parse(rs)
+        writer, enc, dw = write_datum(datum_to_write, writer_schema)
+        datum_read = read_datum(writer, writer_schema, reader_schema)
+        logging.debug('Writer: %s Reader: %s', writer_schema, reader_schema)
+        logging.debug('Datum Read: %s', datum_read)
+        if datum_read != datum_to_write: incorrect += 1
+    self.assertEqual(incorrect, 0)
+
+  def testUnknownSymbol(self):
+    writer_schema = schema.Parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["FOO", "BAR"]}""")
+    datum_to_write = 'FOO'
+
+    reader_schema = schema.Parse("""\
+      {"type": "enum", "name": "Test",
+       "symbols": ["BAR", "BAZ"]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
+    reader = io.BytesIO(writer.getvalue())
+    decoder = avro_io.BinaryDecoder(reader)
+    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+    self.assertRaises(avro_io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def testDefaultValue(self):
+    writer_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    correct = 0
+    for field_type, default_json, default_datum in DEFAULT_VALUE_EXAMPLES:
+      reader_schema = schema.Parse("""\
+        {"type": "record", "name": "Test",
+         "fields": [{"name": "H", "type": %s, "default": %s}]}
+        """ % (field_type, default_json))
+      datum_to_read = {'H': default_datum}
+
+      writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
+      datum_read = read_datum(writer, writer_schema, reader_schema)
+      logging.debug('Datum Read: %s', datum_read)
+      if datum_to_read == datum_read: correct += 1
+    self.assertEqual(correct, len(DEFAULT_VALUE_EXAMPLES))
+
+  def testNoDefaultValue(self):
+    writer_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    reader_schema = schema.Parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "H", "type": "int"}]}""")
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
+    reader = io.BytesIO(writer.getvalue())
+    decoder = avro_io.BinaryDecoder(reader)
+    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
+    self.assertRaises(avro_io.SchemaResolutionException, datum_reader.read, decoder)
+
+  def testProjection(self):
+    writer_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    reader_schema = schema.Parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "E", "type": "int"},
+                  {"name": "F", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
+    datum_read = read_datum(writer, writer_schema, reader_schema)
+    logging.debug('Datum Read: %s', datum_read)
+    self.assertEqual(datum_to_read, datum_read)
+
+  def testFieldOrder(self):
+    writer_schema = LONG_RECORD_SCHEMA
+    datum_to_write = LONG_RECORD_DATUM
+
+    reader_schema = schema.Parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_read = {'E': 5, 'F': 6}
+
+    writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
+    datum_read = read_datum(writer, writer_schema, reader_schema)
+    logging.debug('Datum Read: %s', datum_read)
+    self.assertEqual(datum_to_read, datum_read)
+
+  def testTypeException(self):
+    writer_schema = schema.Parse("""\
+      {"type": "record", "name": "Test",
+       "fields": [{"name": "F", "type": "int"},
+                  {"name": "E", "type": "int"}]}""")
+    datum_to_write = {'E': 5, 'F': 'Bad'}
+    self.assertRaises(
+        avro_io.AvroTypeException, write_datum, datum_to_write, writer_schema)
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_ipc.py b/lang/py3/avro/tests/test_ipc.py
new file mode 100644
index 0000000..aa480b8
--- /dev/null
+++ b/lang/py3/avro/tests/test_ipc.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+There are currently no IPC tests within python, in part because there are no
+servers yet available.
+"""
+
+import logging
+import threading
+import time
+import unittest
+
+from avro import ipc
+from avro import protocol
+from avro import schema
+
+
+def NowMS():
+  return int(time.time() * 1000)
+
+
+ECHO_PROTOCOL_JSON = """
+{
+  "protocol" : "Echo",
+  "namespace" : "org.apache.avro.ipc.echo",
+  "types" : [ {
+    "type" : "record",
+    "name" : "Ping",
+    "fields" : [ {
+      "name" : "timestamp",
+      "type" : "long",
+      "default" : -1
+    }, {
+      "name" : "text",
+      "type" : "string",
+      "default" : ""
+    } ]
+  }, {
+    "type" : "record",
+    "name" : "Pong",
+    "fields" : [ {
+      "name" : "timestamp",
+      "type" : "long",
+      "default" : -1
+    }, {
+      "name" : "ping",
+      "type" : "Ping"
+    } ]
+  } ],
+  "messages" : {
+    "ping" : {
+      "request" : [ {
+        "name" : "ping",
+        "type" : "Ping"
+      } ],
+      "response" : "Pong"
+    }
+  }
+}
+"""
+
+
+ECHO_PROTOCOL = protocol.Parse(ECHO_PROTOCOL_JSON)
+
+
+class EchoResponder(ipc.Responder):
+  def __init__(self):
+    super(EchoResponder, self).__init__(
+        local_protocol=ECHO_PROTOCOL,
+    )
+
+  def Invoke(self, message, request):
+    logging.info('Message: %s', message)
+    logging.info('Request: %s', request)
+    ping = request['ping']
+    return {'timestamp': NowMS(), 'ping': ping}
+
+
+class TestIPC(unittest.TestCase):
+
+  def __init__(self, *args, **kwargs):
+    super(TestIPC, self).__init__(*args, **kwargs)
+    # Reference to an Echo RPC over HTTP server:
+    self._server = None
+
+  def StartEchoServer(self):
+    self._server = ipc.AvroIpcHttpServer(
+        interface='localhost',
+        port=0,
+        responder=EchoResponder(),
+    )
+
+    def ServerThread():
+      self._server.serve_forever()
+
+    self._server_thread = threading.Thread(target=ServerThread)
+    self._server_thread.start()
+
+    logging.info(
+        'Echo RPC Server listening on %s:%s',
+        *self._server.server_address)
+    logging.info('RPC socket: %s', self._server.socket)
+
+  def StopEchoServer(self):
+    assert (self._server is not None)
+    self._server.shutdown()
+    self._server_thread.join()
+    self._server.server_close()
+    self._server = None
+
+  def testEchoService(self):
+    """Tests client-side of the Echo service."""
+    self.StartEchoServer()
+    try:
+      (server_host, server_port) = self._server.server_address
+
+      transceiver = ipc.HTTPTransceiver(host=server_host, port=server_port)
+      requestor = ipc.Requestor(
+          local_protocol=ECHO_PROTOCOL,
+          transceiver=transceiver,
+      )
+      response = requestor.Request(
+          message_name='ping',
+          request_datum={'ping': {'timestamp': 31415, 'text': 'hello ping'}},
+      )
+      logging.info('Received echo response: %s', response)
+
+      response = requestor.Request(
+          message_name='ping',
+          request_datum={'ping': {'timestamp': 123456, 'text': 'hello again'}},
+      )
+      logging.info('Received echo response: %s', response)
+
+      transceiver.Close()
+
+    finally:
+      self.StopEchoServer()
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_protocol.py b/lang/py3/avro/tests/test_protocol.py
new file mode 100644
index 0000000..db6f57e
--- /dev/null
+++ b/lang/py3/avro/tests/test_protocol.py
@@ -0,0 +1,504 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the protocol parsing logic.
+"""
+
+import logging
+import traceback
+import unittest
+
+from avro import protocol
+
+
+# ------------------------------------------------------------------------------
+
+
+class ExampleProtocol(object):
+  def __init__(
+      self,
+      protocol_string,
+      valid=True,
+  ):
+    self._protocol_string = protocol_string
+    self._valid = valid
+
+  @property
+  def protocol_string(self):
+    return self._protocol_string
+
+  @property
+  def valid(self):
+    return self._valid
+
+
+# ------------------------------------------------------------------------------
+# Protocol test cases:
+
+HELLO_WORLD = ExampleProtocol("""
+{
+  "namespace": "com.acme",
+  "protocol": "HelloWorld",
+  "types": [
+    {
+      "name": "Greeting",
+      "type": "record",
+      "fields": [{"name": "message", "type": "string"}]
+    },
+    {
+      "name": "Curse",
+      "type": "error",
+      "fields": [{"name": "message", "type": "string"}]
+    }
+  ],
+  "messages": {
+    "hello": {
+      "request": [{"name": "greeting", "type": "Greeting" }],
+      "response": "Greeting",
+      "errors": ["Curse"]
+    }
+  }
+}
+""")
+
+EXAMPLES = [
+  HELLO_WORLD,
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test",
+    "protocol": "Simple",
+    "types": [
+      {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"]},
+      {"name": "MD5", "type": "fixed", "size": 16},
+      {"name": "TestRecord", "type": "record",
+       "fields": [
+         {"name": "name", "type": "string", "order": "ignore"},
+         {"name": "kind", "type": "Kind", "order": "descending"},
+         {"name": "hash", "type": "MD5"}
+       ]
+      },
+      {"name": "TestError", "type": "error",
+       "fields": [
+         {"name": "message", "type": "string"}
+       ]
+      }
+    ],
+    "messages": {
+      "hello": {
+        "request": [{"name": "greeting", "type": "string"}],
+        "response": "string"
+      },
+      "echo": {
+        "request": [{"name": "record", "type": "TestRecord"}],
+        "response": "TestRecord"
+      },
+      "add": {
+        "request": [{"name": "arg1", "type": "int"},
+                    {"name": "arg2", "type": "int"}],
+        "response": "int"
+      },
+      "echoBytes": {
+        "request": [{"name": "data", "type": "bytes"}],
+        "response": "bytes"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["TestError"]
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test.namespace",
+    "protocol": "TestNamespace",
+    "types": [
+      {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+      {
+        "name": "TestRecord",
+        "type": "record",
+        "fields": [{"name": "hash", "type": "org.apache.avro.test.util.MD5"}]
+      },
+      {
+        "name": "TestError",
+        "namespace": "org.apache.avro.test.errors",
+        "type": "error",
+        "fields": [{"name": "message", "type": "string"}]
+      }
+    ],
+    "messages": {
+      "echo": {
+        "request": [{"name": "record", "type": "TestRecord"}],
+        "response": "TestRecord"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["org.apache.avro.test.errors.TestError"]
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test.namespace",
+    "protocol": "TestImplicitNamespace",
+    "types": [
+      {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+      {
+        "name": "ReferencedRecord",
+        "type": "record",
+        "fields": [{"name": "foo", "type": "string"}]
+      },
+      {
+        "name": "TestRecord",
+        "type": "record",
+        "fields": [
+          {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+          {"name": "unqalified", "type": "ReferencedRecord"}
+        ]
+      },
+      {
+        "name": "TestError",
+        "type": "error",
+        "fields": [{"name": "message", "type": "string"}]
+      }
+    ],
+    "messages": {
+      "echo": {
+        "request": [
+          {"name": "qualified", "type": "org.apache.avro.test.namespace.TestRecord"}
+        ],
+        "response": "TestRecord"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["org.apache.avro.test.namespace.TestError"]
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test.namespace",
+    "protocol": "TestNamespaceTwo",
+    "types": [
+      {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+      {
+        "name": "ReferencedRecord",
+        "namespace": "org.apache.avro.other.namespace",
+        "type": "record",
+        "fields": [{"name": "foo", "type": "string"}]
+      },
+      {
+        "name": "TestRecord",
+        "type": "record",
+        "fields": [
+          {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+          {
+            "name": "qualified",
+            "type": "org.apache.avro.other.namespace.ReferencedRecord"
+          }
+        ]
+      },
+      {
+        "name": "TestError",
+        "type": "error",
+        "fields": [{"name": "message", "type": "string"}]
+      }
+    ],
+    "messages": {
+      "echo": {
+        "request": [
+          {
+            "name": "qualified",
+            "type": "org.apache.avro.test.namespace.TestRecord"
+          }
+        ],
+        "response": "TestRecord"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["org.apache.avro.test.namespace.TestError"]
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test.namespace",
+    "protocol": "TestValidRepeatedName",
+    "types": [
+      {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+      {
+        "name": "ReferencedRecord",
+        "namespace": "org.apache.avro.other.namespace",
+        "type": "record",
+        "fields": [{"name": "foo", "type": "string"}]
+      },
+      {
+        "name": "ReferencedRecord",
+        "type": "record",
+        "fields": [{"name": "bar", "type": "double"}]
+      },
+      {
+        "name": "TestError",
+        "type": "error",
+        "fields": [{"name": "message", "type": "string"}]
+      }
+    ],
+    "messages": {
+      "echo": {
+        "request": [{"name": "qualified", "type": "ReferencedRecord"}],
+        "response": "org.apache.avro.other.namespace.ReferencedRecord"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["org.apache.avro.test.namespace.TestError"]
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test.namespace",
+    "protocol": "TestInvalidRepeatedName",
+    "types": [
+      {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+      {
+        "name": "ReferencedRecord",
+        "type": "record",
+        "fields": [{"name": "foo", "type": "string"}]
+      },
+      {
+        "name": "ReferencedRecord",
+        "type": "record",
+        "fields": [{"name": "bar", "type": "double"}]
+      },
+      {
+        "name": "TestError",
+        "type": "error",
+        "fields": [{"name": "message", "type": "string"}]
+      }
+    ],
+    "messages": {
+      "echo": {
+        "request": [{"name": "qualified", "type": "ReferencedRecord"}],
+        "response": "org.apache.avro.other.namespace.ReferencedRecord"
+      },
+      "error": {
+        "request": [],
+        "response": "null",
+        "errors": ["org.apache.avro.test.namespace.TestError"]
+      }
+    }
+  }
+  """,
+  valid=False),
+
+  ExampleProtocol(
+  """
+  {
+    "namespace": "org.apache.avro.test",
+    "protocol": "BulkData",
+    "types": [],
+    "messages": {
+      "read": {
+        "request": [],
+        "response": "bytes"
+      },
+      "write": {
+        "request": [ {"name": "data", "type": "bytes"} ],
+        "response": "null"
+      }
+    }
+  }
+  """),
+
+  ExampleProtocol(
+  """
+  {
+    "protocol": "API",
+    "namespace": "xyz.api",
+    "types": [
+      {
+        "type": "enum",
+        "name": "Symbology",
+        "namespace": "xyz.api.product",
+        "symbols": [ "OPRA", "CUSIP", "ISIN", "SEDOL" ]
+      },
+      {
+        "type": "record",
+        "name": "Symbol",
+        "namespace": "xyz.api.product",
+        "fields": [ {
+          "name": "symbology",
+          "type": "xyz.api.product.Symbology"
+        }, {
+          "name": "symbol",
+          "type": "string"
+        } ]
+      },
+      {
+        "type": "record",
+        "name": "MultiSymbol",
+        "namespace": "xyz.api.product",
+        "fields": [{
+          "name": "symbols",
+          "type": {
+            "type": "map",
+            "values": "xyz.api.product.Symbol"
+          }
+        }]
+      }
+    ],
+    "messages": {}
+  }
+  """),
+]
+# End of EXAMPLES
+
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestProtocol(unittest.TestCase):
+
+  def testParse(self):
+    correct = 0
+    for iexample, example in enumerate(EXAMPLES):
+      logging.debug(
+          'Parsing protocol #%d:\n%s',
+          iexample, example.protocol_string)
+      try:
+        parsed = protocol.Parse(example.protocol_string)
+        if example.valid:
+          correct += 1
+        else:
+          self.fail(
+              'Invalid protocol was parsed:\n%s' % example.protocol_string)
+      except Exception as exn:
+        if example.valid:
+          self.fail(
+              'Valid protocol failed to parse: %s\n%s'
+              % (example.protocol_string, traceback.format_exc()))
+        else:
+          if logging.getLogger().getEffectiveLevel() <= 5:
+            logging.debug('Expected error:\n%s', traceback.format_exc())
+          else:
+            logging.debug('Expected error: %r', exn)
+          correct += 1
+
+    self.assertEqual(
+      correct,
+      len(EXAMPLES),
+      'Parse behavior correct on %d out of %d protocols.'
+      % (correct, len(EXAMPLES)))
+
+  def testInnerNamespaceSet(self):
+    proto = protocol.Parse(HELLO_WORLD.protocol_string)
+    self.assertEqual(proto.namespace, 'com.acme')
+    greeting_type = proto.type_map['com.acme.Greeting']
+    self.assertEqual(greeting_type.namespace, 'com.acme')
+
+  def testInnerNamespaceNotRendered(self):
+    proto = protocol.Parse(HELLO_WORLD.protocol_string)
+    self.assertEqual('com.acme.Greeting', proto.types[0].fullname)
+    self.assertEqual('Greeting', proto.types[0].name)
+    # but there shouldn't be 'namespace' rendered to json on the inner type
+    self.assertFalse('namespace' in proto.to_json()['types'][0])
+
+  def testValidCastToStringAfterParse(self):
+    """
+    Test that the string generated by an Avro Protocol object is,
+    in fact, a valid Avro protocol.
+    """
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      proto = protocol.Parse(example.protocol_string)
+      try:
+        protocol.Parse(str(proto))
+        logging.debug(
+            'Successfully reparsed protocol:\n%s',
+            example.protocol_string)
+        num_correct += 1
+      except:
+        logging.debug(
+            'Failed to reparse protocol:\n%s',
+            example.protocol_string)
+
+    fail_msg = (
+      'Cast to string success on %d out of %d protocols'
+      % (num_correct, len(VALID_EXAMPLES)))
+    self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+
+  def testEquivalenceAfterRoundTrip(self):
+    """
+    1. Given a string, parse it to get Avro protocol "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro protocol "round trip".
+    3. Ensure "original" and "round trip" protocols are equivalent.
+    """
+    num_correct = 0
+    for example in VALID_EXAMPLES:
+      original_protocol = protocol.Parse(example.protocol_string)
+      round_trip_protocol = protocol.Parse(str(original_protocol))
+
+      if original_protocol == round_trip_protocol:
+        num_correct += 1
+        logging.debug(
+            'Successful round-trip for protocol:\n%s',
+            example.protocol_string)
+      else:
+        self.fail(
+            'Round-trip failure for protocol:\n%s\nOriginal protocol:\n%s'
+            % (example.protocol_string, str(original_protocol)))
+
+    self.assertEqual(
+        num_correct,
+        len(VALID_EXAMPLES),
+        'Round trip success on %d out of %d protocols.'
+        % (num_correct, len(VALID_EXAMPLES)))
+
+
+# ------------------------------------------------------------------------------
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_schema.py b/lang/py3/avro/tests/test_schema.py
new file mode 100644
index 0000000..3aaa6b3
--- /dev/null
+++ b/lang/py3/avro/tests/test_schema.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test the schema parsing logic.
+"""
+
+import logging
+import traceback
+import unittest
+
+from avro import schema
+
+
+# ------------------------------------------------------------------------------
+
+
+class ExampleSchema(object):
+  def __init__(self, schema_string, valid, name='', comment=''):
+    self._schema_string = schema_string
+    self._valid = valid
+    self._name = name or schema_string # default to schema_string for name
+    self.comment = comment
+
+  @property
+  def schema_string(self):
+    return self._schema_string
+
+  @property
+  def valid(self):
+    return self._valid
+
+  @property
+  def name(self):
+    return self._name
+
+
+# ------------------------------------------------------------------------------
+# Example Schemas
+
+
+def MakePrimitiveExamples():
+  examples = []
+  for type in schema.PRIMITIVE_TYPES:
+    examples.append(ExampleSchema('"%s"' % type, valid=True))
+    examples.append(ExampleSchema('{"type": "%s"}' % type, valid=True))
+  return examples
+
+PRIMITIVE_EXAMPLES = MakePrimitiveExamples() + [
+  ExampleSchema('"True"', valid=False),
+  ExampleSchema('True', valid=False),
+  ExampleSchema('{"no_type": "test"}', valid=False),
+  ExampleSchema('{"type": "panther"}', valid=False),
+]
+
+FIXED_EXAMPLES = [
+  ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', valid=True),
+  ExampleSchema("""
+    {
+      "type": "fixed",
+      "name": "MyFixed",
+      "namespace": "org.apache.hadoop.avro",
+      "size": 1
+    }
+    """,
+    valid=True),
+  ExampleSchema("""
+    {
+      "type": "fixed",
+      "name": "Missing size"
+    }
+    """,
+    valid=False),
+  ExampleSchema("""
+    {
+      "type": "fixed",
+      "size": 314
+    }
+    """,
+    valid=False),
+]
+
+ENUM_EXAMPLES = [
+  ExampleSchema(
+    '{"type": "enum", "name": "Test", "symbols": ["A", "B"]}',
+    valid=True),
+  ExampleSchema("""
+    {
+      "type": "enum",
+      "name": "Status",
+      "symbols": "Normal Caution Critical"
+    }
+    """,
+    valid=False),
+  ExampleSchema("""
+    {
+      "type": "enum",
+      "name": [0, 1, 1, 2, 3, 5, 8],
+      "symbols": ["Golden", "Mean"]
+    }
+    """,
+    valid=False),
+  ExampleSchema("""
+    {
+      "type": "enum",
+      "symbols": ["I", "will", "fail", "no", "name"]
+    }
+    """,
+    valid=False),
+  ExampleSchema("""
+    {
+      "type": "enum",
+      "name": "Test"
+      "symbols": ["AA", "AA"]
+    }
+    """,
+    valid=False),
+]
+
+ARRAY_EXAMPLES = [
+  ExampleSchema('{"type": "array", "items": "long"}', valid=True),
+  ExampleSchema("""
+    {
+      "type": "array",
+      "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
+    }
+    """,
+    valid=True),
+]
+
+MAP_EXAMPLES = [
+  ExampleSchema('{"type": "map", "values": "long"}', True),
+  ExampleSchema("""
+    {
+      "type": "map",
+      "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}
+    }
+    """,
+    valid=True,
+  ),
+]
+
+UNION_EXAMPLES = [
+  ExampleSchema('["string", "null", "long"]', valid=True),
+  ExampleSchema('["null", "null"]', valid=False),
+  ExampleSchema('["long", "long"]', valid=False),
+  ExampleSchema("""
+    [
+      {"type": "array", "items": "long"},
+      {"type": "array", "items": "string"}
+    ]
+    """,
+    valid=False,
+  ),
+]
+
+RECORD_EXAMPLES = [
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Test",
+      "fields": [{"name": "f", "type": "long"}]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "error",
+      "name": "Test",
+      "fields": [{"name": "f", "type": "long"}]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Node",
+      "fields": [
+        {"name": "label", "type": "string"},
+        {"name": "children", "type": {"type": "array", "items": "Node"}}
+      ]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Lisp",
+      "fields": [{
+        "name": "value",
+        "type": [
+          "null",
+          "string",
+          {
+            "type": "record",
+            "name": "Cons",
+            "fields": [{"name": "car", "type": "Lisp"},
+                       {"name": "cdr", "type": "Lisp"}]
+          }
+        ]
+      }]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "HandshakeRequest",
+      "namespace": "org.apache.avro.ipc",
+      "fields": [
+        {
+          "name": "clientHash",
+          "type": {"type": "fixed", "name": "MD5", "size": 16}
+        },
+        {"name": "clientProtocol", "type": ["null", "string"]},
+        {"name": "serverHash", "type": "MD5"},
+        {
+          "name": "meta",
+          "type": ["null", {"type": "map", "values": "bytes"}]
+        }
+      ]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "HandshakeResponse",
+      "namespace": "org.apache.avro.ipc",
+      "fields": [
+        {
+          "name": "match",
+          "type": {
+            "type": "enum",
+            "name": "HandshakeMatch",
+            "symbols": ["BOTH", "CLIENT", "NONE"]
+          }
+        },
+        {"name": "serverProtocol", "type": ["null", "string"]},
+        {
+          "name": "serverHash",
+          "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]
+        },
+        {
+          "name": "meta",
+          "type": ["null", {"type": "map", "values": "bytes"}]}]
+        }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Interop",
+      "namespace": "org.apache.avro",
+      "fields": [
+        {"name": "intField", "type": "int"},
+        {"name": "longField", "type": "long"},
+        {"name": "stringField", "type": "string"},
+        {"name": "boolField", "type": "boolean"},
+        {"name": "floatField", "type": "float"},
+        {"name": "doubleField", "type": "double"},
+        {"name": "bytesField", "type": "bytes"},
+        {"name": "nullField", "type": "null"},
+        {"name": "arrayField", "type": {"type": "array", "items": "double"}},
+        {
+          "name": "mapField",
+          "type": {
+            "type": "map",
+            "values": {"name": "Foo",
+                       "type": "record",
+                       "fields": [{"name": "label", "type": "string"}]}
+          }
+        },
+        {
+          "name": "unionField",
+          "type": ["boolean", "double", {"type": "array", "items": "bytes"}]
+        },
+        {
+          "name": "enumField",
+          "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}
+        },
+        {
+          "name": "fixedField",
+          "type": {"type": "fixed", "name": "MD5", "size": 16}
+        },
+        {
+          "name": "recordField",
+          "type": {"type": "record",
+                   "name": "Node",
+                   "fields": [{"name": "label", "type": "string"},
+                              {"name": "children",
+                               "type": {"type": "array",
+                                        "items": "Node"}}]}
+        }
+      ]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "ipAddr",
+      "fields": [{
+        "name": "addr",
+        "type": [
+          {"name": "IPv6", "type": "fixed", "size": 16},
+          {"name": "IPv4", "type": "fixed", "size": 4}
+        ]
+      }]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Address",
+      "fields": [
+        {"type": "string"},
+        {"type": "string", "name": "City"}
+      ]
+    }
+    """,
+    valid=False,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "Event",
+      "fields": [
+        {"name": "Sponsor"},
+        {"name": "City", "type": "string"}
+      ]
+    }
+    """,
+    valid=False,
+  ),
+  ExampleSchema("""
+    {
+      "type": "record",
+      "fields": "His vision, from the constantly passing bars,"
+      "name", "Rainer"
+    }
+    """,
+    valid=False,
+  ),
+  ExampleSchema("""
+    {
+      "name": ["Tom", "Jerry"],
+      "type": "record",
+      "fields": [{"name": "name", "type": "string"}]
+    }
+    """,
+    valid=False,
+  ),
+]
+
+DOC_EXAMPLES = [
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "TestDoc",
+      "doc":  "Doc string",
+      "fields": [{"name": "name", "type": "string", "doc": "Doc String"}]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema("""
+    {"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}
+    """,
+    valid=True,
+  ),
+]
+
+OTHER_PROP_EXAMPLES = [
+  ExampleSchema("""
+    {
+      "type": "record",
+      "name": "TestRecord",
+      "cp_string": "string",
+      "cp_int": 1,
+      "cp_array": [ 1, 2, 3, 4],
+      "fields": [
+        {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2}},
+        {"name": "f2", "type": "long", "cp_null": null}
+      ]
+    }
+    """,
+    valid=True,
+  ),
+  ExampleSchema(
+    '{"type": "map", "values": "long", "cp_boolean": true}',
+    valid=True,
+  ),
+  ExampleSchema("""
+    {
+      "type": "enum",
+      "name": "TestEnum",
+      "symbols": ["one", "two", "three"],
+      "cp_float": 1.0
+    }
+    """,
+    valid=True,
+  ),
+]
+
+EXAMPLES = PRIMITIVE_EXAMPLES
+EXAMPLES += FIXED_EXAMPLES
+EXAMPLES += ENUM_EXAMPLES
+EXAMPLES += ARRAY_EXAMPLES
+EXAMPLES += MAP_EXAMPLES
+EXAMPLES += UNION_EXAMPLES
+EXAMPLES += RECORD_EXAMPLES
+EXAMPLES += DOC_EXAMPLES
+
+VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestSchema(unittest.TestCase):
+
+  def testCorrectRecursiveExtraction(self):
+    parsed = schema.Parse("""
+      {
+        "type": "record",
+        "name": "X",
+        "fields": [{
+          "name": "y",
+          "type": {
+            "type": "record",
+            "name": "Y",
+            "fields": [{"name": "Z", "type": "X"}, {"name": "W", "type": "X"}]
+          }
+        }]
+      }
+    """)
+    logging.debug('Parsed schema:\n%s', parsed)
+    logging.debug('Fields: %s', parsed.fields)
+    t = schema.Parse(str(parsed.fields[0].type))
+    # If we've made it this far, the subschema was reasonably stringified;
+    # it could be reparsed.
+    self.assertEqual("X", t.fields[0].type.name)
+
+  def testParse(self):
+    correct = 0
+    for iexample, example in enumerate(EXAMPLES):
+      logging.debug('Testing example #%d\n%s', iexample, example.schema_string)
+      try:
+        schema.Parse(example.schema_string)
+        if example.valid:
+          correct += 1
+        else:
+          self.fail('Invalid schema was parsed:\n%s' % example.schema_string)
+      except Exception as exn:
+        if example.valid:
+          self.fail(
+              'Valid schema failed to parse: %r\n%s'
+              % (example.schema_string, traceback.format_exc()))
+        else:
+          if logging.getLogger().getEffectiveLevel() <= 5:
+            logging.debug('Expected error:\n%s', traceback.format_exc())
+          else:
+            logging.debug('Expected error: %r', exn)
+          correct += 1
+
+    self.assertEqual(
+        correct,
+        len(EXAMPLES),
+        'Parse behavior correct on %d out of %d schemas.'
+        % (correct, len(EXAMPLES)),
+    )
+
+  def testValidCastToStringAfterParse(self):
+    """
+    Test that the string generated by an Avro Schema object
+    is, in fact, a valid Avro schema.
+    """
+    correct = 0
+    for example in VALID_EXAMPLES:
+      schema_data = schema.Parse(example.schema_string)
+      schema.Parse(str(schema_data))
+      correct += 1
+
+    fail_msg = "Cast to string success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  def testEquivalenceAfterRoundTrip(self):
+    """
+    1. Given a string, parse it to get Avro schema "original".
+    2. Serialize "original" to a string and parse that string
+         to generate Avro schema "round trip".
+    3. Ensure "original" and "round trip" schemas are equivalent.
+    """
+    correct = 0
+    for example in VALID_EXAMPLES:
+      original_schema = schema.Parse(example.schema_string)
+      round_trip_schema = schema.Parse(str(original_schema))
+      if original_schema == round_trip_schema:
+        correct += 1
+        debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
+      else:
+        debug_msg = "%s: ROUND TRIP FAILURE" % example.name
+        self.fail(
+            "Round trip failure: %s, %s, %s"
+            % (example.name, original_schema, str(original_schema)))
+
+    fail_msg = "Round trip success on %d out of %d schemas" % \
+      (correct, len(VALID_EXAMPLES))
+    self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
+
+  def testFullname(self):
+    """The fullname is determined in one of the following ways:
+     * A name and namespace are both specified.  For example,
+       one might use "name": "X", "namespace": "org.foo"
+       to indicate the fullname "org.foo.X".
+     * A fullname is specified.  If the name specified contains
+       a dot, then it is assumed to be a fullname, and any
+       namespace also specified is ignored.  For example,
+       use "name": "org.foo.X" to indicate the
+       fullname "org.foo.X".
+     * A name only is specified, i.e., a name that contains no
+       dots.  In this case the namespace is taken from the most
+       tightly encosing schema or protocol.  For example,
+       if "name": "X" is specified, and this occurs
+       within a field of the record definition
+       of "org.foo.Y", then the fullname is "org.foo.X".
+
+    References to previously defined names are as in the latter
+    two cases above: if they contain a dot they are a fullname, if
+    they do not contain a dot, the namespace is the namespace of
+    the enclosing definition.
+
+    Primitive type names have no namespace and their names may
+    not be defined in any namespace.  A schema may only contain
+    multiple definitions of a fullname if the definitions are
+    equivalent.
+    """
+    # relative name and namespace specified
+    self.assertEqual(schema.Name('a', 'o.a.h').fullname, 'o.a.h.a')
+
+    # absolute name and namespace specified
+    self.assertEqual(schema.Name('.a', 'o.a.h').fullname, '.a')
+
+    # absolute name and namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.h').fullname
+    self.assertEqual(fullname, 'a.b.c.d')
+
+  def testDocAttributes(self):
+    correct = 0
+    for example in DOC_EXAMPLES:
+      original_schema = schema.Parse(example.schema_string)
+      if original_schema.doc is not None:
+        correct += 1
+      if original_schema.type == 'record':
+        for f in original_schema.fields:
+          if f.doc is None:
+            self.fail(
+                "Failed to preserve 'doc' in fields: "
+                + example.schema_string)
+    self.assertEqual(correct,len(DOC_EXAMPLES))
+
+  def testOtherAttributes(self):
+    correct = 0
+    props = {}
+    for example in OTHER_PROP_EXAMPLES:
+      original_schema = schema.Parse(example.schema_string)
+      round_trip_schema = schema.Parse(str(original_schema))
+      self.assertEqual(original_schema.other_props,round_trip_schema.other_props)
+      if original_schema.type == "record":
+        field_props = 0
+        for f in original_schema.fields:
+          if f.other_props:
+            props.update(f.other_props)
+            field_props += 1
+        self.assertEqual(field_props,len(original_schema.fields))
+      if original_schema.other_props:
+        props.update(original_schema.other_props)
+        correct += 1
+    for k in props:
+      v = props[k]
+      if k == "cp_boolean":
+        self.assertEqual(type(v), bool)
+      elif k == "cp_int":
+        self.assertEqual(type(v), int)
+      elif k == "cp_object":
+        self.assertEqual(type(v), dict)
+      elif k == "cp_float":
+        self.assertEqual(type(v), float)
+      elif k == "cp_array":
+        self.assertEqual(type(v), list)
+    self.assertEqual(correct,len(OTHER_PROP_EXAMPLES))
+
+
+# ------------------------------------------------------------------------------
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/test_script.py b/lang/py3/avro/tests/test_script.py
new file mode 100644
index 0000000..8fc7271
--- /dev/null
+++ b/lang/py3/avro/tests/test_script.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import io
+import json
+import logging
+import operator
+import os
+import subprocess
+import tempfile
+import unittest
+
+import avro.datafile
+import avro.io
+import avro.schema
+
+
+# ------------------------------------------------------------------------------
+
+
+NUM_RECORDS = 7
+
+SCHEMA = """
+{
+  "namespace": "test.avro",
+  "name": "LooneyTunes",
+  "type": "record",
+  "fields": [
+    {"name": "first", "type": "string"},
+    {"name": "last", "type": "string"},
+    {"name": "type", "type": "string"}
+  ]
+}
+"""
+
+LOONIES = (
+    ('daffy', 'duck', 'duck'),
+    ('bugs', 'bunny', 'bunny'),
+    ('tweety', '', 'bird'),
+    ('road', 'runner', 'bird'),
+    ('wile', 'e', 'coyote'),
+    ('pepe', 'le pew', 'skunk'),
+    ('foghorn', 'leghorn', 'rooster'),
+)
+
+
+def looney_records():
+  for f, l, t in LOONIES:
+    yield {'first': f, 'last' : l, 'type' : t}
+
+
+def GetRootDir():
+  test_dir = os.path.dirname(os.path.abspath(__file__))
+  root_dir = os.path.dirname(os.path.dirname(test_dir))
+  return root_dir
+
+
+def GetScriptPath():
+  root_dir = GetRootDir()
+  avro_script_path = os.path.join(root_dir, 'scripts', 'avro')
+  assert os.path.exists(avro_script_path), \
+      ('Avro script not found: %r' % avro_script_path)
+  return avro_script_path
+
+
+# Absolute path of the 'avro' script:
+SCRIPT_PATH = GetScriptPath()
+
+
+def RunScript(*args, stdin=None):
+  command = [SCRIPT_PATH]
+  command.extend(args)
+  env = dict(os.environ)
+  env['PYTHONPATH'] = '%s:%s' % (GetRootDir(), env.get('PYTHONPATH', ''))
+  logging.debug('Running command:\n%s', ' \\\n\t'.join(command))
+  process = subprocess.Popen(
+      args=command,
+      env=env,
+      stdin=stdin,
+      stdout=subprocess.PIPE,
+      stderr=subprocess.PIPE,
+  )
+  (out, err) = process.communicate()
+  assert (process.returncode == os.EX_OK), \
+      ('Command %r failed with exit code %r, output %r and error %r'
+       % (command, process.returncode, out, err))
+  return out
+
+
+_TEST_JSON_VALUE = {
+  'first': 'daffy',
+  'last': 'duck',
+  'type': 'duck',
+}
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestCat(unittest.TestCase):
+
+  @staticmethod
+  def WriteAvroFile(file_path):
+    schema = avro.schema.Parse(SCHEMA)
+    with open(file_path, 'wb') as writer:
+      with avro.datafile.DataFileWriter(
+          writer=writer,
+          datum_writer=avro.io.DatumWriter(),
+          writer_schema=schema,
+      ) as writer:
+        for record in looney_records():
+          writer.append(record)
+
+  def setUp(self):
+    # TODO: flag to not delete the files
+    delete = True
+    self._avro_file = (
+        tempfile.NamedTemporaryFile(prefix='test-', suffix='.avro', delete=delete))
+    TestCat.WriteAvroFile(self._avro_file.name)
+
+  def tearDown(self):
+    self._avro_file.close()
+
+  def _RunCat(self, *args, raw=False):
+    """Runs the specified 'avro cat test-file ...' command.
+
+    Args:
+      *args: extra parameters to the 'avro cat' command.
+      raw: Whether to decode stdout as UTF-8.
+    Returns:
+      The command stdout (as bytes if raw is set, or else as string).
+    """
+    out = RunScript('cat', self._avro_file.name, *args)
+    if raw:
+      return out
+    else:
+      return out.decode('utf-8')
+
+  def testPrint(self):
+    lines = self._RunCat().splitlines()
+    return len(lines) == NUM_RECORDS
+
+  def testFilter(self):
+    lines = self._RunCat('--filter', "r['type']=='bird'").splitlines()
+    return len(lines) == 2
+
+  def testSkip(self):
+    skip = 3
+    lines = self._RunCat('--skip', str(skip)).splitlines()
+    return len(lines) == NUM_RECORDS - skip
+
+  def testCsv(self):
+    reader = csv.reader(io.StringIO(self._RunCat('-f', 'csv')))
+    self.assertEqual(len(list(reader)), NUM_RECORDS)
+
+  def testCsvHeader(self):
+    reader = csv.DictReader(io.StringIO(self._RunCat('-f', 'csv', '--header')))
+    expected = {'type': 'duck', 'last': 'duck', 'first': 'daffy'}
+
+    data = next(reader)
+    self.assertEqual(expected, data)
+
+  def testPrintSchema(self):
+    out = self._RunCat('--print-schema')
+    self.assertEqual(json.loads(out)['namespace'], 'test.avro')
+
+  def testHelp(self):
+    # Just see we have these
+    self._RunCat('-h')
+    self._RunCat('--help')
+
+  def testJsonPretty(self):
+    out = self._RunCat('--format', 'json-pretty', '-n', '1')
+    self.assertEqual(
+        json.loads(out),
+        _TEST_JSON_VALUE,
+        'Output mismatch\n'
+        'Expect: %r\n'
+        'Actual: %r'
+        % (_TEST_JSON_VALUE, out))
+
+  def testVersion(self):
+    out = RunScript('cat', '--version').decode('utf-8')
+
+  def testFiles(self):
+    lines = self._RunCat(self._avro_file.name).splitlines()
+    self.assertEqual(len(lines), 2 * NUM_RECORDS)
+
+  def testFields(self):
+    # One field selection (no comma)
+    lines = self._RunCat('--fields', 'last').splitlines()
+    self.assertEqual(json.loads(lines[0]), {'last': 'duck'})
+
+    # Field selection (with comma and space)
+    lines = self._RunCat('--fields', 'first, last').splitlines()
+    self.assertEqual(json.loads(lines[0]), {'first': 'daffy', 'last': 'duck'})
+
+    # Empty fields should get all
+    lines = self._RunCat('--fields', '').splitlines()
+    self.assertEqual(
+        json.loads(lines[0]),
+        {'first': 'daffy', 'last': 'duck', 'type': 'duck'})
+
+    # Non existing fields are ignored
+    lines = self._RunCat('--fields', 'first,last,age').splitlines()
+    self.assertEqual(
+        json.loads(lines[0]),
+        {'first': 'daffy', 'last': 'duck'})
+
+
+# ------------------------------------------------------------------------------
+
+
+class TestWrite(unittest.TestCase):
+
+  def setUp(self):
+    delete = False
+
+    self._json_file = tempfile.NamedTemporaryFile(
+        prefix='test-', suffix='.json', delete=delete)
+    with open(self._json_file.name, 'w') as f:
+      for record in looney_records():
+        json.dump(record, f)
+        f.write('\n')
+
+    self._csv_file = tempfile.NamedTemporaryFile(
+        prefix='test-', suffix='.csv', delete=delete)
+    with open(self._csv_file.name, 'w') as f:
+      writer = csv.writer(f)
+      get = operator.itemgetter('first', 'last', 'type')
+      for record in looney_records():
+        writer.writerow(get(record))
+
+    self._schema_file = tempfile.NamedTemporaryFile(
+        prefix='test-', suffix='.avsc', delete=delete)
+    with open(self._schema_file.name, 'w') as f:
+      f.write(SCHEMA)
+
+  def tearDown(self):
+    self._csv_file.close()
+    self._json_file.close()
+    self._schema_file.close()
+
+  def _RunWrite(self, *args, stdin=None):
+    """Runs the specified 'avro write ...' command.
+
+    Args:
+      *args: extra parameters to the 'avro write' command.
+      stdin: Optional string to feed the 'avro write' command stdin with.
+    Returns:
+      The command stdout as bytes.
+    """
+    return RunScript(
+        'write', '--schema', self._schema_file.name,
+        stdin=stdin, *args
+    )
+
+  def LoadAvro(self, filename):
+    out = RunScript('cat', filename).decode('utf-8')
+    return tuple(map(json.loads, out.splitlines()))
+
+  def testVersion(self):
+    out = RunScript('write', '--version').decode('utf-8')
+
+  def FormatCheck(self, format, filename):
+    with tempfile.NamedTemporaryFile(prefix='test-', suffix='.dat') as temp:
+      with open(temp.name, 'wb') as out:
+        out.write(self._RunWrite(filename, '-f', format))
+
+      records = self.LoadAvro(temp.name)
+      self.assertEqual(len(records), NUM_RECORDS)
+      self.assertEqual(records[0]['first'], 'daffy')
+
+  def testWriteJson(self):
+    self.FormatCheck('json', self._json_file.name)
+
+  def testWriteCsv(self):
+    self.FormatCheck('csv', self._csv_file.name)
+
+  def testOutfile(self):
+    with tempfile.NamedTemporaryFile(prefix='test-', suffix='.dat') as temp:
+      os.remove(temp.name)
+      self._RunWrite(self._json_file.name, '-o', temp.name)
+      self.assertEqual(len(self.LoadAvro(temp.name)), NUM_RECORDS)
+
+  def testMultiFile(self):
+    with tempfile.NamedTemporaryFile(prefix='test-', suffix='.dat') as temp:
+      with open(temp.name, 'wb') as out:
+        out.write(self._RunWrite(self._json_file.name, self._json_file.name))
+
+      self.assertEqual(len(self.LoadAvro(temp.name)), 2 * NUM_RECORDS)
+
+  def testStdin(self):
+    with tempfile.NamedTemporaryFile(prefix='test-', suffix='.dat') as temp:
+      with open(self._json_file.name, 'rb') as input_content:
+        with open(temp.name, 'wb') as out:
+          out.write(self._RunWrite('--input-type', 'json', stdin=input_content))
+
+      self.assertEqual(len(self.LoadAvro(temp.name)), NUM_RECORDS)
+
+
+if __name__ == '__main__':
+  raise Exception('Use run_tests.py')
diff --git a/lang/py3/avro/tests/txsample_http_client.py b/lang/py3/avro/tests/txsample_http_client.py
new file mode 100644
index 0000000..2cfb3c5
--- /dev/null
+++ b/lang/py3/avro/tests/txsample_http_client.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+
+from twisted.internet import reactor, defer
+from twisted.python.util import println
+
+from avro import protocol
+from avro import txipc
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.Parse(MAIL_PROTOCOL_JSON)
+SERVER_HOST = 'localhost'
+SERVER_PORT = 9090
+
+class UsageError(Exception):
+  def __init__(self, value):
+    self.value = value
+  def __str__(self):
+    return repr(self.value)
+
+def make_requestor(server_host, server_port, protocol):
+  client = txipc.TwistedHTTPTransceiver(SERVER_HOST, SERVER_PORT)
+  return txipc.TwistedRequestor(protocol, client)
+
+if __name__ == '__main__':
+  if len(sys.argv) not in [4, 5]:
+    raise UsageError("Usage: <to> <from> <body> [<count>]")
+
+  # client code - attach to the server and send a message
+  # fill in the Message record
+  message = dict()
+  message['to'] = sys.argv[1]
+  message['from'] = sys.argv[2]
+  message['body'] = sys.argv[3]
+
+  try:
+    num_messages = int(sys.argv[4])
+  except:
+    num_messages = 1
+
+  # build the parameters for the request
+  params = {}
+  params['message'] = message
+
+  requests = []
+  # send the requests and print the result
+  for msg_count in range(num_messages):
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    d = requestor.request('send', params)
+    d.addCallback(lambda result: println("Result: " + result))
+    requests.append(d)
+  results = defer.gatherResults(requests)
+
+  def replay_cb(result):
+    print("Replay Result: " + result)
+    reactor.stop()
+
+  def replay(_):
+    # try out a replay message
+    requestor = make_requestor(SERVER_HOST, SERVER_PORT, MAIL_PROTOCOL)
+    d = requestor.request('replay', dict())
+    d.addCallback(replay_cb)
+
+  results.addCallback(replay)
+  reactor.run()
diff --git a/lang/py3/avro/tests/txsample_http_server.py b/lang/py3/avro/tests/txsample_http_server.py
new file mode 100644
index 0000000..14fa979
--- /dev/null
+++ b/lang/py3/avro/tests/txsample_http_server.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from twisted.web import server
+from twisted.internet import reactor
+
+from avro import ipc
+from avro import protocol
+from avro import txipc
+
+MAIL_PROTOCOL_JSON = """\
+{"namespace": "example.proto",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "replay": {
+         "request": [],
+         "response": "string"
+     }
+ }
+}
+"""
+MAIL_PROTOCOL = protocol.Parse(MAIL_PROTOCOL_JSON)
+SERVER_ADDRESS = ('localhost', 9090)
+
+class MailResponder(ipc.Responder):
+  def __init__(self):
+    ipc.Responder.__init__(self, MAIL_PROTOCOL)
+
+  def invoke(self, message, request):
+    if message.name == 'send':
+      request_content = request['message']
+      response = "Sent message to %(to)s from %(from)s with body %(body)s" % \
+                 request_content
+      return response
+    elif message.name == 'replay':
+      return 'replay'
+
+if __name__ == '__main__':
+  root = server.Site(txipc.AvroResponderResource(MailResponder()))
+  reactor.listenTCP(9090, root)
+  reactor.run()
diff --git a/lang/py3/avro/tool.py b/lang/py3/avro/tool.py
new file mode 100644
index 0000000..0f41da6
--- /dev/null
+++ b/lang/py3/avro/tool.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Command-line tool
+
+NOTE: The API for the command-line tool is experimental.
+"""
+
+import sys
+import urllib
+
+from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
+
+from avro import io
+from avro import datafile
+from avro import protocol
+from avro import ipc
+
+class GenericResponder(ipc.Responder):
+  def __init__(self, proto, msg, datum):
+    proto_json = file(proto, 'r').read()
+    ipc.Responder.__init__(self, protocol.Parse(proto_json))
+    self.msg = msg
+    self.datum = datum
+
+  def invoke(self, message, request):
+    if message.name == self.msg:
+      print >> sys.stderr, "Message: %s Datum: %s" % (message.name, self.datum)
+      # server will shut down after processing a single Avro request
+      global server_should_shutdown
+      server_should_shutdown = True
+      return self.datum
+
+class GenericHandler(BaseHTTPRequestHandler):
+  def do_POST(self):
+    self.responder = responder
+    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request = call_request_reader.read_framed_message()
+    resp_body = self.responder.respond(call_request)
+    self.send_response(200)
+    self.send_header('Content-Type', 'avro/binary')
+    self.end_headers()
+    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer.write_framed_message(resp_body)
+    if server_should_shutdown:
+      print >> sys.stderr, "Shutting down server."
+      self.server.force_stop()
+
+class StoppableHTTPServer(HTTPServer):
+  """HTTPServer.shutdown added in Python 2.6. FML."""
+  stopped = False
+  allow_reuse_address = True
+  def __init__(self, *args, **kw):
+    HTTPServer.__init__(self, *args, **kw)
+    self.allow_reuse_address = True
+
+  def serve_forever(self):
+    while not self.stopped:
+      self.handle_request()
+
+  def force_stop(self):
+    self.server_close()
+    self.stopped = True
+    self.serve_forever()
+
+def run_server(uri, proto, msg, datum):
+  url_obj = urllib.parse.urlparse(uri)
+  server_addr = (url_obj.hostname, url_obj.port)
+  global responder
+  global server_should_shutdown
+  server_should_shutdown = False
+  responder = GenericResponder(proto, msg, datum)
+  server = StoppableHTTPServer(server_addr, GenericHandler)
+  print("Port: %s" % server.server_port)
+  sys.stdout.flush()
+  server.allow_reuse_address = True
+  print >> sys.stderr, "Starting server."
+  server.serve_forever()
+
+def send_message(uri, proto, msg, datum):
+  url_obj = urllib.parse.urlparse(uri)
+  client = ipc.HTTPTransceiver(url_obj.hostname, url_obj.port)
+  proto_json = file(proto, 'r').read()
+  requestor = ipc.Requestor(protocol.Parse(proto_json), client)
+  print(requestor.request(msg, datum))
+
+def file_or_stdin(f):
+  if f == "-":
+    return sys.stdin
+  else:
+    return file(f)
+
+def main(args=sys.argv):
+  if len(args) == 1:
+    print("Usage: %s [dump|rpcreceive|rpcsend]" % args[0])
+    return 1
+
+  if args[1] == "dump":
+    if len(args) != 3:
+      print("Usage: %s dump input_file" % args[0])
+      return 1
+    for d in datafile.DataFileReader(file_or_stdin(args[2]), io.DatumReader()):
+      print(repr(d))
+  elif args[1] == "rpcreceive":
+    usage_str = "Usage: %s rpcreceive uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print(usage_str)
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print("JSON Decoder not yet implemented.")
+        return 1
+      else:
+        print(usage_str)
+        return 1
+    run_server(uri, proto, msg, datum)
+  elif args[1] == "rpcsend":
+    usage_str = "Usage: %s rpcsend uri protocol_file " % args[0]
+    usage_str += "message_name (-data d | -file f)"
+    if len(args) not in [5, 7]:
+      print(usage_str)
+      return 1
+    uri, proto, msg = args[2:5]
+    datum = None
+    if len(args) > 5:
+      if args[5] == "-file":
+        reader = open(args[6], 'rb')
+        datum_reader = io.DatumReader()
+        dfr = datafile.DataFileReader(reader, datum_reader)
+        datum = dfr.next()
+      elif args[5] == "-data":
+        print("JSON Decoder not yet implemented.")
+        return 1
+      else:
+        print(usage_str)
+        return 1
+    send_message(uri, proto, msg, datum)
+  return 0
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv))
diff --git a/lang/py3/avro/txipc.py b/lang/py3/avro/txipc.py
new file mode 100644
index 0000000..3a3e917
--- /dev/null
+++ b/lang/py3/avro/txipc.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+
+from avro import io as avro_io
+from avro import ipc
+
+from zope.interface import implements
+
+from twisted.internet.defer import maybeDeferred, Deferred
+from twisted.internet.protocol import Protocol
+from twisted.web import resource, server
+from twisted.web.client import Agent
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import IBodyProducer
+
+
+class TwistedRequestor(ipc.BaseRequestor):
+  """A Twisted-compatible requestor. Returns a Deferred that will fire with the
+     returning value, instead of blocking until the request completes."""
+  def _process_handshake(self, call_response, message_name, request_datum):
+    # process the handshake and call response
+    buffer_decoder = avro_io.BinaryDecoder(io.StringIO(call_response))
+    call_response_exists = self.read_handshake_response(buffer_decoder)
+    if call_response_exists:
+      return self.read_call_response(message_name, buffer_decoder)
+    else:
+      return self.request(message_name, request_datum)
+
+  def issue_request(self, call_request, message_name, request_datum):
+    d = self.transceiver.transceive(call_request)
+    d.addCallback(self._process_handshake, message_name, request_datum)
+    return d
+
+class RequestStreamingProducer(object):
+  """A streaming producer for issuing requests with the Twisted.web Agent."""
+  implements(IBodyProducer)
+
+  paused = False
+  stopped = False
+  started = False
+
+  def __init__(self, message):
+    self._message = message
+    self._length = len(message)
+    # We need a buffer length header for every buffer and an additional
+    # zero-length buffer as the message terminator
+    self._length += (self._length / ipc.BUFFER_SIZE + 2) \
+      * ipc.BUFFER_HEADER_LENGTH
+    self._total_bytes_sent = 0
+    self._deferred = Deferred()
+
+  # read-only properties
+  message = property(lambda self: self._message)
+  length = property(lambda self: self._length)
+  consumer = property(lambda self: self._consumer)
+  deferred = property(lambda self: self._deferred)
+
+  def _get_total_bytes_sent(self):
+    return self._total_bytes_sent
+
+  def _set_total_bytes_sent(self, bytes_sent):
+    self._total_bytes_sent = bytes_sent
+
+  total_bytes_sent = property(_get_total_bytes_sent, _set_total_bytes_sent)
+
+  def startProducing(self, consumer):
+    if self.started:
+      return
+
+    self.started = True
+    self._consumer = consumer
+    # Keep writing data to the consumer until we're finished,
+    # paused (pauseProducing()) or stopped (stopProducing())
+    while self.length - self.total_bytes_sent > 0 and \
+      not self.paused and not self.stopped:
+      self.write()
+    # self.write will fire this deferred once it has written
+    # the entire message to the consumer
+    return self.deferred
+
+  def resumeProducing(self):
+    self.paused = False
+    self.write(self)
+
+  def pauseProducing(self):
+    self.paused = True
+
+  def stopProducing(self):
+    self.stopped = True
+
+  def write(self):
+    if self.length - self.total_bytes_sent > ipc.BUFFER_SIZE:
+      buffer_length = ipc.BUFFER_SIZE
+    else:
+      buffer_length = self.length - self.total_bytes_sent
+    self.write_buffer(self.message[self.total_bytes_sent:
+                              (self.total_bytes_sent + buffer_length)])
+    self.total_bytes_sent += buffer_length
+    # Make sure we wrote the entire message
+    if self.total_bytes_sent == self.length and not self.stopped:
+      self.stopProducing()
+      # A message is always terminated by a zero-length buffer.
+      self.write_buffer_length(0)
+      self.deferred.callback(None)
+
+  def write_buffer(self, chunk):
+    buffer_length = len(chunk)
+    self.write_buffer_length(buffer_length)
+    self.consumer.write(chunk)
+
+  def write_buffer_length(self, n):
+    self.consumer.write(ipc.BIG_ENDIAN_INT_STRUCT.pack(n))
+
+class AvroProtocol(Protocol):
+
+  recvd = ''
+  done = False
+
+  def __init__(self, finished):
+    self.finished = finished
+    self.message = []
+
+  def dataReceived(self, data):
+    self.recvd = self.recvd + data
+    while len(self.recvd) >= ipc.BUFFER_HEADER_LENGTH:
+      buffer_length ,= ipc.BIG_ENDIAN_INT_STRUCT.unpack(
+        self.recvd[:ipc.BUFFER_HEADER_LENGTH])
+      if buffer_length == 0:
+        response = ''.join(self.message)
+        self.done = True
+        self.finished.callback(response)
+        break
+      if len(self.recvd) < buffer_length + ipc.BUFFER_HEADER_LENGTH:
+        break
+      buffer = self.recvd[ipc.BUFFER_HEADER_LENGTH:buffer_length + ipc.BUFFER_HEADER_LENGTH]
+      self.recvd = self.recvd[buffer_length + ipc.BUFFER_HEADER_LENGTH:]
+      self.message.append(buffer)
+
+  def connectionLost(self, reason):
+    if not self.done:
+      self.finished.errback(ipc.ConnectionClosedException("Reader read 0 bytes."))
+
+class TwistedHTTPTransceiver(object):
+  """This transceiver uses the Agent class present in Twisted.web >= 9.0
+     for issuing requests to the remote endpoint."""
+  def __init__(self, host, port, remote_name=None, reactor=None):
+    self.url = "http://%s:%d/" % (host, port)
+
+    if remote_name is None:
+      # There's no easy way to get this peer's remote address
+      # in Twisted so I use a random UUID to identify ourselves
+      import uuid
+      self.remote_name = uuid.uuid4()
+
+    if reactor is None:
+      from twisted.internet import reactor
+    self.agent = Agent(reactor)
+
+  def read_framed_message(self, response):
+    finished = Deferred()
+    response.deliverBody(AvroProtocol(finished))
+    return finished
+
+  def transceive(self, request):
+    req_method = 'POST'
+    req_headers = {
+      'Content-Type': ['avro/binary'],
+      'Accept-Encoding': ['identity'],
+    }
+
+    body_producer = RequestStreamingProducer(request)
+    d = self.agent.request(
+      req_method,
+      self.url,
+      headers=Headers(req_headers),
+      bodyProducer=body_producer)
+    return d.addCallback(self.read_framed_message)
+
+class AvroResponderResource(resource.Resource):
+  """This Twisted.web resource can be placed anywhere in a URL hierarchy
+     to provide an Avro endpoint. Different Avro protocols can be served
+     by the same web server as long as they are in different resources in
+     a URL hierarchy."""
+  isLeaf = True
+
+  def __init__(self, responder):
+    resource.Resource.__init__(self)
+    self.responder = responder
+
+  def cb_render_POST(self, resp_body, request):
+    request.setResponseCode(200)
+    request.setHeader('Content-Type', 'avro/binary')
+    resp_writer = ipc.FramedWriter(request)
+    resp_writer.write_framed_message(resp_body)
+    request.finish()
+
+  def render_POST(self, request):
+    # Unfortunately, Twisted.web doesn't support incoming
+    # streamed input yet, the whole payload must be kept in-memory
+    request.content.seek(0, 0)
+    call_request_reader = ipc.FramedReader(request.content)
+    call_request = call_request_reader.read_framed_message()
+    d = maybeDeferred(self.responder.respond, call_request)
+    d.addCallback(self.cb_render_POST, request)
+    return server.NOT_DONE_YET
diff --git a/lang/py3/scripts/avro b/lang/py3/scripts/avro
new file mode 100644
index 0000000..225cef2
--- /dev/null
+++ b/lang/py3/scripts/avro
@@ -0,0 +1,336 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Command line utlity for reading and writing Avro files."""
+
+import argparse
+import csv
+import functools
+import itertools
+import json
+import os
+import sys
+import traceback
+
+import avro
+from avro import datafile
+from avro import io as avro_io
+from avro import schema
+
+
+# ------------------------------------------------------------------------------
+
+
+class AvroError(Exception):
+  """Errors in this module."""
+  pass
+
+
+def print_json(row):
+  print(json.dumps(row))
+
+
+def print_json_pretty(row):
+  print(json.dumps(row, indent=4, sort_keys=True))
+
+_csv_writer = csv.writer(sys.stdout)
+
+def _write_row(row):
+  _csv_writer.writerow(row)
+
+
+def print_csv(row):
+  # Sort record fields to ensure consistent ordering:
+  _write_row([row[key] for key in sorted(row)])
+
+
+def select_printer(format):
+  return {
+      'json' : print_json,
+      'json-pretty' : print_json_pretty,
+      'csv' : print_csv
+  }[format]
+
+
+def record_match(expr, record):
+  return eval(expr, None, {'r' : record})
+
+
+def parse_fields(fields):
+  fields = fields or ''
+  if not fields.strip():
+    return None
+
+  return [field.strip() for field in fields.split(',') if field.strip()]
+
+
+def field_selector(fields):
+  fields = set(fields)
+  def keys_filter(obj):
+    return dict((k, obj[k]) for k in (set(obj) & fields))
+  return keys_filter
+
+
+def print_avro(avro, opts):
+  if opts.header and (opts.format != 'csv'):
+    raise AvroError('--header applies only to CSV format')
+
+  # Apply filter first
+  if opts.filter:
+    avro = filter(functools.partial(record_match, opts.filter), avro)
+
+  for i in range(opts.skip):
+    try:
+      next(avro)
+    except StopIteration:
+      return
+
+  fields = parse_fields(opts.fields)
+  if fields:
+    avro = map(field_selector(fields), avro)
+
+  printer = select_printer(opts.format)
+  for i, record in enumerate(avro):
+    if i == 0 and opts.header:
+      _write_row(sorted(record.keys()))
+    if i >= opts.count:
+      break
+    printer(record)
+
+
+def print_schema(avro):
+  schema = avro.meta['avro.schema'].decode('utf-8')
+  print(json.dumps(json.loads(schema), indent=4))
+
+
+def cat(opts, args):
+  if not args:
+    raise AvroError('No files to show')
+
+  for filename in args:
+    try:
+      fo = open(filename, 'rb')
+    except (OSError, IOError) as e:
+      raise AvroError('Cannot open %s - %s' % (filename, e))
+
+    avro = datafile.DataFileReader(fo, avro_io.DatumReader())
+
+    if opts.print_schema:
+      print_schema(avro)
+      continue
+
+    print_avro(avro, opts)
+
+
+# ------------------------------------------------------------------------------
+
+
+def _open(filename, mode):
+  if filename == '-':
+    return {
+        'rt' : sys.stdin,
+        'wb' : sys.stdout.buffer,
+    }[mode]
+
+  return open(filename, mode)
+
+
+def iter_json(info, schema):
+  return map(json.loads, info)
+
+
+def convert(value, field):
+  type = field.type.type
+  if type == 'union':
+    return convert_union(value, field)
+
+  return  {
+      'int' : int,
+      'long' : int,
+      'float' : float,
+      'double' : float,
+      'string' : str,
+      'bytes' : bytes,
+      'boolean' : bool,
+      'null' : lambda _: None,
+      'union' : lambda v: convert_union(v, field),
+  }[type](value)
+
+
+def convert_union(value, field):
+  for name in [s.name for s in field.type.schemas]:
+    try:
+      return convert(name)(value)
+    except ValueError:
+      continue
+
+
+def iter_csv(info, schema):
+  header = [field.name for field in schema.fields]
+  for row in csv.reader(info):
+    values = [convert(v, f) for v, f in zip(row, schema.fields)]
+    yield dict(zip(header, values))
+
+
+def guess_input_type(files):
+  if not files:
+      return None
+
+  ext = os.path.splitext(files[0])[1].lower()
+  if ext in ('.json', '.js'):
+      return 'json'
+  elif ext in ('.csv',):
+      return 'csv'
+
+  return None
+
+
+def write(opts, files):
+  if not opts.schema:
+      raise AvroError('No schema specified')
+
+  input_type = opts.input_type or guess_input_type(files)
+  if not input_type:
+      raise AvroError('Cannot guess input file type (not .json or .csv)')
+
+  try:
+    with open(opts.schema, 'rt') as f:
+      json_schema = f.read()
+    writer_schema = schema.Parse(json_schema)
+    out = _open(opts.output, 'wb')
+  except (IOError, OSError) as e:
+    raise AvroError('Cannot open file - %s' % e)
+
+  record_parser_map = {
+      'json': iter_json,
+      'csv': iter_csv,
+  }
+
+  with datafile.DataFileWriter(
+      writer=out,
+      datum_writer=avro_io.DatumWriter(),
+      writer_schema=writer_schema,
+  ) as writer:
+    iter_records = record_parser_map[input_type]
+    for filename in (files or ['-']):
+      reader = _open(filename, 'rt')
+      for record in iter_records(reader, writer_schema):
+        writer.append(record)
+
+
+# ------------------------------------------------------------------------------
+
+
+def main(argv=None):
+  argv = argv or sys.argv
+
+  parser = argparse.ArgumentParser(
+      description='Display/write for Avro files',
+      usage='%(prog)s cat|write [options] FILE [FILE...]',
+  )
+
+  parser.add_argument(
+      '--version',
+      action='version',
+      version='%(prog)s ' + avro.VERSION,
+  )
+
+  # cat options:
+  cat_options = parser.add_argument_group(title='cat options')
+  cat_options.add_argument(
+      '-n', '--count',
+      type=int,
+      default=float('Infinity'),
+      help='number of records to print',
+  )
+  cat_options.add_argument(
+      '-s', '--skip',
+      type=int,
+      default=0,
+      help='number of records to skip',
+  )
+  cat_options.add_argument(
+      '-f', '--format',
+      default='json',
+      choices=['json', 'csv', 'json-pretty'],
+      help='record format',
+  )
+  cat_options.add_argument(
+      '--header',
+      default=False,
+      action='store_true',
+      help='print CSV header',
+  )
+  cat_options.add_argument(
+      '--filter',
+      default=None,
+      help='filter records (e.g. r["age"]>1)',
+  )
+  cat_options.add_argument(
+      '--print-schema',
+      default=False,
+      action='store_true',
+      help='print schema',
+  )
+  cat_options.add_argument(
+      '--fields',
+      default=None,
+      help='fields to show, comma separated (show all by default)',
+  )
+
+  # write options
+  write_options = parser.add_argument_group(title='write options')
+  write_options.add_argument(
+      '--schema',
+      help='schema file (required)',
+  )
+  write_options.add_argument(
+      '--input-type',
+      choices=['json', 'csv'],
+      default=None,
+      help='input file(s) type (json or csv)',
+  )
+  write_options.add_argument(
+      '-o', '--output',
+      default='-',
+      help='output file',
+  )
+
+  opts, args = parser.parse_known_args(argv[1:])
+  if len(args) < 1:
+    parser.error('You much specify `cat` or `write`.')
+
+  command = args.pop(0)
+  try:
+    if command == 'cat':
+      cat(opts, args)
+    elif command == 'write':
+      write(opts, args)
+    else:
+      raise AvroError('Unknown command - %s' % command)
+  except AvroError as e:
+    parser.error('%s' % e) # Will exit
+  except Exception as e:
+    traceback.print_exc()
+    raise SystemExit('panic: %s' % e)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/lang/py3/setup.py b/lang/py3/setup.py
new file mode 100644
index 0000000..426ad1d
--- /dev/null
+++ b/lang/py3/setup.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+# -*- mode: python -*-
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import shutil
+import sys
+
+from setuptools import setup
+
+
+VERSION_FILE_NAME = 'VERSION.txt'
+
+
+def RunsFromSourceDist():
+  """Tests whether setup.py is invoked from a source distribution.
+
+  Returns:
+    True if setup.py runs from a source distribution.
+    False otherwise, ie. if setup.py runs from the SVN trunk.
+  """
+  setup_file_path = os.path.abspath(__file__)
+  # If a file PKG-INFO exists as a sibling of setup.py,
+  # assume we are running as source distribution:
+  pkg_info_file_path = \
+      os.path.join(os.path.dirname(setup_file_path), 'PKG-INFO')
+  return os.path.exists(pkg_info_file_path)
+
+
+def SetupSources():
+  """Prepares the source directory.
+
+  Runs when setup.py is invoked from the Avro SVN/Git source.
+  """
+  # Avro lang/py3/ source directory:
+  py3_dir = os.path.dirname(os.path.abspath(__file__))
+
+  # Avro top-level source directory:
+  root_dir = os.path.dirname(os.path.dirname(py3_dir))
+
+  # Copy README.txt from Avro top-level directory:
+  shutil.copy(
+      src=os.path.join(root_dir, 'README.txt'),
+      dst=os.path.join(py3_dir, 'README.txt'),
+  )
+
+  # Read and copy Avro version:
+  version_file_path = os.path.join(root_dir, 'share', VERSION_FILE_NAME)
+  with open(version_file_path, 'r') as f:
+    avro_version = f.read().strip()
+  shutil.copy(
+      src=version_file_path,
+      dst=os.path.join(py3_dir, 'avro', VERSION_FILE_NAME),
+  )
+
+  # Copy necessary avsc files:
+  avsc_file_path = os.path.join(
+      root_dir, 'share', 'schemas',
+      'org', 'apache', 'avro', 'ipc', 'HandshakeRequest.avsc')
+  shutil.copy(
+      src=avsc_file_path,
+      dst=os.path.join(py3_dir, 'avro', 'HandshakeRequest.avsc'),
+  )
+
+  avsc_file_path = os.path.join(
+      root_dir, 'share', 'schemas',
+      'org', 'apache', 'avro', 'ipc', 'HandshakeResponse.avsc')
+  shutil.copy(
+      src=avsc_file_path,
+      dst=os.path.join(py3_dir, 'avro', 'HandshakeResponse.avsc'),
+  )
+
+  avsc_file_path = os.path.join(
+      root_dir, 'share', 'test', 'schemas', 'interop.avsc')
+  shutil.copy(
+      src=avsc_file_path,
+      dst=os.path.join(py3_dir, 'avro', 'tests', 'interop.avsc'),
+  )
+
+  # Make sure the avro shell script is executable:
+  os.chmod(
+      path=os.path.join(py3_dir, 'scripts', 'avro'),
+      mode=0o777,
+  )
+
+
+def ReadVersion():
+  """Returns: the content of the Avro version file."""
+  setup_file_path = os.path.abspath(__file__)
+  install_dir = os.path.dirname(setup_file_path)
+  version_file_path = os.path.join(install_dir, 'avro', VERSION_FILE_NAME)
+  with open(version_file_path, 'rt') as f:
+    avro_version = f.read().strip()
+  return avro_version
+
+
+def Main():
+  assert (sys.version_info[0] >= 3), \
+      ('Python version >= 3 required, got %r' % sys.version_info)
+
+  if not RunsFromSourceDist():
+    SetupSources()
+
+  avro_version = ReadVersion()
+
+  setup(
+      name = 'avro-python3-snapshot',
+      version = avro_version,
+      packages = ['avro'],
+      package_dir = {'avro': 'avro'},
+      scripts = ['scripts/avro'],
+
+      package_data = {
+          'avro': [
+              'HandshakeRequest.avsc',
+              'HandshakeResponse.avsc',
+              VERSION_FILE_NAME,
+          ],
+      },
+
+      test_suite='avro.tests.run_tests',
+      tests_require=[],
+
+      # metadata for upload to PyPI
+      author = 'Apache Avro',
+      author_email = 'avro-dev at hadoop.apache.org',
+      description = 'Avro is a serialization and RPC framework.',
+      license = 'Apache License 2.0',
+      keywords = 'avro serialization rpc',
+      url = 'http://hadoop.apache.org/avro',
+  )
+
+
+if __name__ == '__main__':
+  Main()
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..e188eb0
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>10</version>
+  </parent>
+
+  <groupId>org.apache.avro</groupId>
+  <artifactId>avro-toplevel</artifactId>
+  <version>1.7.7</version>
+  <packaging>pom</packaging>
+
+  <name>Apache Avro Toplevel</name>
+  <url>http://avro.apache.org</url>
+  <description>Avro toplevel pom</description>
+
+  <!-- This project is used for top level build tasks and artifact copying.
+       The RAT task is run to validate licenses.  The Enforcer plugin is used
+       to validate that java projects are the correct version.
+       Java artifacts are copied to the final build destination with a custom profile.
+       -->
+  <properties>
+    <avro.distDir>dist</avro.distDir>
+    <avro.docDir>build/avro-doc-${project.version}/api</avro.docDir>
+    <!-- dependency plugin versions -->
+    <apache-rat-tasks.version>0.7</apache-rat-tasks.version>
+
+    <!-- plugin versions -->
+    <antrun-plugin.version>1.7</antrun-plugin.version>
+    <enforcer-plugin.version>1.0.1</enforcer-plugin.version>
+  </properties>
+
+  <modules>
+    <module>lang/java</module>
+  </modules>
+
+  <scm>
+    <connection>scm:svn:http://svn.apache.org/repos/asf/avro/trunk</connection>
+    <developerConnection>scm:svn:https://svn.apache.org/repos/asf/avro/trunk</developerConnection>
+    <url>http://svn.apache.org/viewvc/avro/trunk</url>
+  </scm>
+
+  <issueManagement>
+    <system>jira</system>
+    <url>http://issues.apache.org/jira/browse/AVRO</url>
+  </issueManagement>
+
+  <inceptionYear>2009</inceptionYear>
+
+  <mailingLists>
+    <mailingList>
+      <name>Avro Developer List</name>
+      <subscribe>dev-subscribe at avro.apache.org</subscribe>
+      <unsubscribe>dev-unsubscribe at avro.apache.org</unsubscribe>
+      <post>dev at avro.apache.org</post>
+      <archive>http://mail-archives.apache.org/mod_mbox/avro-dev/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Avro Users List</name>
+      <subscribe>users-subscribe at avro.apache.org</subscribe>
+      <unsubscribe>users-unsubscribe at avro.apache.org</unsubscribe>
+      <post>users at avro.apache.org</post>
+      <archive>http://mail-archives.apache.org/mod_mbox/avro-users/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Avro Commits List</name>
+      <subscribe>commits-subscribe at avro.apache.org</subscribe>
+      <unsubscribe>commits-unsubscribe at avro.apache.org</unsubscribe>
+      <post>commits at avro.apache.org</post>
+      <archive>http://mail-archives.apache.org/mod_mbox/avro-commits/</archive>
+    </mailingList>
+  </mailingLists>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <version>${enforcer-plugin.version}</version>
+        <configuration>
+          <rules>
+            <requireProperty>
+              <property>avro.version</property>
+              <message>*****!!!!!! Must have property avro.version set to enforce version. !!!!!!*****</message>
+            </requireProperty>
+            <requireProperty>
+              <property>project.version</property>
+              <regex>${avro.version}</regex>
+              <regexMessage>*****!!!!! Version of project must be ${avro.version} !!!!!*****</regexMessage>
+            </requireProperty>
+          </rules>
+          <fail>true</fail>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>dist</id>
+      <!-- Profile for generating all maven artifacts and documentation. -->
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <executions>
+              <execution>
+                <!-- build javadoc jars per jar for publishing to maven -->
+                <id>module-javadocs</id>
+                <phase>package</phase>
+                <goals>
+                  <goal>jar</goal>
+                </goals>
+              </execution>
+              <execution>
+                <!-- build aggregate javadoc in parent only -->
+                <id>default-cli</id>
+                <goals>
+                  <goal>aggregate</goal>
+                </goals>
+                <inherited>false</inherited>
+                <configuration>
+                  <overview>avro/src/main/java/overview.html</overview>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-source-plugin</artifactId>
+            <executions>
+              <execution>
+                <!-- builds source jars and attaches them to the project for publishing -->
+                <id>avro-java-sources</id>
+                <phase>package</phase>
+                <goals>
+                  <goal>jar-no-fork</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-enforcer-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                <goals>
+                  <goal>enforce</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>sign</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-gpg-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>sign-artifacts</id>
+                <phase>verify</phase>
+                <goals>
+                  <goal>sign</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>rat</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <version>${antrun-plugin.version}</version>
+            <configuration>
+              <target name="rat">
+                <rat:report xmlns:rat="antlib:org.apache.rat.anttasks"
+                            reportFile="build/rat-report.log">
+                  <fileset dir="build/avro-src-${project.version}/"
+                           excludesfile="share/rat-excludes.txt"/>
+                </rat:report>
+                <condition property="rat.passed">
+                  <isfileselected file="build/rat-report.log">
+                    <containsregexp expression="^0 Unknown Licenses"/>
+                  </isfileselected>
+                </condition>
+                <fail unless="rat.passed">Unknown licenses: See build/rat-report.log.</fail>
+              </target>
+            </configuration>
+            <dependencies>
+              <dependency>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-tasks</artifactId>
+                <version>${apache-rat-tasks.version}</version>
+              </dependency>
+            </dependencies>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>copy-artifacts</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <version>${antrun-plugin.version}</version>
+            <configuration>
+              <target name="copy-java-artifacts">
+                <mkdir dir="${avro.distDir}/java"/>
+                <copy todir="${avro.distDir}/java" verbose="true">
+                  <flattenmapper/>
+                  <fileset dir="lang/java/"
+                           includes="**/target/*${project.version}*.jar"
+                           excludes="**/original-*.jar **/*tests.jar"/>
+                </copy>
+
+                <mkdir dir="${avro.docDir}"/>
+                <copy todir="${avro.docDir}/java">
+                  <fileset dir="lang/java/target/site/apidocs"/>
+                </copy>
+                <copy todir="build/avro-doc-${project.version}/trevni">
+                  <fileset dir="lang/java/trevni/doc/target/site"/>
+                </copy>
+              </target>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/share/VERSION.txt b/share/VERSION.txt
new file mode 100644
index 0000000..73c8b4f
--- /dev/null
+++ b/share/VERSION.txt
@@ -0,0 +1 @@
+1.7.7
\ No newline at end of file
diff --git a/share/editors/README.txt b/share/editors/README.txt
new file mode 100644
index 0000000..70dd312
--- /dev/null
+++ b/share/editors/README.txt
@@ -0,0 +1 @@
+Syntax highlighting and config files for working with Avro files in various text editors.
diff --git a/share/editors/avro-idl.vim b/share/editors/avro-idl.vim
new file mode 100644
index 0000000..2eb79d4
--- /dev/null
+++ b/share/editors/avro-idl.vim
@@ -0,0 +1,84 @@
+" Vim syntax file
+" Language: Avro IDL
+" Maintainer: Daniel Lundin <dln at eintr.org>
+" Last Change: 20100924
+" Copy to ~/.vim/syntax/
+" Add to ~/.vimrc
+"  au BufRead,BufNewFile *.avdl setlocal filetype=avro-idl
+"
+" Licensed to the Apache Software Foundation (ASF) under one
+" or more contributor license agreements. See the NOTICE file
+" distributed with this work for additional information
+" regarding copyright ownership. The ASF licenses this file
+" to you under the Apache License, Version 2.0 (the
+" "License"); you may not use this file except in compliance
+" with the License. You may obtain a copy of the License at
+"
+"   http://www.apache.org/licenses/LICENSE-2.0
+"
+" Unless required by applicable law or agreed to in writing,
+" software distributed under the License is distributed on an
+" "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+" KIND, either express or implied. See the License for the
+" specific language governing permissions and limitations
+" under the License.
+"
+
+if version < 600
+  syntax clear
+elseif exists("b:current_syntax")
+  finish
+endif
+
+" Todo
+syn keyword avroTodo TODO todo FIXME fixme XXX xxx contained
+
+" Comments
+syn region avroComment start="/\*" end="\*/" contains=avroTodo
+syn match avroComment "//.\{-}\(?>\|$\)\@=" contains=avroTodo
+
+" Identifiers
+syn region avroIdentifier start="^\s*\(error\|protocol\|record\)" end="{" contains=avroIdentifierType 
+syn keyword avroIdentifierType error protocol record contained nextgroup=avroIdentifierName skipwhite
+syn match avroIdentifierName	"\w\w*" display contained skipwhite
+
+syn region avroEscaped  start=/`/ end=/`/
+
+" Types
+syn match avroNumber "-\=\<\d\+\>" contained
+syn region avroString start=/"/ skip=/\\"/ end=/"/
+syn region avroString start=/'/ skip=/\\'/ end=/'/
+syn region avroArray  start="<" end=">" contains=avroArrayType
+syn match avroArrayType "\w\w*" display contained skipwhite
+
+" Keywords
+syn keyword avroKeyword java-class namespace order
+syn keyword avroKeyword error throws
+syn keyword avroBasicTypes boolean bytes double fixed float int long null string void
+syn keyword avroStructure array enum map union
+
+if version >= 508 || !exists("did_avro_idl_syn_inits")
+  if version < 508
+    let did_avro_idl_syn_inits = 1
+    command! -nargs=+ HiLink hi link <args>
+  else
+    command! -nargs=+ HiLink hi def link <args>
+  endif
+
+  HiLink avroTodo Todo
+  HiLink avroComment Comment
+  HiLink avroNumber Number
+  HiLink avroKeyword Define
+  HiLink avroIdentifierType Special
+  HiLink avroBasicTypes Type
+  HiLink avroArrayType Type
+  HiLink avroString       String
+  HiLink avroStructure Structure
+  HiLink avroArray Structure
+  HiLink avroEscaped Default
+  HiLink avroIdentifierName    Entity
+
+  delcommand HiLink
+endif
+
+let b:current_syntax = "avro-idl"
diff --git a/share/rat-excludes.txt b/share/rat-excludes.txt
new file mode 100644
index 0000000..c123a93
--- /dev/null
+++ b/share/rat-excludes.txt
@@ -0,0 +1,55 @@
+**/*.avpr
+**/*.avro
+**/*.avsc
+**/*.dox
+**/*.gperf
+**/*.html
+**/*.json
+**/*.js
+**/*.la
+**/*.m4
+**/*.md5
+**/*.pom
+**/*.properties
+**/*.sha1
+**/*.txt
+**/.gitignore
+**/Makefile**
+**/configure**
+**/_*
+doc/**
+lang/py/lib/simplejson/**
+lang/c++/Doxyfile
+lang/c++/jsonschemas/**
+lang/c/autom4te.cache/**
+lang/c/config**
+lang/c++/config**
+lang/c/docs/**
+lang/c/json/**
+lang/c/src/avro-c.pc.in
+lang/c/src/avro/msinttypes.h
+lang/c/src/avro/msstdint.h
+lang/c/tests/**
+lang/c/version.sh
+lang/ruby/Manifest
+lang/c/build.sh
+lang/c/jansson/**
+lang/c/src/queue.h
+lang/c/src/st.h
+lang/c/src/st.c
+lang/csharp/Avro.sln
+lang/csharp/Avro.nunit
+lang/csharp/src/apache/ipc/org/apache/avro/ipc/**
+lang/csharp/src/apache/perf/com/foo/**
+lang/csharp/src/apache/test/Ipc/GeneratedFiles/**
+lang/java/avro/src/test/java/org/apache/avro/TypeEnum.java
+lang/java/avro/src/test/java/org/apache/avro/FooBarSpecificRecord.java
+lang/java/ipc/src/main/java/org/apache/avro/ipc/stats/static/*.js
+lang/java/ipc/src/main/java/org/apache/avro/ipc/stats/static/*.css
+lang/java/protobuf/src/test/java/org/apache/avro/protobuf/Test.java
+lang/java/thrift/src/test/java/org/apache/avro/thrift/test/*.java
+lang/java/tools/src/test/compiler/output*/**
+lang/perl/.shipit
+lang/perl/Changes
+lang/perl/MANIFEST*
+share/test/data/test.avro12
diff --git a/share/schemas/org/apache/avro/data/Json.avsc b/share/schemas/org/apache/avro/data/Json.avsc
new file mode 100644
index 0000000..5a34977
--- /dev/null
+++ b/share/schemas/org/apache/avro/data/Json.avsc
@@ -0,0 +1,15 @@
+{"type": "record", "name": "Json", "namespace":"org.apache.avro.data",
+ "fields": [
+     {"name": "value",
+      "type": [
+          "long",
+          "double",
+          "string",
+          "boolean",
+          "null",
+          {"type": "array", "items": "Json"},
+          {"type": "map", "values": "Json"}
+      ]
+     }
+ ]
+}
diff --git a/share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc b/share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc
new file mode 100644
index 0000000..47f0256
--- /dev/null
+++ b/share/schemas/org/apache/avro/ipc/HandshakeRequest.avsc
@@ -0,0 +1,11 @@
+{
+    "type": "record",
+    "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+    "fields": [
+        {"name": "clientHash",
+	 "type": {"type": "fixed", "name": "MD5", "size": 16}},
+        {"name": "clientProtocol", "type": ["null", "string"]},
+        {"name": "serverHash", "type": "MD5"},
+ 	{"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+ ]
+}
diff --git a/share/schemas/org/apache/avro/ipc/HandshakeResponse.avsc b/share/schemas/org/apache/avro/ipc/HandshakeResponse.avsc
new file mode 100644
index 0000000..b54d16d
--- /dev/null
+++ b/share/schemas/org/apache/avro/ipc/HandshakeResponse.avsc
@@ -0,0 +1,15 @@
+{
+    "type": "record",
+    "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
+    "fields": [
+        {"name": "match",
+         "type": {"type": "enum", "name": "HandshakeMatch",
+                  "symbols": ["BOTH", "CLIENT", "NONE"]}},
+        {"name": "serverProtocol",
+         "type": ["null", "string"]},
+        {"name": "serverHash",
+         "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
+ 	{"name": "meta",
+         "type": ["null", {"type": "map", "values": "bytes"}]}
+    ]
+}
diff --git a/share/schemas/org/apache/avro/ipc/trace/avroTrace.avdl b/share/schemas/org/apache/avro/ipc/trace/avroTrace.avdl
new file mode 100644
index 0000000..9fd5680
--- /dev/null
+++ b/share/schemas/org/apache/avro/ipc/trace/avroTrace.avdl
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A Span is our basic unit of tracing. It tracks the critical points
+ * of a single RPC call and records other call meta-data. It also
+ * allows arbitrary string annotations. Both the client and server create
+ * Span objects, each of which is populated with half of the relevant event
+ * data. They share a span ID, which allows us to merge them into one complete
+ * span later on.
+ */
+ at namespace("org.apache.avro.ipc.trace")
+
+protocol AvroTrace {
+  enum SpanEvent { SERVER_RECV, SERVER_SEND, CLIENT_RECV, CLIENT_SEND }
+
+  fixed ID(8);
+
+  record TimestampedEvent {
+    long timeStamp; // Unix time, in nanoseconds
+    union { SpanEvent, string} event;
+  }
+
+  /**
+   * An individual span is the basic unit of testing.
+   * The record is used by both \"client\" and \"server\".
+   */
+  record Span {
+    ID  traceID;  // ID shared by all Spans in a given trace
+    ID spanID;    // Random ID for this Span
+    union { ID, null } parentSpanID; // Parent Span ID (null if root Span)
+    string messageName;       // Function call represented
+    long requestPayloadSize;  // Size (bytes) of the request
+    long responsePayloadSize; // Size (byts) of the response
+    union { string, null} requestorHostname; // Hostname of requestor
+//    int requestorPort;     // Port of the requestor (currently unused)
+    union { string, null } responderHostname; // Hostname of the responder
+//    int responderPort;     // Port of the responder (currently unused)
+    array<TimestampedEvent> events;  // List of critical events
+    boolean complete; // Whether includes data from both sides
+  }
+
+  /**
+   * Get all spans stored on this host.
+   */
+  array<Span> getAllSpans();
+
+  /**
+   * Get spans occuring between start and end. Each is a unix timestamp
+   * in nanosecond units (for consistency with TimestampedEvent).
+   */
+  array<Span> getSpansInRange(long start, long end);
+}
diff --git a/share/schemas/org/apache/avro/ipc/trace/avroTrace.avpr b/share/schemas/org/apache/avro/ipc/trace/avroTrace.avpr
new file mode 100644
index 0000000..041f3e8
--- /dev/null
+++ b/share/schemas/org/apache/avro/ipc/trace/avroTrace.avpr
@@ -0,0 +1,82 @@
+{
+  "protocol" : "AvroTrace",
+  "namespace" : "org.apache.avro.ipc.trace",
+  "types" : [ {
+    "type" : "enum",
+    "name" : "SpanEvent",
+    "symbols" : [ "SERVER_RECV", "SERVER_SEND", "CLIENT_RECV", "CLIENT_SEND" ]
+  }, {
+    "type" : "fixed",
+    "name" : "ID",
+    "size" : 8
+  }, {
+    "type" : "record",
+    "name" : "TimestampedEvent",
+    "fields" : [ {
+      "name" : "timeStamp",
+      "type" : "long"
+    }, {
+      "name" : "event",
+      "type" : [ "SpanEvent", "string" ]
+    } ]
+  }, {
+    "type" : "record",
+    "name" : "Span",
+    "fields" : [ {
+      "name" : "traceID",
+      "type" : "ID"
+    }, {
+      "name" : "spanID",
+      "type" : "ID"
+    }, {
+      "name" : "parentSpanID",
+      "type" : [ "ID", "null" ]
+    }, {
+      "name" : "messageName",
+      "type" : "string"
+    }, {
+      "name" : "requestPayloadSize",
+      "type" : "long"
+    }, {
+      "name" : "responsePayloadSize",
+      "type" : "long"
+    }, {
+      "name" : "requestorHostname",
+      "type" : [ "string", "null" ]
+    }, {
+      "name" : "responderHostname",
+      "type" : [ "string", "null" ]
+    }, {
+      "name" : "events",
+      "type" : {
+        "type" : "array",
+        "items" : "TimestampedEvent"
+      }
+    }, {
+      "name" : "complete",
+      "type" : "boolean"
+    } ]
+  } ],
+  "messages" : {
+    "getAllSpans" : {
+      "request" : [ ],
+      "response" : {
+        "type" : "array",
+        "items" : "Span"
+      }
+    },
+    "getSpansInRange" : {
+      "request" : [ {
+        "name" : "start",
+        "type" : "long"
+      }, {
+        "name" : "end",
+        "type" : "long"
+      } ],
+      "response" : {
+        "type" : "array",
+        "items" : "Span"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr b/share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr
new file mode 100644
index 0000000..d695ffe
--- /dev/null
+++ b/share/schemas/org/apache/avro/mapred/tether/InputProtocol.avpr
@@ -0,0 +1,64 @@
+{"namespace":"org.apache.avro.mapred.tether",
+ "protocol": "InputProtocol",
+ "doc": "Transmit inputs to a map or reduce task sub-process.",
+
+ "types": [
+     {"name": "TaskType", "type": "enum", "symbols": ["MAP","REDUCE"]}
+ ],
+
+ "messages": {
+
+     "configure": {
+         "doc": "Configure the task.  Sent before any other message.",
+         "request": [
+             {"name": "taskType", "type": "TaskType",
+              "doc": "Whether this is a map or reduce task."},
+             {"name": "inSchema", "type": "string",
+              "doc": "The Avro schema for task input data."},
+             {"name": "outSchema", "type": "string",
+              "doc": "The Avro schema for task output data."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "partitions": {
+         "doc": "Set the number of map output partitions.",
+         "request": [
+             {"name": "partitions", "type": "int",
+              "doc": "The number of map output partitions."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "input": {
+         "doc": "Send a block of input data to a task.",
+         "request": [
+             {"name": "data", "type": "bytes",
+              "doc": "A sequence of instances of the declared schema."},
+             {"name": "count", "type": "long",
+              "default": 1,
+              "doc": "The number of instances in this block."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "abort": {
+         "doc": "Called to abort the task.",
+         "request": [],
+         "response": "null",
+         "one-way": true
+     },
+
+     "complete": {
+         "doc": "Called when a task's input is complete.",
+         "request": [],
+         "response": "null",
+         "one-way": true
+     }
+
+ }
+
+}
diff --git a/share/schemas/org/apache/avro/mapred/tether/OutputProtocol.avpr b/share/schemas/org/apache/avro/mapred/tether/OutputProtocol.avpr
new file mode 100644
index 0000000..be755eb
--- /dev/null
+++ b/share/schemas/org/apache/avro/mapred/tether/OutputProtocol.avpr
@@ -0,0 +1,82 @@
+{"namespace":"org.apache.avro.mapred.tether",
+ "protocol": "OutputProtocol",
+ "doc": "Transmit outputs from a map or reduce task to parent.",
+
+ "messages": {
+
+     "configure": {
+         "doc": "Configure task.  Sent before any other message.",
+         "request": [
+             {"name": "port", "type": "int",
+              "doc": "The port to transmit inputs to this task on."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "output": {
+         "doc": "Send an output datum.",
+         "request": [
+             {"name": "datum", "type": "bytes",
+              "doc": "A binary-encoded instance of the declared schema."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "outputPartitioned": {
+         "doc": "Send map output datum explicitly naming its partition.",
+         "request": [
+             {"name": "partition", "type": "int",
+              "doc": "The map output partition for this datum."},
+             {"name": "datum", "type": "bytes",
+              "doc": "A binary-encoded instance of the declared schema."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "status": {
+         "doc": "Update the task's status message.  Also acts as keepalive.",
+         "request": [
+             {"name": "message", "type": "string",
+              "doc": "The new status message for the task."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "count": {
+         "doc": "Increment a task/job counter.",
+         "request": [
+             {"name": "group", "type": "string",
+              "doc": "The name of the counter group."},
+             {"name": "name", "type": "string",
+              "doc": "The name of the counter to increment."},
+             {"name": "amount", "type": "long",
+              "doc": "The amount to incrment the counter."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "fail": {
+         "doc": "Called by a failing task to abort.",
+         "request": [
+             {"name": "message", "type": "string",
+              "doc": "The reason for failure."}
+         ],
+         "response": "null",
+         "one-way": true
+     },
+
+     "complete": {
+         "doc": "Called when a task's output has completed without error.",
+         "request": [],
+         "response": "null",
+         "one-way": true
+     }
+
+ }
+
+}
diff --git a/share/test/data/schema-tests.txt b/share/test/data/schema-tests.txt
new file mode 100644
index 0000000..18cb6d0
--- /dev/null
+++ b/share/test/data/schema-tests.txt
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+// NOTE: the Java implementation provides a slow-but-direct implementation
+// of the fingerpriting algorithm which is used to cross-check the
+// "fingerprint" values below.  Thus, the Java unit-tests provide validation
+// for these values, so other languages can just assume they are correct.
+
+
+// 000
+<<INPUT "null"
+<<canonical "null"
+<<fingerprint 7195948357588979594
+
+// 001
+<<INPUT {"type":"null"}
+<<canonical "null"
+
+// 002
+<<INPUT "boolean"
+<<canonical "boolean"
+<<fingerprint -6970731678124411036
+
+// 003
+<<INPUT {"type":"boolean"}
+<<canonical "boolean"
+
+// 004
+<<INPUT "int"
+<<canonical "int"
+<<fingerprint 8247732601305521295
+
+// 005
+<<INPUT {"type":"int"}
+<<canonical "int"
+
+// 006
+<<INPUT "long"
+<<canonical "long"
+<<fingerprint -3434872931120570953
+
+// 007
+<<INPUT {"type":"long"}
+<<canonical "long"
+
+// 008
+<<INPUT "float"
+<<canonical "float"
+<<fingerprint 5583340709985441680
+
+// 009
+<<INPUT {"type":"float"}
+<<canonical "float"
+
+// 010
+<<INPUT "double"
+<<canonical "double"
+<<fingerprint -8181574048448539266
+
+// 011
+<<INPUT {"type":"double"}
+<<canonical "double"
+
+// 012
+<<INPUT "bytes"
+<<canonical "bytes"
+<<fingerprint 5746618253357095269
+
+// 013
+<<INPUT {"type":"bytes"}
+<<canonical "bytes"
+
+// 014
+<<INPUT "string"
+<<canonical "string"
+<<fingerprint -8142146995180207161
+
+// 015
+<<INPUT {"type":"string"}
+<<canonical "string"
+
+// 016
+<<INPUT [  ]
+<<canonical []
+<<fingerprint -1241056759729112623
+
+// 017
+<<INPUT [ "int"  ]
+<<canonical ["int"]
+<<fingerprint -5232228896498058493
+
+// 018
+<<INPUT [ "int" , {"type":"boolean"} ]
+<<canonical ["int","boolean"]
+<<fingerprint 5392556393470105090
+
+// 019
+<<INPUT {"fields":[], "type":"record", "name":"foo"}
+<<canonical {"name":"foo","type":"record","fields":[]}
+<<fingerprint -4824392279771201922
+
+// 020
+<<INPUT {"fields":[], "type":"record", "name":"foo", "namespace":"x.y"}
+<<canonical {"name":"x.y.foo","type":"record","fields":[]}
+<<fingerprint 5916914534497305771
+
+// 021
+<<INPUT {"fields":[], "type":"record", "name":"a.b.foo", "namespace":"x.y"}
+<<canonical {"name":"a.b.foo","type":"record","fields":[]}
+<<fingerprint -4616218487480524110
+
+// 022
+<<INPUT {"fields":[], "type":"record", "name":"foo", "doc":"Useful info"}
+<<canonical {"name":"foo","type":"record","fields":[]}
+<<fingerprint -4824392279771201922
+
+// 023
+<<INPUT {"fields":[], "type":"record", "name":"foo", "aliases":["foo","bar"]}
+<<canonical {"name":"foo","type":"record","fields":[]}
+<<fingerprint -4824392279771201922
+
+// 024
+<<INPUT {"fields":[], "type":"record", "name":"foo", "doc":"foo", "aliases":["foo","bar"]}
+<<canonical {"name":"foo","type":"record","fields":[]}
+<<fingerprint -4824392279771201922
+
+// 025
+<<INPUT {"fields":[{"type":{"type":"boolean"}, "name":"f1"}], "type":"record", "name":"foo"}
+<<canonical {"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"}]}
+<<fingerprint 7843277075252814651
+
+// 026
+<<INPUT
+{ "fields":[{"type":"boolean", "aliases":[], "name":"f1", "default":true},
+            {"order":"descending","name":"f2","doc":"Hello","type":"int"}],
+  "type":"record", "name":"foo"
+}
+INPUT
+<<canonical {"name":"foo","type":"record","fields":[{"name":"f1","type":"boolean"},{"name":"f2","type":"int"}]}
+<<fingerprint -4860222112080293046
+
+// 027
+<<INPUT {"type":"enum", "name":"foo", "symbols":["A1"]}
+<<canonical {"name":"foo","type":"enum","symbols":["A1"]}
+<<fingerprint -6342190197741309591
+
+// 028
+<<INPUT {"namespace":"x.y.z", "type":"enum", "name":"foo", "doc":"foo bar", "symbols":["A1", "A2"]}
+<<canonical {"name":"x.y.z.foo","type":"enum","symbols":["A1","A2"]}
+<<fingerprint -4448647247586288245
+
+// 029
+// <<INPUT {"type":"fixed", "name":"foo", "size":015} -- Avro parser broken???
+<<INPUT {"name":"foo","type":"fixed","size":15}
+<<canonical {"name":"foo","type":"fixed","size":15}
+<<fingerprint 1756455273707447556
+
+// 030
+<<INPUT {"namespace":"x.y.z", "type":"fixed", "name":"foo", "doc":"foo bar", "size":32}
+<<canonical {"name":"x.y.z.foo","type":"fixed","size":32}
+<<fingerprint -3064184465700546786
+
+// 031
+<<INPUT { "items":{"type":"null"}, "type":"array"}
+<<canonical {"type":"array","items":"null"}
+<<fingerprint -589620603366471059
+
+// 032
+<<INPUT { "values":"string", "type":"map"}
+<<canonical {"type":"map","values":"string"}
+<<fingerprint -8732877298790414990
+
+// 033
+<<INPUT
+  {"name":"PigValue","type":"record",
+   "fields":[{"name":"value", "type":["null", "int", "long", "PigValue"]}]}
+INPUT
+<<canonical {"name":"PigValue","type":"record","fields":[{"name":"value","type":["null","int","long","PigValue"]}]}
+<<fingerprint -1759257747318642341
diff --git a/share/test/data/syncInMeta.avro b/share/test/data/syncInMeta.avro
new file mode 100644
index 0000000..ec701fd
Binary files /dev/null and b/share/test/data/syncInMeta.avro differ
diff --git a/share/test/data/test.avro12 b/share/test/data/test.avro12
new file mode 100644
index 0000000..b113569
Binary files /dev/null and b/share/test/data/test.avro12 differ
diff --git a/share/test/data/weather-snappy.avro b/share/test/data/weather-snappy.avro
new file mode 100644
index 0000000..5751adf
Binary files /dev/null and b/share/test/data/weather-snappy.avro differ
diff --git a/share/test/data/weather-sorted.avro b/share/test/data/weather-sorted.avro
new file mode 100755
index 0000000..80f0536
Binary files /dev/null and b/share/test/data/weather-sorted.avro differ
diff --git a/share/test/data/weather.avro b/share/test/data/weather.avro
new file mode 100644
index 0000000..b5b6b8a
Binary files /dev/null and b/share/test/data/weather.avro differ
diff --git a/share/test/data/weather.json b/share/test/data/weather.json
new file mode 100644
index 0000000..5daa227
--- /dev/null
+++ b/share/test/data/weather.json
@@ -0,0 +1,5 @@
+{"station":"011990-99999","time":-619524000000,"temp":0}
+{"station":"011990-99999","time":-619506000000,"temp":22}
+{"station":"011990-99999","time":-619484400000,"temp":-11}
+{"station":"012650-99999","time":-655531200000,"temp":111}
+{"station":"012650-99999","time":-655509600000,"temp":78}
diff --git a/share/test/interop/bin/test_rpc_interop.sh b/share/test/interop/bin/test_rpc_interop.sh
new file mode 100755
index 0000000..75681be
--- /dev/null
+++ b/share/test/interop/bin/test_rpc_interop.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e						  # exit on error
+
+cd `dirname "$0"`/../../../..			  # connect to root
+
+VERSION=`cat share/VERSION.txt`
+
+#set -x						  # echo commands
+
+java_client="java -jar lang/java/tools/target/avro-tools-$VERSION.jar rpcsend"
+java_server="java -jar lang/java/tools/target/avro-tools-$VERSION.jar rpcreceive"
+
+py_client="python lang/py/build/src/avro/tool.py rpcsend"
+py_server="python lang/py/build/src/avro/tool.py rpcreceive"
+
+ruby_client="ruby -rubygems -Ilang/ruby/lib lang/ruby/test/tool.rb rpcsend"
+ruby_server="ruby -rubygems -Ilang/ruby/lib lang/ruby/test/tool.rb rpcreceive"
+
+export PYTHONPATH=lang/py/build/src	  # path to avro Python module
+
+clients=("$java_client" "$py_client" "$ruby_client")
+servers=("$java_server" "$py_server" "$ruby_server")
+
+proto=share/test/schemas/simple.avpr
+
+portfile=/tmp/interop_$$
+
+function cleanup() {
+    rm -rf $portfile
+    for job in `jobs -p` ; do kill $job; done
+}
+
+trap 'cleanup' EXIT
+
+for server in "${servers[@]}"
+do
+    for msgDir in share/test/interop/rpc/*
+    do
+	msg=`basename "$msgDir"`
+	for c in ${msgDir}/*
+	do
+	    echo TEST: $c
+	    for client in "${clients[@]}"
+	    do
+        rm -rf $portfile
+		$server http://127.0.0.1:0/ $proto $msg -file $c/response.avro \
+		    > $portfile &
+        count=0
+        while [ ! -s $portfile ]
+        do
+            sleep 1
+            if [ $count -ge 10 ]
+            then
+                echo $server did not start.
+                exit 1
+            fi
+            count=`expr $count + 1`
+        done
+		read ignore port < $portfile
+	    	$client http://127.0.0.1:$port $proto $msg -file $c/request.avro
+		wait
+	    done
+	done
+    done
+done
+
+echo RPC INTEROP TESTS PASS
diff --git a/share/test/interop/rpc/add/onePlusOne/request.avro b/share/test/interop/rpc/add/onePlusOne/request.avro
new file mode 100644
index 0000000..172f237
Binary files /dev/null and b/share/test/interop/rpc/add/onePlusOne/request.avro differ
diff --git a/share/test/interop/rpc/add/onePlusOne/response.avro b/share/test/interop/rpc/add/onePlusOne/response.avro
new file mode 100644
index 0000000..62ea90c
Binary files /dev/null and b/share/test/interop/rpc/add/onePlusOne/response.avro differ
diff --git a/share/test/interop/rpc/echo/foo/request.avro b/share/test/interop/rpc/echo/foo/request.avro
new file mode 100644
index 0000000..4d12ecc
Binary files /dev/null and b/share/test/interop/rpc/echo/foo/request.avro differ
diff --git a/share/test/interop/rpc/echo/foo/response.avro b/share/test/interop/rpc/echo/foo/response.avro
new file mode 100644
index 0000000..e2c8d54
Binary files /dev/null and b/share/test/interop/rpc/echo/foo/response.avro differ
diff --git a/share/test/interop/rpc/hello/world/request.avro b/share/test/interop/rpc/hello/world/request.avro
new file mode 100644
index 0000000..71adb63
Binary files /dev/null and b/share/test/interop/rpc/hello/world/request.avro differ
diff --git a/share/test/interop/rpc/hello/world/response.avro b/share/test/interop/rpc/hello/world/response.avro
new file mode 100644
index 0000000..5a79c74
Binary files /dev/null and b/share/test/interop/rpc/hello/world/response.avro differ
diff --git a/share/test/schemas/BulkData.avpr b/share/test/schemas/BulkData.avpr
new file mode 100644
index 0000000..608bf43
--- /dev/null
+++ b/share/test/schemas/BulkData.avpr
@@ -0,0 +1,21 @@
+
+{"namespace": "org.apache.avro.test",
+ "protocol": "BulkData",
+
+ "types": [],
+
+ "messages": {
+
+     "read": {
+         "request": [],
+         "response": "bytes"
+     },
+
+     "write": {
+         "request": [ {"name": "data", "type": "bytes"} ],
+         "response": "null"
+     }
+
+ }
+
+}
diff --git a/share/test/schemas/FooBarSpecificRecord.avsc b/share/test/schemas/FooBarSpecificRecord.avsc
new file mode 100644
index 0000000..08d32b2
--- /dev/null
+++ b/share/test/schemas/FooBarSpecificRecord.avsc
@@ -0,0 +1,22 @@
+{
+    "type": "record",
+    "name": "FooBarSpecificRecord",
+    "namespace": "org.apache.avro",
+    "fields": [
+        {"name": "id", "type": "int"},
+        {"name": "name", "type": "string"},
+        {"name": "nicknames", "type":
+            {"type": "array", "items": "string"}},
+        {"name": "relatedids", "type": 
+            {"type": "array", "items": "int"}},
+        {"name": "typeEnum", "type": 
+            ["null", { 
+                    "type": "enum",
+                    "name": "TypeEnum",
+                    "namespace": "org.apache.avro",
+                    "symbols" : ["a","b", "c"]
+                }],
+            "default": null
+        }
+    ]
+}
diff --git a/share/test/schemas/contexts.avdl b/share/test/schemas/contexts.avdl
new file mode 100644
index 0000000..bcf9e88
--- /dev/null
+++ b/share/test/schemas/contexts.avdl
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ at version("1.0.5")
+ at namespace("org.apache.avro.ipc.specific")
+protocol Contexts {
+  record HomePage {
+  }
+  
+  record ProductPage {
+    string product;
+  }
+
+  record CartPage {
+    array<string> productsInCart;
+  }
+
+  record UnknownPage {
+  }
+
+  record PageView {
+    long datetime;
+    union {UnknownPage, HomePage, ProductPage, CartPage} pageContext;
+  }
+
+} 
diff --git a/share/test/schemas/echo.avdl b/share/test/schemas/echo.avdl
new file mode 100644
index 0000000..8f861ca
--- /dev/null
+++ b/share/test/schemas/echo.avdl
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ at namespace("org.apache.avro.echo")
+protocol Echo {
+  record Ping {
+    long timestamp = -1;
+    string text = "";
+  }
+
+  record Pong {
+    long timestamp = -1;
+    Ping ping;
+  }
+
+  Pong ping(Ping ping);
+}
diff --git a/share/test/schemas/interop.avsc b/share/test/schemas/interop.avsc
new file mode 100644
index 0000000..8cfbba2
--- /dev/null
+++ b/share/test/schemas/interop.avsc
@@ -0,0 +1,28 @@
+{"type": "record", "name":"Interop", "namespace": "org.apache.avro",
+  "fields": [
+      {"name": "intField", "type": "int"},
+      {"name": "longField", "type": "long"},
+      {"name": "stringField", "type": "string"},
+      {"name": "boolField", "type": "boolean"},
+      {"name": "floatField", "type": "float"},
+      {"name": "doubleField", "type": "double"},
+      {"name": "bytesField", "type": "bytes"},
+      {"name": "nullField", "type": "null"},
+      {"name": "arrayField", "type": {"type": "array", "items": "double"}},
+      {"name": "mapField", "type":
+       {"type": "map", "values":
+        {"type": "record", "name": "Foo",
+         "fields": [{"name": "label", "type": "string"}]}}},
+      {"name": "unionField", "type":
+       ["boolean", "double", {"type": "array", "items": "bytes"}]},
+      {"name": "enumField", "type":
+       {"type": "enum", "name": "Kind", "symbols": ["A","B","C"]}},
+      {"name": "fixedField", "type":
+       {"type": "fixed", "name": "MD5", "size": 16}},
+      {"name": "recordField", "type":
+       {"type": "record", "name": "Node",
+        "fields": [
+            {"name": "label", "type": "string"},
+            {"name": "children", "type": {"type": "array", "items": "Node"}}]}}
+  ]
+}
diff --git a/share/test/schemas/mail.avpr b/share/test/schemas/mail.avpr
new file mode 100644
index 0000000..7410592
--- /dev/null
+++ b/share/test/schemas/mail.avpr
@@ -0,0 +1,26 @@
+{"namespace": "org.apache.avro.test",
+ "protocol": "Mail",
+
+ "types": [
+     {"name": "Message", "type": "record",
+      "fields": [
+          {"name": "to",   "type": "string"},
+          {"name": "from", "type": "string"},
+          {"name": "body", "type": "string"}
+      ]
+     }
+ ],
+
+ "messages": {
+     "send": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "string"
+     },
+     "fireandforget": {
+         "request": [{"name": "message", "type": "Message"}],
+         "response": "null",
+         "one-way": true
+     }
+
+ }
+}
diff --git a/share/test/schemas/namespace.avpr b/share/test/schemas/namespace.avpr
new file mode 100644
index 0000000..11b6bf1
--- /dev/null
+++ b/share/test/schemas/namespace.avpr
@@ -0,0 +1,28 @@
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
+     },
+     {"name": "TestError", "namespace": "org.apache.avro.test.errors",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {	
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.errors.TestError"]
+     }
+
+ }
+
+}
diff --git a/share/test/schemas/simple.avpr b/share/test/schemas/simple.avpr
new file mode 100644
index 0000000..7165ad8
--- /dev/null
+++ b/share/test/schemas/simple.avpr
@@ -0,0 +1,80 @@
+{"namespace": "org.apache.avro.test",
+ "protocol": "Simple",
+ "doc": "Protocol used for testing.",
+ "version" : "1.6.2",
+ "javaAnnotation": ["javax.annotation.Generated(\"avro\")",
+                    "org.apache.avro.TestAnnotation"],
+
+ "types": [
+     {"name": "Kind", "type": "enum", "symbols": ["FOO","BAR","BAZ"],
+      "javaAnnotation": "org.apache.avro.TestAnnotation"},
+
+     {"name": "MD5", "type": "fixed", "size": 16,
+      "javaAnnotation": "org.apache.avro.TestAnnotation"},
+
+     {"name": "TestRecord", "type": "record",
+      "javaAnnotation": "org.apache.avro.TestAnnotation",
+      "fields": [
+          {"name": "name", "type": "string", "order": "ignore",
+           "javaAnnotation": "org.apache.avro.TestAnnotation"},
+          {"name": "kind", "type": "Kind", "order": "descending"},
+          {"name": "hash", "type": "MD5"}
+      ]
+     },
+
+     {"name": "TestError", "type": "error", "fields": [
+         {"name": "message", "type": "string"}
+      ]
+     },
+
+     {"name": "TestRecordWithUnion", "type": "record",
+      "fields": [
+          {"name": "kind", "type": ["null", "Kind"]},
+          {"name": "value", "type": ["null", "string"]}
+      ]
+     }
+
+ ],
+
+ "messages": {
+
+     "hello": {
+         "doc": "Send a greeting",
+         "request": [{"name": "greeting", "type": "string"}],
+         "response": "string"
+     },
+
+     "echo": {
+         "doc": "Pretend you're in a cave!",
+         "request": [{"name": "record", "type": "TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "add": {
+         "specialProp" : "test",
+         "request": [{"name": "arg1", "type": "int"}, {"name": "arg2", "type": "int"}],
+         "response": "int"
+     },
+
+     "echoBytes": {
+         "request": [{"name": "data", "type": "bytes"}],
+         "response": "bytes"
+     },
+
+     "error": {
+         "doc": "Always throws an error.",
+         "request": [],
+         "response": "null",
+         "errors": ["TestError"]
+     },
+
+     "ack": {
+         "doc": "Send a one way message",
+         "request": [],
+         "response": "null",
+         "one-way": true,
+         "javaAnnotation": "org.apache.avro.TestAnnotation"
+     }
+ }
+
+}
diff --git a/share/test/schemas/social.avdl b/share/test/schemas/social.avdl
new file mode 100644
index 0000000..3212418
--- /dev/null
+++ b/share/test/schemas/social.avdl
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ at version("1.0.5")
+ at namespace("org.apache.avro.ipc.specific")
+protocol Social {
+  enum PrivacyType { FRIENDS, FRIENDS_OF_FRIENDS, PUBLIC, CUSTOM } 
+  
+  record Person {
+    string name;
+    int year_of_birth;
+    string country = "US";
+    string state;
+    array<Person> friends = [];
+    array<string> languages = [ "English" , "Java" ];
+    PrivacyType defaultPrivacy = "FRIENDS";
+  }
+} 
diff --git a/share/test/schemas/stringables.avdl b/share/test/schemas/stringables.avdl
new file mode 100644
index 0000000..ce6173e
--- /dev/null
+++ b/share/test/schemas/stringables.avdl
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A test case to exercise the stringable feature on @java-class, @java-key-class and
+ * @java-element-class
+ */
+ at namespace("test")
+protocol AnnotatedStringableTypes {
+
+  record StringablesRecord {
+    /** Each field exercises one of the java-class, key-class or element-class. */
+    @java-class("java.math.BigDecimal") string value;
+    @java-key-class("java.math.BigInteger") map<string> mapWithBigIntKeys;
+    map<@java-class("java.math.BigDecimal") string> mapWithBigDecimalElements;
+  }
+}
diff --git a/share/test/schemas/weather.avsc b/share/test/schemas/weather.avsc
new file mode 100644
index 0000000..db3a43f
--- /dev/null
+++ b/share/test/schemas/weather.avsc
@@ -0,0 +1,8 @@
+{"type": "record", "name": "test.Weather",
+ "doc": "A weather reading.",
+ "fields": [
+     {"name": "station", "type": "string", "order": "ignore"},
+     {"name": "time", "type": "long"},
+     {"name": "temp", "type": "int"}
+ ]
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-avro.git