[Python-modules-commits] [python-scrapy-djangoitem] 01/05: importing python-scrapy-djangoitem_1.1.1.orig.tar.gz

Michael Fladischer fladi at moszumanska.debian.org
Sat Feb 11 09:19:30 UTC 2017


This is an automated email from the git hooks/post-receive script.

fladi pushed a commit to branch master
in repository python-scrapy-djangoitem.

commit af33fe3e278fefacff302871c5100be196856bfb
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date:   Thu Feb 2 20:09:08 2017 +0100

    importing python-scrapy-djangoitem_1.1.1.orig.tar.gz
---
 .bumpversion.cfg              |   8 ++
 .gitignore                    |  57 +++++++++++
 .travis.yml                   |  26 +++++
 LICENSE                       |  28 ++++++
 README.rst                    | 224 ++++++++++++++++++++++++++++++++++++++++++
 requirements-py3.txt          |   3 +
 requirements.txt              |   3 +
 scrapy_djangoitem/__init__.py |  73 ++++++++++++++
 setup.cfg                     |   2 +
 setup.py                      |  34 +++++++
 tests/__init__.py             |   0
 tests/models.py               |  18 ++++
 tests/settings.py             |   8 ++
 tests/test_djangoitem.py      | 102 +++++++++++++++++++
 tox.ini                       |  24 +++++
 15 files changed, 610 insertions(+)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
new file mode 100644
index 0000000..801e5f8
--- /dev/null
+++ b/.bumpversion.cfg
@@ -0,0 +1,8 @@
+[bumpversion]
+current_version = 1.1.1
+commit = True
+tag = True
+tag_name = v{new_version}
+
+[bumpversion:file:setup.py]
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ba74660
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,57 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..8147c09
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,26 @@
+language: python
+python: 3.5
+env:
+- TOXENV=py27
+- TOXENV=py34
+- TOXENV=py35
+install:
+- pip install -U tox
+script: tox
+notifications:
+  irc:
+    use_notice: true
+    skip_join: true
+    channels:
+    - irc.freenode.org#scrapy
+deploy:
+  provider: pypi
+  distributions: sdist bdist_wheel
+  user: scrapy
+  password:
+    secure: bUpnSgikr11B4ddmDUlAEg6ujKVM1Lwd7M7mecdXyMDVDobIOIpKA9GfgajfM9Uh9NDGYERvkIzXAikM4uY3Ltz+QtL4qJ14y7hp0Uw2IfoLcRiea315ieNdEQL2cF6EC6GEo49/Ht9iLLZsrlSZdOnYn+HjopYe58cYYuAHyp8=
+  on:
+    tags: true
+    all_branches: true
+    repo: scrapy-plugins/scrapy-djangoitem
+    condition: $TOXENV == py27
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a30cd00
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Scrapy project
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of scrapy-djangoitem nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..237c2de
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,224 @@
+=================
+scrapy-djangoitem
+=================
+
+.. image:: https://img.shields.io/pypi/v/scrapy-djangoitem.svg
+   :target: https://pypi.python.org/pypi/scrapy-djangoitem
+   :alt: PyPI Version
+
+.. image:: https://img.shields.io/travis/scrapy-plugins/scrapy-djangoitem/master.svg
+   :target: http://travis-ci.org/scrapy-plugins/scrapy-djangoitem
+   :alt: Build Status
+
+.. image:: https://img.shields.io/github/license/scrapy-plugins/scrapy-djangoitem.svg
+   :target: https://github.com/scrapy-plugins/scrapy-djangoitem/blob/master/LICENSE
+   :alt: License
+
+
+``scrapy-djangoitem`` is an extension that allows you to define `Scrapy items
+<http://doc.scrapy.org/en/latest/topics/items.html>`_ using existing `Django
+models <https://docs.djangoproject.com/en/latest/topics/db/models/>`_.
+
+This utility provides a new class, named ``DjangoItem``, that you can use as a
+regular Scrapy item and link it to a Django model with its ``django_model``
+attribute. Start using it right away by importing it from this package::
+
+    from scrapy_djangoitem import DjangoItem
+
+Installation
+============
+
+Starting with ``v1.1`` both ``Python 2.7`` and ``Python 3.4/3.5`` are
+supported. For ``Python 3`` you need ``Scrapy v1.1`` or above.
+
+Latest tested Django version is ``Django 1.9``.
+
+Install from ``PyPI`` using::
+
+  pip install scrapy-djangoitem
+
+
+Introduction
+============
+
+``DjangoItem`` is a class of item that gets its fields definition from a
+Django model, you simply create a ``DjangoItem`` and specify what Django
+model it relates to.
+
+Besides of getting the model fields defined on your item, ``DjangoItem``
+provides a method to create and populate a Django model instance with the item
+data.
+
+Usage
+=====
+
+``DjangoItem`` works much like ModelForms in Django, you create a subclass
+and define its ``django_model`` attribute to be a valid Django model. With this
+you will get an item with a field for each Django model field.
+
+In addition, you can define fields that aren't present in the model and even
+override fields that are present in the model defining them in the item.
+
+Let's see some examples:
+
+Creating a Django model for the examples::
+
+    from django.db import models
+
+    class Person(models.Model):
+        name = models.CharField(max_length=255)
+        age = models.IntegerField()
+
+Defining a basic ``DjangoItem``::
+
+    from scrapy_djangoitem import DjangoItem
+
+    class PersonItem(DjangoItem):
+        django_model = Person
+
+``DjangoItem`` works just like Scrapy items::
+
+    >>> p = PersonItem()
+    >>> p['name'] = 'John'
+    >>> p['age'] = '22'
+
+To obtain the Django model from the item, we call the extra method
+``DjangoItem.save()`` of the ``DjangoItem``::
+
+    >>> person = p.save()
+    >>> person.name
+    'John'
+    >>> person.age
+    '22'
+    >>> person.id
+    1
+
+The model is already saved when we call ``DjangoItem.save()``, we
+can prevent this by calling it with ``commit=False``. We can use
+``commit=False`` in ``DjangoItem.save()`` method to obtain an unsaved model::
+
+    >>> person = p.save(commit=False)
+    >>> person.name
+    'John'
+    >>> person.age
+    '22'
+    >>> person.id
+    None
+
+As said before, we can add other fields to the item::
+
+    import scrapy
+    from scrapy_djangoitem import DjangoItem
+
+    class PersonItem(DjangoItem):
+        django_model = Person
+        sex = scrapy.Field()
+
+::
+
+   >>> p = PersonItem()
+   >>> p['name'] = 'John'
+   >>> p['age'] = '22'
+   >>> p['sex'] = 'M'
+
+And we can override the fields of the model with your own::
+
+    class PersonItem(DjangoItem):
+        django_model = Person
+        name = scrapy.Field(default='No Name')
+
+This is useful to provide properties to the field, like a default or any other
+property that your project uses. Those additional fields won't be taken into
+account when doing a ``DjangoItem.save()``.
+
+Caveats
+=======
+
+``DjangoItem`` is a rather convenient way to integrate Scrapy projects with Django
+models, but bear in mind that Django ORM **may not scale well** if you scrape a lot
+of items (ie. millions) with Scrapy. This is because a relational backend is
+**often not a good choice for a write intensive applications** (such as a web
+crawler), specially if the database is highly normalized and with many indices.
+
+Setup
+=====
+
+To use the Django models outside the Django application you need to set up the
+``DJANGO_SETTINGS_MODULE`` environment variable and --in most cases-- modify
+the ``PYTHONPATH`` environment variable to be able to import the settings
+module.
+
+There are many ways to do this depending on your use case and preferences.
+Below is detailed one of the simplest ways to do it.
+
+Suppose your Django project is named ``mysite``, is located in the path
+``/home/projects/mysite`` and you have created an app ``myapp`` with the model
+``Person``. That means your directory structure is something like this::
+
+    /home/projects/mysite
+    ├── manage.py
+    ├── myapp
+    │   ├── __init__.py
+    │   ├── models.py
+    │   ├── tests.py
+    │   └── views.py
+    └── mysite
+        ├── __init__.py
+        ├── settings.py
+        ├── urls.py
+        └── wsgi.py
+
+Then you need to add ``/home/projects/mysite`` to the ``PYTHONPATH``
+environment variable and set up the environment variable
+``DJANGO_SETTINGS_MODULE`` to ``mysite.settings``. That can be done in your
+Scrapy's settings file by adding the lines below::
+
+  import sys
+  sys.path.append('/home/projects/mysite')
+
+  import os
+  os.environ['DJANGO_SETTINGS_MODULE'] = 'mysite.settings'
+
+Notice that we modify the ``sys.path`` variable instead the ``PYTHONPATH``
+environment variable as we are already within the python runtime. If everything
+is right, you should be able to start the ``scrapy shell`` command and import
+the model ``Person`` (i.e. ``from myapp.models import Person``).
+
+Starting with ``Django 1.8`` you also have to explicitly set up ``Django`` if using
+it outside a ``manage.py`` context
+(see `Django Docs <https://docs.djangoproject.com/en/1.8/intro/tutorial01/#playing-with-the-api>`_)::
+
+  import django
+  django.setup()
+
+
+Development
+===========
+
+Test suite from the ``tests`` directory can be run using ``tox`` by running::
+
+  tox
+
+...using the configuration in ``tox.ini``. The ``Python`` interpreters
+used have to be installed locally on the system.
+
+
+Changelog
+=========
+
+v1.1.1 (2016-05-04)
+-------------------
+
+* Distribute as universal wheel
+* Fix README's markup
+
+v1.1 (2016-05-04)
+-----------------
+
+* ``Python 3.4/3.5`` support
+* Making tests work with ``Django 1.9`` again
+
+v1.0 (2015-04-29)
+-----------------
+
+* Initial version
diff --git a/requirements-py3.txt b/requirements-py3.txt
new file mode 100644
index 0000000..454b454
--- /dev/null
+++ b/requirements-py3.txt
@@ -0,0 +1,3 @@
+Scrapy>=1.1.0rc1
+Django
+six
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..98f4ac2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+Scrapy>=0.24.5
+Django
+six
diff --git a/scrapy_djangoitem/__init__.py b/scrapy_djangoitem/__init__.py
new file mode 100644
index 0000000..dc28cc8
--- /dev/null
+++ b/scrapy_djangoitem/__init__.py
@@ -0,0 +1,73 @@
+from six import with_metaclass
+from django.core.exceptions import ValidationError
+from scrapy.item import Field, Item, ItemMeta
+
+
+
+class DjangoItemMeta(ItemMeta):
+
+    def __new__(mcs, class_name, bases, attrs):
+        cls = super(DjangoItemMeta, mcs).__new__(mcs, class_name, bases, attrs)
+        cls.fields = cls.fields.copy()
+
+        if cls.django_model:
+            cls._model_fields = []
+            cls._model_meta = cls.django_model._meta
+            for model_field in cls._model_meta.fields:
+                if not model_field.auto_created:
+                    if model_field.name not in cls.fields:
+                        cls.fields[model_field.name] = Field()
+                    cls._model_fields.append(model_field.name)
+        return cls
+
+
+class DjangoItem(with_metaclass(DjangoItemMeta, Item)):
+
+    django_model = None
+
+    def __init__(self, *args, **kwargs):
+        super(DjangoItem, self).__init__(*args, **kwargs)
+        self._instance = None
+        self._errors = None
+
+    def save(self, commit=True):
+        if commit:
+            self.instance.save()
+        return self.instance
+
+    def is_valid(self, exclude=None):
+        self._get_errors(exclude)
+        return not bool(self._errors)
+
+    def _get_errors(self, exclude=None):
+        if self._errors is not None:
+            return self._errors
+
+        self._errors = {}
+        if exclude is None:
+            exclude = []
+
+        try:
+            self.instance.clean_fields(exclude=exclude)
+        except ValidationError as e:
+            self._errors = e.update_error_dict(self._errors)
+
+        try:
+            self.instance.clean()
+        except ValidationError as e:
+            self._errors = e.update_error_dict(self._errors)
+
+        # uniqueness is not checked, because it is faster to check it when
+        # saving object to database. Just beware, that failed save()
+        # raises IntegrityError instead of ValidationError.
+
+        return self._errors
+    errors = property(_get_errors)
+
+    @property
+    def instance(self):
+        if self._instance is None:
+            modelargs = dict((k, self.get(k)) for k in self._values
+                             if k in self._model_fields)
+            self._instance = self.django_model(**modelargs)
+        return self._instance
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..2a9acf1
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal = 1
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..2aa7bb8
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,34 @@
+from setuptools import setup, find_packages
+
+
+setup(
+    name='scrapy-djangoitem',
+    version='1.1.1',
+    url='https://github.com/scrapy-plugins/scrapy-djangoitem',
+    description='Scrapy extension to write scraped items using Django models',
+    long_description=open('README.rst').read(),
+    author='Scrapy developers',
+    license='BSD',
+    packages=find_packages(exclude=('tests', 'tests.*')),
+    include_package_data=True,
+    zip_safe=False,
+    classifiers=[
+        'Framework :: Scrapy',
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: BSD License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Topic :: Utilities',
+        'Framework :: Django',
+        'Framework :: Scrapy',
+    ],
+    install_requires=['six'],
+    requires=['scrapy (>=0.24.5)', 'django'],
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/models.py b/tests/models.py
new file mode 100644
index 0000000..7435727
--- /dev/null
+++ b/tests/models.py
@@ -0,0 +1,18 @@
+from django.db import models
+
+
+class Person(models.Model):
+    name = models.CharField(max_length=255, default='Robot')
+    age = models.IntegerField()
+
+    class Meta:
+        app_label = 'test_djangoitem'
+
+
+class IdentifiedPerson(models.Model):
+    identifier = models.PositiveIntegerField(primary_key=True)
+    name = models.CharField(max_length=255)
+    age = models.IntegerField()
+
+    class Meta:
+        app_label = 'test_djangoitem'
diff --git a/tests/settings.py b/tests/settings.py
new file mode 100644
index 0000000..1bee924
--- /dev/null
+++ b/tests/settings.py
@@ -0,0 +1,8 @@
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': ':memory:',
+    }
+}
+
+SECRET_KEY = 'top-secret'
diff --git a/tests/test_djangoitem.py b/tests/test_djangoitem.py
new file mode 100644
index 0000000..80897f5
--- /dev/null
+++ b/tests/test_djangoitem.py
@@ -0,0 +1,102 @@
+import os
+import unittest
+
+os.environ['DJANGO_SETTINGS_MODULE'] = 'tests.settings'
+import django
+django.setup()
+
+from scrapy_djangoitem import DjangoItem, Field
+from tests.models import Person, IdentifiedPerson
+
+
+class BasePersonItem(DjangoItem):
+    django_model = Person
+
+
+class NewFieldPersonItem(BasePersonItem):
+    other = Field()
+
+
+class OverrideFieldPersonItem(BasePersonItem):
+    age = Field()
+
+
+class IdentifiedPersonItem(DjangoItem):
+    django_model = IdentifiedPerson
+
+
+class DjangoItemTest(unittest.TestCase):
+
+    def assertSortedEqual(self, first, second, msg=None):
+        return self.assertEqual(sorted(first), sorted(second), msg)
+
+    def test_base(self):
+        i = BasePersonItem()
+        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
+
+    def test_new_fields(self):
+        i = NewFieldPersonItem()
+        self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])
+
+    def test_override_field(self):
+        i = OverrideFieldPersonItem()
+        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
+
+    def test_custom_primary_key_field(self):
+        """
+        Test that if a custom primary key exists, it is
+        in the field list.
+        """
+        i = IdentifiedPersonItem()
+        self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])
+
+    def test_save(self):
+        i = BasePersonItem()
+        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
+
+        i['name'] = 'John'
+        i['age'] = '22'
+        person = i.save(commit=False)
+
+        self.assertEqual(person.name, 'John')
+        self.assertEqual(person.age, '22')
+
+    def test_override_save(self):
+        i = OverrideFieldPersonItem()
+
+        i['name'] = 'John'
+        # it is not obvious that "age" should be saved also, since it was
+        # redefined in child class
+        i['age'] = '22'
+        person = i.save(commit=False)
+
+        self.assertEqual(person.name, 'John')
+        self.assertEqual(person.age, '22')
+
+    def test_validation(self):
+        long_name = 'z' * 300
+        i = BasePersonItem(name=long_name)
+        self.assertFalse(i.is_valid())
+        self.assertEqual(set(i.errors), set(['age', 'name']))
+        i = BasePersonItem(name='John')
+        self.assertTrue(i.is_valid(exclude=['age']))
+        self.assertEqual({}, i.errors)
+
+        # once the item is validated, it does not validate again
+        i['name'] = long_name
+        self.assertTrue(i.is_valid())
+
+    def test_override_validation(self):
+        i = OverrideFieldPersonItem()
+        i['name'] = 'John'
+        self.assertFalse(i.is_valid())
+
+        i = i = OverrideFieldPersonItem()
+        i['name'] = 'John'
+        i['age'] = '22'
+        self.assertTrue(i.is_valid())
+
+    def test_default_field_values(self):
+        i = BasePersonItem()
+        person = i.save(commit=False)
+        self.assertEqual(person.name, 'Robot')
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..5bd907d
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,24 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27,py34,py35
+
+[testenv]
+deps =
+    -rrequirements.txt
+    pytest
+commands =
+    py.test {posargs:tests}
+
+[testenv:py34]
+basepython = python3.4
+deps =
+    -rrequirements-py3.txt
+    pytest
+
+[testenv:py35]
+basepython = python3.5
+deps = {[testenv:py34]deps}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-scrapy-djangoitem.git



More information about the Python-modules-commits mailing list