[Python-modules-commits] [python-mechanicalsoup] 01/06: New upstream version 0.7.0

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue May 16 13:59:28 UTC 2017


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository python-mechanicalsoup.

commit 56cf5bb7bb4e2d77365fb1a0e3a1670a783b6ffd
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Mon May 15 20:26:07 2017 +0100

    New upstream version 0.7.0
---
 LICENSE                             |  21 +++
 MANIFEST.in                         |   3 +
 MechanicalSoup.egg-info/PKG-INFO    |  38 +++---
 MechanicalSoup.egg-info/SOURCES.txt |  12 +-
 PKG-INFO                            |  38 +++---
 README.md                           | 105 +++++++++++++++
 example.py                          |  40 ++++++
 example_manual.py                   |  44 +++++++
 mechanicalsoup/__init__.py          |   8 +-
 mechanicalsoup/browser.py           | 251 +++++++++++++++++++-----------------
 mechanicalsoup/form.py              | 183 +++++++++++++++++++-------
 mechanicalsoup/stateful_browser.py  | 169 ++++++++++++++++++++++++
 mechanicalsoup/utils.py             |   2 +
 setup.cfg                           |  16 +--
 setup.py                            |  90 ++++++-------
 tests/test_browser.py               | 100 ++++++++++++++
 tests/test_form.py                  | 116 +++++++++++++++++
 tests/test_stateful_browser.py      |  28 ++++
 18 files changed, 1001 insertions(+), 263 deletions(-)

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8244556
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..29eaa01
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include LICENSE README.md
+recursive-include tests *.py
+include example*.py
diff --git a/MechanicalSoup.egg-info/PKG-INFO b/MechanicalSoup.egg-info/PKG-INFO
index 798ac2a..c59b4c3 100644
--- a/MechanicalSoup.egg-info/PKG-INFO
+++ b/MechanicalSoup.egg-info/PKG-INFO
@@ -1,19 +1,19 @@
-Metadata-Version: 1.1
-Name: MechanicalSoup
-Version: 0.6.0
-Summary: A Python library for automating interaction with websites
-Home-page: https://github.com/hickford/MechanicalSoup
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: MIT
-Description: UNKNOWN
-Platform: UNKNOWN
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.2
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
+Metadata-Version: 1.1
+Name: MechanicalSoup
+Version: 0.7.0
+Summary: A Python library for automating interaction with websites
+Home-page: https://github.com/hickford/MechanicalSoup
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: MIT
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff --git a/MechanicalSoup.egg-info/SOURCES.txt b/MechanicalSoup.egg-info/SOURCES.txt
index 3061499..22cdd8b 100644
--- a/MechanicalSoup.egg-info/SOURCES.txt
+++ b/MechanicalSoup.egg-info/SOURCES.txt
@@ -1,3 +1,8 @@
+LICENSE
+MANIFEST.in
+README.md
+example.py
+example_manual.py
 setup.cfg
 setup.py
 MechanicalSoup.egg-info/PKG-INFO
@@ -7,4 +12,9 @@ MechanicalSoup.egg-info/requires.txt
 MechanicalSoup.egg-info/top_level.txt
 mechanicalsoup/__init__.py
 mechanicalsoup/browser.py
-mechanicalsoup/form.py
\ No newline at end of file
+mechanicalsoup/form.py
+mechanicalsoup/stateful_browser.py
+mechanicalsoup/utils.py
+tests/test_browser.py
+tests/test_form.py
+tests/test_stateful_browser.py
\ No newline at end of file
diff --git a/PKG-INFO b/PKG-INFO
index 798ac2a..c59b4c3 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,19 +1,19 @@
-Metadata-Version: 1.1
-Name: MechanicalSoup
-Version: 0.6.0
-Summary: A Python library for automating interaction with websites
-Home-page: https://github.com/hickford/MechanicalSoup
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: MIT
-Description: UNKNOWN
-Platform: UNKNOWN
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.2
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
+Metadata-Version: 1.1
+Name: MechanicalSoup
+Version: 0.7.0
+Summary: A Python library for automating interaction with websites
+Home-page: https://github.com/hickford/MechanicalSoup
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: MIT
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d898d66
--- /dev/null
+++ b/README.md
@@ -0,0 +1,105 @@
+MechanicalSoup
+==============
+
+A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do Javascript.
+
+I was a fond user of the [Mechanize](https://github.com/jjlee/mechanize) library, but unfortunately it's  [incompatible with Python 3](https://github.com/jjlee/mechanize/issues/96) and development is inactive. MechanicalSoup provides a similar API, built on Python giants [Requests](http://docs.python-requests.org/en/latest/) (for http sessions) and [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/) (for document navigation).
+
+Installation
+------
+
+[![Latest Version](https://img.shields.io/pypi/v/MechanicalSoup.svg)](https://pypi.python.org/pypi/MechanicalSoup/)
+
+From [PyPI](https://pypi.python.org/pypi/MechanicalSoup/)
+
+     pip install MechanicalSoup
+
+Python versions 2.6-2.7, 3.3-3.6, PyPy and PyPy3 are supported (and tested against).
+
+Example
+------
+
+From [`example.py`](example.py), code to log into the GitHub website:
+
+```python
+"""Example app to login to GitHub using the StatefulBrowser class."""
+
+from __future__ import print_function
+import argparse
+import mechanicalsoup
+from getpass import getpass
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+args = parser.parse_args()
+
+args.password = getpass("Please enter your GitHub password: ")
+
+browser = mechanicalsoup.StatefulBrowser()
+# Uncomment for a more verbose output:
+# browser.set_verbose(2)
+
+browser.open("https://github.com")
+browser.follow_link("login")
+browser.select_form('#login form')
+browser["login"] = args.username
+browser["password"] = args.password
+resp = browser.submit_selected()
+
+# Uncomment to launch a web browser on the current page:
+# browser.launch_browser()
+
+# verify we are now logged in
+page = browser.get_current_page()
+messages = page.find("div", class_="flash-messages")
+if messages:
+    print(messages.text)
+assert page.select(".logout-form")
+
+print(page.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.open("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
+```
+
+For an example with a more complex form (checkboxes, radio buttons and textareas), read [`tests/test_browser.py`](tests/test_browser.py) and [`tests/test_form.py`](tests/test_form.py).
+
+Common problems
+---
+
+### "No parser was explicitly specified"
+
+> UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
+
+Recent versions of BeautifulSoup show a harmless warning to encourage you to specify which HTML parser to use. You can do this in MechanicalSoup:
+
+    mechanicalsoup.Browser(soup_config={'features':'html.parser'})
+
+Or if you have the parser [lxml](http://lxml.de/installation.html) installed:
+
+    mechanicalsoup.Browser(soup_config={'features':'lxml'})
+
+See also https://www.crummy.com/software/BeautifulSoup/bs4/doc/#you-need-a-parser
+
+Development
+---------
+
+[![Build Status](https://travis-ci.org/hickford/MechanicalSoup.svg?branch=master)](https://travis-ci.org/hickford/MechanicalSoup)
+
+### Tests
+
+    py.test
+
+### Roadmap
+
+* Draw [Substack-style](http://substack.net/art) readme art (imagine a steaming bowl of cogs and noodles)
+* [Write docs and publish website](https://github.com/hickford/MechanicalSoup/issues/6)
+
+See also
+------
+
+* [RoboBrowser](https://github.com/jmcarp/robobrowser): a similar library, also based on Requests and BeautifulSoup.
+* [Hacker News post](https://news.ycombinator.com/item?id=8012103)
+* [Reddit discussion](http://www.reddit.com/r/programming/comments/2aa13s/mechanicalsoup_a_python_library_for_automating/)
diff --git a/example.py b/example.py
new file mode 100644
index 0000000..a890d31
--- /dev/null
+++ b/example.py
@@ -0,0 +1,40 @@
+"""Example app to login to GitHub using the StatefulBrowser class."""
+
+from __future__ import print_function
+import argparse
+import mechanicalsoup
+from getpass import getpass
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+args = parser.parse_args()
+
+args.password = getpass("Please enter your GitHub password: ")
+
+browser = mechanicalsoup.StatefulBrowser(soup_config={'features': 'lxml'})
+# Uncomment for a more verbose output:
+# browser.set_verbose(2)
+
+browser.open("https://github.com")
+browser.follow_link("login")
+browser.select_form('#login form')
+browser["login"] = args.username
+browser["password"] = args.password
+resp = browser.submit_selected()
+
+# Uncomment to launch a web browser on the current page:
+# browser.launch_browser()
+
+# verify we are now logged in
+page = browser.get_current_page()
+messages = page.find("div", class_="flash-messages")
+if messages:
+    print(messages.text)
+assert page.select(".logout-form")
+
+print(page.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.open("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
diff --git a/example_manual.py b/example_manual.py
new file mode 100644
index 0000000..5964fc7
--- /dev/null
+++ b/example_manual.py
@@ -0,0 +1,44 @@
+"""Example app to login to GitHub, using the plain Browser class.
+
+See example.py for an example using the more advanced StatefulBrowser."""
+import argparse
+import mechanicalsoup
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+parser.add_argument("password")
+args = parser.parse_args()
+
+browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'})
+
+# request github login page. the result is a requests.Response object
+# http://docs.python-requests.org/en/latest/user/quickstart/#response-content
+login_page = browser.get("https://github.com/login")
+
+# similar to assert login_page.ok but with full status code in case of
+# failure.
+login_page.raise_for_status()
+
+# login_page.soup is a BeautifulSoup object
+# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup
+# we grab the login form
+login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form'))
+
+# specify username and password
+login_form.input({"login": args.username, "password": args.password})
+
+# submit form
+page2 = browser.submit(login_form, login_page.url)
+
+# verify we are now logged in
+messages = page2.soup.find("div", class_="flash-messages")
+if messages:
+    print(messages.text)
+assert page2.soup.select(".logout-form")
+
+print(page2.soup.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.get("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
diff --git a/mechanicalsoup/__init__.py b/mechanicalsoup/__init__.py
index 05fe111..9e36dde 100644
--- a/mechanicalsoup/__init__.py
+++ b/mechanicalsoup/__init__.py
@@ -1,2 +1,6 @@
-from .browser import Browser
-from .form import Form
\ No newline at end of file
+from .utils import LinkNotFoundError
+from .browser import Browser
+from .form import Form
+from .stateful_browser import StatefulBrowser
+
+__all__ = ['LinkNotFoundError', 'Browser', 'StatefulBrowser', 'Form']
diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py
index 15f4768..99301c3 100644
--- a/mechanicalsoup/browser.py
+++ b/mechanicalsoup/browser.py
@@ -1,120 +1,131 @@
-import warnings
-import requests
-import bs4
-from six.moves import urllib
-from six import string_types
-from .form import Form
-
-# see https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use
-warnings.filterwarnings("ignore", "No parser was explicitly specified", module="bs4")
-
-class Browser(object):
-
-    def __init__(self, session=None, soup_config=None, requests_adapters=None):
-        self.session = session or requests.Session()
-
-        if requests_adapters is not None:
-            for adaptee, adapter in requests_adapters.items():
-                self.session.mount(adaptee, adapter)
-
-        self.soup_config = soup_config or dict()
-
-    @staticmethod
-    def add_soup(response, soup_config):
-        if "text/html" in response.headers.get("Content-Type", ""):
-            response.soup = bs4.BeautifulSoup(
-                response.content, **soup_config)
-
-    def request(self, *args, **kwargs):
-        response = self.session.request(*args, **kwargs)
-        Browser.add_soup(response, self.soup_config)
-        return response
-
-    def get(self, *args, **kwargs):
-        response = self.session.get(*args, **kwargs)
-        Browser.add_soup(response, self.soup_config)
-        return response
-
-    def post(self, *args, **kwargs):
-        response = self.session.post(*args, **kwargs)
-        Browser.add_soup(response, self.soup_config)
-        return response
-
-    def _build_request(self, form, url=None, **kwargs):
-        method = form.get("method", "get")
-        action = form.get("action")
-        url = urllib.parse.urljoin(url, action)
-        if url is None:  # This happens when both `action` and `url` are None.
-            raise ValueError('no URL to submit to')
-
-        # read http://www.w3.org/TR/html5/forms.html
-        data = kwargs.pop("data", dict())
-        files = kwargs.pop("files", dict())
-
-        for input in form.select("input"):
-            name = input.get("name")
-            if not name:
-                continue
-
-            if input.get("type") in ("radio", "checkbox"):
-                if "checked" not in input.attrs:
-                    continue
-                value = input.get("value", "on")
-            else:
-                # web browsers use empty string for inputs with missing values
-                value = input.get("value", "")
-
-            if input.get("type") == "checkbox":
-                data.setdefault(name, []).append(value)
-
-            elif input.get("type") == "file":
-                # read http://www.cs.tut.fi/~jkorpela/forms/file.html
-                # in web browsers, file upload only happens if the form"s (or
-                # submit button"s) enctype attribute is set to
-                # "multipart/form-data". we don"t care, simplify.
-                if not value:
-                    continue
-                if isinstance(value, string_types):
-                    value = open(value, "rb")
-                files[name] = value
-
-            else:
-                data[name] = value
-
-        for textarea in form.select("textarea"):
-            name = textarea.get("name")
-            if not name:
-                continue
-            data[name] = textarea.text
-
-        for select in form.select("select"):
-            name = select.get("name")
-            if not name:
-                continue
-            multiple = "multiple" in select.attrs
-            values = []
-            for i, option in enumerate(select.select("option")):
-                if (i == 0 and not multiple) or "selected" in option.attrs:
-                    values.append(option.get("value", ""))
-            if multiple:
-                data[name] = values
-            elif values:
-                data[name] = values[-1]
-
-        if method.lower() == "get":
-            kwargs["params"] = data
-        else:
-            kwargs["data"] = data
-        return requests.Request(method, url, files=files, **kwargs)
-
-    def _prepare_request(self, form, url=None, **kwargs):
-        request = self._build_request(form, url, **kwargs)
-        return self.session.prepare_request(request)
-
-    def submit(self, form, url=None, **kwargs):
-        if isinstance(form, Form):
-            form = form.form
-        request = self._prepare_request(form, url, **kwargs)
-        response = self.session.send(request)
-        Browser.add_soup(response, self.soup_config)
-        return response
+import warnings
+import requests
+import bs4
+from six.moves import urllib
+from six import string_types
+from .form import Form
+import webbrowser
+import tempfile
+
+# see
+# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use
+warnings.filterwarnings(
+    "ignore", "No parser was explicitly specified", module="bs4")
+
+
+class Browser(object):
+
+    def __init__(self, session=None, soup_config=None, requests_adapters=None):
+        self.session = session or requests.Session()
+
+        if requests_adapters is not None:
+            for adaptee, adapter in requests_adapters.items():
+                self.session.mount(adaptee, adapter)
+
+        self.soup_config = soup_config or dict()
+
+    @staticmethod
+    def add_soup(response, soup_config):
+        if "text/html" in response.headers.get("Content-Type", ""):
+            response.soup = bs4.BeautifulSoup(
+                response.content, **soup_config)
+
+    def request(self, *args, **kwargs):
+        response = self.session.request(*args, **kwargs)
+        Browser.add_soup(response, self.soup_config)
+        return response
+
+    def get(self, *args, **kwargs):
+        response = self.session.get(*args, **kwargs)
+        Browser.add_soup(response, self.soup_config)
+        return response
+
+    def post(self, *args, **kwargs):
+        response = self.session.post(*args, **kwargs)
+        Browser.add_soup(response, self.soup_config)
+        return response
+
+    def _build_request(self, form, url=None, **kwargs):
+        method = str(form.get("method", "get"))
+        action = form.get("action")
+        url = urllib.parse.urljoin(url, action)
+        if url is None:  # This happens when both `action` and `url` are None.
+            raise ValueError('no URL to submit to')
+
+        # read http://www.w3.org/TR/html5/forms.html
+        data = kwargs.pop("data", dict())
+        files = kwargs.pop("files", dict())
+
+        for input in form.select("input"):
+            name = input.get("name")
+            if not name:
+                continue
+
+            if input.get("type") in ("radio", "checkbox"):
+                if "checked" not in input.attrs:
+                    continue
+                value = input.get("value", "on")
+            else:
+                # web browsers use empty string for inputs with missing values
+                value = input.get("value", "")
+
+            if input.get("type") == "checkbox":
+                data.setdefault(name, []).append(value)
+
+            elif input.get("type") == "file":
+                # read http://www.cs.tut.fi/~jkorpela/forms/file.html
+                # in web browsers, file upload only happens if the form"s (or
+                # submit button"s) enctype attribute is set to
+                # "multipart/form-data". we don"t care, simplify.
+                if not value:
+                    continue
+                if isinstance(value, string_types):
+                    value = open(value, "rb")
+                files[name] = value
+
+            else:
+                data[name] = value
+
+        for textarea in form.select("textarea"):
+            name = textarea.get("name")
+            if not name:
+                continue
+            data[name] = textarea.text
+
+        for select in form.select("select"):
+            name = select.get("name")
+            if not name:
+                continue
+            multiple = "multiple" in select.attrs
+            values = []
+            for i, option in enumerate(select.select("option")):
+                if (i == 0 and not multiple) or "selected" in option.attrs:
+                    values.append(option.get("value", ""))
+            if multiple:
+                data[name] = values
+            elif values:
+                data[name] = values[-1]
+
+        if method.lower() == "get":
+            kwargs["params"] = data
+        else:
+            kwargs["data"] = data
+        return requests.Request(method, url, files=files, **kwargs)
+
+    def _prepare_request(self, form, url=None, **kwargs):
+        request = self._build_request(form, url, **kwargs)
+        return self.session.prepare_request(request)
+
+    def submit(self, form, url=None, **kwargs):
+        if isinstance(form, Form):
+            form = form.form
+        request = self._prepare_request(form, url, **kwargs)
+        response = self.session.send(request)
+        Browser.add_soup(response, self.soup_config)
+        return response
+
+    def launch_browser(self, soup):
+        """Launch a browser on the page, for debugging purpose."""
+        with tempfile.NamedTemporaryFile(delete=False) as file:
+            file.write(soup.encode())
+        webbrowser.open('file://' + file.name)
diff --git a/mechanicalsoup/form.py b/mechanicalsoup/form.py
index 618b20e..e9f0293 100644
--- a/mechanicalsoup/form.py
+++ b/mechanicalsoup/form.py
@@ -1,49 +1,134 @@
-class Form(object):
-
-    def __init__(self, form):
-        self.form = form
-
-    def input(self, data):
-        for (name, value) in data.items():
-            self.form.find("input", {"name": name})["value"] = value
-
-    def check(self, data):
-        for (name, value) in data.items():
-            if isinstance(value, list):
-                for choice in value:
-                    self.form.find("input", {"name": name, "value": choice})[
-                        "checked"] = ""
-            else:
-                self.form.find("input", {"name": name, "value": value})[
-                    "checked"] = ""
-
-    def textarea(self, data):
-        for (name, value) in data.items():
-            self.form.find("textarea", {"name": name}).insert(0, value)
-
-    def attach(self, data):
-        for (name, value) in data.items():
-            self.form.find("input", {"name": name})["value"] = value
-
-    def choose_submit(self, el):
-        # In a normal web browser, when a input[type=submit] is clicked,
-        # all other submits aren't sent. You can use simulate this as following:
-
-        # page = browser.get(URL)
-        # form_el = page.soup.form
-        # form = Form(form_el)
-        # submit = page.soup.select(SUBMIT_SELECTOR)[0]
-        # form.choose_submit(submit)
-        # url = BASE_DOMAIN + form_el.attrs['action']
-        # return browser.submit(form, url)
-
-        for inp in self.form.select("input"):
-            if inp.get('type') != 'submit':
-                continue
-            if inp == el:
-                continue
-
-            del inp['name']
-            return True
-
-        return False
+from .utils import LinkNotFoundError
+from bs4 import BeautifulSoup
+
+
+class Form(object):
+
+    def __init__(self, form):
+        self.form = form
+
+    def input(self, data):
+        for (name, value) in data.items():
+            i = self.form.find("input", {"name": name})
+            if not i:
+                raise LinkNotFoundError("No input field named " + name)
+            i["value"] = value
+
+    attach = input
+
+    def uncheck_all(self, name):
+        for option in self.form.find_all("input", {"name": name}):
+            if "checked" in option.attrs:
+                del option.attrs["checked"]
+
+    def check(self, data):
+        for (name, value) in data.items():
+            # Complain if we don't find the name, regardless of the
+            # value
+            inputs = self.form.find_all("input", {"name": name})
+            if inputs == []:
+                raise LinkNotFoundError("No input checkbox named " + name)
+            type = inputs[0].attrs.get('type', 'text')
+            if type == "radio":
+                self.uncheck_all(name)
+
+            # Accept individual values (int, str)
+            # We just wrap them in a 1-value tuple.
+            if not isinstance(value, list) and not isinstance(value, tuple):
+                value = (value,)
+            for choice in value:
+                choice = str(choice)  # Allow for example literal numbers
+                found = False
+                for i in inputs:
+                    if i.attrs.get("value", "on") == choice:
+                        i["checked"] = ""
+                        found = True
+                        break
+                if not found:
+                    print(self.form)
+                    raise LinkNotFoundError(
+                        "No input checkbox named %s with choice %s" %
+                        (name, choice)
+                        )
+
+    def textarea(self, data):
+        for (name, value) in data.items():
+            t = self.form.find("textarea", {"name": name})
+            if not t:
+                raise LinkNotFoundError("No textarea named " + name)
+            t.string = value
+
+    def __setitem__(self, name, value):
+        return self.set(name, value)
+
+    def set(self, name, value, force=False):
+        input = self.form.find("input", {"name": name})
+        if input:
+            if input.attrs.get('type', 'text') in ("radio", "checkbox"):
+                if value is True:
+                    # f["foo"] = True checks the box foo
+                    input.attrs["checked"] = ""
+                else:
+                    self.check({name: value})
+            else:
+                input["value"] = value
+            return
+        textarea = self.form.find("textarea", {"name": name})
+        if textarea:
+            textarea.string = value
+            return
+        select = self.form.find("select", {"name": name})
+        if select:
+            for option in select.find_all("option"):
+                if "selected" in option.attrs:
+                    del option.attrs["selected"]
+            o = select.find("option", {"value": value})
+            o.attrs["selected"] = "selected"
+            return
+        if force:
+            self.new_control('input', name, value=value)
+            return
+        raise LinkNotFoundError()
+
+    def new_control(self, type, name, value, **kwargs):
+        old = self.form.find('input', {'name': name})
+        if old:
+            old.decompose()
+        old = self.form.find('textarea', {'name': name})
+        if old:
+            old.decompose()
+        # We don't have access to the original soup object, so we
+        # instantiate a new BeautifulSoup() to call new_tag().
+        control = BeautifulSoup().new_tag('input')
+        control['type'] = type
+        control['name'] = name
+        control['value'] = value
+        for k, v in kwargs.items():
+            control[k] = v
+        self.form.append(control)
+        return control
+
+    def choose_submit(self, el):
+        # In a normal web browser, when a input[type=submit] is clicked,
+        # all other submits aren't sent. You can use simulate this as
+        # following:
+
+        # page = browser.get(URL)
+        # form_el = page.soup.form
+        # form = Form(form_el)
+        # submit = page.soup.select(SUBMIT_SELECTOR)[0]
+        # form.choose_submit(submit)
+        # url = BASE_DOMAIN + form_el.attrs['action']
+        # return browser.submit(form, url)
+
+        found = False
+        for inp in self.form.select("input"):
+            if inp.get('type') != 'submit':
+                continue
+            if inp == el or inp['name'] == el:
+                continue
+
+            del inp['name']
+            found = True
+
+        return found
diff --git a/mechanicalsoup/stateful_browser.py b/mechanicalsoup/stateful_browser.py
new file mode 100644
index 0000000..5d05c67
--- /dev/null
+++ b/mechanicalsoup/stateful_browser.py
@@ -0,0 +1,169 @@
+from __future__ import print_function
+
+from six.moves import urllib
+from .browser import Browser
+from .utils import LinkNotFoundError
+from .form import Form
+import sys
+import re
+
+
+class StatefulBrowser(Browser):
+    def __init__(self, session=None, soup_config=None, requests_adapters=None):
+        super(StatefulBrowser, self).__init__(
+            session, soup_config, requests_adapters)
+        self.__debug = False
+        self.__verbose = 0
+        self.__current_page = None
+        self.__current_url = None
+        self.__current_form = None
+
+    def set_debug(self, debug):
+        """Set the debug mode (off by default).
+
+        Set to True to enable debug mode. When active, some actions
+        will launch a browser on the current page on failure to let
+        you inspect the page content.
+        """
+        self.__debug = debug
+
+    def get_debug(self):
+        """Get the debug mode (off by default)."""
+        return self.__debug
+
+    def set_verbose(self, verbose):
+        """Set the verbosity level (an integer).
+
+        * 0 means no verbose output.
+
+        * 1 shows one dot per visited page (looks like a progress bar)
+
+        * >= 1 shows each visited URL."""
+        self.__verbose = verbose
+
+    def get_url(self):
+        """Get the URL of the currently visited page."""
+        return self.__current_url
+
+    def get_current_form(self):
+        """Get the currently selected form. See select_form()."""
+        return self.__current_form
+
+    def __setitem__(self, name, value):
+        """Call item assignment on the currently selected form."""
+        self.get_current_form()[name] = value
+
+    def new_control(self, type, name, value, **kwargs):
+        """Call new_control() on the currently selected form."""
+        return self.get_current_form().new_control(type, name, value, **kwargs)
+
+    def get_current_page(self):
+        """Get the current page as a soup object."""
+        return self.__current_page
+
+    def absolute_url(self, url):
+        """Make url absolute. url can be either relative or absolute."""
+        return urllib.parse.urljoin(self.__current_url, url)
+
+    def open(self, url, *args, **kwargs):
+        """Open the URL in this Browser object."""
+        if self.__verbose == 1:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        elif self.__verbose >= 2:
+            print(url)
+
+        resp = self.get(url, *args, **kwargs)
+        if hasattr(resp, 'soup'):
+            self.__current_page = resp.soup
+        self.__current_url = resp.url
+        self.__current_form = None
+        return resp
+
+    def open_relative(self, url, *args, **kwargs):
+        """Like open, but URL can be relative to the currently visited page."""
+        return self.open(self.absolute_url(url))
+
+    def select_form(self, *args, **kwargs):
+        """Select a form in the current page. Arguments are the same
+        as the select() method for a soup object."""
+        found_forms = self.__current_page.select(*args, **kwargs)
+        if len(found_forms) < 1:
+            if self.__debug:
+                print('select_form failed for', *args)
+                self.launch_browser()
+            raise LinkNotFoundError()
+
+        self.__current_form = Form(found_forms[0])
+        return self.__current_form
+
+    def submit_selected(self, btnName=None, *args, **kwargs):
+        """Submit the form selected with select_form()."""
+        if btnName is not None:
+            if 'data' not in kwargs:
+                kwargs['data'] = dict()
+            kwargs['data'][btnName] = ''
+
+        form = self.get_current_form()
+        if "action" in form.form:
+            url = self.__current_url
+        else:
+            url = self.absolute_url(form.form["action"])
+        resp = self.submit(self.__current_form,
+                           url=url,
+                           *args, **kwargs)
+        self.__current_url = resp.url
+        if hasattr(resp, "soup"):
+            self.__current_page = resp.soup
+        self.__current_form = None
+        return resp
+
+    def list_links(self, *args, **kwargs):
+        """Display the list of links in the current page."""
+        print("Links in the current page:")
+        for l in self.links(*args, **kwargs):
+            print("    ", l)
+
+    def links(self, url_regex=None, link_text=None, *args, **kwargs):
+        """Return links in the page, as a list of bs4.element.Tag object."""
+        all_links = self.get_current_page().find_all(
+            'a', href=True, *args, **kwargs)
+        if url_regex is not None:
+            all_links = [a for a in all_links
+                         if re.search(url_regex, a['href'])]
+        if link_text is not None:
+            all_links = [a for a in all_links
+                         if a.text == link_text]
+        return all_links
+
+    def find_link(self, url_regex=None, *args, **kwargs):
+        """Find a link whose href property matches url_regex.
+
+        If several links match, return the first one found.
+
+        If url_regex is None, return the first link found on the page."""
+        links = self.links(url_regex, *args, **kwargs)
+        if len(links) == 0:
+            raise LinkNotFoundError()
+        else:
+            return links[0]
+
+    def follow_link(self, url_regex=None, *args, **kwargs):
+        """Find a link whose href property matches url_regex, and follow it.
+
+        If the link is not found, Raise LinkNotFoundError.
+        Before raising LinkNotFoundError, if debug is activated, list
... 400 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-mechanicalsoup.git



More information about the Python-modules-commits mailing list