[Python-modules-commits] [python-mechanicalsoup] 01/06: New upstream version 0.7.0
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue May 16 13:59:28 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository python-mechanicalsoup.
commit 56cf5bb7bb4e2d77365fb1a0e3a1670a783b6ffd
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date: Mon May 15 20:26:07 2017 +0100
New upstream version 0.7.0
---
LICENSE | 21 +++
MANIFEST.in | 3 +
MechanicalSoup.egg-info/PKG-INFO | 38 +++---
MechanicalSoup.egg-info/SOURCES.txt | 12 +-
PKG-INFO | 38 +++---
README.md | 105 +++++++++++++++
example.py | 40 ++++++
example_manual.py | 44 +++++++
mechanicalsoup/__init__.py | 8 +-
mechanicalsoup/browser.py | 251 +++++++++++++++++++-----------------
mechanicalsoup/form.py | 183 +++++++++++++++++++-------
mechanicalsoup/stateful_browser.py | 169 ++++++++++++++++++++++++
mechanicalsoup/utils.py | 2 +
setup.cfg | 16 +--
setup.py | 90 ++++++-------
tests/test_browser.py | 100 ++++++++++++++
tests/test_form.py | 116 +++++++++++++++++
tests/test_stateful_browser.py | 28 ++++
18 files changed, 1001 insertions(+), 263 deletions(-)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8244556
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..29eaa01
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include LICENSE README.md
+recursive-include tests *.py
+include example*.py
diff --git a/MechanicalSoup.egg-info/PKG-INFO b/MechanicalSoup.egg-info/PKG-INFO
index 798ac2a..c59b4c3 100644
--- a/MechanicalSoup.egg-info/PKG-INFO
+++ b/MechanicalSoup.egg-info/PKG-INFO
@@ -1,19 +1,19 @@
-Metadata-Version: 1.1
-Name: MechanicalSoup
-Version: 0.6.0
-Summary: A Python library for automating interaction with websites
-Home-page: https://github.com/hickford/MechanicalSoup
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: MIT
-Description: UNKNOWN
-Platform: UNKNOWN
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.2
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
+Metadata-Version: 1.1
+Name: MechanicalSoup
+Version: 0.7.0
+Summary: A Python library for automating interaction with websites
+Home-page: https://github.com/hickford/MechanicalSoup
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: MIT
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff --git a/MechanicalSoup.egg-info/SOURCES.txt b/MechanicalSoup.egg-info/SOURCES.txt
index 3061499..22cdd8b 100644
--- a/MechanicalSoup.egg-info/SOURCES.txt
+++ b/MechanicalSoup.egg-info/SOURCES.txt
@@ -1,3 +1,8 @@
+LICENSE
+MANIFEST.in
+README.md
+example.py
+example_manual.py
setup.cfg
setup.py
MechanicalSoup.egg-info/PKG-INFO
@@ -7,4 +12,9 @@ MechanicalSoup.egg-info/requires.txt
MechanicalSoup.egg-info/top_level.txt
mechanicalsoup/__init__.py
mechanicalsoup/browser.py
-mechanicalsoup/form.py
\ No newline at end of file
+mechanicalsoup/form.py
+mechanicalsoup/stateful_browser.py
+mechanicalsoup/utils.py
+tests/test_browser.py
+tests/test_form.py
+tests/test_stateful_browser.py
\ No newline at end of file
diff --git a/PKG-INFO b/PKG-INFO
index 798ac2a..c59b4c3 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,19 +1,19 @@
-Metadata-Version: 1.1
-Name: MechanicalSoup
-Version: 0.6.0
-Summary: A Python library for automating interaction with websites
-Home-page: https://github.com/hickford/MechanicalSoup
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: MIT
-Description: UNKNOWN
-Platform: UNKNOWN
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.6
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.2
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
+Metadata-Version: 1.1
+Name: MechanicalSoup
+Version: 0.7.0
+Summary: A Python library for automating interaction with websites
+Home-page: https://github.com/hickford/MechanicalSoup
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: MIT
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d898d66
--- /dev/null
+++ b/README.md
@@ -0,0 +1,105 @@
+MechanicalSoup
+==============
+
+A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do Javascript.
+
+I was a fond user of the [Mechanize](https://github.com/jjlee/mechanize) library, but unfortunately it's [incompatible with Python 3](https://github.com/jjlee/mechanize/issues/96) and development is inactive. MechanicalSoup provides a similar API, built on Python giants [Requests](http://docs.python-requests.org/en/latest/) (for http sessions) and [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/) (for document navigation).
+
+Installation
+------
+
+[](https://pypi.python.org/pypi/MechanicalSoup/)
+
+From [PyPI](https://pypi.python.org/pypi/MechanicalSoup/)
+
+ pip install MechanicalSoup
+
+Python versions 2.6-2.7, 3.3-3.6, PyPy and PyPy3 are supported (and tested against).
+
+Example
+------
+
+From [`example.py`](example.py), code to log into the GitHub website:
+
+```python
+"""Example app to login to GitHub using the StatefulBrowser class."""
+
+from __future__ import print_function
+import argparse
+import mechanicalsoup
+from getpass import getpass
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+args = parser.parse_args()
+
+args.password = getpass("Please enter your GitHub password: ")
+
+browser = mechanicalsoup.StatefulBrowser()
+# Uncomment for a more verbose output:
+# browser.set_verbose(2)
+
+browser.open("https://github.com")
+browser.follow_link("login")
+browser.select_form('#login form')
+browser["login"] = args.username
+browser["password"] = args.password
+resp = browser.submit_selected()
+
+# Uncomment to launch a web browser on the current page:
+# browser.launch_browser()
+
+# verify we are now logged in
+page = browser.get_current_page()
+messages = page.find("div", class_="flash-messages")
+if messages:
+ print(messages.text)
+assert page.select(".logout-form")
+
+print(page.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.open("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
+```
+
+For an example with a more complex form (checkboxes, radio buttons and textareas), read [`tests/test_browser.py`](tests/test_browser.py) and [`tests/test_form.py`](tests/test_form.py).
+
+Common problems
+---
+
+### "No parser was explicitly specified"
+
+> UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
+
+Recent versions of BeautifulSoup show a harmless warning to encourage you to specify which HTML parser to use. You can do this in MechanicalSoup:
+
+ mechanicalsoup.Browser(soup_config={'features':'html.parser'})
+
+Or if you have the parser [lxml](http://lxml.de/installation.html) installed:
+
+ mechanicalsoup.Browser(soup_config={'features':'lxml'})
+
+See also https://www.crummy.com/software/BeautifulSoup/bs4/doc/#you-need-a-parser
+
+Development
+---------
+
+[](https://travis-ci.org/hickford/MechanicalSoup)
+
+### Tests
+
+ py.test
+
+### Roadmap
+
+* Draw [Substack-style](http://substack.net/art) readme art (imagine a steaming bowl of cogs and noodles)
+* [Write docs and publish website](https://github.com/hickford/MechanicalSoup/issues/6)
+
+See also
+------
+
+* [RoboBrowser](https://github.com/jmcarp/robobrowser): a similar library, also based on Requests and BeautifulSoup.
+* [Hacker News post](https://news.ycombinator.com/item?id=8012103)
+* [Reddit discussion](http://www.reddit.com/r/programming/comments/2aa13s/mechanicalsoup_a_python_library_for_automating/)
diff --git a/example.py b/example.py
new file mode 100644
index 0000000..a890d31
--- /dev/null
+++ b/example.py
@@ -0,0 +1,40 @@
+"""Example app to login to GitHub using the StatefulBrowser class."""
+
+from __future__ import print_function
+import argparse
+import mechanicalsoup
+from getpass import getpass
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+args = parser.parse_args()
+
+args.password = getpass("Please enter your GitHub password: ")
+
+browser = mechanicalsoup.StatefulBrowser(soup_config={'features': 'lxml'})
+# Uncomment for a more verbose output:
+# browser.set_verbose(2)
+
+browser.open("https://github.com")
+browser.follow_link("login")
+browser.select_form('#login form')
+browser["login"] = args.username
+browser["password"] = args.password
+resp = browser.submit_selected()
+
+# Uncomment to launch a web browser on the current page:
+# browser.launch_browser()
+
+# verify we are now logged in
+page = browser.get_current_page()
+messages = page.find("div", class_="flash-messages")
+if messages:
+ print(messages.text)
+assert page.select(".logout-form")
+
+print(page.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.open("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
diff --git a/example_manual.py b/example_manual.py
new file mode 100644
index 0000000..5964fc7
--- /dev/null
+++ b/example_manual.py
@@ -0,0 +1,44 @@
+"""Example app to login to GitHub, using the plain Browser class.
+
+See example.py for an example using the more advanced StatefulBrowser."""
+import argparse
+import mechanicalsoup
+
+parser = argparse.ArgumentParser(description="Login to GitHub.")
+parser.add_argument("username")
+parser.add_argument("password")
+args = parser.parse_args()
+
+browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'})
+
+# request github login page. the result is a requests.Response object
+# http://docs.python-requests.org/en/latest/user/quickstart/#response-content
+login_page = browser.get("https://github.com/login")
+
+# similar to assert login_page.ok but with full status code in case of
+# failure.
+login_page.raise_for_status()
+
+# login_page.soup is a BeautifulSoup object
+# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup
+# we grab the login form
+login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form'))
+
+# specify username and password
+login_form.input({"login": args.username, "password": args.password})
+
+# submit form
+page2 = browser.submit(login_form, login_page.url)
+
+# verify we are now logged in
+messages = page2.soup.find("div", class_="flash-messages")
+if messages:
+ print(messages.text)
+assert page2.soup.select(".logout-form")
+
+print(page2.soup.title.text)
+
+# verify we remain logged in (thanks to cookies) as we browse the rest of
+# the site
+page3 = browser.get("https://github.com/hickford/MechanicalSoup")
+assert page3.soup.select(".logout-form")
diff --git a/mechanicalsoup/__init__.py b/mechanicalsoup/__init__.py
index 05fe111..9e36dde 100644
--- a/mechanicalsoup/__init__.py
+++ b/mechanicalsoup/__init__.py
@@ -1,2 +1,6 @@
-from .browser import Browser
-from .form import Form
\ No newline at end of file
+from .utils import LinkNotFoundError
+from .browser import Browser
+from .form import Form
+from .stateful_browser import StatefulBrowser
+
+__all__ = ['LinkNotFoundError', 'Browser', 'StatefulBrowser', 'Form']
diff --git a/mechanicalsoup/browser.py b/mechanicalsoup/browser.py
index 15f4768..99301c3 100644
--- a/mechanicalsoup/browser.py
+++ b/mechanicalsoup/browser.py
@@ -1,120 +1,131 @@
-import warnings
-import requests
-import bs4
-from six.moves import urllib
-from six import string_types
-from .form import Form
-
-# see https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use
-warnings.filterwarnings("ignore", "No parser was explicitly specified", module="bs4")
-
-class Browser(object):
-
- def __init__(self, session=None, soup_config=None, requests_adapters=None):
- self.session = session or requests.Session()
-
- if requests_adapters is not None:
- for adaptee, adapter in requests_adapters.items():
- self.session.mount(adaptee, adapter)
-
- self.soup_config = soup_config or dict()
-
- @staticmethod
- def add_soup(response, soup_config):
- if "text/html" in response.headers.get("Content-Type", ""):
- response.soup = bs4.BeautifulSoup(
- response.content, **soup_config)
-
- def request(self, *args, **kwargs):
- response = self.session.request(*args, **kwargs)
- Browser.add_soup(response, self.soup_config)
- return response
-
- def get(self, *args, **kwargs):
- response = self.session.get(*args, **kwargs)
- Browser.add_soup(response, self.soup_config)
- return response
-
- def post(self, *args, **kwargs):
- response = self.session.post(*args, **kwargs)
- Browser.add_soup(response, self.soup_config)
- return response
-
- def _build_request(self, form, url=None, **kwargs):
- method = form.get("method", "get")
- action = form.get("action")
- url = urllib.parse.urljoin(url, action)
- if url is None: # This happens when both `action` and `url` are None.
- raise ValueError('no URL to submit to')
-
- # read http://www.w3.org/TR/html5/forms.html
- data = kwargs.pop("data", dict())
- files = kwargs.pop("files", dict())
-
- for input in form.select("input"):
- name = input.get("name")
- if not name:
- continue
-
- if input.get("type") in ("radio", "checkbox"):
- if "checked" not in input.attrs:
- continue
- value = input.get("value", "on")
- else:
- # web browsers use empty string for inputs with missing values
- value = input.get("value", "")
-
- if input.get("type") == "checkbox":
- data.setdefault(name, []).append(value)
-
- elif input.get("type") == "file":
- # read http://www.cs.tut.fi/~jkorpela/forms/file.html
- # in web browsers, file upload only happens if the form"s (or
- # submit button"s) enctype attribute is set to
- # "multipart/form-data". we don"t care, simplify.
- if not value:
- continue
- if isinstance(value, string_types):
- value = open(value, "rb")
- files[name] = value
-
- else:
- data[name] = value
-
- for textarea in form.select("textarea"):
- name = textarea.get("name")
- if not name:
- continue
- data[name] = textarea.text
-
- for select in form.select("select"):
- name = select.get("name")
- if not name:
- continue
- multiple = "multiple" in select.attrs
- values = []
- for i, option in enumerate(select.select("option")):
- if (i == 0 and not multiple) or "selected" in option.attrs:
- values.append(option.get("value", ""))
- if multiple:
- data[name] = values
- elif values:
- data[name] = values[-1]
-
- if method.lower() == "get":
- kwargs["params"] = data
- else:
- kwargs["data"] = data
- return requests.Request(method, url, files=files, **kwargs)
-
- def _prepare_request(self, form, url=None, **kwargs):
- request = self._build_request(form, url, **kwargs)
- return self.session.prepare_request(request)
-
- def submit(self, form, url=None, **kwargs):
- if isinstance(form, Form):
- form = form.form
- request = self._prepare_request(form, url, **kwargs)
- response = self.session.send(request)
- Browser.add_soup(response, self.soup_config)
- return response
+import warnings
+import requests
+import bs4
+from six.moves import urllib
+from six import string_types
+from .form import Form
+import webbrowser
+import tempfile
+
+# see
+# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use
+warnings.filterwarnings(
+ "ignore", "No parser was explicitly specified", module="bs4")
+
+
+class Browser(object):
+
+ def __init__(self, session=None, soup_config=None, requests_adapters=None):
+ self.session = session or requests.Session()
+
+ if requests_adapters is not None:
+ for adaptee, adapter in requests_adapters.items():
+ self.session.mount(adaptee, adapter)
+
+ self.soup_config = soup_config or dict()
+
+ @staticmethod
+ def add_soup(response, soup_config):
+ if "text/html" in response.headers.get("Content-Type", ""):
+ response.soup = bs4.BeautifulSoup(
+ response.content, **soup_config)
+
+ def request(self, *args, **kwargs):
+ response = self.session.request(*args, **kwargs)
+ Browser.add_soup(response, self.soup_config)
+ return response
+
+ def get(self, *args, **kwargs):
+ response = self.session.get(*args, **kwargs)
+ Browser.add_soup(response, self.soup_config)
+ return response
+
+ def post(self, *args, **kwargs):
+ response = self.session.post(*args, **kwargs)
+ Browser.add_soup(response, self.soup_config)
+ return response
+
+ def _build_request(self, form, url=None, **kwargs):
+ method = str(form.get("method", "get"))
+ action = form.get("action")
+ url = urllib.parse.urljoin(url, action)
+ if url is None: # This happens when both `action` and `url` are None.
+ raise ValueError('no URL to submit to')
+
+ # read http://www.w3.org/TR/html5/forms.html
+ data = kwargs.pop("data", dict())
+ files = kwargs.pop("files", dict())
+
+ for input in form.select("input"):
+ name = input.get("name")
+ if not name:
+ continue
+
+ if input.get("type") in ("radio", "checkbox"):
+ if "checked" not in input.attrs:
+ continue
+ value = input.get("value", "on")
+ else:
+ # web browsers use empty string for inputs with missing values
+ value = input.get("value", "")
+
+ if input.get("type") == "checkbox":
+ data.setdefault(name, []).append(value)
+
+ elif input.get("type") == "file":
+ # read http://www.cs.tut.fi/~jkorpela/forms/file.html
+ # in web browsers, file upload only happens if the form"s (or
+ # submit button"s) enctype attribute is set to
+ # "multipart/form-data". we don"t care, simplify.
+ if not value:
+ continue
+ if isinstance(value, string_types):
+ value = open(value, "rb")
+ files[name] = value
+
+ else:
+ data[name] = value
+
+ for textarea in form.select("textarea"):
+ name = textarea.get("name")
+ if not name:
+ continue
+ data[name] = textarea.text
+
+ for select in form.select("select"):
+ name = select.get("name")
+ if not name:
+ continue
+ multiple = "multiple" in select.attrs
+ values = []
+ for i, option in enumerate(select.select("option")):
+ if (i == 0 and not multiple) or "selected" in option.attrs:
+ values.append(option.get("value", ""))
+ if multiple:
+ data[name] = values
+ elif values:
+ data[name] = values[-1]
+
+ if method.lower() == "get":
+ kwargs["params"] = data
+ else:
+ kwargs["data"] = data
+ return requests.Request(method, url, files=files, **kwargs)
+
+ def _prepare_request(self, form, url=None, **kwargs):
+ request = self._build_request(form, url, **kwargs)
+ return self.session.prepare_request(request)
+
+ def submit(self, form, url=None, **kwargs):
+ if isinstance(form, Form):
+ form = form.form
+ request = self._prepare_request(form, url, **kwargs)
+ response = self.session.send(request)
+ Browser.add_soup(response, self.soup_config)
+ return response
+
+ def launch_browser(self, soup):
+ """Launch a browser on the page, for debugging purpose."""
+ with tempfile.NamedTemporaryFile(delete=False) as file:
+ file.write(soup.encode())
+ webbrowser.open('file://' + file.name)
diff --git a/mechanicalsoup/form.py b/mechanicalsoup/form.py
index 618b20e..e9f0293 100644
--- a/mechanicalsoup/form.py
+++ b/mechanicalsoup/form.py
@@ -1,49 +1,134 @@
-class Form(object):
-
- def __init__(self, form):
- self.form = form
-
- def input(self, data):
- for (name, value) in data.items():
- self.form.find("input", {"name": name})["value"] = value
-
- def check(self, data):
- for (name, value) in data.items():
- if isinstance(value, list):
- for choice in value:
- self.form.find("input", {"name": name, "value": choice})[
- "checked"] = ""
- else:
- self.form.find("input", {"name": name, "value": value})[
- "checked"] = ""
-
- def textarea(self, data):
- for (name, value) in data.items():
- self.form.find("textarea", {"name": name}).insert(0, value)
-
- def attach(self, data):
- for (name, value) in data.items():
- self.form.find("input", {"name": name})["value"] = value
-
- def choose_submit(self, el):
- # In a normal web browser, when a input[type=submit] is clicked,
- # all other submits aren't sent. You can use simulate this as following:
-
- # page = browser.get(URL)
- # form_el = page.soup.form
- # form = Form(form_el)
- # submit = page.soup.select(SUBMIT_SELECTOR)[0]
- # form.choose_submit(submit)
- # url = BASE_DOMAIN + form_el.attrs['action']
- # return browser.submit(form, url)
-
- for inp in self.form.select("input"):
- if inp.get('type') != 'submit':
- continue
- if inp == el:
- continue
-
- del inp['name']
- return True
-
- return False
+from .utils import LinkNotFoundError
+from bs4 import BeautifulSoup
+
+
+class Form(object):
+
+ def __init__(self, form):
+ self.form = form
+
+ def input(self, data):
+ for (name, value) in data.items():
+ i = self.form.find("input", {"name": name})
+ if not i:
+ raise LinkNotFoundError("No input field named " + name)
+ i["value"] = value
+
+ attach = input
+
+ def uncheck_all(self, name):
+ for option in self.form.find_all("input", {"name": name}):
+ if "checked" in option.attrs:
+ del option.attrs["checked"]
+
+ def check(self, data):
+ for (name, value) in data.items():
+ # Complain if we don't find the name, regardless of the
+ # value
+ inputs = self.form.find_all("input", {"name": name})
+ if inputs == []:
+ raise LinkNotFoundError("No input checkbox named " + name)
+ type = inputs[0].attrs.get('type', 'text')
+ if type == "radio":
+ self.uncheck_all(name)
+
+ # Accept individual values (int, str)
+ # We just wrap them in a 1-value tuple.
+ if not isinstance(value, list) and not isinstance(value, tuple):
+ value = (value,)
+ for choice in value:
+ choice = str(choice) # Allow for example literal numbers
+ found = False
+ for i in inputs:
+ if i.attrs.get("value", "on") == choice:
+ i["checked"] = ""
+ found = True
+ break
+ if not found:
+ print(self.form)
+ raise LinkNotFoundError(
+ "No input checkbox named %s with choice %s" %
+ (name, choice)
+ )
+
+ def textarea(self, data):
+ for (name, value) in data.items():
+ t = self.form.find("textarea", {"name": name})
+ if not t:
+ raise LinkNotFoundError("No textarea named " + name)
+ t.string = value
+
+ def __setitem__(self, name, value):
+ return self.set(name, value)
+
+ def set(self, name, value, force=False):
+ input = self.form.find("input", {"name": name})
+ if input:
+ if input.attrs.get('type', 'text') in ("radio", "checkbox"):
+ if value is True:
+ # f["foo"] = True checks the box foo
+ input.attrs["checked"] = ""
+ else:
+ self.check({name: value})
+ else:
+ input["value"] = value
+ return
+ textarea = self.form.find("textarea", {"name": name})
+ if textarea:
+ textarea.string = value
+ return
+ select = self.form.find("select", {"name": name})
+ if select:
+ for option in select.find_all("option"):
+ if "selected" in option.attrs:
+ del option.attrs["selected"]
+ o = select.find("option", {"value": value})
+ o.attrs["selected"] = "selected"
+ return
+ if force:
+ self.new_control('input', name, value=value)
+ return
+ raise LinkNotFoundError()
+
+ def new_control(self, type, name, value, **kwargs):
+ old = self.form.find('input', {'name': name})
+ if old:
+ old.decompose()
+ old = self.form.find('textarea', {'name': name})
+ if old:
+ old.decompose()
+ # We don't have access to the original soup object, so we
+ # instantiate a new BeautifulSoup() to call new_tag().
+ control = BeautifulSoup().new_tag('input')
+ control['type'] = type
+ control['name'] = name
+ control['value'] = value
+ for k, v in kwargs.items():
+ control[k] = v
+ self.form.append(control)
+ return control
+
+ def choose_submit(self, el):
+ # In a normal web browser, when a input[type=submit] is clicked,
+ # all other submits aren't sent. You can use simulate this as
+ # following:
+
+ # page = browser.get(URL)
+ # form_el = page.soup.form
+ # form = Form(form_el)
+ # submit = page.soup.select(SUBMIT_SELECTOR)[0]
+ # form.choose_submit(submit)
+ # url = BASE_DOMAIN + form_el.attrs['action']
+ # return browser.submit(form, url)
+
+ found = False
+ for inp in self.form.select("input"):
+ if inp.get('type') != 'submit':
+ continue
+ if inp == el or inp['name'] == el:
+ continue
+
+ del inp['name']
+ found = True
+
+ return found
diff --git a/mechanicalsoup/stateful_browser.py b/mechanicalsoup/stateful_browser.py
new file mode 100644
index 0000000..5d05c67
--- /dev/null
+++ b/mechanicalsoup/stateful_browser.py
@@ -0,0 +1,169 @@
+from __future__ import print_function
+
+from six.moves import urllib
+from .browser import Browser
+from .utils import LinkNotFoundError
+from .form import Form
+import sys
+import re
+
+
+class StatefulBrowser(Browser):
+ def __init__(self, session=None, soup_config=None, requests_adapters=None):
+ super(StatefulBrowser, self).__init__(
+ session, soup_config, requests_adapters)
+ self.__debug = False
+ self.__verbose = 0
+ self.__current_page = None
+ self.__current_url = None
+ self.__current_form = None
+
+ def set_debug(self, debug):
+ """Set the debug mode (off by default).
+
+ Set to True to enable debug mode. When active, some actions
+ will launch a browser on the current page on failure to let
+ you inspect the page content.
+ """
+ self.__debug = debug
+
+ def get_debug(self):
+ """Get the debug mode (off by default)."""
+ return self.__debug
+
+ def set_verbose(self, verbose):
+ """Set the verbosity level (an integer).
+
+ * 0 means no verbose output.
+
+ * 1 shows one dot per visited page (looks like a progress bar)
+
+ * >= 1 shows each visited URL."""
+ self.__verbose = verbose
+
+ def get_url(self):
+ """Get the URL of the currently visited page."""
+ return self.__current_url
+
+ def get_current_form(self):
+ """Get the currently selected form. See select_form()."""
+ return self.__current_form
+
+ def __setitem__(self, name, value):
+ """Call item assignment on the currently selected form."""
+ self.get_current_form()[name] = value
+
+ def new_control(self, type, name, value, **kwargs):
+ """Call new_control() on the currently selected form."""
+ return self.get_current_form().new_control(type, name, value, **kwargs)
+
+ def get_current_page(self):
+ """Get the current page as a soup object."""
+ return self.__current_page
+
+ def absolute_url(self, url):
+ """Make url absolute. url can be either relative or absolute."""
+ return urllib.parse.urljoin(self.__current_url, url)
+
+ def open(self, url, *args, **kwargs):
+ """Open the URL in this Browser object."""
+ if self.__verbose == 1:
+ sys.stdout.write('.')
+ sys.stdout.flush()
+ elif self.__verbose >= 2:
+ print(url)
+
+ resp = self.get(url, *args, **kwargs)
+ if hasattr(resp, 'soup'):
+ self.__current_page = resp.soup
+ self.__current_url = resp.url
+ self.__current_form = None
+ return resp
+
+ def open_relative(self, url, *args, **kwargs):
+ """Like open, but URL can be relative to the currently visited page."""
+ return self.open(self.absolute_url(url))
+
+ def select_form(self, *args, **kwargs):
+ """Select a form in the current page. Arguments are the same
+ as the select() method for a soup object."""
+ found_forms = self.__current_page.select(*args, **kwargs)
+ if len(found_forms) < 1:
+ if self.__debug:
+ print('select_form failed for', *args)
+ self.launch_browser()
+ raise LinkNotFoundError()
+
+ self.__current_form = Form(found_forms[0])
+ return self.__current_form
+
+ def submit_selected(self, btnName=None, *args, **kwargs):
+ """Submit the form selected with select_form()."""
+ if btnName is not None:
+ if 'data' not in kwargs:
+ kwargs['data'] = dict()
+ kwargs['data'][btnName] = ''
+
+ form = self.get_current_form()
+ if "action" in form.form:
+ url = self.__current_url
+ else:
+ url = self.absolute_url(form.form["action"])
+ resp = self.submit(self.__current_form,
+ url=url,
+ *args, **kwargs)
+ self.__current_url = resp.url
+ if hasattr(resp, "soup"):
+ self.__current_page = resp.soup
+ self.__current_form = None
+ return resp
+
+ def list_links(self, *args, **kwargs):
+ """Display the list of links in the current page."""
+ print("Links in the current page:")
+ for l in self.links(*args, **kwargs):
+ print(" ", l)
+
+ def links(self, url_regex=None, link_text=None, *args, **kwargs):
+ """Return links in the page, as a list of bs4.element.Tag object."""
+ all_links = self.get_current_page().find_all(
+ 'a', href=True, *args, **kwargs)
+ if url_regex is not None:
+ all_links = [a for a in all_links
+ if re.search(url_regex, a['href'])]
+ if link_text is not None:
+ all_links = [a for a in all_links
+ if a.text == link_text]
+ return all_links
+
+ def find_link(self, url_regex=None, *args, **kwargs):
+ """Find a link whose href property matches url_regex.
+
+ If several links match, return the first one found.
+
+ If url_regex is None, return the first link found on the page."""
+ links = self.links(url_regex, *args, **kwargs)
+ if len(links) == 0:
+ raise LinkNotFoundError()
+ else:
+ return links[0]
+
+ def follow_link(self, url_regex=None, *args, **kwargs):
+ """Find a link whose href property matches url_regex, and follow it.
+
+ If the link is not found, Raise LinkNotFoundError.
+ Before raising LinkNotFoundError, if debug is activated, list
... 400 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-mechanicalsoup.git
More information about the Python-modules-commits
mailing list