# Copyright(C) 2017 woob project
#
# This file is part of woob.
#
# woob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# woob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with woob. If not, see <http://www.gnu.org/licenses/>.
from functools import wraps
import lxml.html
from woob.exceptions import ParseError
from woob.tools.log import DEBUG_FILTERS, getLogger
from woob.tools.misc import NO_DEFAULT as _NO_DEFAULT
from woob.tools.misc import NoDefaultType
__all__ = ["FilterError", "ItemNotFound", "Filter"]
# Defined for compatibility.
NoDefault = NoDefaultType
[docs]class FilterError(ParseError):
pass
[docs]class ItemNotFound(FilterError):
pass
class _Filter:
_creation_counter = 0
def __init__(self, default=_NO_DEFAULT):
self._key = None
self._obj = None
self.default = default
self._creation_counter = _Filter._creation_counter
_Filter._creation_counter += 1
def __or__(self, o):
self.default = o
return self
def __and__(self, o):
if isinstance(o, type) and issubclass(o, _Filter):
o = o()
o.selector = self
return o
def __str__(self):
return self.__class__.__name__
def __call__(self, item):
raise NotImplementedError()
def default_or_raise(self, exception):
if self.default is not _NO_DEFAULT:
return self.default
else:
raise exception
def highlight_el(self, el, item=None):
obj = self._obj or item
try:
if not hasattr(obj, "saved_attrib"):
return
if not obj.page.browser.highlight_el:
return
except AttributeError:
return
if el not in obj.saved_attrib:
obj.saved_attrib[el] = dict(el.attrib)
el.attrib["style"] = "color: white !important; background: red !important;"
if self._key:
el.attrib["title"] = "woob field: %s" % self._key
def debug(*args):
"""
A decorator function to provide some debug information
in Filters.
It prints by default the name of the Filter and the input value.
"""
def decorator(function):
logger = getLogger("woob.browser.b2filters")
def print_debug(self, value):
result = ""
outputvalue = value
if isinstance(value, list):
from lxml import etree
outputvalue = ""
first = True
for element in value:
if first:
first = False
else:
outputvalue += ", "
if isinstance(element, etree.ElementBase):
outputvalue += "%s" % etree.tostring(element, encoding="unicode")
else:
outputvalue += "%r" % element
if self._obj is not None:
result += "%s" % self._obj._random_id
if self._key is not None:
result += ".%s" % self._key
name = str(self)
result += f" {name}({outputvalue!r}"
for arg in self.__dict__:
if arg.startswith("_") or arg == "selector":
continue
if arg == "default" and getattr(self, arg) == _NO_DEFAULT:
continue
result += f", {arg}={getattr(self, arg)!r}"
result += ")"
logger.log(DEBUG_FILTERS, result)
@wraps(function)
def wrapper(self, value):
if logger.isEnabledFor(DEBUG_FILTERS):
print_debug(self, value)
res = function(self, value)
return res
return wrapper
return decorator
[docs]class Filter(_Filter):
"""
Class used to filter on a HTML element given as call parameter to return
matching elements.
Filters can be chained, so the parameter supplied to constructor can be
either a xpath selector string, or an other filter called before.
>>> from lxml.html import etree
>>> f = CleanDecimal(CleanText('//p'), replace_dots=True)
>>> f(etree.fromstring('<html><body><p>blah: <span>229,90</span></p></body></html>'))
Decimal('229.90')
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
"""
:param default: default value in case the filter fails to find or parse
the requested value
"""
super().__init__(default=default)
self.selector = selector
[docs] def select(self, selector, item):
if isinstance(selector, str):
ret = item.xpath(selector)
elif isinstance(selector, _Filter):
selector._key = self._key
selector._obj = self._obj
ret = selector(item)
elif callable(selector):
ret = selector(item)
else:
ret = selector
if isinstance(ret, lxml.html.HtmlElement):
self.highlight_el(ret, item)
elif isinstance(ret, list):
for el in ret:
if isinstance(el, lxml.html.HtmlElement):
self.highlight_el(el, item)
return ret
def __call__(self, item):
return self.filter(self.select(self.selector, item))
[docs] @debug()
def filter(self, value):
"""
This method has to be overridden by children classes.
"""
raise NotImplementedError()
class _Selector(Filter):
def filter(self, elements):
if elements is not None:
return elements
else:
return self.default_or_raise(FilterError("Element %r not found" % self.selector))